# Predicción del Rendimiento Estudiantil
Este proyecto utiliza un modelo de clasificación para predecir si un estudiante aprobará o no en base a características personales, familiares y académicas.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [None]:
# Cargar el dataset
df = pd.read_csv("mat2.csv")
df.head()

In [None]:
# Ver información general
print(df.info())

# Estadísticas
df.describe()

# Visualización de correlaciones
plt.figure(figsize=(12, 8))
sns.heatmap(df.corr(numeric_only=True), annot=True, cmap='coolwarm')
plt.title("Correlación entre variables numéricas")
plt.show()

In [None]:
# Eliminar columnas irrelevantes
df.drop(columns=["Unnamed: 0"], inplace=True)

# Convertir G3 a variable binaria: aprobado (G3 >= 10)
df["pass"] = (df["G3"] >= 10).astype(int)

# Codificar variables categóricas
cat_cols = df.select_dtypes(include='object').columns
le = LabelEncoder()
for col in cat_cols:
    df[col] = le.fit_transform(df[col])

# Separar features y target
X = df.drop(columns=["G3", "pass"])
y = df["pass"]

# Normalizar datos
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [None]:
# Entrenar modelo
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Predicciones
y_pred = model.predict(X_test)

# Resultados
print("Accuracy:", accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

In [None]:
# Importancia de variables
importances = model.feature_importances_
feat_names = df.drop(columns=["G3", "pass"]).columns

# Gráfico
plt.figure(figsize=(10, 6))
sns.barplot(x=importances, y=feat_names)
plt.title("Importancia de las características")
plt.show()