In [None]:

# Caso 3: Rendimiento Escolar

# --- Teoría ---
"""
Problema de regresión: predecir nota final G3.
Modelos: Regresión Lineal, Random Forest, XGBRegressor, AdaBoostRegressor.
Métricas: MAE, RMSE, R².
"""

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
from xgboost import XGBRegressor

data = pd.read_csv("student-mat.csv", sep=';')
data = pd.get_dummies(data, drop_first=True)

X = data.drop('G3',axis=1)
y = data['G3']
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=42)

scaler=StandardScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)

models={
    'Linear Regression':LinearRegression(),
    'Random Forest':RandomForestRegressor(random_state=42),
    'XGBRegressor':XGBRegressor(),
    'AdaBoost':AdaBoostRegressor(random_state=42)
}
results={}
for n,m in models.items():
    m.fit(X_train,y_train)
    yp=m.predict(X_test)
    results[n]={'MAE':mean_absolute_error(y_test,yp),
                'RMSE':np.sqrt(mean_squared_error(y_test,yp)),
                'R2':r2_score(y_test,yp)}
    print(f"\nModelo {n}: MAE={results[n]['MAE']:.2f}, RMSE={results[n]['RMSE']:.2f}, R²={results[n]['R2']:.2f}")

import pandas as pd
df=pd.DataFrame(results).T
print(df)
