In [39]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import Ridge, ElasticNet, BayesianRidge, Lasso, QuantileRegressor
from sklearn.metrics import root_mean_squared_error, r2_score, mean_absolute_error, mean_squared_log_error, median_absolute_error
import kagglehub
import tensorflow as tf
import random
from sklearn.model_selection import cross_val_score


In [40]:
RANDOM_SEED = 6
tf.random.set_seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
random.seed(RANDOM_SEED)

In [41]:
# Cargar datos

path_student = kagglehub.dataset_download("lainguyn123/student-performance-factors")
path_boston = kagglehub.dataset_download("willianleite/boston-housing-dataset")

student_data = pd.read_csv(path_student+'/StudentPerformanceFactors.csv')
boston_data = pd.read_csv(path_boston+'/Boston.csv')



In [42]:
boston_data

Unnamed: 0.1,Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,black,lstat,medv
0,1,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.0900,1,296,15.3,396.90,4.98,24.0
1,2,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.90,9.14,21.6
2,3,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,4,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,5,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.90,5.33,36.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,502,0.06263,0.0,11.93,0,0.573,6.593,69.1,2.4786,1,273,21.0,391.99,9.67,22.4
502,503,0.04527,0.0,11.93,0,0.573,6.120,76.7,2.2875,1,273,21.0,396.90,9.08,20.6
503,504,0.06076,0.0,11.93,0,0.573,6.976,91.0,2.1675,1,273,21.0,396.90,5.64,23.9
504,505,0.10959,0.0,11.93,0,0.573,6.794,89.3,2.3889,1,273,21.0,393.45,6.48,22.0


In [43]:
# Preprocesamiento para StudentPerformanceFactors
X_student = student_data.drop('Exam_Score', axis=1)
y_student = student_data['Exam_Score']

In [44]:
# Preprocesamiento para Boston
X_boston = boston_data.drop(['medv', 'Unnamed: 0'], axis=1)  # Eliminar la columna de índice y la columna objetivo
y_boston = boston_data['medv']  # Columna objetivo


In [45]:
# Función para preprocesar datos
def preprocess_data(X):
    numeric_features = X.select_dtypes(include=['int64', 'float64']).columns
    categorical_features = X.select_dtypes(include=['object']).columns

    preprocessor = ColumnTransformer(
        transformers=[
            ('num', StandardScaler(), numeric_features),
            ('cat', OneHotEncoder(), categorical_features)
        ])
    return preprocessor

In [46]:
# Dividir datos
def split_data(X, y):
    return train_test_split(X, y, test_size=0.2, random_state=42)

In [47]:
# Evaluar métricas
def evaluate_model(model, X_test, y_test, X_train, y_train):
    y_pred = model.predict(X_test)
    n = X_test.shape[0]  # Número de muestras
    p = X_test.shape[1]  # Número de características

    # Reemplazar valores negativos por 0
    y_test_sin_negativo = np.maximum(y_test, 0)
    y_pred_sin_negativo  = np.maximum(y_pred, 0)

    # Calcular métricas
    mae = mean_absolute_error(y_test, y_pred)
    mse = root_mean_squared_error(y_test, y_pred) ** 2  # MSE es el cuadrado del RMSE
    rmse = root_mean_squared_error(y_test, y_pred)  # RMSE
    r2 = r2_score(y_test, y_pred)
    r2_adjusted = 1 - (1 - r2) * (n - 1) / (n - p - 1)  # R² ajustado
    rmsle = mean_squared_log_error(y_test_sin_negativo,y_pred_sin_negativo) # RMSLE
    validacion_cruzada_errores = cross_val_score(estimator = model, X = X_train, y = y_train, cv = 10)
    vc = validacion_cruzada_errores.mean()
    medae = median_absolute_error(y_test, y_pred)  # Mediana del Error Absoluto
    evs = 1 - np.var(y_test - y_pred) / np.var(y_test)  # Explained Variance Score

    return mae, mse, rmse, r2, r2_adjusted, rmsle, medae, evs, vc


In [48]:
# Función para entrenar y evaluar modelos
def train_and_evaluate(X_train, X_test, y_train, y_test, preprocessor):
    results = {}

    # Ridge
    ridge = Pipeline(steps=[('preprocessor', preprocessor),
                            ('regressor', Ridge(random_state=RANDOM_SEED))])
    ridge.fit(X_train, y_train)
    results['Ridge'] = evaluate_model(ridge, X_test, y_test, X_train,y_train)  # Pasar X_train

    # ElasticNet
    elastic_net = Pipeline(steps=[('preprocessor', preprocessor),
                                  ('regressor', ElasticNet(random_state=RANDOM_SEED))])
    elastic_net.fit(X_train, y_train)
    results['ElasticNet'] = evaluate_model(elastic_net, X_test, y_test, X_train,y_train)  # Pasar X_train

    # Bayesian Ridge
    bayesian_ridge = Pipeline(steps=[('preprocessor', preprocessor),
                                     ('regressor', BayesianRidge())])
    bayesian_ridge.fit(X_train, y_train)
    results['BayesianRidge'] = evaluate_model(bayesian_ridge, X_test, y_test, X_train, y_train)  # Pasar X_train

    # Lasso
    lasso = Pipeline(steps=[('preprocessor', preprocessor),
                            ('regressor', Lasso(random_state=RANDOM_SEED))])
    lasso.fit(X_train, y_train)
    results['Lasso'] = evaluate_model(lasso, X_test, y_test, X_train,y_train)  # Pasar X_train

    # Quantile Regression
    quantile_reg = Pipeline(steps=[('preprocessor', preprocessor),
                                   ('regressor', QuantileRegressor())])
    quantile_reg.fit(X_train, y_train)
    results['QuantileRegression'] = evaluate_model(quantile_reg, X_test, y_test, X_train,y_train)  # Pasar X_train

    return results

In [49]:
# Procesar y evaluar StudentPerformanceFactors
preprocessor_student = preprocess_data(X_student)
X_train_student, X_test_student, y_train_student, y_test_student = split_data(X_student, y_student)
results_student = train_and_evaluate(X_train_student, X_test_student, y_train_student, y_test_student, preprocessor_student)


In [None]:
def create_results_table(results):
    rows = []
    for model, metrics in results.items():
        rows.append({
            "Modelo": model,
            "MAE": metrics[0],
            "MSE": metrics[1],
            "RMSE": metrics[2],
            "R²": metrics[3],
            "R² Ajustado": metrics[4],
            "RMSLE": metrics[5],
            "MedAE": metrics[6],
            "EVS": metrics[7]
        })
    return pd.DataFrame(rows)

# Crear tabla para StudentPerformanceFactors


Resultados para StudentPerformanceFactors:
               Modelo       MAE        MSE      RMSE        R²  R² Ajustado  \
0               Ridge  0.449911   3.251946  1.803315  0.769938     0.766580   
1          ElasticNet  1.863991   7.698025  2.774531  0.455395     0.447448   
2       BayesianRidge  0.449748   3.251188  1.803105  0.769991     0.766635   
3               Lasso  1.905677   7.946270  2.818913  0.437833     0.429629   
4  QuantileRegression  2.807867  14.236006  3.773063 -0.007141    -0.021838   

      RMSLE     MedAE           EVS  
0  0.000534  0.286952  7.701033e-01  
1  0.001496  1.519590  4.555385e-01  
2  0.000534  0.284985  7.701542e-01  
3  0.001550  1.591434  4.379466e-01  
4  0.002889  2.000000  1.776357e-15  


In [58]:
display(create_results_table(results_student))

Unnamed: 0,Modelo,MAE,MSE,RMSE,R²,R² Ajustado,RMSLE,MedAE,EVS
0,Ridge,0.449911,3.251946,1.803315,0.769938,0.76658,0.000534,0.286952,0.7701033
1,ElasticNet,1.863991,7.698025,2.774531,0.455395,0.447448,0.001496,1.51959,0.4555385
2,BayesianRidge,0.449748,3.251188,1.803105,0.769991,0.766635,0.000534,0.284985,0.7701542
3,Lasso,1.905677,7.94627,2.818913,0.437833,0.429629,0.00155,1.591434,0.4379466
4,QuantileRegression,2.807867,14.236006,3.773063,-0.007141,-0.021838,0.002889,2.0,1.776357e-15


In [51]:
# Preprocesar y evaluar Boston
preprocessor_boston = preprocess_data(X_boston)

X_train_boston, X_test_boston, y_train_boston, y_test_boston = split_data(X_boston, y_boston)
results_boston = train_and_evaluate(X_train_boston, X_test_boston, y_train_boston, y_test_boston, preprocessor_boston)


In [59]:
display(create_results_table(results_boston))

Unnamed: 0,Modelo,MAE,MSE,RMSE,R²,R² Ajustado,RMSLE,MedAE,EVS
0,Ridge,3.185724,24.312904,4.930812,0.668462,0.619485,0.164751,2.332025,0.6692343
1,ElasticNet,3.431002,28.358648,5.325284,0.613294,0.556166,0.067259,2.382668,0.6132936
2,BayesianRidge,3.176569,24.41729,4.941385,0.667039,0.617852,0.164923,2.274389,0.6678657
3,Lasso,3.47377,27.577692,5.251447,0.623943,0.568389,0.095538,2.528627,0.6244501
4,QuantileRegression,5.958824,73.346275,8.564244,-0.00017,-0.147923,0.143983,3.5,-2.220446e-16


In [52]:
# Mostrar resultados
print("Resultados para StudentPerformanceFactors:")
for model, metrics in results_student.items():
        print(f"{model} - MAE: {metrics[0]}, MSE: {metrics[1]}, RMSE: {metrics[2]}, R²: {metrics[3]}, R² ajustado: {metrics[4]}, RMSLE: {metrics[5]}, MedAE: {metrics[6]}, EVS: {metrics[7]}")


print("\nResultados para Boston:")
for model, metrics in results_boston.items():
    print(f"{model} - MAE: {metrics[0]}, MSE: {metrics[1]}, RMSE: {metrics[2]}, R²: {metrics[3]}, R² ajustado: {metrics[4]}, RMSLE: {metrics[5]}, MedAE: {metrics[6]}, EVS: {metrics[7]}")


Resultados para StudentPerformanceFactors:
Ridge - MAE: 0.4499114659209326, MSE: 3.251946128292267, RMSE: 1.8033153158259003, R²: 0.7699377817488297, R² ajustado: 0.7665804989940123, RMSLE: 0.0005337062763047074, MedAE: 0.28695246515937356, EVS: 0.7701032930012612
ElasticNet - MAE: 1.8639914899727328, MSE: 7.69802497894548, RMSE: 2.774531488187777, R²: 0.45539543616635647, R² ajustado: 0.4474480577386766, RMSLE: 0.0014964692987051243, MedAE: 1.5195899039433272, EVS: 0.4555384615284057
BayesianRidge - MAE: 0.44974786158245617, MSE: 3.2511881720487095, RMSE: 1.8031051472525692, R²: 0.7699914041299672, R² ajustado: 0.7666349038830159, RMSLE: 0.0005335526496618186, MedAE: 0.2849845670925859, EVS: 0.7701542450401425
Lasso - MAE: 1.9056770790141107, MSE: 7.946270181394098, RMSE: 2.8189129432095092, R²: 0.4378330782144024, R² ajustado: 0.42962941345716255, RMSLE: 0.0015501493726449139, MedAE: 1.5914340239843412, EVS: 0.43794656461983084
QuantileRegression - MAE: 2.8078668683812404, MSE: 14.23

In [53]:
# Ajuste de hiperparámetros
def train_with_hyperparameters(model_class, hyperparams, X_train, X_test, y_train, y_test, preprocessor):
    results = {}
    for param_name, values in hyperparams.items():
        for value in values:
            # Crear modelo con el hiperparámetro ajustado
            model = model_class(**{param_name: value})
            pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                                       ('regressor', model)])
            pipeline.fit(X_train, y_train)
            # Evaluar modelo
            metrics = evaluate_model(pipeline, X_test, y_test, X_train, y_train)
            results[f"{param_name}={value}"] = metrics
    return results

In [54]:
# Hiperparámetros a probar
ridge_hyperparams = {'alpha': [0.1, 10.0], 'solver': ['auto', 'saga']}
elasticnet_hyperparams = {'alpha': [0.1, 1.0], 'l1_ratio': [0.2, 0.8]}
bayesian_hyperparams = {'alpha_1': [1e-6, 1e-4], 'alpha_2': [1e-6, 1e-4]}
lasso_hyperparams = {'alpha': [0.1, 1.0], 'max_iter': [1000, 5000]}
quantile_hyperparams = {'quantile': [0.25, 0.75], 'alpha': [0.1, 1.0]}


In [55]:
# Entrenar y evaluar modelos para StudentPerformanceFactors
print("Resultados para StudentPerformanceFactors:")

print("\nRidge:")
ridge_results_student = train_with_hyperparameters(Ridge, ridge_hyperparams, X_train_student, X_test_student, y_train_student, y_test_student, preprocessor_student)
for config, metrics in ridge_results_student.items():
    print(f"{config} - MAE: {metrics[0]}, MSE: {metrics[1]}, RMSE: {metrics[2]}, R²: {metrics[3]}, R² ajustado: {metrics[4]}, RMSLE: {metrics[5]}, MedAE: {metrics[6]}, EVS: {metrics[7]}")

print("\nElasticNet:")
elasticnet_results_student = train_with_hyperparameters(ElasticNet, elasticnet_hyperparams, X_train_student, X_test_student, y_train_student, y_test_student, preprocessor_student)
for config, metrics in elasticnet_results_student.items():
        print(f"{config} - MAE: {metrics[0]}, MSE: {metrics[1]}, RMSE: {metrics[2]}, R²: {metrics[3]}, R² ajustado: {metrics[4]}, RMSLE: {metrics[5]}, MedAE: {metrics[6]}, EVS: {metrics[7]}")

print("\nBayesian Ridge:")
bayesian_results_student = train_with_hyperparameters(BayesianRidge, bayesian_hyperparams, X_train_student, X_test_student, y_train_student, y_test_student, preprocessor_student)
for config, metrics in bayesian_results_student.items():
        print(f"{config} - MAE: {metrics[0]}, MSE: {metrics[1]}, RMSE: {metrics[2]}, R²: {metrics[3]}, R² ajustado: {metrics[4]}, RMSLE: {metrics[5]}, MedAE: {metrics[6]}, EVS: {metrics[7]}")


print("\nLasso:")
lasso_results_student = train_with_hyperparameters(Lasso, lasso_hyperparams, X_train_student, X_test_student, y_train_student, y_test_student, preprocessor_student)
for config, metrics in lasso_results_student.items():
    print(f"{config} - MAE: {metrics[0]}, MSE: {metrics[1]}, RMSE: {metrics[2]}, R²: {metrics[3]}, R² ajustado: {metrics[4]}, RMSLE: {metrics[5]}, MedAE: {metrics[6]}, EVS: {metrics[7]}")

print("\nQuantile Regression:")
quantile_results_student = train_with_hyperparameters(QuantileRegressor, quantile_hyperparams, X_train_student, X_test_student, y_train_student, y_test_student, preprocessor_student)
for config, metrics in quantile_results_student.items():
        print(f"{config} - MAE: {metrics[0]}, MSE: {metrics[1]}, RMSE: {metrics[2]}, R²: {metrics[3]}, R² ajustado: {metrics[4]}, RMSLE: {metrics[5]}, MedAE: {metrics[6]}, EVS: {metrics[7]}")

Resultados para StudentPerformanceFactors:

Ridge:
alpha=0.1 - MAE: 0.44994553991882885, MSE: 3.2520675087655904, RMSE: 1.8033489703231569, R²: 0.7699291945644657, R² ajustado: 0.7665717864974342, RMSLE: 0.0005337311081390005, MedAE: 0.2875952863782487, EVS: 0.7700950813259124
alpha=10.0 - MAE: 0.4497375972213049, MSE: 3.2509296180317193, RMSE: 1.8030334489497746, R²: 0.7700096957954314, R² ajustado: 0.7666534624775461, RMSLE: 0.0005335010225225078, MedAE: 0.2856103007516211, EVS: 0.7701714491376719
solver=auto - MAE: 0.4499114659209326, MSE: 3.251946128292267, RMSE: 1.8033153158259003, R²: 0.7699377817488297, R² ajustado: 0.7665804989940123, RMSLE: 0.0005337062763047074, MedAE: 0.28695246515937356, EVS: 0.7701032930012612
solver=saga - MAE: 0.44992130057867674, MSE: 3.251953902828113, RMSE: 1.8033174714475855, R²: 0.7699372317314271, R² ajustado: 0.7665799409502421, RMSLE: 0.000533708400239778, MedAE: 0.2868692686775205, EVS: 0.770102761770906

ElasticNet:
alpha=0.1 - MAE: 0.728777514

In [56]:
# Entrenar y evaluar modelos para Housing
print("\nResultados para Housing:")

print("\nResultados para Housing:")

print("\nRidge:")
ridge_results_housing = train_with_hyperparameters(Ridge, ridge_hyperparams, X_train_boston, X_test_boston, y_train_boston, y_test_boston, preprocessor_boston)
for config, metrics in ridge_results_housing.items():
    print(f"{config} - MAE: {metrics[0]}, MSE: {metrics[1]}, RMSE: {metrics[2]}, R²: {metrics[3]}, R² ajustado: {metrics[4]}, RMSLE: {metrics[5]}")

print("\nElasticNet:")
elasticnet_results_housing = train_with_hyperparameters(ElasticNet, elasticnet_hyperparams, X_train_boston, X_test_boston, y_train_boston, y_test_boston, preprocessor_boston)
for config, metrics in elasticnet_results_housing.items():
    print(f"{config} - MAE: {metrics[0]}, MSE: {metrics[1]}, RMSE: {metrics[2]}, R²: {metrics[3]}, R² ajustado: {metrics[4]}, RMSLE: {metrics[5]}")

print("\nBayesian Ridge:")
bayesian_results_housing = train_with_hyperparameters(BayesianRidge, bayesian_hyperparams, X_train_boston, X_test_boston, y_train_boston, y_test_boston, preprocessor_boston)
for config, metrics in bayesian_results_housing.items():
    print(f"{config} - MAE: {metrics[0]}, MSE: {metrics[1]}, RMSE: {metrics[2]}, R²: {metrics[3]}, R² ajustado: {metrics[4]}, RMSLE: {metrics[5]}")

print("\nLasso:")
lasso_results_housing = train_with_hyperparameters(Lasso, lasso_hyperparams, X_train_boston, X_test_boston, y_train_boston, y_test_boston, preprocessor_boston)
for config, metrics in lasso_results_housing.items():
        print(f"{config} - MAE: {metrics[0]}, MSE: {metrics[1]}, RMSE: {metrics[2]}, R²: {metrics[3]}, R² ajustado: {metrics[4]}, RMSLE: {metrics[5]}")


print("\nQuantile Regression:")
quantile_results_housing = train_with_hyperparameters(QuantileRegressor, quantile_hyperparams, X_train_boston, X_test_boston, y_train_boston, y_test_boston, preprocessor_boston)
for config, metrics in quantile_results_housing.items():
        print(f"{config} - MAE: {metrics[0]}, MSE: {metrics[1]}, RMSE: {metrics[2]}, R²: {metrics[3]}, R² ajustado: {metrics[4]}, RMSLE: {metrics[5]}")


Resultados para Housing:

Resultados para Housing:

Ridge:
alpha=0.1 - MAE: 3.188723109256336, MSE: 24.293294309665953, RMSE: 4.928822811753934, R²: 0.6687298368808311, R² ajustado: 0.6197921991473174, RMSLE: 0.16471838301500022
alpha=10.0 - MAE: 3.1722813116860684, MSE: 24.49584561966741, RMSE: 4.9493277947280285, R²: 0.6659677905050341, R² ajustado: 0.6166221231932778, RMSLE: 0.16504232482870368
solver=auto - MAE: 3.185723807244597, MSE: 24.312903830491617, RMSE: 4.930811680696356, R²: 0.6684624359643558, R² ajustado: 0.6194852958227266, RMSLE: 0.16475095063474984
solver=saga - MAE: 3.1857942440773224, MSE: 24.314173781607646, RMSE: 4.930940456100402, R²: 0.6684451185553624, R² ajustado: 0.6194654201601318, RMSLE: 0.16475490900318668

ElasticNet:
alpha=0.1 - MAE: 3.1920909142714446, MSE: 25.203552956840408, RMSE: 5.02031402970376, R²: 0.6563172951034016, R² ajustado: 0.6055459864254951, RMSLE: 0.16670906910722955
alpha=1.0 - MAE: 3.4310018130449818, MSE: 28.358648122255975, RMSE: 5.