In [11]:
from ucimlrepo import fetch_ucirepo
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from itertools import combinations

# Cargar el dataset de estudiantes
student_performance = fetch_ucirepo(id=320)

# Obtener características y objetivos
X = student_performance.data.features
y = student_performance.data.targets

# Convertir variables categóricas en variables dummy
X_dummies = pd.get_dummies(X, drop_first=True)

# Definir las variables objetivo
G1 = y['G1']
G2 = y['G2']
G3 = y['G3']

# Almacenar resultados
results = []

# Entrenar y evaluar modelos para todas las combinaciones de columnas
for g, target in zip(['G1', 'G2', 'G3'], [G1, G2, G3]):
    X_train, X_test, y_train, y_test = train_test_split(X_dummies, target, test_size=0.3, random_state=42)
    
    all_combinations = []
    # Generar todas las combinaciones de columnas desde 1 hasta el total de columnas
    for i in range(37, len(X_train.columns) + 1):
        all_combinations += list(combinations(X_train.columns, i))

    # Evaluar cada combinación
    for combination in all_combinations:
        combination_name = ', '.join(combination)
        
        # Lasso
        X_train_subset = X_train[list(combination)]
        X_test_subset = X_test[list(combination)]
        
        lasso_model = Lasso(alpha=0.1)
        lasso_model.fit(X_train_subset, y_train)
        y_pred_lasso = lasso_model.predict(X_test_subset)
        r2_lasso = r2_score(y_test, y_pred_lasso)
        
        # Random Forest
        rf_model = RandomForestRegressor(random_state=42)
        rf_model.fit(X_train_subset, y_train)
        y_pred_rf = rf_model.predict(X_test_subset)
        r2_rf = r2_score(y_test, y_pred_rf)

        # Almacenar resultados
        results.append({
            'Combinación de Características': combination_name,
            'Modelo': f'Lasso - {g}',
            'Puntaje R²': r2_lasso
        })
        results.append({
            'Combinación de Características': combination_name,
            'Modelo': f'Random Forest - {g}',
            'Puntaje R²': r2_rf
        })

# Convertir resultados a DataFrame
results_df = pd.DataFrame(results)

# Ordenar resultados por Puntaje R² de mayor a menor
results_df.sort_values('Puntaje R²', ascending=False, inplace=True)

# Mostrar las 15 mejores combinaciones
print(results_df.head(15))


KeyboardInterrupt: 

In [12]:
from ucimlrepo import fetch_ucirepo
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from itertools import combinations

# Cargar el dataset de estudiantes
student_performance = fetch_ucirepo(id=320)

# Obtener características y objetivos
X = student_performance.data.features
y = student_performance.data.targets

# Convertir variables categóricas en variables dummy
X_dummies = pd.get_dummies(X, drop_first=True)

# Definir una sola variable objetivo (en este caso G1)
G1 = y['G1']

# Almacenar resultados
results = []

# Entrenar y evaluar modelos para todas las combinaciones de columnas usando solo G1
X_train, X_test, y_train, y_test = train_test_split(X_dummies, G1, test_size=0.3, random_state=42)

all_combinations = []
# Generar todas las combinaciones de columnas desde 1 hasta el total de columnas
for i in range(36, 37):
    all_combinations += list(combinations(X_train.columns, i))

# Evaluar cada combinación
for combination in all_combinations:
    combination_name = ', '.join(combination)
    
    # Lasso
    X_train_subset = X_train[list(combination)]
    X_test_subset = X_test[list(combination)]
    
    lasso_model = Lasso(alpha=0.1)
    lasso_model.fit(X_train_subset, y_train)
    y_pred_lasso = lasso_model.predict(X_test_subset)
    r2_lasso = r2_score(y_test, y_pred_lasso)
    
    # Random Forest
    rf_model = RandomForestRegressor(random_state=42)
    rf_model.fit(X_train_subset, y_train)
    y_pred_rf = rf_model.predict(X_test_subset)
    r2_rf = r2_score(y_test, y_pred_rf)

    # Almacenar resultados
    results.append({
        'Combinación de Características': combination_name,
        'Modelo': 'Lasso - G1',
        'Puntaje R²': r2_lasso
    })
    results.append({
        'Combinación de Características': combination_name,
        'Modelo': 'Random Forest - G1',
        'Puntaje R²': r2_rf
    })

# Convertir resultados a DataFrame
results_df = pd.DataFrame(results)

# Ordenar resultados por Puntaje R² de mayor a menor
results_df.sort_values('Puntaje R²', ascending=False, inplace=True)

# Mostrar las 15 mejores combinaciones
print(results_df.head(15))


                          Combinación de Características      Modelo  \
17920  Medu, Fedu, traveltime, studytime, failures, f...  Lasso - G1   
17924  Medu, Fedu, traveltime, studytime, failures, f...  Lasso - G1   
17902  Medu, Fedu, traveltime, studytime, failures, f...  Lasso - G1   
17910  Medu, Fedu, traveltime, studytime, failures, f...  Lasso - G1   
17890  Medu, Fedu, traveltime, studytime, failures, f...  Lasso - G1   
17864  Medu, Fedu, traveltime, studytime, failures, f...  Lasso - G1   
17866  Medu, Fedu, traveltime, studytime, failures, f...  Lasso - G1   
17870  Medu, Fedu, traveltime, studytime, failures, f...  Lasso - G1   
17872  Medu, Fedu, traveltime, studytime, failures, f...  Lasso - G1   
17874  Medu, Fedu, traveltime, studytime, failures, f...  Lasso - G1   
17876  Medu, Fedu, traveltime, studytime, failures, f...  Lasso - G1   
17880  Medu, Fedu, traveltime, studytime, failures, f...  Lasso - G1   
17882  Medu, Fedu, traveltime, studytime, failures, f...  Lasso 

In [13]:
texto = results_df.head(1).to_string()
texto

'                                                                                                                                                                                                                                                                                                                                                                                                                       Combinación de Características      Modelo  Puntaje R²\n17920  Medu, Fedu, traveltime, studytime, failures, freetime, goout, Dalc, health, absences, school_MS, sex_M, address_U, famsize_LE3, Pstatus_T, Mjob_health, Mjob_other, Mjob_services, Mjob_teacher, Fjob_health, Fjob_other, Fjob_services, Fjob_teacher, reason_home, reason_other, reason_reputation, guardian_mother, guardian_other, schoolsup_yes, famsup_yes, paid_yes, activities_yes, nursery_yes, higher_yes, internet_yes, romantic_yes  Lasso - G1    0.244352'