# Explore here

## Modelo Regresión Lineal

In [1]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
import os

In [2]:
# 1. Definir rutas y nombres de archivos
BASE_PATH = "../data/processed"
TRAIN_FILES = [
    "X_train_con_outliers.xlsx",
    "X_train_sin_outliers.xlsx",
    "X_train_con_outliers_norm.xlsx",
    "X_train_sin_outliers_norm.xlsx",
    "X_train_con_outliers_scal.xlsx",
    "X_train_sin_outliers_scal.xlsx"
]
TEST_FILES = [
    "X_test_con_outliers.xlsx",
    "X_test_sin_outliers.xlsx",
    "X_test_con_outliers_norm.xlsx",
    "X_test_sin_outliers_norm.xlsx",
    "X_test_con_outliers_scal.xlsx",
    "X_test_sin_outliers_scal.xlsx"
]

In [5]:
# 2. Cargar datos
def load_data(base_path, filenames):
    """Carga archivos Excel y devuelve una lista de DataFrames."""
    dataframes = [pd.read_excel(os.path.join(base_path, file)) for file in filenames]
    for i, df in enumerate(dataframes):
        print(f"Cargado: {filenames[i]}, Filas: {df.shape[0]}, Columnas: {df.shape[1]}")
    return dataframes

X_train_list = load_data(BASE_PATH, TRAIN_FILES)
X_test_list = load_data(BASE_PATH, TEST_FILES)
y_train = pd.read_excel(os.path.join(BASE_PATH, "y_train.xlsx"))
y_test = pd.read_excel(os.path.join(BASE_PATH, "y_test.xlsx"))

print("\nDatos de y_train:", y_train.shape)
print("Datos de y_test:", y_test.shape)


Cargado: X_train_con_outliers.xlsx, Filas: 1069, Columnas: 6
Cargado: X_train_sin_outliers.xlsx, Filas: 1069, Columnas: 6
Cargado: X_train_con_outliers_norm.xlsx, Filas: 1069, Columnas: 6
Cargado: X_train_sin_outliers_norm.xlsx, Filas: 1069, Columnas: 6
Cargado: X_train_con_outliers_scal.xlsx, Filas: 1069, Columnas: 6
Cargado: X_train_sin_outliers_scal.xlsx, Filas: 1069, Columnas: 6
Cargado: X_test_con_outliers.xlsx, Filas: 268, Columnas: 6
Cargado: X_test_sin_outliers.xlsx, Filas: 268, Columnas: 6
Cargado: X_test_con_outliers_norm.xlsx, Filas: 268, Columnas: 6
Cargado: X_test_sin_outliers_norm.xlsx, Filas: 268, Columnas: 6
Cargado: X_test_con_outliers_scal.xlsx, Filas: 268, Columnas: 6
Cargado: X_test_sin_outliers_scal.xlsx, Filas: 268, Columnas: 6

Datos de y_train: (1069, 1)
Datos de y_test: (268, 1)


In [8]:
# 3. Entrenar y evaluar modelos
results = []
for i, (X_train, X_test) in enumerate(zip(X_train_list, X_test_list)):
    model = LinearRegression()
    model.fit(X_train, y_train)
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)
    
    train_r2 = r2_score(y_train, y_train_pred)
    test_r2 = r2_score(y_test, y_test_pred)
    
    results.append({
        "train_r2": train_r2,
        "test_r2": test_r2 })
    

In [9]:
# 4. Mostrar resumen de resultados
print("\nResumen de todos los modelos:")
for i, result in enumerate(results):
    print(f"Modelo {i+1}:")
    print(f"  Train: {result['train_r2']:.4f}")
    print(f"  Test: {result['test_r2']:.4f}")


Resumen de todos los modelos:
Modelo 1:
  Train: 0.7297
  Test: 0.8068
Modelo 2:
  Train: 0.1139
  Test: 0.1407
Modelo 3:
  Train: 0.7297
  Test: 0.8068
Modelo 4:
  Train: 0.1139
  Test: 0.1407
Modelo 5:
  Train: 0.7297
  Test: 0.8068
Modelo 6:
  Train: 0.1139
  Test: 0.1407
