In [12]:
from sklearn.linear_model import LinearRegression, SGDRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd

df = pd.read_csv("../data/ml/ml.csv").copy()

X = df.drop(columns=["total_cost", "user_id"])
y = df["total_cost"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

linear_model = LinearRegression()
linear_model.fit(X_train, y_train)
y_pred_linear = linear_model.predict(X_test)

sgd_model = SGDRegressor(max_iter=1000, tol=1e-3, random_state=42)
sgd_model.fit(X_train, y_train)
y_pred_sgd = sgd_model.predict(X_test)

sgd_stochastic_model = SGDRegressor(max_iter=1000, tol=1e-3, random_state=42, penalty='elasticnet')
sgd_stochastic_model.fit(X_train, y_train)
y_pred_sgd_stochastic = sgd_stochastic_model.predict(X_test)

def evaluate_model(y_true, y_pred):
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    return rmse, mae, r2

rmse_linear, mae_linear, r2_linear = evaluate_model(y_test, y_pred_linear)
rmse_sgd, mae_sgd, r2_sgd = evaluate_model(y_test, y_pred_sgd)
rmse_sgd_stochastic, mae_sgd_stochastic, r2_sgd_stochastic = evaluate_model(y_test, y_pred_sgd_stochastic)

models_performance = {
    'Regresión Lineal': {'RMSE': rmse_linear, 'MAE': mae_linear, 'R²': r2_linear},
    'Gradient Descent': {'RMSE': rmse_sgd, 'MAE': mae_sgd, 'R²': r2_sgd},
    'Stochastic Gradient Descent': {'RMSE': rmse_sgd_stochastic, 'MAE': mae_sgd_stochastic, 'R²': r2_sgd_stochastic},
}

best_model = min(models_performance, key=lambda x: models_performance[x]['RMSE'])

print("Evaluación de Modelos - Métricas de Error (RMSE, MAE, R²):\n")
print(f"Regresión Lineal:")
print(f"  RMSE: {rmse_linear:.4f}")
print(f"  MAE: {mae_linear:.4f}")
print(f"  R²: {r2_linear:.4f}\n")

print(f"Gradient Descent:")
print(f"  RMSE: {rmse_sgd:.4f}")
print(f"  MAE: {mae_sgd:.4f}")
print(f"  R²: {r2_sgd:.4f}\n")

print(f"Stochastic Gradient Descent:")
print(f"  RMSE: {rmse_sgd_stochastic:.4f}")
print(f"  MAE: {mae_sgd_stochastic:.4f}")
print(f"  R²: {r2_sgd_stochastic:.4f}\n")

print(f"\nEl mejor modelo es: {best_model}")
print("Métricas de este modelo:")
print(f"  RMSE: {models_performance[best_model]['RMSE']:.4f}")
print(f"  MAE: {models_performance[best_model]['MAE']:.4f}")
print(f"  R²: {models_performance[best_model]['R²']:.4f}")


Evaluación de Modelos - Métricas de Error (RMSE, MAE, R²):

Regresión Lineal:
  RMSE: 15.8641
  MAE: 11.7274
  R²: 0.8108

Gradient Descent:
  RMSE: 45382430172060229632.0000
  MAE: 30654650613038391296.0000
  R²: -1548028433496653695661258423431856128.0000

Stochastic Gradient Descent:
  RMSE: 45374724068306960384.0000
  MAE: 30649770913883811840.0000
  R²: -1547502756274894965059878095956213760.0000


El mejor modelo es: Regresión Lineal
Métricas de este modelo:
  RMSE: 15.8641
  MAE: 11.7274
  R²: 0.8108
