# 0.0 imports

In [45]:
import numpy as np
import pandas as pd
import sklearn

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

# 1.0 load dataset

In [46]:
X_train = pd.read_csv('../projetoaluno/dataregre/X_training.csv')
y_train = pd.read_csv('../projetoaluno/dataregre/y_training.csv')

X_val = pd.read_csv('../projetoaluno/dataregre/X_validation.csv')
y_val = pd.read_csv('../projetoaluno/dataregre/y_val.csv')

X_test = pd.read_csv('../projetoaluno/dataregre/X_test.csv')
y_test = pd.read_csv('../projetoaluno/dataregre/y_test.csv')

# 2.0 def metrics

In [47]:
def calculate_metrics(y_true, y_pred):
    r2 = r2_score(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_true, y_pred)
    mape = mean_absolute_percentage_error(y_true, y_pred)
    return r2, mse, rmse, mae, mape

# 3.0 model training and predict

## 3.1 train dataset

In [39]:
def run_regression_models(X_train, y_train):
    results = []

    # Linear Regression
    lin_reg = LinearRegression(n_jobs=-1)
    lin_reg.fit(X_train, y_train)
    y_pred = lin_reg.predict(X_train)
    results.append(['Linear Regression'] + list(calculate_metrics(y_train, y_pred)))

    # Decision Tree Regressor
    dt_regressor = DecisionTreeRegressor(max_depth=100)
    dt_regressor.fit(X_train, y_train)
    y_pred = dt_regressor.predict(X_train)
    results.append(['Decision Tree'] + list(calculate_metrics(y_train, y_pred)))

    # Random Forest Regressor
    rf_regressor = RandomForestRegressor(n_estimators=100, max_depth=5)
    rf_regressor.fit(X_train, y_train)
    y_pred = rf_regressor.predict(X_train)
    results.append(['Random Forest'] + list(calculate_metrics(y_train, y_pred)))

    # Polynomial Regression
    poly = PolynomialFeatures(degree=2)
    X_poly = poly.fit_transform(X_train)
    poly_reg = LinearRegression()
    poly_reg.fit(X_poly, y_train)
    y_pred = poly_reg.predict(X_poly)
    results.append(['Polynomial Regression'] + list(calculate_metrics(y_train, y_pred)))

    # Lasso Regression
    lasso_reg = Lasso(alpha=0.1, max_iter=1000)
    lasso_reg.fit(X_train, y_train)
    y_pred = lasso_reg.predict(X_train)
    results.append(['Lasso Regression'] + list(calculate_metrics(y_train, y_pred)))

    # Ridge Regression
    ridge_reg = Ridge(alpha=0.1, max_iter=1000)
    ridge_reg.fit(X_train, y_train)
    y_pred = ridge_reg.predict(X_train)
    results.append(['Ridge Regression'] + list(calculate_metrics(y_train, y_pred)))

    # Elastic Net Regression
    elastic_net_reg = ElasticNet(alpha=1.0, l1_ratio=0.5, max_iter=5000)
    elastic_net_reg.fit(X_train, y_train)
    y_pred = elastic_net_reg.predict(X_train)
    results.append(['Elastic Net'] + list(calculate_metrics(y_train, y_pred)))

    # Polynomial Regression Lasso
    poly_lasso = Lasso(alpha=0.1, max_iter=1000)
    poly_lasso.fit(X_poly, y_train)
    y_pred = poly_lasso.predict(X_poly)
    results.append(['Polynomial Regression Lasso'] + list(calculate_metrics(y_train, y_pred)))

    # Polynomial Regression Ridge
    poly_ridge = Ridge(alpha=0.1, max_iter=1000)
    poly_ridge.fit(X_poly, y_train)
    y_pred = poly_ridge.predict(X_poly)
    results.append(['Polynomial Regression Ridge'] + list(calculate_metrics(y_train, y_pred)))

    # Polynomial Regression Elastic Net
    poly_elastic_net = ElasticNet(alpha=0.1, l1_ratio=0.5, max_iter=1000)
    poly_elastic_net.fit(X_poly, y_train)
    y_pred = poly_elastic_net.predict(X_poly)
    results.append(['Polynomial Regression Elastic Net'] + list(calculate_metrics(y_train, y_pred)))

    # DataFrame creation
    columns = ['Model', 'R²', 'MSE', 'RMSE', 'MAE', 'MAPE']
    results_df = pd.DataFrame(results, columns=columns)

    return results_df


In [40]:
results_df = run_regression_models(X_train, y_train)
results_df

  return fit_method(estimator, *args, **kwargs)


Unnamed: 0,Model,R²,MSE,RMSE,MAE,MAPE
0,Linear Regression,0.046058,455.996112,21.354065,16.998249,8.653186
1,Decision Tree,0.991757,3.940403,1.985045,0.214099,0.082628
2,Random Forest,0.14099,410.617337,20.263695,16.111761,8.032265
3,Polynomial Regression,0.094195,432.98621,20.808321,16.458032,8.35054
4,Lasso Regression,0.041219,458.309397,21.408162,17.046776,8.667964
5,Ridge Regression,0.046058,455.996115,21.354066,16.998255,8.653209
6,Elastic Net,0.007832,474.268889,21.777715,17.299507,8.7323
7,Polynomial Regression Lasso,0.067909,445.55132,21.108087,16.743258,8.556924
8,Polynomial Regression Ridge,0.094092,433.035331,20.809501,16.459988,8.354608
9,Polynomial Regression Elastic Net,0.061135,448.789471,21.184652,16.802298,8.555597


## 3.2 test dataset

In [43]:
def run_regression_models_test(X_train, y_train):
    results = []
        
    # Linear Regression
    lin_reg = LinearRegression()
    lin_reg.fit(X_train, y_train)
    y_pred = lin_reg.predict(X_test)
    results.append(['Linear Regression'] + list(calculate_metrics(y_test, y_pred)))

    # Decision Tree Regressor
    dt_regressor = DecisionTreeRegressor(max_depth=100)
    dt_regressor.fit(X_train, y_train)
    y_pred = dt_regressor.predict(X_test)
    results.append(['Decision Tree'] + list(calculate_metrics(y_test, y_pred)))

    # Random Forest Regressor
    rf_regressor = RandomForestRegressor(n_estimators=100, max_depth=5)
    rf_regressor.fit(X_train, y_train)
    y_pred = rf_regressor.predict(X_test)
    results.append(['Random Forest'] + list(calculate_metrics(y_test, y_pred)))

    # Polynomial Regression
    poly = PolynomialFeatures(degree=2)
    X_train_poly = poly.fit_transform(X_train)
    X_test_poly = poly.fit_transform(X_test)
    poly_reg = LinearRegression()
    poly_reg.fit(X_train_poly, y_train)
    y_pred = poly_reg.predict(X_test_poly)
    results.append(['Polynomial Regression'] + list(calculate_metrics(y_test, y_pred)))

    # Lasso Regression
    lasso_reg = Lasso(alpha=0.1, max_iter=1000)
    lasso_reg.fit(X_train, y_train)
    y_pred = lasso_reg.predict(X_test)
    results.append(['Lasso Regression'] + list(calculate_metrics(y_test, y_pred)))

    # Ridge Regression
    ridge_reg = Ridge(alpha=0.1, max_iter=1000)
    ridge_reg.fit(X_train, y_train)
    y_pred = ridge_reg.predict(X_test)
    results.append(['Ridge Regression'] + list(calculate_metrics(y_test, y_pred)))

    # Elastic Net Regression
    elastic_net_reg = ElasticNet(alpha=1.0, l1_ratio=0.5, max_iter=5000)
    elastic_net_reg.fit(X_train, y_train)
    y_pred = elastic_net_reg.predict(X_test)
    results.append(['Elastic Net'] + list(calculate_metrics(y_test, y_pred)))

    # Polynomial Regression Lasso
    poly_lasso = Lasso(alpha=0.1, max_iter=1000)
    poly_lasso.fit(X_train_poly, y_train)
    y_pred = poly_lasso.predict(X_test_poly)
    results.append(['Polynomial Regression Lasso'] + list(calculate_metrics(y_test, y_pred)))

    # Polynomial Regression Ridge
    poly_ridge = Ridge(alpha=0.1, max_iter=1000)
    poly_ridge.fit(X_train_poly, y_train)
    y_pred = poly_ridge.predict(X_test_poly)
    results.append(['Polynomial Regression Ridge'] + list(calculate_metrics(y_test, y_pred)))

    # Polynomial Regression Elastic Net
    poly_elastic_net = ElasticNet(alpha=0.1, l1_ratio=0.5, max_iter=1000)
    poly_elastic_net.fit(X_train_poly, y_train)
    y_pred = poly_elastic_net.predict(X_test_poly)
    results.append(['Polynomial Regression Elastic Net'] + list(calculate_metrics(y_test, y_pred)))

    # DataFrame creation
    columns = ['Model', 'R²', 'MSE', 'RMSE', 'MAE', 'MAPE']
    results_df_test = pd.DataFrame(results, columns=columns)

    return results_df_test


In [44]:
results_df_test = run_regression_models_test(X_train, y_train)
results_df_test

  return fit_method(estimator, *args, **kwargs)


Unnamed: 0,Model,R²,MSE,RMSE,MAE,MAPE
0,Linear Regression,0.052317,461.427719,21.480869,17.129965,8.521859
1,Decision Tree,-0.257611,612.332029,24.745344,17.183673,6.308271
2,Random Forest,0.111445,432.638377,20.799961,16.608437,8.025412
3,Polynomial Regression,0.090079,443.041256,21.048545,16.720535,8.242464
4,Lasso Regression,0.044728,465.122726,21.566704,17.1756,8.592947
5,Ridge Regression,0.052317,461.428015,21.480876,17.129936,8.521956
6,Elastic Net,0.00794,483.03485,21.978054,17.470259,8.744585
7,Polynomial Regression Lasso,0.070407,452.61997,21.274867,16.91231,8.454671
8,Polynomial Regression Ridge,0.09018,442.992268,21.047381,16.720196,8.252362
9,Polynomial Regression Elastic Net,0.059057,458.145857,21.404342,16.991344,8.535398


## 3.2 validation dataset

In [54]:
def run_regression_models_val(X_train, y_train):
    results = []
        
    # Linear Regression
    lin_reg = LinearRegression()
    lin_reg.fit(X_train, y_train)
    y_pred = lin_reg.predict(X_val)
    results.append(['Linear Regression'] + list(calculate_metrics(y_val, y_pred)))

    # Decision Tree Regressor
    dt_regressor = DecisionTreeRegressor(max_depth=100)
    dt_regressor.fit(X_train, y_train)
    y_pred = dt_regressor.predict(X_val)
    results.append(['Decision Tree'] + list(calculate_metrics(y_val, y_pred)))

    # Random Forest Regressor
    rf_regressor = RandomForestRegressor(n_estimators=100, max_depth=5)
    rf_regressor.fit(X_train, y_train)
    y_pred = rf_regressor.predict(X_val)
    results.append(['Random Forest'] + list(calculate_metrics(y_val, y_pred)))

    # Polynomial Regression
    poly = PolynomialFeatures(degree=2)
    X_train_poly = poly.fit_transform(X_train)
    X_val_poly = poly.transform(X_val)
    poly_reg = LinearRegression()
    poly_reg.fit(X_train_poly, y_train)
    y_pred = poly_reg.predict(X_val_poly)
    results.append(['Polynomial Regression'] + list(calculate_metrics(y_val, y_pred)))

    # Lasso Regression
    lasso_reg = Lasso(alpha=0.1, max_iter=1000)
    lasso_reg.fit(X_train, y_train)
    y_pred = lasso_reg.predict(X_val)
    results.append(['Lasso Regression'] + list(calculate_metrics(y_val, y_pred)))

    # Ridge Regression
    ridge_reg = Ridge(alpha=0.1, max_iter=1000)
    ridge_reg.fit(X_train, y_train)
    y_pred = ridge_reg.predict(X_val)
    results.append(['Ridge Regression'] + list(calculate_metrics(y_val, y_pred)))

    # Elastic Net Regression
    elastic_net_reg = ElasticNet(alpha=1.0, l1_ratio=0.5, max_iter=5000)
    elastic_net_reg.fit(X_train, y_train)
    y_pred = elastic_net_reg.predict(X_val)
    results.append(['Elastic Net'] + list(calculate_metrics(y_val, y_pred)))

    # Polynomial Regression Lasso
    poly_lasso = Lasso(alpha=0.1, max_iter=1000)
    poly_lasso.fit(X_train_poly, y_train)
    y_pred = poly_lasso.predict(X_val_poly)
    results.append(['Polynomial Regression Lasso'] + list(calculate_metrics(y_val, y_pred)))

    # Polynomial Regression Ridge
    poly_ridge = Ridge(alpha=0.1, max_iter=1000)
    poly_ridge.fit(X_train_poly, y_train)
    y_pred = poly_ridge.predict(X_val_poly)
    results.append(['Polynomial Regression Ridge'] + list(calculate_metrics(y_val, y_pred)))

    # Polynomial Regression Elastic Net
    poly_elastic_net = ElasticNet(alpha=0.1, l1_ratio=0.5, max_iter=1000)
    poly_elastic_net.fit(X_train_poly, y_train)
    y_pred = poly_elastic_net.predict(X_val_poly)
    results.append(['Polynomial Regression Elastic Net'] + list(calculate_metrics(y_val, y_pred)))

    # DataFrame creation
    columns = ['Model', 'R²', 'MSE', 'RMSE', 'MAE', 'MAPE']
    results_df_val = pd.DataFrame(results, columns=columns)

    return results_df_val

In [55]:
results_df_val = run_regression_models_val(X_train, y_train)
results_df_val

  return fit_method(estimator, *args, **kwargs)


Unnamed: 0,Model,R²,MSE,RMSE,MAE,MAPE
0,Linear Regression,0.039925,458.447042,21.411376,17.039754,8.682542
1,Decision Tree,-0.277629,610.082866,24.699856,16.973513,7.01039
2,Random Forest,0.099787,429.862158,20.733117,16.485752,8.387781
3,Polynomial Regression,0.066477,445.768223,21.113224,16.749939,8.547931
4,Lasso Regression,0.037195,459.750411,21.441791,17.047448,8.686903
5,Ridge Regression,0.039925,458.446874,21.411373,17.039725,8.682529
6,Elastic Net,0.008117,473.635616,21.763171,17.262903,8.694035
7,Polynomial Regression Lasso,0.058898,449.38696,21.198749,16.818893,8.667648
8,Polynomial Regression Ridge,0.067212,445.417065,21.104906,16.741767,8.554125
9,Polynomial Regression Elastic Net,0.053482,451.973428,21.259667,16.851165,8.643853
