# 0.0 - Imports Libraries

In [9]:
import warnings

import pandas as pd
import numpy  as np

import matplotlib.pyplot as plt

from sklearn import metrics as mt
from sklearn.linear_model import LinearRegression

In [10]:
warnings.filterwarnings('ignore')

# 1.0 - Load Dataset

In [2]:
# Train Dataset 
df_X_train = pd.read_csv( '../dataset/regressao/X_training.csv' )
df_y_train = pd.read_csv( '../dataset/regressao/y_training.csv' )

# Validation Dataset 
df_X_val = pd.read_csv( '../dataset/regressao/X_validation.csv' )
df_y_val = pd.read_csv( '../dataset/regressao/y_val.csv' )

# Validation Dataset 
df_X_test = pd.read_csv( '../dataset/regressao/X_test.csv' )
df_y_test = pd.read_csv( '../dataset/regressao/y_test.csv' )

# 2.0 - Linear Regression

In [4]:
# definition
model_lr_train = LinearRegression()

# training
model_lr_train.fit( df_X_train, df_y_train )

# performance
y_pred_train = model_lr_train.predict( df_X_train )

# performance metrics
r2_train = mt.r2_score( df_y_train, y_pred_train )
mse_train = mt.mean_squared_error( df_y_train, y_pred_train )
rmse_train = mt.mean_squared_error( df_y_train, y_pred_train, squared=False )
mae_train = mt.mean_absolute_error( df_y_train, y_pred_train )
mape_train = mt.mean_absolute_percentage_error( df_y_train, y_pred_train )

print( f'Train R2: {r2_train}' )
print( f'Train MSE: {mse_train}' )
print( f'Train RMSE: {rmse_train}' )
print( f'Train MAE: {mae_train}' )
print( f'Train MAPE: {mape_train}' )

Train R2: 0.04605830473391903
Train MSE: 455.99611182562677
Train RMSE: 21.35406546364478
Train MAE: 16.998249066011095
Train MAPE: 8.653185943804512


## 2.2 - Performance for Validation Dataset

In [6]:
# definition
model_lr_val = LinearRegression()

# training
model_lr_val.fit( df_X_train, df_y_train )

# performance
y_pred_val = model_lr_val.predict( df_X_val )

# performance metrics
r2_val = mt.r2_score( df_y_val, y_pred_val )
mse_val = mt.mean_squared_error( df_y_val, y_pred_val )
rmse_val = mt.mean_squared_error( df_y_val, y_pred_val, squared=False )
mae_val = mt.mean_absolute_error( df_y_val, y_pred_val )
mape_val = mt.mean_absolute_percentage_error( df_y_val, y_pred_val )

print( f'Validation R2: {r2_val}' )
print( f'Validation MSE: {mse_val}' )
print( f'Validation RMSE: {rmse_val}' )
print( f'Validation MAE: {mae_val}' )
print( f'Validation MAPE: {mape_val}' )

Validation R2: 0.03992483038154071
Validation MSE: 458.4470418439312
Validation RMSE: 21.41137645841414
Validation MAE: 17.039753759960327
Validation MAPE: 8.682541883735295


## 2.3 - Performance for Test Dataset

In [11]:
# definition
model_lr_test = LinearRegression()

# training
model_lr_test.fit( np.concatenate(( df_X_train, df_X_val)), np.concatenate(( df_y_train, df_y_val )) )

# performance
y_pred_test = model_lr_test.predict( df_X_test )

# performance metrics
r2_test = mt.r2_score( df_y_test, y_pred_test )
mse_test = mt.mean_squared_error( df_y_test, y_pred_test )
rmse_test = mt.mean_squared_error( df_y_test, y_pred_test, squared=False )
mae_test = mt.mean_absolute_error( df_y_test, y_pred_test )
mape_test = mt.mean_absolute_percentage_error( df_y_test, y_pred_test )

print( f'Test R2: {r2_test}' )
print( f'Test MSE: {mse_test}' )
print( f'Test RMSE: {rmse_test}' )
print( f'Test MAE: {mae_test}' )
print( f'Test MAPE: {mape_test}' )

Test R2: 0.05116551777115075
Test MSE: 461.98843535255924
Test RMSE: 21.493916240475098
Test MAE: 17.1441970869877
Test MAPE: 8.531355027820084
