# Polinomial Regression

## Imports

In [1]:
import pandas as pd
from matplotlib import pyplot as plt
from sklearn import metrics as mt
from sklearn import linear_model as lm
import numpy as np
from matplotlib import pyplot as plt
from sklearn import preprocessing as pp

## Load datasets

In [2]:
x_train = pd.read_csv('./X_training.csv')
x_test = pd.read_csv('./X_test.csv')
x_val = pd.read_csv('./X_validation.csv')
y_train = pd.read_csv('./y_training.csv')
y_test = pd.read_csv('./y_test.csv')
y_val = pd.read_csv('./y_val.csv')


## Model training

### training dataset

In [3]:
# polinomial model
poly = pp.PolynomialFeatures(degree= 2)

# poli fi
x_train_poly = poly.fit_transform(x_train)

In [24]:
# models

# Linear regression
lr = lm.LinearRegression()

# lasso l1 - tende a zerar os parametros
lasso = lm.Lasso(alpha= 1)

# ridge l2 - tende a suavizar os parametros
ridge = lm.Ridge(alpha= 10)

# elasticnet - L1 e L2
elastic = lm.ElasticNet(alpha=10)

# RANSAC
ransac = lm.RANSACRegressor()

# fit 
lr.fit(x_train_poly, y_train)
lasso.fit(x_train_poly,y_train)
ridge.fit(x_train_poly,y_train)
elastic.fit(x_train_poly,y_train)
ransac.fit(x_train_poly,y_train)

# predict
yhat_linear_model = lr.predict(x_train_poly)
yhat_lasso = lasso.predict(x_train_poly)
yhat_ridge = ridge.predict(x_train_poly)
yhat_elastic = elastic.predict(x_train_poly)
yhat_ransac = ransac.predict(x_train_poly)

In [5]:
# metrics - linear regression

r2_lm = mt.r2_score(y_train, yhat_linear_model)
mse_lm = mt.mean_squared_error(y_train, yhat_linear_model)
rmse_lm = np.sqrt(mse_lm)
mae_lm = mt.mean_absolute_error(y_train, yhat_linear_model)
mape_lm = mt.mean_absolute_percentage_error(y_train, yhat_linear_model)
print(f'r2: {r2_lm:.3f}')
print(f'mse: {mse_lm:.3f}')
print(f'rmse: {rmse_lm:.3f}')
print(f'mae: {mae_lm:.3f}')
print(f'mape: {mape_lm:.3f}')

r2: 0.094
mse: 432.986
rmse: 20.808
mae: 16.458
mape: 8.351


In [25]:
# metrics - lasso

r2_lasso = mt.r2_score(y_train, yhat_lasso)
mse_lasso = mt.mean_squared_error(y_train, yhat_lasso)
rmse_lasso = np.sqrt(mse_lasso)
mae_lasso = mt.mean_absolute_error(y_train, yhat_lasso)
mape_lasso = mt.mean_absolute_percentage_error(y_train, yhat_lasso)
print(f'r2: {r2_lasso:.3f}')
print(f'mse: {mse_lasso:.3f}')
print(f'rmse: {rmse_lasso:.3f}')
print(f'mae: {mae_lasso:.3f}')
print(f'mape: {mape_lasso:.3f}')

r2: 0.009
mse: 473.639
rmse: 21.763
mae: 17.285
mape: 8.700


In [7]:
# metrics - ridge

r2_ridge = mt.r2_score(y_train, yhat_ridge)
mse_ridge = mt.mean_squared_error(y_train, yhat_ridge)
rmse_ridge = np.sqrt(mse_ridge)
mae_ridge = mt.mean_absolute_error(y_train, yhat_ridge)
mape_ridge = mt.mean_absolute_percentage_error(y_train, yhat_ridge)
print(f'r2: {r2_ridge:.3f}')
print(f'mse: {mse_ridge:.3f}')
print(f'rmse: {rmse_ridge:.3f}')
print(f'mae: {mae_ridge:.3f}')
print(f'mape: {mape_ridge:.3f}')

r2: 0.089
mse: 435.302
rmse: 20.864
mae: 16.517
mape: 8.416


In [8]:
# metrics - elastic net

r2_en = mt.r2_score(y_train, yhat_elastic)
mse_en = mt.mean_squared_error(y_train, yhat_elastic)
rmse_en = np.sqrt(mse_en)
mae_en = mt.mean_absolute_error(y_train, yhat_elastic)
mape_en = mt.mean_absolute_percentage_error(y_train, yhat_elastic)
print(f'r2: {r2_en:.3f}')
print(f'mse: {mse_en:.3f}')
print(f'rmse: {rmse_en:.3f}')
print(f'mae: {mae_en:.3f}')
print(f'mape: {mape_en:.3f}')

r2: 0.000
mse: 478.013
rmse: 21.863
mae: 17.365
mape: 8.742


In [9]:
# metrics - ransac

r2_ransac = mt.r2_score(y_train, yhat_ransac)
mse_ransac = mt.mean_squared_error(y_train, yhat_ransac)
rmse_ransac = np.sqrt(mse_ransac)
mae_ransac = mt.mean_absolute_error(y_train, yhat_ransac)
mape_ransac = mt.mean_absolute_percentage_error(y_train, yhat_ransac)
print(f'r2: {r2_ransac:.3f}')
print(f'mse: {mse_ransac:.3f}')
print(f'rmse: {rmse_ransac:.3f}')
print(f'mae: {mae_ransac:.3f}')
print(f'mape: {mape_ransac:.3f}')

r2: -39163130671637.703
mse: 18720468349174144.000
rmse: 136822762.540
mae: 23375889.307
mape: 7749900.787


### Validation dataset

In [10]:
# poli fit
x_val_poly = poly.fit_transform(x_val)

In [11]:
# models
# Linear regression
lr = lm.LinearRegression()

# lasso l1 - tende a zerar os parametros
lasso = lm.Lasso(alpha= 1)

# ridge l2 - tende a suavizar os parametros
ridge = lm.Ridge(alpha= 1)

# elasticnet - L1 e L2
elastic = lm.ElasticNet(alpha=10)

# RANSAC
ransac = lm.RANSACRegressor()

# fit 
lr.fit(x_train_poly, y_train)
lasso.fit(x_train_poly,y_train)
ridge.fit(x_train_poly,y_train)
elastic.fit(x_train_poly,y_train)
ransac.fit(x_train_poly,y_train)

# predict
yhat_linear_model = lr.predict(x_val_poly)
yhat_lasso = lasso.predict(x_val_poly)
yhat_ridge = ridge.predict(x_val_poly)
yhat_elastic = elastic.predict(x_val_poly)
yhat_ransac = ransac.predict(x_val_poly)


In [12]:
# metrics - linear regression

r2_lm = mt.r2_score(y_val, yhat_linear_model)
mse_lm = mt.mean_squared_error(y_val, yhat_linear_model)
rmse_lm = np.sqrt(mse_lm)
mae_lm = mt.mean_absolute_error(y_val, yhat_linear_model)
mape_lm = mt.mean_absolute_percentage_error(y_val, yhat_linear_model)
print(f'r2: {r2_lm:.3f}')
print(f'mse: {mse_lm:.3f}')
print(f'rmse: {rmse_lm:.3f}')
print(f'mae: {mae_lm:.3f}')
print(f'mape: {mape_lm:.3f}')

r2: 0.066
mse: 445.768
rmse: 21.113
mae: 16.750
mape: 8.548


In [13]:
# metrics - lasso

r2_lasso = mt.r2_score(y_val, yhat_lasso)
mse_lasso = mt.mean_squared_error(y_val, yhat_lasso)
rmse_lasso = np.sqrt(mse_lasso)
mae_lasso = mt.mean_absolute_error(y_val, yhat_lasso)
mape_lasso = mt.mean_absolute_percentage_error(y_val, yhat_lasso)
print(f'r2: {r2_lasso:.3f}')
print(f'mse: {mse_lasso:.3f}')
print(f'rmse: {rmse_lasso:.3f}')
print(f'mae: {mae_lasso:.3f}')
print(f'mape: {mape_lasso:.3f}')

r2: 0.010
mse: 472.913
rmse: 21.747
mae: 17.238
mape: 8.682


In [14]:
# metrics - ridge

r2_ridge = mt.r2_score(y_val, yhat_ridge)
mse_ridge = mt.mean_squared_error(y_val, yhat_ridge)
rmse_ridge = np.sqrt(mse_ridge)
mae_ridge = mt.mean_absolute_error(y_val, yhat_ridge)
mape_ridge = mt.mean_absolute_percentage_error(y_val, yhat_ridge)
print(f'r2: {r2_ridge:.3f}')
print(f'mse: {mse_ridge:.3f}')
print(f'rmse: {rmse_ridge:.3f}')
print(f'mae: {mae_ridge:.3f}')
print(f'mape: {mape_ridge:.3f}')

r2: 0.068
mse: 445.184
rmse: 21.099
mae: 16.739
mape: 8.569


In [15]:
# metrics - elastic net

r2_en = mt.r2_score(y_val, yhat_elastic)
mse_en = mt.mean_squared_error(y_val, yhat_elastic)
rmse_en = np.sqrt(mse_en)
mae_en = mt.mean_absolute_error(y_val, yhat_elastic)
mape_en = mt.mean_absolute_percentage_error(y_val, yhat_elastic)
print(f'r2: {r2_en:.3f}')
print(f'mse: {mse_en:.3f}')
print(f'rmse: {rmse_en:.3f}')
print(f'mae: {mae_en:.3f}')
print(f'mape: {mape_en:.3f}')

r2: -0.000
mse: 477.512
rmse: 21.852
mae: 17.353
mape: 8.679


In [16]:
# metrics - ransac

r2_ransac = mt.r2_score(y_val, yhat_ransac)
mse_ransac = mt.mean_squared_error(y_val, yhat_ransac)
rmse_ransac = np.sqrt(mse_ransac)
mae_ransac = mt.mean_absolute_error(y_val, yhat_ransac)
mape_ransac = mt.mean_absolute_percentage_error(y_val, yhat_ransac)
print(f'r2: {r2_ransac:.3f}')
print(f'mse: {mse_ransac:.3f}')
print(f'rmse: {rmse_ransac:.3f}')
print(f'mae: {mae_ransac:.3f}')
print(f'mape: {mape_ransac:.3f}')

r2: -42559758960029.188
mse: 20322779105484576.000
rmse: 142557985.064
mae: 23205679.760
mape: 1919985.059


### Test dataset

In [17]:
# poli fit
x_test_poly = poly.fit_transform(x_test)

In [18]:
# model
# Linear regression
lr = lm.LinearRegression()

# lasso l1 - tende a zerar os parametros
lasso = lm.Lasso(alpha= 1)

# ridge l2 - tende a suavizar os parametros
ridge = lm.Ridge(alpha= 1)

# elasticnet - L1 e L2
elastic = lm.ElasticNet(alpha=1)

# RANSAC
ransac = lm.RANSACRegressor()

# fit 
lr.fit(np.concatenate((x_train_poly,x_val_poly)),
                np.concatenate((y_train,y_val)))
lasso.fit(np.concatenate((x_train_poly,x_val_poly)),
                np.concatenate((y_train,y_val)))
ridge.fit(np.concatenate((x_train_poly,x_val_poly)),
                np.concatenate((y_train,y_val)))
elastic.fit(np.concatenate((x_train_poly,x_val_poly)),
                np.concatenate((y_train,y_val)))
ransac.fit(np.concatenate((x_train_poly,x_val_poly)),
                np.concatenate((y_train,y_val)))

# predict
yhat_linear_model = lr.predict(x_test_poly)
yhat_lasso = lasso.predict(x_test_poly)
yhat_ridge = ridge.predict(x_test_poly)
yhat_elastic = elastic.predict(x_test_poly)
yhat_ransac = ransac.predict(x_test_poly)

In [19]:
# metrics - linear regression

r2_lm = mt.r2_score(y_test, yhat_linear_model)
mse_lm = mt.mean_squared_error(y_test, yhat_linear_model)
rmse_lm = np.sqrt(mse_lm)
mae_lm = mt.mean_absolute_error(y_test, yhat_linear_model)
mape_lm = mt.mean_absolute_percentage_error(y_test, yhat_linear_model)
print(f'r2: {r2_lm:.3f}')
print(f'mse: {mse_lm:.3f}')
print(f'rmse: {rmse_lm:.3f}')
print(f'mae: {mae_lm:.3f}')
print(f'mape: {mape_lm:.3f}')

r2: 0.091
mse: 442.641
rmse: 21.039
mae: 16.736
mape: 8.277


In [20]:
# metrics - lasso

r2_lasso = mt.r2_score(y_test, yhat_lasso)
mse_lasso = mt.mean_squared_error(y_test, yhat_lasso)
rmse_lasso = np.sqrt(mse_lasso)
mae_lasso = mt.mean_absolute_error(y_test, yhat_lasso)
mape_lasso = mt.mean_absolute_percentage_error(y_test, yhat_lasso)
print(f'r2: {r2_lasso:.3f}')
print(f'mse: {mse_lasso:.3f}')
print(f'rmse: {rmse_lasso:.3f}')
print(f'mae: {mae_lasso:.3f}')
print(f'mape: {mape_lasso:.3f}')

r2: 0.009
mse: 482.615
rmse: 21.968
mae: 17.456
mape: 8.756


In [21]:
# metrics - ridge

r2_ridge = mt.r2_score(y_test, yhat_ridge)
mse_ridge = mt.mean_squared_error(y_test, yhat_ridge)
rmse_ridge = np.sqrt(mse_ridge)
mae_ridge = mt.mean_absolute_error(y_test, yhat_ridge)
mape_ridge = mt.mean_absolute_percentage_error(y_test, yhat_ridge)
print(f'r2: {r2_ridge:.3f}')
print(f'mse: {mse_ridge:.3f}')
print(f'rmse: {rmse_ridge:.3f}')
print(f'mae: {mae_ridge:.3f}')
print(f'mape: {mape_ridge:.3f}')

r2: 0.090
mse: 442.967
rmse: 21.047
mae: 16.742
mape: 8.309


In [22]:
# metrics - elastic net

r2_en = mt.r2_score(y_test, yhat_elastic)
mse_en = mt.mean_squared_error(y_test, yhat_elastic)
rmse_en = np.sqrt(mse_en)
mae_en = mt.mean_absolute_error(y_test, yhat_elastic)
mape_en = mt.mean_absolute_percentage_error(y_test, yhat_elastic)
print(f'r2: {r2_en:.3f}')
print(f'mse: {mse_en:.3f}')
print(f'rmse: {rmse_en:.3f}')
print(f'mae: {mae_en:.3f}')
print(f'mape: {mape_en:.3f}')

r2: 0.011
mse: 481.537
rmse: 21.944
mae: 17.427
mape: 8.754


In [23]:
# metrics - ransac

r2_ransac = mt.r2_score(y_test, yhat_ransac)
mse_ransac = mt.mean_squared_error(y_test, yhat_ransac)
rmse_ransac = np.sqrt(mse_ransac)
mae_ransac = mt.mean_absolute_error(y_test, yhat_ransac)
mape_ransac = mt.mean_absolute_percentage_error(y_test, yhat_ransac)
print(f'r2: {r2_ransac:.3f}')
print(f'mse: {mse_ransac:.3f}')
print(f'rmse: {rmse_ransac:.3f}')
print(f'mae: {mae_ransac:.3f}')
print(f'mape: {mape_ransac:.3f}')

r2: -2285001956457.447
mse: 1112569682503553.875
rmse: 33355204.729
mae: 5940580.331
mape: 1207725.270
