## Polynomial Regression Ridge

In [1]:
#import bibliotecas

import numpy as np
import pandas as pd

from sklearn import preprocessing as pp
from sklearn import linear_model as lm
from sklearn import metrics as mt

### Datasets

In [2]:
X_train = pd.read_csv('X_training.csv')
y_train = pd.read_csv('y_training.csv') 
X_test = pd.read_csv('X_test.csv')
y_test = pd.read_csv('y_test.csv')
X_val = pd.read_csv('X_validation.csv')
y_val = pd.read_csv('y_val.csv') 

In [3]:
X_val.head(2)

Unnamed: 0,song_duration_ms,acousticness,danceability,energy,instrumentalness,key,liveness,loudness,audio_mode,speechiness,tempo,time_signature,audio_valence
0,0.143252,0.0259,0.616,0.933,0.0,0.0,0.359,0.962307,0,0.0513,0.309853,0.8,0.806
1,0.363603,0.000188,0.49,0.972,0.0299,0.909091,0.368,0.765216,0,0.111,-0.908089,0.8,0.376


In [4]:
# preparação dos dados 

y_train = y_train.values.ravel()
y_val = y_val.values.ravel()

### Dados de Treino

In [5]:
degrees = np.arange(1,3)
alph = np.arange(1,4)
degree_list= []
alpha_list =[]
r2_list = []


for i in degrees:
    #features
    poly = pp.PolynomialFeatures (degree= i)
    X_poly = poly.fit_transform(X_train)
    degree_list.append(i)
    for a in alph:
        #definition
        ridge = lm.Ridge( alpha = a , max_iter=100)
        alpha_list.append(a)
        #fit
        ridge.fit (X_poly, y_train)
        
        #predict
        ypred = ridge.predict(X_poly)
        
        #r2
        r2 = round(mt.r2_score(y_train , ypred),4)
        r2_list.append(r2)

        print ( 'Degree: {}, ALPHA:{}, R2:{}' .format(i, a, r2))

Degree: 1, ALPHA:1, R2:0.0461
Degree: 1, ALPHA:2, R2:0.0461
Degree: 1, ALPHA:3, R2:0.0461
Degree: 2, ALPHA:1, R2:0.0932
Degree: 2, ALPHA:2, R2:0.0925
Degree: 2, ALPHA:3, R2:0.092


**Maior R2 encontrado:**

In [6]:
max(r2_list)

0.0932

**Com o máximo R2 , temos o Degree = 2 e Alpha = 1**

In [7]:
#features
poly = pp.PolynomialFeatures (degree= 2)
X_poly = poly.fit_transform(X_train)

#definition
ridge = lm.Ridge( alpha = 1 , max_iter=100)

#fit
ridge.fit (X_poly, y_train)
        
#predict
ypred = ridge.predict(X_poly)
        
#r2
r2 = round(mt.r2_score(y_train , ypred),3)
print( 'R2: {}'.format(r2))

#mse
mse = round(mt.mean_squared_error( y_train , ypred),2)
print ('MSE: {}'.format(mse))

#rmse 
rmse = round(np.sqrt(mse),2)
print ('RMSE: {}'.format(rmse))

#mae
mae = np.round(mt.mean_absolute_error( y_train , ypred), 2)
print('MAE: {}'.format( mae ))

#mape
mape = np.round(mt.mean_absolute_percentage_error( y_train , ypred), 2) 
print('MAPE: {}%'.format( mape ) )


R2: 0.093
MSE: 433.48
RMSE: 20.82
MAE: 16.47
MAPE: 8.37%


### Dados de Validação

In [8]:
#features
poly = pp.PolynomialFeatures (degree= 2)
X_polytrain = poly.fit_transform(X_train)
X_polyval = poly.fit_transform(X_val)


# definition
ridge = lm.Ridge( alpha = 1 , max_iter=100)

# fit
ridge.fit( X_polytrain, y_train)

# performance
ypred_val = ridge.predict( X_polyval )

#R2
r2 = round(mt.r2_score( y_val , ypred_val),3)
print( 'R2: {}'.format(r2))

#mse
mse = round(mt.mean_squared_error( y_val , ypred_val),2)
print ('MSE: {}'.format(mse))

#rmse 
rmse = np.sqrt(mse)
print ('RMSE: {}'.format(rmse))

#mae
mae = np.round(mt.mean_absolute_error( y_val , ypred_val), 2)
print('MAE: {}'.format( mae ))

#mape
mape = np.round(mt.mean_absolute_percentage_error( y_val , ypred_val), 2) 
print('MAPE: {}%'.format( mape ) )

R2: 0.068
MSE: 445.18
RMSE: 21.099289087549845
MAE: 16.74
MAPE: 8.57%


### Dados de Teste

In [9]:
#features
poly = pp.PolynomialFeatures (degree= 2)
X_polytrain = poly.fit_transform(X_train)
X_polyval = poly.fit_transform(X_val)
X_polytest = poly.fit_transform(X_test)

# definition
ridge = lm.Ridge( alpha = 1 , max_iter=100)

# fit
ridge.fit(np.concatenate(( X_polytrain ,X_polyval) ),
          np.concatenate((y_train , y_val)))

# performance
ypred_test = ridge.predict( X_polytest )

#R2
r2 = round(mt.r2_score( y_test , ypred_test),3)
print( 'R2: {}'.format(r2))

#mse
mse = round(mt.mean_squared_error( y_test , ypred_test),2)
print ('MSE: {}'.format(mse))

#rmse 
rmse = np.sqrt(mse)
print ('RMSE: {}'.format(rmse))

#mae
mae = np.round(mt.mean_absolute_error(  y_test , ypred_test), 2)
print('MAE: {}'.format( mae ))

#mape
mape = np.round(mt.mean_absolute_percentage_error( y_test, ypred_test), 2) 
print('MAPE: {}%'.format( mape ) )

R2: 0.09
MSE: 442.97
RMSE: 21.046852496276017
MAE: 16.74
MAPE: 8.31%
