In [1]:
import pandas as pd
import sklearn

import numpy as np

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.linear_model import ElasticNet



from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

import matplotlib.pyplot as plt

In [2]:
if __name__ == '__main__':
    dataset = pd.read_csv('./data/rawData/felicidad.csv')
    print(dataset.describe())

             rank       score        high         low         gdp      family  \
count  155.000000  155.000000  155.000000  155.000000  155.000000  155.000000   
mean    78.000000    5.354019    5.452326    5.255713    0.984718    1.188898   
std     44.888751    1.131230    1.118542    1.145030    0.420793    0.287263   
min      1.000000    2.693000    2.864884    2.521116    0.000000    0.000000   
25%     39.500000    4.505500    4.608172    4.374955    0.663371    1.042635   
50%     78.000000    5.279000    5.370032    5.193152    1.064578    1.253918   
75%    116.500000    6.101500    6.194600    6.006527    1.318027    1.414316   
max    155.000000    7.537000    7.622030    7.479556    1.870766    1.610574   

           lifexp     freedom  generosity  corruption    dystopia  
count  155.000000  155.000000  155.000000  155.000000  155.000000  
mean     0.551341    0.408786    0.246883    0.123120    1.850238  
std      0.237073    0.149997    0.134780    0.101661    0.500028 

In [3]:
x = dataset[['gdp', 'family', 'lifexp', 'freedom', 'corruption', 'generosity', 'dystopia']]
y = dataset[['score']]

print(x.shape)
print(y.shape)

(155, 7)
(155, 1)


In [4]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.25)

In [5]:
modelLinear = LinearRegression().fit(X_train, y_train)

y_predict_linear = modelLinear.predict(X_test)

modelLasso = Lasso(alpha=0.02).fit(X_train, y_train)

y_predict_lasso = modelLasso.predict(X_test)

modelRidge = Ridge(alpha=1).fit(X_train, y_train)

y_predict_ridge = modelRidge.predict(X_test)

linear_loss = mean_squared_error(y_test, y_predict_linear)

print('Linear Loss:', linear_loss)

lasso_loss = mean_squared_error(y_test, y_predict_lasso)

print('Lasso Loss:', lasso_loss)

ridge_loss = mean_squared_error(y_test, y_predict_ridge)

print('Ridge Loss:', ridge_loss)


Linear Loss: 1.0258312503429065e-07
Lasso Loss: 0.09102565288866861
Ridge Loss: 0.012351698471630793


In [8]:
print("Linear Coef")
print(f"{modelLinear.coef_}")
print("Coef Lasso")
print(f"{modelLasso.coef_}")
print('=='*32)
print('Coef Ridge')
print(f"{modelRidge.coef_}")

Linear Coef
[[1.00000637 0.99988946 1.00006468 1.00007267 1.00000875 1.00007761
  0.99997318]]
Coef Lasso
[1.32631108 0.81308157 0.49107475 0.72792102 0.         0.04633982
 0.88149314]
Coef Ridge
[[1.09157461 0.93206564 0.86273242 0.88957397 0.54830302 0.72533212
  0.95300529]]


In [7]:

modelElasticNet = ElasticNet(random_state=0).fit(X_train, y_train)

y_pred_elastic = modelElasticNet.predict(X_test)

elastic_loss = mean_squared_error(y_test, y_pred_elastic)

print('ElasticNet Loss: ', elastic_loss)

print("Coef Elastic: ", modelElasticNet.coef_)

ElasticNet Loss:  1.4307915933591668
Coef Elastic:  [0. 0. 0. 0. 0. 0. 0.]
