In [1]:
from IPython.display import display
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

%matplotlib inline

In [2]:
df = pd.read_csv('../data/felicidad.csv')
display(df.info())
display(df.head(2))

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 155 entries, 0 to 154
Data columns (total 12 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   country     155 non-null    object 
 1   rank        155 non-null    int64  
 2   score       155 non-null    float64
 3   high        155 non-null    float64
 4   low         155 non-null    float64
 5   gdp         155 non-null    float64
 6   family      155 non-null    float64
 7   lifexp      155 non-null    float64
 8   freedom     155 non-null    float64
 9   generosity  155 non-null    float64
 10  corruption  155 non-null    float64
 11  dystopia    155 non-null    float64
dtypes: float64(10), int64(1), object(1)
memory usage: 14.7+ KB


None

Unnamed: 0,country,rank,score,high,low,gdp,family,lifexp,freedom,generosity,corruption,dystopia
0,Norway,1,7.537,7.594445,7.479556,1.616463,1.533524,0.796667,0.635423,0.362012,0.315964,2.277027
1,Denmark,2,7.522,7.581728,7.462272,1.482383,1.551122,0.792566,0.626007,0.35528,0.40077,2.313707


In [3]:
x = df[['gdp', 'family', 'lifexp', 'freedom', 'corruption', 'generosity', 'dystopia']]
y = df['score']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=1)

x.shape, y.shape, x_train.shape, x_test.shape


((155, 7), (155,), (108, 7), (47, 7))

In [4]:
model_linear = LinearRegression().fit(x_train, y_train)
y_hat_linear = model_linear.predict(x_test)

model_lasso = Lasso(alpha=0.02).fit(x_train, y_train)
y_hat_lasso = model_lasso.predict(x_test)

model_ridge = Ridge(alpha=1).fit(x_train, y_train)
y_hat_ridge = model_ridge.predict(x_test)


In [5]:
loss_linear = mean_squared_error(y_test, y_hat_linear)
loss_lasso = mean_squared_error(y_test, y_hat_lasso)
loss_ridge = mean_squared_error(y_test, y_hat_ridge)

score_linear = model_linear.score(x_test, y_test)
score_lasso = model_lasso.score(x_test, y_test)
score_ridge = model_ridge.score(x_test, y_test)


print(f'Linear: \nLOSS: {loss_linear} \nSCORE: {score_linear} \nCOEF: {model_linear.coef_}\n')
print(f'Lasso: \nLOSS: {loss_lasso} \nSCORE: {score_lasso} \nCOEF: {model_lasso.coef_}\n')
print(f'Ridge: \nLOSS: {loss_ridge} \nSCORE: {score_ridge} \nCOEF: {model_ridge.coef_}\n')

Linear: 
LOSS: 9.750765185081905e-08 
SCORE: 0.9999999182374784 
COEF: [1.00014313 0.9999875  0.99969244 0.99991552 0.9999593  1.00022517
 0.99998714]

Lasso: 
LOSS: 0.03181524450935537 
SCORE: 0.9733221489205492 
COEF: [1.3139091  0.89008824 0.4497227  0.82012434 0.         0.32839722
 0.89557344]

Ridge: 
LOSS: 0.004325157856236762 
SCORE: 0.9963732506550478 
COEF: [1.08062593 0.95595475 0.85235651 0.88957098 0.66087502 0.76605749
 0.95616033]

