In [1]:
# Import the libraries
import pandas as pd 
import sklearn 

# Import the models
from sklearn.linear_model import LinearRegression 
from sklearn.linear_model import Lasso 
from sklearn.linear_model import Ridge

# Import the training metrics and mean square error 
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error 

In [2]:
# Import and show the data set 

df_17_report = pd.read_csv('../input/world-happiness/2017.csv')
df_17_report.head()

Unnamed: 0,Country,Happiness.Rank,Happiness.Score,Whisker.high,Whisker.low,Economy..GDP.per.Capita.,Family,Health..Life.Expectancy.,Freedom,Generosity,Trust..Government.Corruption.,Dystopia.Residual
0,Norway,1,7.537,7.594445,7.479556,1.616463,1.533524,0.796667,0.635423,0.362012,0.315964,2.277027
1,Denmark,2,7.522,7.581728,7.462272,1.482383,1.551122,0.792566,0.626007,0.35528,0.40077,2.313707
2,Iceland,3,7.504,7.62203,7.38597,1.480633,1.610574,0.833552,0.627163,0.47554,0.153527,2.322715
3,Switzerland,4,7.494,7.561772,7.426227,1.56498,1.516912,0.858131,0.620071,0.290549,0.367007,2.276716
4,Finland,5,7.469,7.527542,7.410458,1.443572,1.540247,0.809158,0.617951,0.245483,0.382612,2.430182


In [3]:
df_17_report.describe()

Unnamed: 0,Happiness.Rank,Happiness.Score,Whisker.high,Whisker.low,Economy..GDP.per.Capita.,Family,Health..Life.Expectancy.,Freedom,Generosity,Trust..Government.Corruption.,Dystopia.Residual
count,155.0,155.0,155.0,155.0,155.0,155.0,155.0,155.0,155.0,155.0,155.0
mean,78.0,5.354019,5.452326,5.255713,0.984718,1.188898,0.551341,0.408786,0.246883,0.12312,1.850238
std,44.888751,1.13123,1.118542,1.14503,0.420793,0.287263,0.237073,0.149997,0.13478,0.101661,0.500028
min,1.0,2.693,2.864884,2.521116,0.0,0.0,0.0,0.0,0.0,0.0,0.377914
25%,39.5,4.5055,4.608172,4.374955,0.663371,1.042635,0.369866,0.303677,0.154106,0.057271,1.591291
50%,78.0,5.279,5.370032,5.193152,1.064578,1.253918,0.606042,0.437454,0.231538,0.089848,1.83291
75%,116.5,6.1015,6.1946,6.006527,1.318027,1.414316,0.723008,0.516561,0.323762,0.153296,2.144654
max,155.0,7.537,7.62203,7.479556,1.870766,1.610574,0.949492,0.658249,0.838075,0.464308,3.117485


In [4]:
df_17_report.columns

Index(['Country', 'Happiness.Rank', 'Happiness.Score', 'Whisker.high',
       'Whisker.low', 'Economy..GDP.per.Capita.', 'Family',
       'Health..Life.Expectancy.', 'Freedom', 'Generosity',
       'Trust..Government.Corruption.', 'Dystopia.Residual'],
      dtype='object')

In [5]:
# We are going to chosse the columns that we are going to use 

X = df_17_report[['Economy..GDP.per.Capita.','Family','Freedom','Trust..Government.Corruption.','Generosity','Dystopia.Residual']]

In [6]:
# Define the target
y = df_17_report[['Happiness.Score']]

In [7]:
# Show the shape 

X.shape

(155, 6)

In [8]:
y.shape

(155, 1)

In [9]:
# We'll separte train and test dataset 

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.25) 

In [10]:
# Define the regressors and fit the model 

modelLinear = LinearRegression().fit(X_train, y_train )

# Config penalty with alpha

modelLasso = Lasso(alpha=0.2).fit(X_train, y_train)

modelRidge = Ridge(alpha=1).fit(X_train, y_train)

In [11]:
# Calculate the prediction 

y_predict_linear = modelLinear.predict(X_test)

y_predict_lasso = modelLasso.predict(X_test)

y_predict_ridge = modelRidge.predict(X_test)

In [12]:
# Calculate the loss

linear_loss = mean_squared_error(y_test, y_predict_lasso)
print('Linear Loss: ', linear_loss)

lasso_loss = mean_squared_error(y_test, y_predict_lasso)
print('Lasso Loss', lasso_loss)

ridge_loss = mean_squared_error(y_test, y_predict_ridge)
print("Ridge loss: ", ridge_loss)

Linear Loss:  0.5509719919498246
Lasso Loss 0.5509719919498246
Ridge loss:  0.019007982810996964


In [13]:
# Show coefficients

print('Lasso coefficients: ', modelLasso.coef_)
print('Ridge coefficients: ', modelRidge.coef_)

Lasso coefficients:  [1.03532449 0.         0.         0.         0.         0.23560632]
Ridge coefficients:  [[1.43412539 1.03127103 0.91797846 0.6695014  0.84012108 0.96158414]]
