In [1]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error as mse

In [2]:
def fit_predict_score(model, x_train, y_train, x_test, y_test, metric) -> float:
    model.fit(x_train, y_train)
    y_predict = model.predict(x_test)
    score = metric(y_test, y_predict)
    return score

In [3]:
dataset = pd.read_csv("../inputs/felicidad.csv")
print(dataset.describe())

             rank       score        high         low         gdp      family  \
count  155.000000  155.000000  155.000000  155.000000  155.000000  155.000000   
mean    78.000000    5.354019    5.452326    5.255713    0.984718    1.188898   
std     44.888751    1.131230    1.118542    1.145030    0.420793    0.287263   
min      1.000000    2.693000    2.864884    2.521116    0.000000    0.000000   
25%     39.500000    4.505500    4.608172    4.374955    0.663371    1.042635   
50%     78.000000    5.279000    5.370032    5.193152    1.064578    1.253918   
75%    116.500000    6.101500    6.194600    6.006527    1.318027    1.414316   
max    155.000000    7.537000    7.622030    7.479556    1.870766    1.610574   

           lifexp     freedom  generosity  corruption    dystopia  
count  155.000000  155.000000  155.000000  155.000000  155.000000  
mean     0.551341    0.408786    0.246883    0.123120    1.850238  
std      0.237073    0.149997    0.134780    0.101661    0.500028 

In [7]:
dataset.head()

Unnamed: 0,country,rank,score,high,low,gdp,family,lifexp,freedom,generosity,corruption,dystopia
0,Norway,1,7.537,7.594445,7.479556,1.616463,1.533524,0.796667,0.635423,0.362012,0.315964,2.277027
1,Denmark,2,7.522,7.581728,7.462272,1.482383,1.551122,0.792566,0.626007,0.35528,0.40077,2.313707
2,Iceland,3,7.504,7.62203,7.38597,1.480633,1.610574,0.833552,0.627163,0.47554,0.153527,2.322715
3,Switzerland,4,7.494,7.561772,7.426227,1.56498,1.516912,0.858131,0.620071,0.290549,0.367007,2.276716
4,Finland,5,7.469,7.527542,7.410458,1.443572,1.540247,0.809158,0.617951,0.245483,0.382612,2.430182


In [4]:
# Vamos a elegir los features que vamos a usar
X = dataset[['gdp', 'family', 'lifexp', 'freedom',
                'corruption', 'generosity', 'dystopia']]
# Definimos nuestro objetivo, que sera nuestro data set, pero solo en la columna score
y = dataset[['score']]
print(X.shape)
print(y.shape)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

(155, 7)
(155, 1)


In [5]:
models = {"Linear Regression": LinearRegression(),
            "Lasso": Lasso(alpha=0.2),
            "Ridge": Ridge(alpha=1)}

for name, model in models.items():
    score = fit_predict_score(model, X_train, y_train, X_test, y_test, mse)
    print(f"{name} loss: {score}")

Linear Regression loss: 8.427131777911863e-08
Lasso loss: 0.4833680603511786
Ridge loss: 0.005793411222176944


In [6]:
print("="*32)
for name, model in models.items():
    print(f"Coef {name}: {model.coef_}")

Coef Linear Regression: [[1.00004143 0.99990613 0.99999573 1.00003277 0.99985463 1.00021215
  0.99994836]]
Coef Lasso: [1.05963777 0.         0.         0.         0.         0.
 0.3294551 ]
Coef Ridge: [1.09511566 0.96407956 0.8037888  0.89687205 0.65917058 0.77061983
 0.96455814]
