In [1]:
from IPython.display import display

import pandas as pd

from sklearn.linear_model import RANSACRegressor, HuberRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

from sklearn.svm import SVR

In [2]:
df = pd.read_csv('../data/felicidad_corrupt.csv')

display(df.info())
display(df.head(2))

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 167 entries, 0 to 166
Data columns (total 12 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   country     167 non-null    object 
 1   rank        167 non-null    int64  
 2   score       167 non-null    float64
 3   high        167 non-null    float64
 4   low         167 non-null    float64
 5   gdp         167 non-null    float64
 6   family      167 non-null    float64
 7   lifexp      167 non-null    float64
 8   freedom     167 non-null    float64
 9   generosity  167 non-null    float64
 10  corruption  167 non-null    float64
 11  dystopia    167 non-null    float64
dtypes: float64(10), int64(1), object(1)
memory usage: 15.8+ KB


None

Unnamed: 0,country,rank,score,high,low,gdp,family,lifexp,freedom,generosity,corruption,dystopia
0,Norway,1,7.537,7.594445,7.479556,1.616463,1.533524,0.796667,0.635423,0.362012,0.315964,2.277027
1,Denmark,2,7.522,7.581728,7.462272,1.482383,1.551122,0.792566,0.626007,0.35528,0.40077,2.313707


In [3]:
x = df.drop(columns=['country', 'score'])
y = df['score']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=1)
print(f'x \n   Train:  {x_train.shape} \n   Test:   {x_test.shape} \n')
print(f'Y \n   Train:  {y_train.shape} \n   Test:   {y_test.shape} \n')

x 
   Train:  (116, 10) 
   Test:   (51, 10) 

Y 
   Train:  (116,) 
   Test:   (51,) 



In [4]:
models = {
    'HUBER' : HuberRegressor(epsilon=1.35),
    'SVR' : SVR(gamma='auto', C=1.0, epsilon=0.1),
    'RANSAC' : RANSACRegressor()
}

for name, model in models.items():
    model.fit(x_train, y_train)
    y_hat = model.predict(x_test)
    error = mean_squared_error(y_test, y_hat)
    score = model.score(x_test, y_test)
    print('{name} \n   MSE:   {error} \n   SCORE: {score:.2%}\n'.format(error=error, name=name, score=score))

HUBER 
   MSE:   1.172500699185358e-06 
   SCORE: 100.00%

SVR 
   MSE:   0.04249215616531833 
   SCORE: 97.40%

RANSAC 
   MSE:   1.731465916095798e-19 
   SCORE: 100.00%

