In [15]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

In [16]:
path = "https://frenzy86.s3.eu-west-2.amazonaws.com/IFAO/boston_houses.csv"
df = pd.read_csv(path)

In [17]:
df.rename(columns={'MEDV':'Price'},inplace=True)

target = 'Price'
X = df.drop(target,axis=1).values
y = df[target].values

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3, random_state=667, )

In [18]:
polyfeats = PolynomialFeatures(degree=2)
X_train_poly = polyfeats.fit_transform(X_train)
X_test_poly = polyfeats.transform(X_test)

print("Numero di esempi nel test: "+str(X_train_poly.shape[0]))
print("Numero di features: "+str(X_train_poly.shape[1]))

Numero di esempi nel test: 354
Numero di features: 105


In [19]:
ss = StandardScaler()
X_train_poly = ss.fit_transform(X_train_poly)
X_test_poly = ss.transform(X_test_poly)

In [20]:
def overfit_eval(model, X, y):

    """
    model: il nostro modello predittivo già addestrato
    X: una tupla contenente le prorietà del train set e test set (X_train, X_test)
    y: una tupla contenente target del train set e test set (y_train, y_test)
    """

    y_pred_train = model.predict(X[0])
    y_pred_test = model.predict(X[1])

    mse_train = mean_squared_error(y[0], y_pred_train)
    mse_test = mean_squared_error(y[1], y_pred_test)

    r2_train = r2_score(y[0], y_pred_train)
    r2_test = r2_score(y[1], y_pred_test)

    print("Train set:  MSE="+str(mse_train)+" R2="+str(r2_train))
    print("Test set:  MSE="+str(mse_test)+" R2="+str(r2_test))

In [21]:
ll = LinearRegression()
ll.fit(X_train_poly, y_train)

overfit_eval(ll, (X_train_poly, X_test_poly),(y_train, y_test))

Train set:  MSE=15.034347840021729 R2=0.8179241488461418
Test set:  MSE=32.03088973401751 R2=0.6389761364104628


In [22]:
from sklearn.linear_model import Ridge

alphas = [0.0001, 0.001, 0.01, 0.1 ,1 ,10] #alpha corrispone a lambda

for alpha in alphas:
    print("Alpha="+str(alpha))
    ridge = Ridge(alpha=alpha)
    ridge.fit(X_train_poly, y_train)

    overfit_eval(ridge, (X_train_poly, X_test_poly),(y_train, y_test))

Alpha=0.0001
Train set:  MSE=3.994688160473648 R2=0.9516216962217479
Test set:  MSE=15.152641036909586 R2=0.8292128299851529
Alpha=0.001
Train set:  MSE=4.008703827486144 R2=0.9514519572661282
Test set:  MSE=15.124700794341678 R2=0.829527747691319
Alpha=0.01
Train set:  MSE=4.058274133286124 R2=0.9508516282251529
Test set:  MSE=15.105305007019956 R2=0.8297463598539656
Alpha=0.1
Train set:  MSE=4.402406182869467 R2=0.9466839625236576
Test set:  MSE=15.48999674800514 R2=0.8254104547394111
Alpha=1
Train set:  MSE=5.509493608081795 R2=0.9332764048835002
Test set:  MSE=16.52339795797641 R2=0.8137628701559049
Alpha=10
Train set:  MSE=8.278399484203595 R2=0.8997431316398323
Test set:  MSE=20.18929728795343 R2=0.7724440947291631


In [23]:
from sklearn.linear_model import Lasso

alphas = [0.0001, 0.001, 0.01, 0.1 ,1 ,10] #alpha corrispone a lambda

for alpha in alphas:
    print("Alpha="+str(alpha))
    ridge = Lasso(alpha=alpha)
    ridge.fit(X_train_poly, y_train)

    overfit_eval(ridge, (X_train_poly, X_test_poly),(y_train, y_test))

Alpha=0.0001
Train set:  MSE=4.879663235273144 R2=0.9409040653867655
Test set:  MSE=17.5886150638713 R2=0.801756684928917
Alpha=0.001
Train set:  MSE=4.909409855251362 R2=0.9405438142332658
Test set:  MSE=17.428184826002518 R2=0.803564912698827
Alpha=0.01
Train set:  MSE=5.916971830313477 R2=0.9283415753232851
Test set:  MSE=17.032217443861434 R2=0.8080279068692383
Alpha=0.1
Train set:  MSE=11.021977307534176 R2=0.8665165977917872
Test set:  MSE=25.793165540963034 R2=0.7092822473827975
Alpha=1
Train set:  MSE=18.70379818426357 R2=0.7734846891632877
Test set:  MSE=35.19362698600457 R2=0.603328558971542
Alpha=10
Train set:  MSE=82.57189377254302 R2=0.0
Test set:  MSE=88.72279474960983 R2=-4.883253304388546e-06


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


In [24]:
from sklearn.linear_model import ElasticNet

alphas = [0.0001, 0.001, 0.01, 0.1 ,1 ,10]

for alpha in alphas:
    print("Lambda is: "+str(alpha))
    elastic = ElasticNet(alpha=alpha, l1_ratio=0.5)
    elastic.fit(X_train_poly, y_train)
    overfit_eval(elastic, (X_train_poly, X_test_poly),(y_train, y_test))

Lambda is: 0.0001
Train set:  MSE=4.885168469233903 R2=0.9408373933787827
Test set:  MSE=17.536105176623636 R2=0.8023485299425342
Lambda is: 0.001
Train set:  MSE=5.022774408558439 R2=0.9391708948520129
Test set:  MSE=17.050013599981426 R2=0.807827324334915
Lambda is: 0.01
Train set:  MSE=6.258270634244147 R2=0.9242082220920896
Test set:  MSE=17.272043729307526 R2=0.8053247970623908
Lambda is: 0.1
Train set:  MSE=11.457395627997807 R2=0.8612433952458574

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(



Test set:  MSE=25.64089984015166 R2=0.7109984517110435
Lambda is: 1
Train set:  MSE=18.966995179357113 R2=0.770297200260362
Test set:  MSE=35.51343150683474 R2=0.5997240052216382
Lambda is: 10
Train set:  MSE=67.75245698455062 R2=0.17947313681353638
Test set:  MSE=74.68499614194536 R2=0.15821676877431756


In [25]:
elastic = ElasticNet(alpha=0.001, l1_ratio=0.5)
elastic.fit(X_train_poly, y_train)

  model = cd_fast.enet_coordinate_descent(


In [26]:
X_ = X.copy()
X_ = polyfeats.transform(X_)
X_ = ss.transform(X_)

In [27]:
from sklearn.model_selection import cross_val_score

cross = cross_val_score(elastic, X_, y)

cross.mean()

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


0.050590558660125004

In [28]:
import pickle