In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

In [2]:
boston = pd.read_csv("Dataset/BostonHousing.csv")

In [3]:
X = boston.drop("medv",axis=1).values

In [4]:
Y = boston["medv"].values

In [5]:
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size=0.3, random_state=0)

In [6]:
poly_feats = PolynomialFeatures(degree=2)

In [7]:
X_train_poly = poly_feats.fit_transform(X_train)

In [8]:
X_test_poly = poly_feats.transform(X_test)

In [9]:
ss = StandardScaler()

In [10]:
X_train_poly = ss.fit_transform(X_train_poly)

In [11]:
X_test_poly = ss.transform(X_test_poly)

In [12]:
ll = LinearRegression()

In [13]:
ll.fit(X_train_poly, Y_train)

## Calcoliamo errore sul Train Set

In [14]:
Y_pred_train = ll.predict(X_train_poly)

In [15]:
mse = mean_squared_error(Y_train, Y_pred_train)

In [16]:
r2 = r2_score(Y_train, Y_pred_train)

In [17]:
print("MSE="+str(mse))

MSE=4.0920343304759585


In [18]:
print("R2="+str(r2))

R2=0.9517246762476053


## Calcoliamo errore sul Test Set

In [19]:
Y_pred_test = ll.predict(X_test_poly)

In [20]:
mse = mean_squared_error(Y_test, Y_pred_test)

In [21]:
r2 = r2_score(Y_test, Y_pred_test)

In [22]:
print("MSE="+str(mse))

MSE=29.25250713919841


In [23]:
print("R2="+str(r2))

R2=0.6486839499987875


## Regolarizzazione L2

In [24]:
from sklearn.linear_model import Ridge

In [25]:
alphas = [0.0001, 0.001, 0.01, 0.1, 1., 10.]

In [27]:
for alpha in alphas:
    print("ALPHA:"+str(alpha))
    model = Ridge(alpha=alpha)
    model.fit(X_train_poly, Y_train)

    Y_pred_train = model.predict(X_train_poly)
    Y_pred_test = model.predict(X_test_poly)

    mse_train = mean_squared_error(Y_train, Y_pred_train)
    mse_test = mean_squared_error(Y_test, Y_pred_test)

    r2_train = r2_score(Y_train, Y_pred_train)
    r2_test = r2_score(Y_test, Y_pred_test)

    print("Train set: MSE="+str(mse_train)+" R2="+str(r2_train))
    print("Test set: MSE="+str(mse_test)+" R2="+str(r2_test))

ALPHA:0.0001
Train set: MSE=4.099263404861015 R2=0.9516393920397556
Test set: MSE=28.91761846388228 R2=0.6527058878806365
ALPHA:0.001
Train set: MSE=4.113502509943374 R2=0.9514714077678066
Test set: MSE=28.42000926765981 R2=0.6586820627236776
ALPHA:0.01
Train set: MSE=4.20820612723713 R2=0.9503541522865098
Test set: MSE=26.813295018285196 R2=0.6779783405054398
ALPHA:0.1
Train set: MSE=4.747028830953483 R2=0.9439974508597107
Test set: MSE=23.631755117381356 R2=0.7161879211608029
ALPHA:1.0
Train set: MSE=5.875947305341835 R2=0.9306791596529947
Test set: MSE=17.63458462753178 R2=0.7882125937009058
ALPHA:10.0
Train set: MSE=8.812755521737843 R2=0.8960324885854234
Test set: MSE=17.159715774774178 R2=0.7939156621191289


## Regolarizzazione L1

### Porta a 0 pesi meno importanti

In [29]:
from sklearn.linear_model import Lasso

In [30]:
for alpha in alphas:
    print("ALPHA:"+str(alpha))
    model = Lasso(alpha=alpha)
    model.fit(X_train_poly, Y_train)

    Y_pred_train = model.predict(X_train_poly)
    Y_pred_test = model.predict(X_test_poly)

    mse_train = mean_squared_error(Y_train, Y_pred_train)
    mse_test = mean_squared_error(Y_test, Y_pred_test)

    r2_train = r2_score(Y_train, Y_pred_train)
    r2_test = r2_score(Y_test, Y_pred_test)

    print("Train set: MSE="+str(mse_train)+" R2="+str(r2_train))
    print("Test set: MSE="+str(mse_test)+" R2="+str(r2_test))

ALPHA:0.0001
Train set: MSE=5.3911236526970985 R2=0.9363988132296843
Test set: MSE=29.701776720600673 R2=0.6432883230881478
ALPHA:0.001
Train set: MSE=5.407317548867132 R2=0.936207767525449
Test set: MSE=28.788018557306582 R2=0.6542623536919956
ALPHA:0.01
Train set: MSE=6.063858816900318 R2=0.9284622943178908
Test set: MSE=22.93324201265634 R2=0.7245769068863115
ALPHA:0.1
Train set: MSE=11.833211121207535 R2=0.8603989967405071
Test set: MSE=19.29615234281638 R2=0.7682575380960781
ALPHA:1.0
Train set: MSE=21.590985067091978 R2=0.7452827346818105
Test set: MSE=27.258043145129136 R2=0.6726370152499754
ALPHA:10.0
Train set: MSE=84.76451346994796 R2=0.0
Test set: MSE=83.76673764512785 R2=-0.0060197319476869016


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


## Regolarizzazione L2 è migliore della L1

### Meglio utilizzarle entrambi! Con ElasticNet

### I valori devono essere sulla stessa scala Normalizzazione o Standardizzazione