In [79]:
import pandas as pd
import numpy as np
import joblib as jb
from sklearn.model_selection import train_test_split

In [80]:
house=pd.read_csv('house.csv')
house=house.drop(columns='Unnamed: 0')
house


Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,Price
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.0900,1,296,15.3,396.90,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.90,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.90,5.33,36.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.0,11.93,0,0.573,6.593,69.1,2.4786,1,273,21.0,391.99,9.67,22.4
502,0.04527,0.0,11.93,0,0.573,6.120,76.7,2.2875,1,273,21.0,396.90,9.08,20.6
503,0.06076,0.0,11.93,0,0.573,6.976,91.0,2.1675,1,273,21.0,396.90,5.64,23.9
504,0.10959,0.0,11.93,0,0.573,6.794,89.3,2.3889,1,273,21.0,393.45,6.48,22.0


In [81]:
target = 'Price'
X = house.drop(target,axis=1).values
y = house[target].values

X_train, X_test, y_train, y_test = train_test_split(X,y,
                                                    test_size=0.3,
                                                    random_state=667,
                                                    )

In [82]:
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import PolynomialFeatures

In [83]:
scaler = StandardScaler()
X_scaled= scaler.fit_transform(X)

In [84]:
def evaluate_model(model, X_train, X_test, y_train, y_test, model_name):
    """ fitto il modello e lo valido con le metrice di regressione
    """
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    # Misurazione errore
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    # Cross validation
    cv_scores = cross_val_score(model, X_train, y_train, cv=5, scoring='r2')

    print(f"\nRisultati per {model_name}:")
    print(f"RMSE: {rmse:.2f}")
    print(f"R2 Score: {r2:.3f}")
    print(f"CV R2 Scores: {cv_scores.mean():.3f} (+/- {cv_scores.std() * 2:.3f})")

    return y_pred, r2

In [85]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled,y,
                                                    test_size=0.3,
                                                    random_state=667,
                                                    )

In [94]:
model = LinearRegression
model.fit(X_train,y_train)


AttributeError: 'numpy.ndarray' object has no attribute '_validate_params'

In [86]:
def overfit_eval(model, X, y):

    """
    model: il nostro modello predittivo già addestrato
    X: una tupla contenente le prorietà del train set e test set (X_train, X_test)
    y: una tupla contenente target del train set e test set (y_train, y_test)
    """

    y_pred_train = model.predict(X[0])
    y_pred_test = model.predict(X[1])

    mse_train = mean_squared_error(y[0], y_pred_train)
    mse_test = mean_squared_error(y[1], y_pred_test)

    r2_train = r2_score(y[0], y_pred_train)
    r2_test = r2_score(y[1], y_pred_test)

    print("Train set:  MSE="+str(mse_train)+" R2="+str(r2_train))
    print("Test set:  MSE="+str(mse_test)+" R2="+str(r2_test))

In [87]:
from sklearn.linear_model import ElasticNet
alphas=[0.0001,0.001,0.01,0.1,1,10]
for i in alphas:
    elastic = ElasticNet(alpha=i, l1_ratio=0.5)
    print("Alpha="+str(i))
    elastic.fit(X_train, y_train)
    overfit_eval(elastic, (X_train, X_test),(y_train, y_test))
    y_pred_elastic, r2_elastic = evaluate_model(elastic, X_train, X_test, y_train, y_test, "Elastic Net")

Alpha=0.0001
Train set:  MSE=17.210332042967053 R2=0.7915715474520233
Test set:  MSE=34.485931071546545 R2=0.611305081491078

Risultati per Elastic Net:
RMSE: 5.87
R2 Score: 0.611
CV R2 Scores: 0.751 (+/- 0.140)
Alpha=0.001
Train set:  MSE=17.210478708704674 R2=0.791569771233374
Test set:  MSE=34.48982001308396 R2=0.6112612487811293

Risultati per Elastic Net:
RMSE: 5.87
R2 Score: 0.611
CV R2 Scores: 0.751 (+/- 0.140)
Alpha=0.01
Train set:  MSE=17.222145438712808 R2=0.791428479451448
Test set:  MSE=34.53133654847361 R2=0.6107933111080366

Risultati per Elastic Net:
RMSE: 5.88
R2 Score: 0.611
CV R2 Scores: 0.751 (+/- 0.140)
Alpha=0.1
Train set:  MSE=17.77865625587115 R2=0.784688767041662
Test set:  MSE=35.42839322561281 R2=0.6006824815266891

Risultati per Elastic Net:
RMSE: 5.95
R2 Score: 0.601
CV R2 Scores: 0.746 (+/- 0.139)
Alpha=1
Train set:  MSE=24.49960165170899 R2=0.7032936931397393
Test set:  MSE=42.17596356124969 R2=0.5246298357012859

Risultati per Elastic Net:
RMSE: 6.49
R2 S

In [88]:
import warnings
warnings.filterwarnings('ignore')


In [89]:
elastic = ElasticNet(alpha=0.001, l1_ratio=0.5)

In [90]:
import numpy as np
def mape(actual,pred):
    return np.mean(np.abs((actual - pred) / actual)) * 100
#Calculate the MAPE
result = mape(y_test,y_pred_elastic)
result

35.52330869921908

In [91]:
from sklearn.model_selection import cross_val_score

scores = cross_val_score(elastic, X_poly_scaled, y)

print(scores)

[ 0.54097427 -0.15873748 -1.60256304  0.38419709 -2.06385491]


In [92]:
scores.mean()

-0.5799968142371414