In [13]:
#Exercise 5
import numpy as np
# imports
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
# data
housing = fetch_california_housing()
X, y = housing['data'], housing['target']
# split data train test
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.1,
                                                    shuffle=True,
                                                    random_state=43)

In [14]:
# pipeline regression
pipeline_1 = [('imputer', SimpleImputer(strategy='median')),
            ('scaler', StandardScaler()),
            ('lr', LinearRegression())]
pipe_1 = Pipeline(pipeline_1)
# fit
pipe_1.fit(X_train, y_train)

print("Regression")
print("\nTrain")
print("r2 score :", r2_score(y_train, pipe_1.predict(X_train) ))
print("MAE: ", mean_absolute_error(y_train, pipe_1.predict(X_train) ))
print("MSE:", mean_squared_error(y_train, pipe_1.predict(X_train) ))

print("\nTest")
print("r2 score :", r2_score(y_test, pipe_1.predict(X_test) ))
print("MAE: ", mean_absolute_error(y_test, pipe_1.predict(X_test) ))
print("MSE:", mean_squared_error(y_test, pipe_1.predict(X_test) ))

Regression

Train
r2 score : 0.6054131599242079
MAE:  0.533092001261455
MSE: 0.5273648371379568

Test
r2 score : 0.6128959462132965
MAE:  0.5196420310323713
MSE: 0.49761195027083793


In [15]:

#pipeline svm
import numpy as np
from sklearn.svm import SVR
pipeline_2 = [('imputer', SimpleImputer(strategy='median')),
            ('scaler', StandardScaler()),
            ('svm', SVR())]
pipe_2 = Pipeline(pipeline_2)
pipe_2.fit(X_train, y_train)

print("\nSVM")
print("\nTrain")
print("r2 score :", r2_score(y_train, pipe_2.predict(X_train) ))
print("MAE: ", mean_absolute_error(y_train, pipe_2.predict(X_train) ))
print("MSE:", mean_squared_error(y_train, pipe_2.predict(X_train) ))

print("\nTest")
print("r2 score :", r2_score(y_test, pipe_2.predict(X_test) ))
print("MAE: ", mean_absolute_error(y_test, pipe_2.predict(X_test) ))
print("MSE:", mean_squared_error(y_test, pipe_2.predict(X_test) ))


SVM

Train
r2 score : 0.7496108582936638
MAE:  0.383564516332599
MSE: 0.33464478671339204

Test
r2 score : 0.729508064989969
MAE:  0.3897680598426777
MSE: 0.3477101776542996


In [16]:

#pipeline Decision Tree
from sklearn.tree import DecisionTreeRegressor
pipeline_3 = [('imputer', SimpleImputer(strategy='median')),
            ('scaler', StandardScaler()),
            ('tree', DecisionTreeRegressor(random_state=43))]

pipe_3 = Pipeline(pipeline_3)
pipe_3.fit(X_train, y_train)

print("\nDecision Tree")
print("\nTrain")
print("r2 score :", r2_score(y_train, pipe_3.predict(X_train) ))
print("MAE: ", mean_absolute_error(y_train, pipe_3.predict(X_train) ))
print("MSE:", mean_squared_error(y_train, pipe_3.predict(X_train) ))

print("\nTest")
print("r2 score :", r2_score(y_test, pipe_3.predict(X_test) ))
print("MAE: ", mean_absolute_error(y_test, pipe_3.predict(X_test) ))
print("MSE:", mean_squared_error(y_test, pipe_3.predict(X_test) ))


Decision Tree

Train
r2 score : 1.0
MAE:  4.221907539810565e-17
MSE: 9.24499456646287e-32

Test
r2 score : 0.6228217144931267
MAE:  0.4403051356589147
MSE: 0.4848526395290697


In [17]:

#Random Forest
from sklearn.ensemble import RandomForestRegressor
pipeline_4 = [('imputer', SimpleImputer(strategy='median')),
            ('scaler', StandardScaler()),
            ('rf', RandomForestRegressor(random_state=43))]

pipe_4 = Pipeline(pipeline_4)
pipe_4.fit(X_train, y_train)

print("\nRandom Forest")
print("\nTrain")
print("r2 score :", r2_score(y_train, pipe_4.predict(X_train) ))
print("MAE: ", mean_absolute_error(y_train, pipe_4.predict(X_train) ))
print("MSE:", mean_squared_error(y_train, pipe_4.predict(X_train) ))

print("\nTest")
print("r2 score :", r2_score(y_test, pipe_4.predict(X_test) ))
print("MAE: ", mean_absolute_error(y_test, pipe_4.predict(X_test) ))
print("MSE:", mean_squared_error(y_test, pipe_4.predict(X_test) ))


Random Forest

Train
r2 score : 0.9741263135396302
MAE:  0.12000198560508221
MSE: 0.03458015083247723

Test
r2 score : 0.8119778189909694
MAE:  0.3194169859011629
MSE: 0.24169750554364758


In [18]:

#Gradient Boosting
from sklearn.ensemble import GradientBoostingRegressor
pipeline_5 = [('imputer', SimpleImputer(strategy='median')),
            ('scaler', StandardScaler()),
            ('gb', GradientBoostingRegressor(random_state=43))]

pipe_5 = Pipeline(pipeline_5)
pipe_5.fit(X_train, y_train)

print("\nGradient Boosting")
print("\nTrain")
print("r2 score :", r2_score(y_train, pipe_5.predict(X_train) ))
print("MAE: ", mean_absolute_error(y_train, pipe_5.predict(X_train) ))
print("MSE:", mean_squared_error(y_train, pipe_5.predict(X_train) ))

print("\nTest")
print("r2 score :", r2_score(y_test, pipe_5.predict(X_test) ))
print("MAE: ", mean_absolute_error(y_test, pipe_5.predict(X_test) ))
print("MSE:", mean_squared_error(y_test, pipe_5.predict(X_test) ))


Gradient Boosting

Train
r2 score : 0.8042086499063386
MAE:  0.35656543036682264
MSE: 0.26167490389525294

Test
r2 score : 0.7895081234643192
MAE:  0.36455447680396397
MSE: 0.27058170064218096
