In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

In [3]:
from sklearn.datasets import fetch_california_housing
housing = fetch_california_housing()

In [4]:
df = pd.DataFrame(housing.data, columns=housing.feature_names)
df.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25


In [5]:
df.shape

(20640, 8)

In [6]:
from sklearn.preprocessing import StandardScaler

In [7]:
sc = StandardScaler()

In [8]:
from sklearn.model_selection import train_test_split

In [9]:
X_train, X_test, y_train, y_test = train_test_split(df, housing.target, test_size=0.2, random_state=42)

In [10]:
X_train_scaled = sc.fit_transform(X_train)
X_test_scaled = sc.transform(X_test)

In [11]:
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor
from xgboost import XGBRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import Lasso, Ridge, ElasticNet,LogisticRegression
from sklearn.model_selection import GridSearchCV

In [12]:
models = {'LinearRegression': LinearRegression(),
          'RandomForestRegressor': RandomForestRegressor(),
          'GradientBoostingRegressor': GradientBoostingRegressor(),
          'AdaBoostRegressor': AdaBoostRegressor(),
          'XGBRegressor': XGBRegressor(),
          'DecisionTreeRegressor': DecisionTreeRegressor(),
          'SVR': SVR(),
          'KNeighborsRegressor': KNeighborsRegressor(),
          'Lasso': Lasso(),
          'Ridge': Ridge(),
          'ElasticNet': ElasticNet(),
         }

In [13]:
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error, accuracy_score
for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    print(name)
    print('R2 Score:', r2_score(y_test, y_pred))
    print('MAE:', mean_absolute_error(y_test, y_pred))
    print('MSE:', mean_squared_error(y_test, y_pred))
    print('RMSE:', np.sqrt(mean_squared_error(y_test, y_pred)))
    print("-----------------------------------------------------")





LinearRegression
R2 Score: 0.575787706032451
MAE: 0.5332001304956566
MSE: 0.5558915986952442
RMSE: 0.7455813830127763
-----------------------------------------------------
RandomForestRegressor
R2 Score: 0.8074368384377422
MAE: 0.32590452298934125
MSE: 0.2523364957896356
RMSE: 0.5023310619398681
-----------------------------------------------------
GradientBoostingRegressor
R2 Score: 0.7756289545712209
MAE: 0.37170892371812675
MSE: 0.29401783238716883
RMSE: 0.5422341121574414
-----------------------------------------------------
AdaBoostRegressor
R2 Score: 0.36643646366857197
MAE: 0.8022567312503632
MSE: 0.8302273462947577
RMSE: 0.9111681218604817
-----------------------------------------------------
XGBRegressor
R2 Score: 0.8301370561019205
MAE: 0.30957335413783094
MSE: 0.2225899267544737
RMSE: 0.4717943691423984
-----------------------------------------------------
DecisionTreeRegressor
R2 Score: 0.6216717174967605
MAE: 0.4537404917635659
MSE: 0.49576477811470443
RMSE: 0.704105658345

In [14]:
from sklearn.model_selection import GridSearchCV

In [15]:
params = {'LinearRegression_params': {},
          'RandomForestRegressor_params' : {'n_estimators': [100,200,300,400,500],
                                'max_depth': [10,15,20,25,30],
                                'min_samples_split': [2,5,10],
                                'min_samples_leaf': [1,2,4]},

'GradientBoostingRegressor_params':{'n_estimators': [100,200,300,400,500],
                                    'learning_rate': [0.01,0.02,0.03,0.04],
                                    'max_depth': [1,2,3,4,5],
                                    'min_samples_split': [2,5,10],
                                    'min_samples_leaf': [1,2,4]},

'AdaBoostRegressor_params':{'n_estimators': [100,200,300,400,500],
                            'learning_rate': [0.01,0.02,0.03,0.04],
                            'loss': ['linear', 'square', 'exponential']},


'XGBRegressor_params': {'n_estimators': [100,200,300,400,500],
                       'learning_rate': [0.01,0.02,0.03,0.04,0.05],
                       'max_depth': [1,2,3,4,5],
                       'min_child_weight': [1,2,3,4,5],
                       'gamma': [0,0.1,0.2,0.3,0.4,0.5],
                       'subsample': [0.5,0.6,0.7,0.8,0.9],
                       'colsample_bytree': [0.5,0.6,0.7,0.8,0.9]},



'DecisionTreeRegressor_params' :{'max_depth': [10,15,20,25,30],
                                          'min_samples_split': [2,5,10],
                                          'min_samples_leaf': [1,2,4],
                                'criterion': ['squared_error', 'friedman_mse', 'absolute_error', 'poisson']},

'SVR_params': {'C': [0.1,1,10,100],
             'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
             'gamma': ['scale', 'auto']},

'KNeighborsRegressor_params': {'n_neighbors': [3,5,7,9,11],
                                        'weights': ['uniform', 'distance'],
                                        'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute']},

'Lasso_params': {'alpha': [0.001, 0.01, 0.1, 1, 10, 100],},
'Ridge_params' : {'alpha': [0.001, 0.01, 0.1, 1, 10, 100],},
'ElasticNet_params': {'alpha': [0.001, 0.01, 0.1, 1, 10, 100],
                                'l1_ratio': [0.1, 0.5, 0.9]}}

In [17]:
XGBRegressor_params = {'learning_rate': [0.05], 'max_depth': [5], 'n_estimators': [500]}


grid_search = GridSearchCV(XGBRegressor(), param_grid=XGBRegressor_params,cv=5,verbose=1)
grid_search.fit(X_train_scaled, y_train)


Fitting 5 folds for each of 1 candidates, totalling 5 fits


In [18]:
y_pred = grid_search.predict(X_test_scaled)

In [19]:
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score

In [20]:
print(mean_squared_error(y_pred,y_test))

0.2154035134123943


In [21]:
print(r2_score(y_pred,y_test))

0.8023145135005538


In [22]:
print(mean_absolute_error(y_pred,y_test))

0.3053534111611903
