In [31]:
import numpy as np
import pandas as pd

In [33]:
from sklearn.datasets import fetch_california_housing

In [34]:
housing = fetch_california_housing()

In [35]:
housing

{'data': array([[   8.3252    ,   41.        ,    6.98412698, ...,    2.55555556,
           37.88      , -122.23      ],
        [   8.3014    ,   21.        ,    6.23813708, ...,    2.10984183,
           37.86      , -122.22      ],
        [   7.2574    ,   52.        ,    8.28813559, ...,    2.80225989,
           37.85      , -122.24      ],
        ...,
        [   1.7       ,   17.        ,    5.20554273, ...,    2.3256351 ,
           39.43      , -121.22      ],
        [   1.8672    ,   18.        ,    5.32951289, ...,    2.12320917,
           39.43      , -121.32      ],
        [   2.3886    ,   16.        ,    5.25471698, ...,    2.61698113,
           39.37      , -121.24      ]]),
 'target': array([4.526, 3.585, 3.521, ..., 0.923, 0.847, 0.894]),
 'frame': None,
 'target_names': ['MedHouseVal'],
 'feature_names': ['MedInc',
  'HouseAge',
  'AveRooms',
  'AveBedrms',
  'Population',
  'AveOccup',
  'Latitude',
  'Longitude'],
 'DESCR': '.. _california_housing_dataset:\n

In [69]:
x = pd.DataFrame(housing.data, columns=housing.feature_names)
y = pd.Series(housing.target)

In [70]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.33,random_state=42)

In [71]:
from sklearn.linear_model import LinearRegression

In [56]:
model = LinearRegression()

In [57]:
model.fit(x_train,y_train)

In [58]:
pred = model.predict(x_test)

In [51]:
from sklearn.metrics import mean_squared_error

In [59]:
mse = mean_squared_error(y_test,pred)

In [60]:
mse

0.5369686543372453

#Standardizing the dataset

In [72]:
from sklearn.preprocessing import StandardScaler

In [73]:
scalar = StandardScaler()

In [74]:
x_train = scalar.fit_transform(x_train)
x_test = scalar.transform(x_test)

In [75]:
standardized_model = LinearRegression()

In [76]:
standardized_model.fit(x_train,y_train)

In [77]:
prediction = standardized_model.predict(x_test)

In [78]:
mse = mean_squared_error(y_test,prediction)

In [79]:
mse

0.5369686543372459

#hyperparameter tuning

In [80]:
a = pd.DataFrame(housing.data, columns=housing.feature_names)
b = pd.Series(housing.target)

In [81]:
from sklearn.model_selection import train_test_split

In [82]:
a_train,a_test,b_train,b_test = train_test_split(a,b,test_size=0.33,random_state=42)

In [83]:
from sklearn.linear_model import LinearRegression

In [84]:
hyper_tuned_model = LinearRegression()

In [85]:
param_grid = {
    'fit_intercept': [True, False],
    'positive': [True, False],
    'copy_X': [True, False]
}

In [86]:
from sklearn.model_selection import GridSearchCV

In [97]:
grid_search = GridSearchCV(model, param_grid, cv=5, scoring='neg_mean_squared_error')

In [98]:
grid_search

In [99]:
grid_search.fit(a_train,b_train)

In [100]:
grid_search.best_params_

{'copy_X': True, 'fit_intercept': True, 'positive': False}

In [101]:
grid_search.best_score_

-0.5230499763383937

In [102]:
df = pd.DataFrame(grid_search.cv_results_)
df = df.sort_values('mean_test_score', ascending=False)
df.to_csv('grid_search_results.csv', index=False)