In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error  
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split

In [2]:
ds = pd.read_csv('../datasets/Boston.csv')
ds.head()

Unnamed: 0,crim,zn,indus,nox,rm,age,dis,rad,tax,ptratio,black,lstat,medv
0,0.00632,18.0,2.31,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2


In [3]:
X = ds.iloc[:, :12].values
y = ds.iloc[:, 12].values

print(X.shape, y.shape)

(506, 12) (506,)


In [4]:
model = LinearRegression()

In [5]:
ncvals = cross_val_score(model, X, y, cv=5, scoring='neg_mean_squared_error')
ncvals

array([-12.6721495 , -24.28442268, -27.83877583, -82.08494898,
       -36.50537454])

In [6]:
cvals = -ncvals
cvals

array([12.6721495 , 24.28442268, 27.83877583, 82.08494898, 36.50537454])

In [7]:
cval_sqrts = np.sqrt(cvals)
cv_mean = np.mean(cval_sqrts)
cv_mean

5.773201524357144

In [8]:
train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.2)

print(train_X.shape, test_X.shape)
print(train_y.shape, test_y.shape)

(404, 12) (102, 12)
(404,) (102,)


In [9]:
k_fold = KFold(n_splits=5)

In [10]:
fold_metrics = {}

for no, (train, test) in enumerate(k_fold.split(train_X, train_y)):
    fold_metrics[no] = {'acc': None, 'mse': None}

    model.fit(train_X[train], train_y[train])
    fold_metrics[no]['acc'] = model.score(train_X[train], train_y[train]) * 100
    preds = model.predict(train_X[test])
    mse = mean_squared_error(preds, train_y[test])
    fold_metrics[no]['mse'] = mse #np.sqrt(mse)

In [11]:
fold_metrics

{0: {'acc': 72.57808840200096, 'mse': 19.91932777653326},
 1: {'acc': 73.39861360493309, 'mse': 33.01424752745549},
 2: {'acc': 75.41998221114916, 'mse': 33.434787898613756},
 3: {'acc': 71.87852392333527, 'mse': 15.78518855150348},
 4: {'acc': 71.84884608273877, 'mse': 26.70720280999867}}

In [12]:
test_preds = model.predict(test_X)
mse = mean_squared_error(test_preds, test_y)
mse

21.77012395676818