In [None]:
import numpy as np 
import pandas as pd
from sklearn.model_selection import train_test_split, KFold, cross_val_predict
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.metrics import r2_score
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR

In [None]:
columns = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV']
data = pd.read_csv('housing.csv', header=None, delimiter=r"\s+", names=columns)
data.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222.0,18.7,396.9,5.33,36.2


In [None]:
cols = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT']
x = data[cols]
y = data['MEDV']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=101)

In [None]:
x.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296.0,15.3,396.9,4.98
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242.0,17.8,396.9,9.14
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242.0,17.8,392.83,4.03
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222.0,18.7,394.63,2.94
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222.0,18.7,396.9,5.33


In [None]:
y.head()

0    24.0
1    21.6
2    34.7
3    33.4
4    36.2
Name: MEDV, dtype: float64

In [None]:
kf = KFold(shuffle=True, random_state=101, n_splits=5)

In [None]:
#first model
lr = LinearRegression()
lr.fit(x_train, y_train)
lr_pred = lr.predict(x_test)
lr_acc = lr.score(x_test, y_test)
print("Linear regression accuracy: ", lr_acc)

#cross validation
lr_score = cross_val_predict(lr, x, y, cv=kf)  ## it is the difference between the samples in the dataset and the predictions made by the model
print("cross validtation score: ", r2_score(y, lr_score))

Linear regression accuracy:  0.7123963332666869
cross validtation score:  0.7124076445016578


In [None]:
#second model
knn = KNeighborsRegressor()
knn.fit(x_train, y_train)
knn_pred = knn.predict(x_test)
knn_acc = knn.score(x_test, y_test)
print("KNN accuracy: ", knn_acc)

#cross validation
knn_score = cross_val_predict(knn, x, y, cv=kf)
print("cross validtation score: ", r2_score(y, knn_score))

KNN accuracy:  0.5139991753741371
cross validtation score:  0.5047827112703703


In [None]:
#third model
svr = SVR()
svr.fit(x_train, y_train)
svr_pred = svr.predict(x_test)
svr_acc = svr.score(x_test, y_test)
print("SVR accuracy: ", svr_acc)

#cross validation
svr_score = cross_val_predict(svr, x, y, cv=kf)
print("cross validtation score: ", r2_score(y, svr_score))

SVR accuracy:  0.13979861118195425
cross validtation score:  0.19809353181777167


In [None]:
#Regularization
ridge = Ridge(alpha=0.1)
ridge.fit(x_train,y_train)
print("Regularized Linear score(ridge): ", ridge.score(x_test, y_test))

ridge = Ridge(alpha=0.3)
ridge.fit(x_train,y_train)
print("Regularized Linear score(ridge): ", ridge.score(x_test, y_test))

ridge = Ridge(alpha=0.5)
ridge.fit(x_train,y_train)
print("Regularized Linear score(ridge): ", ridge.score(x_test, y_test))

lasso = Lasso(alpha=0.1)
lasso.fit(x_train,y_train)
print("Regularized Linear score(lasso): ",lasso.score(x_test, y_test))

lasso = Lasso(alpha=0.3)
lasso.fit(x_train,y_train)
print("Regularized Linear score(lasso): ",lasso.score(x_test, y_test))

lasso = Lasso(alpha=0.5)
lasso.fit(x_train,y_train)
print("Regularized Linear score(lasso): ",lasso.score(x_test, y_test))

Regularized Linear score(ridge):  0.7122925098276605
Regularized Linear score(ridge):  0.711893167784983
Regularized Linear score(ridge):  0.7114081364970006
Regularized Linear score(lasso):  0.7039522461898744
Regularized Linear score(lasso):  0.6944741183759
Regularized Linear score(lasso):  0.6775198121307753
