In [1]:
import pandas as pd
from sklearn.datasets import load_boston

In [2]:
boston_dict = load_boston()
boston_dict.feature_names

array(['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD',
       'TAX', 'PTRATIO', 'B', 'LSTAT'], dtype='<U7')

In [3]:
boston_dict.keys()

dict_keys(['data', 'target', 'feature_names', 'DESCR', 'filename'])

In [4]:
boston_dataset = pd.DataFrame(boston_dict.data, columns=boston_dict.feature_names)
boston_dataset['target']=boston_dict.target
boston_dataset.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,target
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


In [5]:
boston_dataset.shape

(506, 14)

In [6]:
X=boston_dataset.iloc[:,:-1]
y=boston_dataset['target']

In [7]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3, random_state=20)

In [8]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_train_scaled=scaler.fit_transform(X_train)
X_test_scaled=scaler.transform(X_test)

In [9]:
from sklearn.svm import SVR,LinearSVR
from sklearn.model_selection import GridSearchCV

In [10]:
# linear kernel 
svm_linear = SVR(kernel='linear')
search_dic_linear = {'C': [0.01, 0.1, 1 , 8, 10 , 100, 1000]}
grid_search_linear = GridSearchCV(svm_linear, search_dic_linear)

In [11]:
grid_search_linear.fit(X_train_scaled, y_train)

GridSearchCV(estimator=SVR(kernel='linear'),
             param_grid={'C': [0.01, 0.1, 1, 8, 10, 100, 1000]})

In [12]:
y_pred_linear = grid_search_linear.predict(X_test_scaled)

In [13]:
from sklearn import metrics
import numpy as np

In [14]:
print('*'*50)
print(' '*15+' SVR (kernel=linear) '+' '*15)
print('*'*50)
# calculate MAE, MSE, RMSE
print('Mean Absolute Error(MAE) = {:.2f}'.format(metrics.mean_absolute_error(y_test, y_pred_linear)))
print('Mean Squared Error(MSE) = {:.2f}'.format(metrics.mean_squared_error(y_test, y_pred_linear)))
print('Root Mean Squared Error(RMSE) = {:.2f}'.format(np.sqrt(metrics.mean_squared_error(y_test, y_pred_linear))))
print('R-squared(r2_score) = {:.2f}'.format(metrics.r2_score(y_test, y_pred_linear)))
print()
print("Best parameters set found on development set:")
print(grid_search_linear.best_params_)

**************************************************
                SVR (kernel=linear)                
**************************************************
Mean Absolute Error(MAE) = 3.28
Mean Squared Error(MSE) = 28.16
Root Mean Squared Error(RMSE) = 5.31
R-squared(r2_score) = 0.67

Best parameters set found on development set:
{'C': 10}


In [15]:
# Polynomial kernel 
svm_poly = SVR(kernel='poly')
search_dic_poly = {'C': [0.01, 0.1, 1 , 8, 10 , 100, 1000],'degree':[3,5] ,'gamma': [1, 0.1, 0.01, 0.001, 0.0001]}
grid_search_poly = GridSearchCV(svm_poly, search_dic_poly)

In [16]:
grid_search_poly.fit(X_train_scaled, y_train)

GridSearchCV(estimator=SVR(kernel='poly'),
             param_grid={'C': [0.01, 0.1, 1, 8, 10, 100, 1000],
                         'degree': [3, 5],
                         'gamma': [1, 0.1, 0.01, 0.001, 0.0001]})

In [17]:
y_pred_poly = grid_search_poly.predict(X_test_scaled)

In [18]:
print('*'*50)
print(' '*15+' SVR (kernel=poly) '+' '*15)
print('*'*50)
# calculate MAE, MSE, RMSE
print('Mean Absolute Error(MAE) = {:.2f}'.format(metrics.mean_absolute_error(y_test, y_pred_poly)))
print('Mean Squared Error(MSE) = {:.2f}'.format(metrics.mean_squared_error(y_test, y_pred_poly)))
print('Root Mean Squared Error(RMSE) = {:.2f}'.format(np.sqrt(metrics.mean_squared_error(y_test, y_pred_poly))))
print('R-squared(r2_score) = {:.2f}'.format(metrics.r2_score(y_test, y_pred_poly)))
print()
print("Best parameters set found on development set:")
print(grid_search_poly.best_params_)

**************************************************
                SVR (kernel=poly)                
**************************************************
Mean Absolute Error(MAE) = 2.65
Mean Squared Error(MSE) = 19.18
Root Mean Squared Error(RMSE) = 4.38
R-squared(r2_score) = 0.78

Best parameters set found on development set:
{'C': 1, 'degree': 5, 'gamma': 1}


In [19]:
# rbf kernel 
svm_rbf = SVR(kernel='rbf')
search_dic_rbf = {'C': [0.01, 0.1, 1 , 8, 10 , 100, 1000] ,'gamma': [1, 0.1, 0.01, 0.001, 0.0001]}
grid_search_rbf = GridSearchCV(svm_rbf, search_dic_rbf)

In [20]:
grid_search_rbf.fit(X_train_scaled, y_train)

GridSearchCV(estimator=SVR(),
             param_grid={'C': [0.01, 0.1, 1, 8, 10, 100, 1000],
                         'gamma': [1, 0.1, 0.01, 0.001, 0.0001]})

In [21]:
y_pred_rbf = grid_search_rbf.predict(X_test_scaled)

In [22]:
print('*'*50)
print(' '*15+' SVR (kernel=rbf) '+' '*15)
print('*'*50)
# calculate MAE, MSE, RMSE
print('Mean Absolute Error(MAE) = {:.2f}'.format(metrics.mean_absolute_error(y_test, y_pred_rbf)))
print('Mean Squared Error(MSE) = {:.2f}'.format(metrics.mean_squared_error(y_test, y_pred_rbf)))
print('Root Mean Squared Error(RMSE) = {:.2f}'.format(np.sqrt(metrics.mean_squared_error(y_test, y_pred_rbf))))
print('R-squared(r2_score) = {:.2f}'.format(metrics.r2_score(y_test, y_pred_rbf)))
print()
print("Best parameters set found on development set:")
print(grid_search_rbf.best_params_)

**************************************************
                SVR (kernel=rbf)                
**************************************************
Mean Absolute Error(MAE) = 2.04
Mean Squared Error(MSE) = 9.71
Root Mean Squared Error(RMSE) = 3.12
R-squared(r2_score) = 0.89

Best parameters set found on development set:
{'C': 100, 'gamma': 1}


In [23]:
# sigmoid kernel 
svm_sigmoid = SVR(kernel='sigmoid')
search_dic_sigmoid = {'C': [0.01, 0.1, 1 , 8, 10 , 100, 1000] ,'gamma': [1, 0.1, 0.01, 0.001, 0.0001]}
grid_search_sigmoid = GridSearchCV(svm_sigmoid, search_dic_sigmoid)

In [24]:
grid_search_sigmoid.fit(X_train_scaled, y_train)

GridSearchCV(estimator=SVR(kernel='sigmoid'),
             param_grid={'C': [0.01, 0.1, 1, 8, 10, 100, 1000],
                         'gamma': [1, 0.1, 0.01, 0.001, 0.0001]})

In [25]:
y_pred_sigmoid = grid_search_sigmoid.predict(X_test_scaled)

In [26]:
print('*'*50)
print(' '*15+' SVR (kernel=sigmoid) '+' '*15)
print('*'*50)
# calculate MAE, MSE, RMSE
print('Mean Absolute Error(MAE) = {:.2f}'.format(metrics.mean_absolute_error(y_test, y_pred_sigmoid)))
print('Mean Squared Error(MSE) = {:.2f}'.format(metrics.mean_squared_error(y_test, y_pred_sigmoid)))
print('Root Mean Squared Error(RMSE) = {:.2f}'.format(np.sqrt(metrics.mean_squared_error(y_test, y_pred_sigmoid))))
print('R-squared(r2_score) = {:.2f}'.format(metrics.r2_score(y_test, y_pred_sigmoid)))
print()
print("Best parameters set found on development set:")
print(grid_search_sigmoid.best_params_)

**************************************************
                SVR (kernel=sigmoid)                
**************************************************
Mean Absolute Error(MAE) = 3.29
Mean Squared Error(MSE) = 28.18
Root Mean Squared Error(RMSE) = 5.31
R-squared(r2_score) = 0.67

Best parameters set found on development set:
{'C': 1000, 'gamma': 0.01}


In [37]:
# LinearSVR
linear_SVR = LinearSVR()
linear_SVR.fit(X_train_scaled, y_train)
y_pred_linear_SVR = linear_SVR.predict(X_test_scaled)

In [39]:
print('*'*50)
print(' '*15+' LinearSVR '+' '*15)
print('*'*50)
# calculate MAE, MSE, RMSE
print('Mean Absolute Error(MAE) = {:.2f}'.format(metrics.mean_absolute_error(y_test, y_pred_linear_SVR)))
print('Mean Squared Error(MSE) = {:.2f}'.format(metrics.mean_squared_error(y_test, y_pred_linear_SVR)))
print('Root Mean Squared Error(RMSE) = {:.2f}'.format(np.sqrt(metrics.mean_squared_error(y_test, y_pred_linear_SVR))))
print('R-squared(r2_score) = {:.2f}'.format(metrics.r2_score(y_test, y_pred_linear_SVR)))

**************************************************
                LinearSVR                
**************************************************
Mean Absolute Error(MAE) = 3.74
Mean Squared Error(MSE) = 35.54
Root Mean Squared Error(RMSE) = 5.96
R-squared(r2_score) = 0.58
