In [1]:
import pandas as pd
import numpy as np
from sklearn import metrics
import matplotlib.pyplot as plt
import seaborn as sns

### Importing the Boston Housing dataset

In [2]:
from sklearn.datasets import load_boston
boston = load_boston()

In [3]:
# Initializing the dataframe
data = pd.DataFrame(boston.data)

data.columns = boston.feature_names

data['PRICE'] = boston.target 

In [22]:
# Spliting target variable and independent variables
X = data.drop(['PRICE'], axis = 1)
y = data['PRICE']

In [36]:
X = pd.read_csv("..//..//Bayesian_optimization/Boston_Housing_Bayesian_optimization/NewBoston.csv")

In [37]:
# Splitting to training and testing data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.3, random_state = 4)

### Linear Regression

In [38]:
# Import library for Linear Regression
from sklearn.linear_model import LinearRegression

In [39]:
# Create a Linear regressor
lm = LinearRegression()

# Train the model using the training sets 
lm.fit(X_train, y_train);

In [40]:
# Model prediction on train data
y_pred = lm.predict(X_train)

# Predicting Test data with the model
y_test_pred = lm.predict(X_test)

In [41]:
# Model Evaluation
acc_linreg = metrics.r2_score(y_test, y_test_pred)
print('Linear Regression')
print('R^2:', acc_linreg)
print('Adjusted R^2:',1 - (1-metrics.r2_score(y_test, y_test_pred))*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1))
print('MAE:',metrics.mean_absolute_error(y_test, y_test_pred))
print('MSE:',metrics.mean_squared_error(y_test, y_test_pred))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_test, y_test_pred)))
print('MAPE: ',np.mean(np.abs(( y_test - y_test_pred)/y_test))*100)
print('RMSPE: ',np.sqrt(np.mean(np.square(( y_test - y_test_pred)/y_test)))*100)

Linear Regression
R^2: 0.7602431009382478
Adjusted R^2: 0.7149347105643733
MAE: 3.604826700317402
MSE: 25.035432730101817
RMSE: 5.003542018420733
MAPE:  17.601425834426536
RMSPE:  26.555833958290826


### Random Forest

In [42]:
from sklearn.ensemble import RandomForestRegressor

In [46]:
# Create a Random Forest Regressor
reg = RandomForestRegressor(random_state = 1, max_depth = 16, n_estimators = 66)

# Train the model using the training sets 
reg.fit(X_train, y_train);

In [47]:
# Model prediction on train data
y_pred = reg.predict(X_train)

# Predicting Test data with the model
y_test_pred = reg.predict(X_test)

In [48]:
# Model Evaluation
acc_rf = metrics.r2_score(y_test, y_test_pred)
print('Sklearn Random Forest')
print('R^2:', acc_rf)
print('Adjusted R^2:',1 - (1-metrics.r2_score(y_test, y_test_pred))*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1))
print('MAE:',metrics.mean_absolute_error(y_test, y_test_pred))
print('MSE:',metrics.mean_squared_error(y_test, y_test_pred))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_test, y_test_pred)))
print('MAPE: ',np.mean(np.abs(( y_test - y_test_pred)/y_test))*100)
print('RMSPE: ',np.sqrt(np.mean(np.square(( y_test - y_test_pred)/y_test)))*100)

Sklearn Random Forest
R^2: 0.8465329641920181
Adjusted R^2: 0.8175313196298797
MAE: 2.3661688376605516
MSE: 16.025038971951645
RMSE: 4.0031286479392145
MAPE:  10.962647612566494
RMSPE:  17.391232633286275


### XGBoost Regressor

In [49]:
# Import XGBoost Regressor
from xgboost import XGBRegressor

In [51]:
#Create a XGBoost Regressor
reg = XGBRegressor(objective = "reg:squarederror", eta = 0.073548, max_depth = 18, n_estimators = 158)

# Train the model using the training sets 
reg.fit(X_train, y_train);

In [52]:
# Model prediction on train data
y_pred = reg.predict(X_train)

#Predicting Test data with the model
y_test_pred = reg.predict(X_test)

In [53]:
# Model Evaluation
acc_xgb = metrics.r2_score(y_test, y_test_pred)
print('XGBoost Regressor')
print('R^2:', acc_xgb)
print('Adjusted R^2:',1 - (1-metrics.r2_score(y_test, y_test_pred))*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1))
print('MAE:',metrics.mean_absolute_error(y_test, y_test_pred))
print('MSE:',metrics.mean_squared_error(y_test, y_test_pred))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_test, y_test_pred)))
print('MAPE: ',np.mean(np.abs(( y_test - y_test_pred)/y_test))*100)
print('RMSPE: ',np.sqrt(np.mean(np.square(( y_test - y_test_pred)/y_test)))*100)

XGBoost Regressor
R^2: 0.8729506787774601
Adjusted R^2: 0.8489413582314683
MAE: 2.18065034213819
MSE: 13.266499305418357
RMSE: 3.6423205934429164
MAPE:  9.80846013027885
RMSPE:  15.149711491811917


### SVR

In [55]:
# Creating scaled set to be used in model to improve our results
from sklearn.preprocessing import StandardScaler
# Import SVM Regressor
from sklearn import svm

In [56]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [57]:
# Create a SVM Regressor
reg = svm.SVR(C = 193.724604, epsilon = 0.271362, gamma = 0.028334)
# Train the model using the training sets 
reg.fit(X_train, y_train);

In [58]:
# Model prediction on train data
y_pred = reg.predict(X_train)
# Predicting Test data with the model
y_test_pred = reg.predict(X_test)

In [59]:
# Model Evaluation
acc_svm = metrics.r2_score(y_test, y_test_pred)
print('Sklearn SVR')
print('R^2:', acc_svm)
print('Adjusted R^2:',1 - (1-metrics.r2_score(y_test, y_test_pred))*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1))
print('MAE:',metrics.mean_absolute_error(y_test, y_test_pred))
print('MSE:',metrics.mean_squared_error(y_test, y_test_pred))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_test, y_test_pred)))
print('MAPE',np.mean(np.abs(( y_test - y_test_pred)/y_test))*100)
print('RMSPE',np.sqrt(np.mean(np.square(( y_test - y_test_pred)/y_test)))*100)

Sklearn SVR
R^2: 0.9091565112983999
Adjusted R^2: 0.8919892378429795
MAE: 1.9471490199897266
MSE: 9.485883656557043
RMSE: 3.0799161768718712
MAPE 10.003938540092483
RMSPE 17.456826994271275


### SVR LS Percentage

In [60]:
# Creating scaled set to be used in model to improve our results
from sklearn.preprocessing import StandardScaler
# Import SVR LS percentage
%run ..//..//Library//SVRLSPercent_Library.ipynb 

In [61]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [65]:
# Create a SVM Regressor
model = SVR_PerLs(
    kernel = "rbf", 
    C = 32638.266572,
    gamma = 0.034856 
)
# Train the model using the training sets 
model.fit(X_train,y_train);

In [66]:
# Model prediction on train data
y_pred = model.predict(X_train)
# Predicting Test data with the model
y_test_pred = model.predict(X_test)

In [67]:
# Model Evaluation
acc_svm = metrics.r2_score(y_test, y_test_pred)
print('SVR-LS Percentage')
print('R^2:', acc_svm)
print('Adjusted R^2:',1 - (1-metrics.r2_score(y_test, y_test_pred))*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1))
print('MAE:',metrics.mean_absolute_error(y_test, y_test_pred))
print('MSE:',metrics.mean_squared_error(y_test, y_test_pred))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_test, y_test_pred)))
print('MAPE',np.mean(np.abs(( y_test - y_test_pred)/y_test))*100)
print('RMSPE',np.sqrt(np.mean(np.square(( y_test - y_test_pred)/y_test)))*100)

SVR-LS Percentage
R^2: 0.913583815614505
Adjusted R^2: 0.8972531980928367
MAE: 1.8904181923052978
MSE: 9.02358421985558
RMSE: 3.0039281316062776
MAPE 9.589497013515224
RMSPE 17.16547298656573
