In [1]:
import pandas as pd
import numpy as np
from sklearn import metrics

### Importing the Boston Housing dataset

In [2]:
from sklearn.datasets import load_diabetes
diabetes = load_diabetes()

In [3]:
# Initializing the dataframe
data = pd.DataFrame(diabetes.data)

data.columns = diabetes.feature_names

data['diabetes'] = diabetes.target 

In [4]:
# Spliting target variable and independent variables
X = data.drop(['diabetes'], axis = 1)
y = data['diabetes']

In [107]:
X = pd.read_csv("..//..//Bayesian_optimization//Diabetes_Bayesian_optimization//newDiabetes.csv")

In [108]:
# Splitting to training and testing data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.3, random_state = 4)

### Linear Regression

In [109]:
# Import library for Linear Regression
from sklearn.linear_model import LinearRegression

In [110]:
# Create a Linear regressor
lm = LinearRegression()

# Train the model using the training sets 
lm.fit(X_train, y_train);

In [111]:
# Model prediction on train data
y_pred = lm.predict(X_train)

# Predicting Test data with the model
y_test_pred = lm.predict(X_test)

In [112]:
# Model Evaluation
acc_linreg = metrics.r2_score(y_test, y_test_pred)
print('Linear Regression')
print('R^2:', acc_linreg)
print('Adjusted R^2:',1 - (1-metrics.r2_score(y_test, y_test_pred))*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1))
print('MAE:',metrics.mean_absolute_error(y_test, y_test_pred))
print('MSE:',metrics.mean_squared_error(y_test, y_test_pred))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_test, y_test_pred)))
print('RMSPE:', np.sqrt(np.mean(np.square((y_test - y_test_pred)/y_test)))*100)
print('MAPE',np.mean(np.abs(( y_test - y_test_pred)/y_test))*100)

Linear Regression
R^2: 0.4388468563326121
Adjusted R^2: 0.33268274807121445
MAE: 43.244666200209515
MSE: 3003.776800790004
RMSE: 54.806722222643494
RMSPE: 58.75354451452921
MAPE 38.30408347847433


### Random Forest

In [113]:
from sklearn.ensemble import RandomForestRegressor

In [114]:
# Create a Random Forest Regressor
reg = RandomForestRegressor(random_state = 5, max_depth = 8, n_estimators = 22)

# Train the model using the training sets 
reg.fit(X_train, y_train);

In [115]:
# Model prediction on train data
y_pred = reg.predict(X_train)

# Predicting Test data with the model
y_test_pred = reg.predict(X_test)

In [116]:
# Model Evaluation
acc_rf = metrics.r2_score(y_test, y_test_pred)
print('Sklearn Random Forest')
print('R^2:', acc_rf)
print('Adjusted R^2:',1 - (1-metrics.r2_score(y_test, y_test_pred))*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1))
print('MAE:',metrics.mean_absolute_error(y_test, y_test_pred))
print('MSE:',metrics.mean_squared_error(y_test, y_test_pred))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_test, y_test_pred)))
print('RMSPE:', np.sqrt(np.mean(np.square((y_test - y_test_pred)/y_test)))*100)
print('MAPE',np.mean(np.abs(( y_test - y_test_pred)/y_test))*100)

Sklearn Random Forest
R^2: 0.4310018449687165
Adjusted R^2: 0.3233535453682034
MAE: 43.600219107785264
MSE: 3045.77008444658
RMSE: 55.18849594296424
RMSPE: 55.17768069821551
MAPE 37.79118671487843


### XGBoost Regressor

In [117]:
# Import XGBoost Regressor
from xgboost import XGBRegressor

In [118]:
#Create a XGBoost Regressor
reg = XGBRegressor(objective = "reg:squarederror", random_state = 1, eta = 0.31, max_depth = 6, n_estimators = 7)

# Train the model using the training sets 
reg.fit(X_train, y_train);

In [119]:
# Model prediction on train data
y_pred = reg.predict(X_train)

#Predicting Test data with the model
y_test_pred = reg.predict(X_test)

In [120]:
# Model Evaluation
acc_xgb = metrics.r2_score(y_test, y_test_pred)
print('XGBoost Regressor')
print('R^2:', acc_xgb)
print('Adjusted R^2:',1 - (1-metrics.r2_score(y_test, y_test_pred))*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1))
print('MAE:',metrics.mean_absolute_error(y_test, y_test_pred))
print('MSE:',metrics.mean_squared_error(y_test, y_test_pred))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_test, y_test_pred)))
print('RMSPE:', np.sqrt(np.mean(np.square((y_test - y_test_pred)/y_test)))*100)
print('MAPE',np.mean(np.abs(( y_test - y_test_pred)/y_test))*100)

XGBoost Regressor
R^2: 0.3647281628710862
Adjusted R^2: 0.24454159908994033
MAE: 42.61623250631462
MSE: 3400.524131598081
RMSE: 58.314013166631575
RMSPE: 48.041525213872504
MAPE 32.67055441465551


### SVR

In [121]:
# Creating scaled set to be used in model to improve our results
from sklearn.preprocessing import StandardScaler
# Import SVM Regressor
from sklearn import svm

In [122]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [123]:
# Create a SVM Regressor
reg = svm.SVR(C = 933.645625, epsilon = 0.143973, gamma = 0.000212)
# Train the model using the training sets 
reg.fit(X_train, y_train);

In [124]:
# Model prediction on train data
y_pred = reg.predict(X_train)
# Predicting Test data with the model
y_test_pred = reg.predict(X_test)

In [125]:
# Model Evaluation
acc_svm = metrics.r2_score(y_test, y_test_pred)
print('Sklearn SVR')
print('R^2:', acc_svm)
print('Adjusted R^2:',1 - (1-metrics.r2_score(y_test, y_test_pred))*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1))
print('MAE:',metrics.mean_absolute_error(y_test, y_test_pred))
print('MSE:',metrics.mean_squared_error(y_test, y_test_pred))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_test, y_test_pred)))
print('RMSPE:', np.sqrt(np.mean(np.square((y_test - y_test_pred)/y_test)))*100)
print('MAPE',np.mean(np.abs(( y_test - y_test_pred)/y_test))*100)

Sklearn SVR
R^2: 0.426518303245474
Adjusted R^2: 0.3180217660216448
MAE: 42.972679564588994
MSE: 3069.769876242511
RMSE: 55.40550402480345
RMSPE: 52.45550088519351
MAPE 35.498756583854785


### SVR Extended MAPE

In [126]:
# Creating scaled set to be used in model to improve our results
from sklearn.preprocessing import StandardScaler
# Import SVR Extended MAPE
%run ..//..//Library//MapeExtended_Library.ipynb 

In [127]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [128]:
# Create a SVM Regressor
model = SVR_mapext(
    kernel = "rbf", 
    C = 185.977814,
    gamma = 0.006246,
    epsilon = 35.910829,
    lamda = 0.039905
)
# Train the model using the training sets 
model.fit(X_train,y_train);

In [129]:
# Model prediction on train data
y_pred = model.predict(X_train)
# Predicting Test data with the model
y_test_pred = model.predict(X_test)

In [130]:
# Model Evaluation
acc_svm = metrics.r2_score(y_test, y_test_pred)
print('SVR-Extended MAPE')
print('R^2:', acc_svm)
print('Adjusted R^2:',1 - (1-metrics.r2_score(y_test, y_test_pred))*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1))
print('MAE:',metrics.mean_absolute_error(y_test, y_test_pred))
print('MSE:',metrics.mean_squared_error(y_test, y_test_pred))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_test, y_test_pred)))
print('MAPE',np.mean(np.abs(( y_test - y_test_pred)/y_test))*100)
print('RMSPE',np.sqrt(np.mean(np.square(( y_test - y_test_pred)/y_test)))*100)

SVR-Extended MAPE
R^2: 0.2553766507982641
Adjusted R^2: 0.11450196311144922
MAE: 48.99028340899621
MSE: 3985.8679700892426
RMSE: 63.133730842468374
MAPE 34.24792242337358
RMSPE 43.20640394885827
