In [16]:
import pandas as pd
import numpy as np
from sklearn import metrics

### Importing the Boston Housing dataset

In [36]:
from sklearn.datasets import load_diabetes
diabetes = load_diabetes()

In [37]:
# Initializing the dataframe
data = pd.DataFrame(diabetes.data)

data.columns = diabetes.feature_names

data['diabetes'] = diabetes.target 

In [38]:
# Spliting target variable and independent variables
X = data.drop(['diabetes'], axis = 1)
y = data['diabetes']

In [39]:
# Splitting to training and testing data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.3, random_state = 4)

### Linear Regression

In [40]:
# Import library for Linear Regression
from sklearn.linear_model import LinearRegression

In [41]:
# Create a Linear regressor
lm = LinearRegression()

# Train the model using the training sets 
lm.fit(X_train, y_train);

In [42]:
# Model prediction on train data
y_pred = lm.predict(X_train)

# Predicting Test data with the model
y_test_pred = lm.predict(X_test)

In [43]:
# Model Evaluation
acc_linreg = metrics.r2_score(y_test, y_test_pred)
print('Linear Regression')
print('R^2:', acc_linreg)
print('Adjusted R^2:',1 - (1-metrics.r2_score(y_test, y_test_pred))*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1))
print('MAE:',metrics.mean_absolute_error(y_test, y_test_pred))
print('MSE:',metrics.mean_squared_error(y_test, y_test_pred))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_test, y_test_pred)))
print('RMSPE:', np.sqrt(np.mean(np.square((y_test - y_test_pred)/y_test)))*100)
print('MAPE',np.mean(np.abs(( y_test - y_test_pred)/y_test))*100)

Linear Regression
R^2: 0.44639570493224057
Adjusted R^2: 0.4010183036971783
MAE: 43.68985512129272
MSE: 2963.3688363116294
RMSE: 54.436833452283295
RMSPE: 59.96229909080583
MAPE 38.763655375881605


### Random Forest

In [44]:
from sklearn.ensemble import RandomForestRegressor

In [45]:
# Create a Random Forest Regressor
reg = RandomForestRegressor(random_state = 2, max_depth = 12, n_estimators = 34)

# Train the model using the training sets 
reg.fit(X_train, y_train);

In [46]:
# Model prediction on train data
y_pred = reg.predict(X_train)

# Predicting Test data with the model
y_test_pred = reg.predict(X_test)

In [47]:
# Model Evaluation
acc_rf = metrics.r2_score(y_test, y_test_pred)
print('Sklearn Random Forest')
print('R^2:', acc_rf)
print('Adjusted R^2:',1 - (1-metrics.r2_score(y_test, y_test_pred))*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1))
print('MAE:',metrics.mean_absolute_error(y_test, y_test_pred))
print('MSE:',metrics.mean_squared_error(y_test, y_test_pred))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_test, y_test_pred)))
print('RMSPE:', np.sqrt(np.mean(np.square((y_test - y_test_pred)/y_test)))*100)
print('MAPE',np.mean(np.abs(( y_test - y_test_pred)/y_test))*100)

Sklearn Random Forest
R^2: 0.41614836295635427
Adjusted R^2: 0.36829167139539964
MAE: 44.53568584043778
MSE: 3125.278762573367
RMSE: 55.904192710148024
RMSPE: 54.22701258556424
MAPE 38.337041868839464


### XGBoost Regressor

In [32]:
# Import XGBoost Regressor
from xgboost import XGBRegressor

In [33]:
#Create a XGBoost Regressor
reg = XGBRegressor(objective = "reg:squarederror", random_state = 1, eta = 0.31, max_depth = 2, n_estimators = 4)

# Train the model using the training sets 
reg.fit(X_train, y_train);

In [34]:
# Model prediction on train data
y_pred = reg.predict(X_train)

#Predicting Test data with the model
y_test_pred = reg.predict(X_test)

In [35]:
# Model Evaluation
acc_xgb = metrics.r2_score(y_test, y_test_pred)
print('XGBoost Regressor')
print('R^2:', acc_xgb)
print('Adjusted R^2:',1 - (1-metrics.r2_score(y_test, y_test_pred))*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1))
print('MAE:',metrics.mean_absolute_error(y_test, y_test_pred))
print('MSE:',metrics.mean_squared_error(y_test, y_test_pred))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_test, y_test_pred)))
print('RMSPE:', np.sqrt(np.mean(np.square((y_test - y_test_pred)/y_test)))*100)
print('MAPE',np.mean(np.abs(( y_test - y_test_pred)/y_test))*100)

XGBoost Regressor
R^2: 0.22237703627323646
Adjusted R^2: 0.1586374490825182
MAE: 49.62502174090622
MSE: 4162.510438662927
RMSE: 64.51752040076344
RMSPE: 41.09686551707037
MAPE 33.53415958600321


### SVR

In [48]:
# Creating scaled set to be used in model to improve our results
from sklearn.preprocessing import StandardScaler
# Import SVM Regressor
from sklearn import svm

In [49]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [50]:
# Create a SVM Regressor
reg = svm.SVR(C = 27.638650, epsilon = 1.282185, gamma = 0.008259)
# Train the model using the training sets 
reg.fit(X_train, y_train);

In [51]:
# Model prediction on train data
y_pred = reg.predict(X_train)
# Predicting Test data with the model
y_test_pred = reg.predict(X_test)

In [52]:
# Model Evaluation
acc_svm = metrics.r2_score(y_test, y_test_pred)
print('Sklearn SVR')
print('R^2:', acc_svm)
print('Adjusted R^2:',1 - (1-metrics.r2_score(y_test, y_test_pred))*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1))
print('MAE:',metrics.mean_absolute_error(y_test, y_test_pred))
print('MSE:',metrics.mean_squared_error(y_test, y_test_pred))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_test, y_test_pred)))
print('RMSPE:', np.sqrt(np.mean(np.square((y_test - y_test_pred)/y_test)))*100)
print('MAPE',np.mean(np.abs(( y_test - y_test_pred)/y_test))*100)

Sklearn SVR
R^2: 0.4260165744725033
Adjusted R^2: 0.3789687527079544
MAE: 44.28338531869277
MSE: 3072.4555624327177
RMSE: 55.42973536318496
RMSPE: 54.47048230732927
MAPE 36.78353922149706


### SVR LS Percentage

In [65]:
# Creating scaled set to be used in model to improve our results
from sklearn.preprocessing import StandardScaler
# Import SVR least_square percentage
%run ..//..//Library//SVRLSPercent_Library.ipynb

In [66]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [67]:
# Create a SVM Regressor
model = SVR_PerLs(
    kernel = "rbf", 
    C = 1103860.1485,
    gamma = 0.0025, 
)
# Train the model using the training sets 
model.fit(X_train,y_train);

In [68]:
# Model prediction on train data
y_pred = model.predict(X_train)
# Predicting Test data with the model
y_test_pred = model.predict(X_test)

In [69]:
# Model Evaluation
acc_svm = metrics.r2_score(y_test, y_test_pred)
print('SVR MAPE extendido')
print('R^2:', acc_svm)
print('Adjusted R^2:',1 - (1-metrics.r2_score(y_test, y_test_pred))*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1))
print('MAE:',metrics.mean_absolute_error(y_test, y_test_pred))
print('MSE:',metrics.mean_squared_error(y_test, y_test_pred))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_test, y_test_pred)))
print('RMSPE:', np.sqrt(np.mean(np.square((y_test - y_test_pred)/y_test)))*100)
print('MAPE',np.mean(np.abs(( y_test - y_test_pred)/y_test))*100)

SVR MAPE extendido
R^2: 0.11304000839604955
Adjusted R^2: 0.040338369739987945
MAE: 52.81612077147305
MSE: 4747.77674521594
RMSE: 68.90411268724051
RMSPE: 41.677852731792704
MAPE 34.19295983966329
