In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
from sklearn import ensemble
from sklearn.ensemble import VotingRegressor
from sklearn.ensemble import StackingRegressor

In [2]:
data=pd.read_csv('../_data/8_features_c2db.csv')

In [3]:
df=data.values
df=np.array(df)

for i in range(8):
  df[:,i]=(df[:,i]-df[:,i].min())/(df[:,i].max()- df[:,i].min())
  
data_train, data_test = train_test_split(df, test_size=0.1, random_state=0)
x_train=data_train[:,:8]
x_test=data_test[:,:8]
y_train=data_train[:,8]
y_test=data_test[:,8]

In [4]:
params = {'n_estimators':21000, 'max_depth': 21, 'min_samples_split': 5,
          'max_features':0.8, 'learning_rate': 0.001, 'loss': 'squared_error',
          'random_state':0, 'subsample': 0.85}

In [None]:
kernel = 'rbf'
C = [50]
gamma = [50] 
epsilon = [0.2]

params_dict = {
    'C': C,
    'gamma': gamma, 
    'epsilon': epsilon
}

svr = SVR()

gsCV = GridSearchCV(
    estimator=svr, 
    param_grid=params_dict, 
    n_jobs=2,
    scoring='r2',
    cv=6 
)

gsCV.fit(x_train, y_train)

In [8]:
estimators = [('svr', SVR(C=gsCV.best_params_['C'], kernel=kernel, 
                          gamma=gsCV.best_params_['gamma'],
                          epsilon=gsCV.best_params_['epsilon'])),
              ('rfr', RandomForestRegressor(criterion = 'squared_error', 
                                              bootstrap=False, max_features=0.8, 
                                              max_depth=20, min_samples_split=5, 
                                              n_estimators=15000, min_samples_leaf=3, 
                                              random_state=0)),
              ('gbr', ensemble.GradientBoostingRegressor(**params))]

In [11]:
final_estimator = RandomForestRegressor(n_estimators=100, max_leaf_nodes=3, random_state=36)

reg = StackingRegressor(
    estimators=estimators,
    final_estimator=final_estimator)

In [13]:
reg.fit(x_train, y_train)
reg_train_pred = reg.predict(x_train)
reg_test_pred = reg.predict(x_test)

In [14]:
print("Model evaluation - Test Set:")
print('r^2:',r2_score(y_test, reg_test_pred))
print('RSE', mean_squared_error(y_test, reg_test_pred)) 
print('MAE', mean_absolute_error(y_test, reg_test_pred)) 
print('RMSE:',np.sqrt(mean_squared_error(y_test,reg_test_pred)))

Model evaluation - Test Set:
r^2: 0.780716575234559
RSE 0.14526942600040327
MAE 0.16246159180036912
RMSE: 0.3811422647783938
