In [43]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [17]:
training = pd.read_csv("splits/training.csv")
training.drop(["index_number"], axis=1, inplace=True)
training

#training, val = train_test_split(training, test_size=0.2, random_state=1337)

X_train = training.iloc[:, :4]
y_train = training.iloc[:, -1]
#X_val = val.iloc[:, :4] 
#y_val = val.iloc[:, -1]

In [None]:
parameters = {
               "criterion": ["friedman_mse"],
              "loss":["squared_error"],
              "max_features":["log2","sqrt"],
              'learning_rate': [0.01,0.1,0.5],
              'max_depth': [3, 4, 5, 6, 7],
              'n_estimators': [250, 500, 1000]
              }

gbr = GradientBoostingRegressor()
gs = GridSearchCV(gbr, parameters, n_jobs=-1,cv = 5, verbose=1)

gs.fit(X_train, y_train)

Fitting 5 folds for each of 90 candidates, totalling 450 fits


In [71]:
gs.best_score_

0.9655956000414637

In [72]:
gs.best_params_

{'criterion': 'friedman_mse',
 'learning_rate': 0.1,
 'loss': 'squared_error',
 'max_depth': 5,
 'max_features': 'sqrt',
 'n_estimators': 250}

In [73]:
gs.best

AttributeError: 'GridSearchCV' object has no attribute 'best'

## Testing

In [74]:
interpolation = pd.read_csv("splits/inter_total.csv") 
interpolation.drop(["index_number"], axis=1, inplace=True)
extrapolation = pd.read_csv("splits/extrapolation.csv")
extrapolation.drop(["index_number"], axis=1, inplace=True)

In [75]:
X_test_inter = interpolation.iloc[:, :4]
X_test_extra = extrapolation.iloc[:, :4]
y_test_inter = interpolation.iloc[:, -1]
y_test_extra = extrapolation.iloc[:, -1]

In [76]:
gs.best_estimator_.score(X_test_inter, y_test_inter)

0.9972527922176481

In [77]:
pred_inter = gs.best_estimator_.predict(X_test_inter)

In [78]:
print("Test Inter")
print(f"MSE: {mean_squared_error(y_pred=pred_inter, y_true=y_test_inter)}, MAE: {mean_absolute_error(y_pred=pred_inter, y_true=y_test_inter)}, R2: {r2_score(y_pred=pred_inter, y_true=y_test_inter)}")

Test Inter
MSE: 0.0008981306587055064, MAE: 0.023623548620465014, R2: 0.9972527922176481


In [79]:
gs.best_estimator_.score(X_test_extra, y_test_extra)

0.4440279926920183

In [80]:
pred = gs.best_estimator_.predict(X_test_extra)

In [81]:
print("Test Extra")
print(f"MSE: {mean_squared_error(y_pred=pred, y_true=y_test_extra)}, MAE: {mean_absolute_error(y_pred=pred, y_true=y_test_extra)}, R2: {r2_score(y_pred=pred, y_true=y_test_extra)}")

Test Extra
MSE: 3.7524433183144277, MAE: 0.2954383027864377, R2: 0.4440279926920183
