Philippe JOly
This is to further tune hyperparams using a simple grid search (instead of Byaesian search)

In [1]:
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.preprocessing import StandardScaler
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from datetime import datetime as dt
import pickle

In [2]:
train = pd.read_csv('../../data/train_static.csv')
val = pd.read_csv('../../data/val_static.csv')
test = pd.read_csv('../../data/test_static.csv')

In [3]:

train.drop(columns=['Date/Time (UTC)','Year'], inplace=True)
val.drop(columns=['Date/Time (UTC)','Year'], inplace=True)
test.drop(columns=['Date/Time (UTC)','Year'], inplace=True)

X = train.drop(columns=["Average Power Output (MW)"]).values
y = train["Average Power Output (MW)"].values

X_val = val.drop(columns=["Average Power Output (MW)"]).values
y_val = val["Average Power Output (MW)"].values

X_test = test.drop(columns=["Average Power Output (MW)"]).values
y_test = test["Average Power Output (MW)"].values

In [4]:
y = y.reshape(len(y), 1)
y_val = y_val.reshape(len(y_val), 1)
y_test = y_test.reshape(len(y_test), 1)

x_scaler = StandardScaler().fit(X)
y_scaler = StandardScaler().fit(y)

X = x_scaler.transform(X)
y = y_scaler.transform(y)

X_val = x_scaler.transform(X_val)
y_val = y_scaler.transform(y_val)

X_test = x_scaler.transform(X_test)
y_test = y_scaler.transform(y_test)

y = y.reshape(y.shape[0])
y_val = y_val.reshape(y_val.shape[0])
y_test = y_test.reshape(y_test.shape[0])

# Hype-Parameter Tuning

In [5]:
svr = SVR()

In [6]:
opt_params=[{
    'C': np.logspace(-4,4,10),
    'gamma': np.logspace(-6,1,10),
    'epsilon': [0.01, 0.1, 0.2, 0.5],
    'coef0': [0.0, 0.1, 0.5, 1.0],
    'kernel': ('sigmoid','rbf'),
    'max_iter': [1000]
}]

In [7]:
clf = GridSearchCV(estimator=svr, param_grid=opt_params, verbose=4, n_jobs=-1)
clf.fit(X,y)

Fitting 5 folds for each of 3200 candidates, totalling 16000 fits


In [None]:
results = pd.DataFrame(clf.cv_results_).sort_values(by="rank_test_score", ascending=True)

In [None]:
clf.best_score_

In [None]:
results.head()