In [1]:
# automatic svm hyperparameter tuning using skopt 
import pandas as pd
import numpy as np 
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC
from sklearn.impute import SimpleImputer
from sklearn.model_selection import RepeatedStratifiedKFold
from skopt import BayesSearchCV

from IPython.core.display import display, HTML                                    
display(HTML("<style>.container { width:100% !important; }</style>"))                                  #change width of Jupyer Notebook to use the whole window resolution availab

In [2]:
#dataset selection and loading
train_set_name = "weather_data_2000_2019_3c"
train_dataset = pd.read_csv(("./datasets/" + train_set_name + ".csv"),header=None)
test_set_name = "weather_data_2020_2021_3c"
test_dataset = pd.read_csv(("./datasets/" + test_set_name + ".csv"),header=None) 

print("Your Train Dataset is: ", train_set_name)                                                                       #display dataset name to user
print("Your Test Dataset is: ", test_set_name)                                                                       #display dataset name to user

features_selected = 5
#training features
temperature_train = train_dataset.iloc[:, 4:5]
feels_like_train = train_dataset.iloc[:, 7:8]
dew_point_train = train_dataset.iloc[:, 8:9]
humidity_train = train_dataset.iloc[:, 9:10]
pressure_train = train_dataset.iloc[:, 19:20] 
#uv_index_train = train_dataset.iloc[:, 24:25] 

#testing data
temperature_test = test_dataset.iloc[:, 4:5]
feels_like_test = test_dataset.iloc[:, 7:8]
dew_point_test = test_dataset.iloc[:, 8:9]
humidity_test = test_dataset.iloc[:, 9:10]
pressure_test = test_dataset.iloc[:, 19:20]
#uv_index_test = test_dataset.iloc[:, 24:25] 

X_train = pd.concat([temperature_train, feels_like_train, dew_point_train, humidity_train, pressure_train], axis=1)  #, uv_index_train], axis=1)
X_train.replace(([np.inf, -np.inf], np.nan), inplace=True)                                                            #replace any infinite values with nan
X_train = X_train.to_numpy().astype('float64')  

y_train = train_dataset.iloc[:, 30:31].to_numpy().astype('int32')
y_true = test_dataset.iloc[:, 30:31].to_numpy().astype('int32')

#https://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html
test_data = pd.concat([temperature_test, feels_like_test, dew_point_test, humidity_test, pressure_test], axis=1)  #, uv_index_test], axis=1)       
test_data.replace(([np.inf, -np.inf], np.nan), inplace=True)                                                           #replace any infinite values with nan
X_test = test_data.to_numpy().astype('float64')  

#change all nan values in all datasets with the most frequent value of the dataset
imp = SimpleImputer(missing_values=np.nan, strategy='most_frequent')
imp.fit(X_train)
imp.fit(y_train)
imp.fit(X_test)
imp.fit(y_true)

Your Train Dataset is:  weather_data_2000_2019_3c
Your Test Dataset is:  weather_data_2020_2021_3c


SimpleImputer(strategy='most_frequent')

In [None]:
#https://machinelearningmastery.com/scikit-optimize-for-hyperparameter-tuning-in-machine-learning/
def main():
    #define search space
    params = dict()
    params['C'] = (1e-6, 100.0, 'log-uniform')
    params['gamma'] = (1e-6, 100.0, 'log-uniform')
    params['degree'] = (1,5)
    params['kernel'] = ['linear', 'poly', 'rbf', 'sigmoid']
    
    # define evaluation
    cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=1)
    
    # define the search
    search = BayesSearchCV(estimator=SVC(), search_spaces=params, n_jobs=-1, cv=cv, verbose = 10)
    
    # perform the search
    print("Running Bayesian Optimisation!")
    search.fit(X_train, y_train.ravel())
    
    # report the best result
    print(search.best_score_)
    print(search.best_params_)
    
if __name__ == "__main__":   
    main()

Running Bayesian Optimisation!
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
