# Support Vector Machine Experiments

In [1]:
from src.svr2 import MultiOutputSVR
from src.GridSearch2 import GridSearch
from data.load_data import load_monk, load_MLCup
import numpy as np
import os
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split


In [2]:
path = os.getcwd()
file_train = '/data/ML-CUP22-TR.csv'
file_test = '/data/ML-CUP22-TS.csv'

labels_train = ['x1','x2','x3','x4','x5','x6','x7','x8','x9','y1','y2']
labels_test = ['x1','x2','x3','x4','x5','x6','x7','x8','x9']

X, y = load_MLCup(path + file_train, labels_train)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.1, random_state = 42)

X_blind_test = load_MLCup(path + file_test, labels_test)

In [3]:
svr_rbf = MultiOutputSVR(n_outputs = 2, kernel = 'rbf')

params_grid = {
    "C" : [50, 75, 100, 500],
    "epsilon" : [0.001, 0.01, 0.1],
    "gamma" : [0.5, 0.75, 1],
    "max_iter" : [10, 50, 100],
    "tolerance" : [0.0001, 0.001, 0.01]
}

grid_svr_rbf = GridSearch(svr_rbf)
grid_svr_rbf.fit(X_train, y_train, params_grid, n_folds = 5, parallel = True, verbose = False)
print('*' * 30)
grid_svr_rbf.print_grid_results(n_results = 10)

Grid search of 324 combinations.
Parallelisation activated


In [None]:
svr_poly = MultiOutputSVR(n_outputs = 2, kernel = 'polynomial')

params_grid = {
    "C" : [50, 75, 100, 500],
    "epsilon" : [0.001, 0.01, 0.1],
    "max_iter" : [10, 50, 100],
    "tolerance" : [0.0001, 0.001, 0.01],
    "degree" : [2, 3, 5, 6, 9],
    "offset" : [0.5, 1, 2, 5]
}

grid_svr_poly = GridSearch(svr_rbf)
grid_svr_poly.fit(X_train, y_train, params_grid, n_folds = 5, parallel = True, verbose = False)
print('*' * 30)
grid_svr_poly.print_grid_results(n_results = 10)

In [None]:
if grid_svr_poly.best_score < grid_svr_rbf.best_score:
    best_kernel = grid_svr_rbf
else:
    best_kernel = grid_svr_poly

best_model = best_kernel.best_model
best_params = best_kernel.get_best_parameters()

print(f"Best kernel : {best_model.kernel}")
print(f"Best parameters : {best_params}")
print(f"Best mean validation error : {best_model.best_score}")

In [None]:
# Get the best model
best_model.fit(X_train, y_train, **best_params)
y_pred = best_model.predict(X_test)

# Print the resulting errors
print(f"Train MEE : {best_model.evaluate_model(X_train, y_train, 'mee')}")
print(f"Test MEE : {best_model.evaluate_model(X_test, y_test, 'mee')}")

In [None]:
fig, (ax1, ax2) = plt.subplots(1,2, figsize = (12,4))


#scatterplot of y_test and y_pred
ax1.scatter(y_test[:,1], y_test[:,0], label = 'Groud Truth')
ax1.scatter(y_pred[:,1], y_pred[:,0], color = 'red', label = 'Predicted')
ax1.legend()
ax1.set_ybound(0,27)
ax1.set_xbound(-37,-12)
ax1.set_title('Test predictions and ground truth')

y_cup = best_model.predict(X_blind_test)
ax2.scatter(y_cup[:,1], y_cup[:,0], c = 'k', label = 'Blind test predictions')
ax2.legend()
ax2.set_ybound(0,27)
ax2.set_xbound(-37,-12)
ax2.set_title('Blind test predctions')