## Imports

In [None]:
import os
import sys

import joblib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import uniform

from sklearn.svm import SVC, SVR
from sklearn.metrics import accuracy_score, make_scorer, mean_squared_error
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, StratifiedKFold, KFold, train_test_split, ValidationCurveDisplay, validation_curve
from sklearn.multioutput import MultiOutputRegressor
from sklearn.preprocessing import PolynomialFeatures

dir_parts = os.getcwd().split(os.path.sep)
root_index = dir_parts.index('ML-B')
root_path = os.path.sep.join(dir_parts[:root_index + 1])
sys.path.append(root_path + '/code/')
from data.data_config import Dataset
from data.data_utils import load_monk, load_cup, store_monk_result, store_cup_result
from hyperparameter_tuning import tuning_search_top_configs
from training.metrics import mean_euclidean_error

%load_ext autoreload
%autoreload 2

# SVM
In this notebook we test a **SVC** and a **SVR** w.r.t. the tasks at hand, i.e. the three MONK's problems and the CUP dataset respectively.

## Settings

In [None]:
MODEL_NAME = 'SVM'
INTERNAL_TEST_SPLIT = 0.1 # internal test split percentage
RANDOM_STATE = 128 # reproducibility
N_SPLITS=5 # for cross-validation
POLY_DEGREE = 3 # polynomial features pre-processing

## Path

In [None]:
# Directories
results_dir = root_path + '/results/' + MODEL_NAME

# Filepaths (MONK)
m1_dev_path, m1_test_path = Dataset.MONK_1.dev_path, Dataset.MONK_1.test_path # MONK 1
m2_dev_path, m2_test_path = Dataset.MONK_2.dev_path, Dataset.MONK_2.test_path # MONK 2
m3_dev_path, m3_test_path = Dataset.MONK_3.dev_path, Dataset.MONK_3.test_path # MONK 3

# Filepaths (CUP)
cup_dev_path, cup_test_path = Dataset.CUP.dev_path, Dataset.CUP.test_path

# MONK-1

In [None]:
# Load MONK-1
x_dev_m1, y_dev_m1, x_test_m1, y_test_m1 = load_monk(m1_dev_path, m1_test_path)

Let's perform a grid-search to identify promising hyper-paramaters for the task.

In [None]:
# Grid-search spaces
clf_hparams_spaces = [
    {
        'kernel': ['rbf'], 
         'gamma': ['scale', 'auto'], 
         'C': [0.1, 1, 10, 50, 100]
    },
    
    {
        'kernel': ['linear'], 
        'C': [1, 10, 100, 1000]
    },
    
    {
        'kernel': ['poly'], 'degree': [2,3,4],
         'coef0': [0.0],
         'gamma': ['scale', 'auto'],
          'C': [0.1, 1, 10, 50, 100] 
    }
]

In [None]:
grid_search_m1 = GridSearchCV(
    SVC(),
    param_grid=clf_hparams_spaces,
    cv=StratifiedKFold(n_splits=5, shuffle=True, random_state=128),
    scoring='accuracy',
    verbose=1
)

grid_search_m1.fit(x_dev_m1, y_dev_m1)

In [None]:
tuning_search_top_configs(grid_search_m1.cv_results_) # top k config

## Training - Testing

In [None]:
# Create a SVC with the best hparams
#svc_m1 = SVC(**grid_search_m1.best_params_)

# Create and train the SVM
svc_m1 = SVC(C=10, degree=2, kernel='poly')
svc_m1.fit(x_dev_m1, y_dev_m1)

In [None]:
print('-- DEVELOPMENT --')
acc_dev_m1 = accuracy_score(y_dev_m1, svc_m1.predict(x_dev_m1))
mse_dev_m1 = mean_squared_error(y_dev_m1, svc_m1.predict(x_dev_m1))
print(f'Loss (MSE): {mse_dev_m1:.4f} - Accuracy: {acc_dev_m1:.4f}')

In [None]:
print('-- TEST --')
acc_test_m1 = accuracy_score(y_test_m1, svc_m1.predict(x_test_m1))
mse_test_m1 = mean_squared_error(y_test_m1, svc_m1.predict(x_test_m1))
print(f'Loss (MSE): {mse_test_m1:.4f} - Accuracy: {acc_test_m1:.4f}')

## Store results

In [None]:
report_m1 = {
    'dev': {'mse': mse_dev_m1, 'accuracy': acc_dev_m1},
    'test': {'mse': mse_test_m1, 'accuracy': acc_test_m1}
}

store_monk_result(results_dir + '/MONK1/', svc_m1.get_params(), report_m1)

# MONK-2

In [None]:
# Load MONK-2
x_dev_m2, y_dev_m2, x_test_m2, y_test_m2 = load_monk(m2_dev_path, m2_test_path)

Let's perform a grid-search to identify promising hyper-paramaters for the task.

## GridSearch

In [None]:
grid_search_m2 = GridSearchCV(
    SVC(),
    param_grid=clf_hparams_spaces,
    cv=StratifiedKFold(n_splits=5, shuffle=True, random_state=128),
    scoring='accuracy',
    verbose=1
)

grid_search_m2.fit(x_dev_m2, y_dev_m2)

In [None]:
tuning_search_top_configs(grid_search_m2.cv_results_) # top k config

## Training - Testing

In [None]:
# Create a SVC with the best hparams
#svc_m2 = SVC(**grid_search_m2.best_params_)

# Create and train the SVM
svc_m2 = SVC(C=50, degree=2, kernel='poly')
svc_m2.fit(x_dev_m2, y_dev_m2)

In [None]:
print('-- DEVELOPMENT --')
acc_dev_m2 = accuracy_score(y_dev_m2, svc_m2.predict(x_dev_m2))
mse_dev_m2 = mean_squared_error(y_dev_m2, svc_m2.predict(x_dev_m2))
print(f'MSE: {mse_dev_m2:.4f} - Accuracy: {acc_dev_m2:.4f}')

In [None]:
print('-- TEST --')
acc_test_m2 = accuracy_score(y_test_m2, svc_m2.predict(x_test_m2))
mse_test_m2 = mean_squared_error(y_test_m2, svc_m2.predict(x_test_m2))
print(f'MSE: {mse_test_m2:.4f} - Accuracy: {acc_test_m2:.4f}')

## Store results

In [None]:
report_m2 = {
    'dev': {'mse': mse_dev_m2, 'accuracy': acc_dev_m2},
    'test': {'mse': mse_test_m2, 'accuracy': acc_test_m2}
}

store_monk_result(results_dir + '/MONK2/', svc_m2.get_params(), report_m2)

# MONK-3

In [None]:
# Load MONK-3
x_dev_m3, y_dev_m3, x_test_m3, y_test_m3 = load_monk(m3_dev_path, m3_test_path)

Let's perform a grid-search to identify promising hyper-paramaters for the task.

In [None]:
grid_search_m3 = GridSearchCV(
    SVC(),
    param_grid=clf_hparams_spaces,
    cv=StratifiedKFold(n_splits=5, shuffle=True, random_state=128),
    scoring='accuracy',
    verbose=1
)

grid_search_m3.fit(x_dev_m3, y_dev_m3)

In [None]:
tuning_search_top_configs(grid_search_m3.cv_results_) # top k config

## Training - Testing

In [None]:
# Create a SVC with the best hparams
#svc_m3 = SVC(**grid_search_m3.best_params_)

# Train the model
svc_m3 = SVC(C=10, degree=2, gamma='auto', kernel='poly')
svc_m3.fit(x_dev_m3, y_dev_m3)

In [None]:
print('-- DEVELOPMENT --')
acc_dev_m3 = accuracy_score(y_dev_m3, svc_m3.predict(x_dev_m3))
mse_dev_m3 = mean_squared_error(y_dev_m3, svc_m3.predict(x_dev_m3))
print(f'MSE: {mse_dev_m3:.4f} - Accuracy: {acc_dev_m3:.4f}')

In [None]:
print('-- TEST --')
acc_test_m3 = accuracy_score(y_test_m3, svc_m3.predict(x_test_m3))
mse_test_m3 = mean_squared_error(y_test_m3, svc_m3.predict(x_test_m3))
print(f'MSE: {mse_test_m3:.4f} - Accuracy: {acc_test_m3:.4f}')

## Storing results

In [None]:
report_m3 = {
    'dev': {'mse': mse_dev_m3, 'accuracy': acc_dev_m3},
    'test': {'mse': mse_test_m3, 'accuracy': acc_test_m3}
}

store_monk_result(results_dir + '/MONK3/', svc_m3.get_params(), report_m3)

# CUP

In [None]:
# Load CUP
x_dev_cup, y_dev_cup, x_test_cup = load_cup(cup_dev_path, cup_test_path)

## Dev - Internal Test Split 
The development dataset is split between training and internal test ($90-10$).

In [None]:
# Split the internal test set
x_train_cup, x_internal_test_cup, y_train_cup, y_internal_test_cup = train_test_split(
    x_dev_cup, y_dev_cup, test_size=INTERNAL_TEST_SPLIT, random_state=128
)

## Polynomial features pre-processing
We create a version of our dataset to which PolynoMialFeatures pre-processing is applied with a fixed degree.

In [None]:
# --- COMMENT TO USE NON-ENCHANED DATASET --- 
# Polynomial features pre-processing
poly = PolynomialFeatures(degree=POLY_DEGREE)
x_train_cup = poly.fit_transform(x_train_cup)
x_internal_test_cup = poly.transform(x_internal_test_cup)
x_test_cup = poly.transform(x_test_cup)

## Hyper-parameters Tuning
A common approach is to start with a coarse search across a wide range of values to find promising sub-ranges of our parameter space. Then, you would zoom into these ranges and perform another search to fine-tune the configurations.

Here, we proceed as follows:
1. (coarse) Grid-search across a wide range of hyper-paramaters and values;
2. (fine-tune) Random-search into zoomed intervals w.r.t. best configuration found by grid-search.

Then, we perform a single run of grid-search and random-search with the respectively best configurations while taking into account a PolynomialFeatures pre-processing with fixed degree. The best configurations that will be used for final re-training and evaluation on internal test is the one with the best mean MEE on the validation cross-validation.

Note that, tuning of the polynomial degree wasn't performed because it would be very expensive. Thus, we simply decided to use a fixed degree value.

### Grid Search

In [None]:
# Grid-search spaces
grid_search_spaces_cup = [
    {
        'estimator__kernel': ['rbf'], 
         'estimator__gamma': ['scale', 'auto'], 
         'estimator__C': [0.1, 1, 10, 100, 500, 1000],
         'estimator__epsilon': [0.01, 0.03, 0.05, 0.07, 0.09]
    },
    
    {
        'estimator__kernel': ['linear'], 
        'estimator__C': [0.1, 1, 10, 100, 500, 1000],
         'estimator__epsilon': [0.01, 0.03, 0.05, 0.07, 0.09]
    },
    
    {
        'estimator__kernel': ['poly'], 
        'estimator__degree': [2,3,4],                          
        'estimator__coef0': [0.0],                          
        'estimator__gamma': ['scale', 'auto'],                        
        'estimator__C': [0.1, 1, 10, 100, 500, 1000],
         'estimator__epsilon': [0.01, 0.03, 0.05, 0.07, 0.09]
    }
]

In [None]:
grid_search_cup = GridSearchCV(
    MultiOutputRegressor(SVR()), 
    grid_search_spaces_cup, 
    cv=KFold(n_splits=N_SPLITS, shuffle=True, random_state=RANDOM_STATE),
    scoring=make_scorer(mean_euclidean_error, greater_is_better = False),
    verbose=1
)

grid_search_cup.fit(x_train_cup, y_train_cup)

In [None]:
tuning_search_top_configs(grid_search_cup.cv_results_) # top k config

### Random Search

In [None]:
best_params = grid_search_cup.best_params_

In [None]:
# Random-search spaces
random_search_spaces_cup = {
    'estimator__C': [700, 1000, 1300, 1500, 1800, 2000],
    'estimator__epsilon': uniform(max(0.01, best_params['estimator__epsilon'] * 0.8), best_params['estimator__epsilon'] * 1.3),
    'estimator__kernel': [best_params['estimator__kernel']],
    'estimator__gamma': [best_params['estimator__gamma']],
}

In [None]:
random_search_cup = RandomizedSearchCV(
    MultiOutputRegressor(SVR()), 
    random_search_spaces_cup, 
    n_iter=50,
    cv=KFold(n_splits=N_SPLITS, shuffle=True, random_state=RANDOM_STATE),
    scoring=make_scorer(mean_euclidean_error, greater_is_better = False),
    verbose=1,
    random_state=RANDOM_STATE
)

random_search_cup.fit(x_train_cup, y_train_cup)

In [None]:
tuning_search_top_configs(random_search_cup.cv_results_) # top k config

### Save tuning results

In [None]:
best_score_grid = grid_search_cup.best_score_
best_score_random = random_search_cup.best_score_

# Check if best result is from GridSearch or RandomSearch
if best_score_random > best_score_grid:
    print("Best configuration from RandomizedSearch:\n")
    best_params = random_search_cup.best_params_
    print(best_params)
else:
    print("GridSearchCV resulted in the best configuration.")

## Training and Internal test assessment
Let's perform a re-training of our model on the entire development set. In this way, we're able to leverage the entire training data (early stopping is applied w.r.t. the train MEE). Finally, predict on the (untouched) internal test to perform model assessment and estimate our performance on the blind test set.

Note that, in this phase, we don't use the internal test in any way (i.e., no training and no validation). We only estimate its errors and plot its curves.

In [None]:
# --- UNCOMMENT IF YOU USE HPARAMS TUNING ---
#best_params = {k.replace('estimator__', ''): v for k, v in best_params.items()}

# --- UNCOMMENT TO TEST ---
# Best configuration
best_params = {
    'C': 2000,
    'epsilon': 0.07,
    'kernel': 'rbf',
    'gamma': 'scale',
}

# Train SVR
multi_svr = MultiOutputRegressor(SVR(**best_params))
multi_svr.fit(x_train_cup, y_train_cup)

In [None]:
print('-- TRAINING --')
train_preds = multi_svr.predict(x_train_cup)
mee_train_cup = mean_euclidean_error(y_train_cup, train_preds)
mse_train_cup = mean_squared_error(y_train_cup, train_preds)
print(f'Loss (MSE): {mse_train_cup:.4f} - MEE: {mee_train_cup:.4f}')

In [None]:
print('-- INTERNAL TEST --')
internal_test_preds = multi_svr.predict(x_internal_test_cup)
mee_internal_test_cup = mean_euclidean_error(y_internal_test_cup, internal_test_preds)
mse_internal_test_cup = mean_squared_error(y_internal_test_cup, internal_test_preds)
print(f'Loss (MSE): {mse_internal_test_cup:.4f} - MEE: {mee_internal_test_cup:.4f}')

In [None]:
# Blind test set predictions
blind_test_preds_cup = multi_svr.predict(x_test_cup)

### Store results

In [None]:
#best_index = grid_search_cup.best_index_
#mee_mean_score = -grid_search_cup.cv_results_['mean_test_score'][best_index]
#mee_std_dev = std_validation_error = grid_search_cup.cv_results_['std_test_score'][best_index]

report_cup = {
    'train': {'mse': mse_train_cup, 'mee': mee_train_cup},
    'internal_test': {'mse': mse_internal_test_cup, 'mee': mee_internal_test_cup},
}

store_cup_result(results_dir + '/CUP/', best_params, report_cup, blind_test_preds_cup, is_poly=True)

In [None]:
# Store train predictions
with open(results_dir + '/CUP/train_preds_poly.csv', 'w') as outf:
    # Team Info
    outf.write("# Matteo Pinna, Leonardo Caridi, Marco Sanna\n")
    outf.write("# ACD-TEAM\n")
    outf.write("# ML-CUP23 v2\n")
    outf.write("# 20/01/2024\n")

    # Writing predictions
    for i, pred in enumerate(train_preds, 1):
        outf.write(f"{i},{','.join(map(str, pred))}\n")
        
# Store internal test mean predictions
with open(results_dir + '/CUP/internal_test_preds_poly.csv', 'w') as outf:
    # Team Info
    outf.write("# Matteo Pinna, Leonardo Caridi, Marco Sanna\n")
    outf.write("# ACD-TEAM\n")
    outf.write("# ML-CUP23 v2\n")
    outf.write("# 20/01/2024\n")

    # Writing predictions
    for i, pred in enumerate(internal_test_preds, 1):
        outf.write(f"{i},{','.join(map(str, pred))}\n")

# Final re-training
Since the test error has already been estimated by leveraging the (untouched) internal test, we now perform a final re-training with all the development data. This does not violate the rules, since the internal test is not (and has never) been used for any model selection.

Thus, we train on the entire development data, i.e. $90$ train/val + $10$ internal test.

In [None]:
# Apply polynomial to the entire development set
x_dev_cup = poly.transform(x_dev_cup)

This time - since the internal test estimate has already been performed - we're able to use the internal test set either as training data.

In [None]:
# Train SVR
multi_svr = MultiOutputRegressor(SVR(**best_params))
multi_svr.fit(x_dev_cup, y_dev_cup)

In [None]:
print('-- DEVELOPMENT --')
final_train_preds = multi_svr.predict(x_dev_cup)
loss_dev_cup, mee_dev_cup = mean_squared_error(y_dev_cup, final_train_preds), mean_euclidean_error(y_dev_cup, final_train_preds)
print(f'Mean Loss (MSE): {loss_dev_cup:.4f} - Mean MEE: {mee_dev_cup:.4f}')

In [None]:
# Final blind test set predictions
final_blind_test_preds = multi_svr.predict(x_test_cup)

In [None]:
# Store final re-training train/dev mean predictions
with open(results_dir + '/CUP/final_train.csv', 'w') as outf:
    # Team Info
    outf.write("# Matteo Pinna, Leonardo Caridi, Marco Sanna\n")
    outf.write("# ACD-TEAM\n")
    outf.write("# ML-CUP23 v2\n")
    outf.write("# 20/01/2024\n")

    # Writing predictions
    for i, pred in enumerate(final_train_preds, 1):
        outf.write(f"{i},{','.join(map(str, pred))}\n")

# Store final blind test mean predictions
with open(results_dir + '/CUP/final_blind_test.csv', 'w') as outf:
    # Team Info
    outf.write("# Matteo Pinna, Leonardo Caridi, Marco Sanna\n")
    outf.write("# ACD-TEAM\n")
    outf.write("# ML-CUP23 v2\n")
    outf.write("# 20/01/2024\n")

    # Writing predictions
    for i, pred in enumerate(final_blind_test_preds, 1):
        outf.write(f"{i},{','.join(map(str, pred))}\n")

# Validation curves

In [None]:
# Validation curves with respect to C with four different values of epsilon
param_name = "estimator__C"
param_range = 2.5*np.logspace(0, 3, 20)
epsilon_range = [0.007, 0.07, 0.1, 0.7]
figure, axis = plt.subplots(2, 2)
min_train_scores=[]
min_test_scores=[]

for (epsilon, i, j) in zip(epsilon_range, [0,0,1,1], [0,1,0,1]):
    train_scores, test_scores = validation_curve(MultiOutputRegressor(SVR(kernel = 'rbf', 
                                                     gamma = 'scale', 
                                                     epsilon = epsilon)), 
                                                 x_train_cup, 
                                                 y_train_cup, 
                                                 param_name=param_name, 
                                                 param_range=param_range,
                                                 cv=KFold(n_splits=5, shuffle=True, random_state=128),
                                                 scoring= make_scorer(mean_euclidean_error, greater_is_better = False),
                                                 verbose=1)
    
    min_train_scores.append(round(np.max(train_scores),4)) #score is negative
    min_test_scores.append(round(np.max(test_scores),4)) 
                                          
    display = ValidationCurveDisplay(param_name=param_name, 
                                     param_range=param_range,
                                     train_scores=train_scores, 
                                     test_scores=test_scores,
                                     score_name="MEE")
    display.plot(ax=axis[i,j], negate_score = True)
    axis[i,j].set_title('epsilon = %1.3f' %epsilon)

figure.tight_layout()
plt.show()
print(f"min_train_scores = {min_train_scores}")
print(f"min_test_scores = {min_test_scores}")