In [54]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import KFold
from sklearn.metrics import make_scorer
from sklearn.model_selection import GridSearchCV
from joblib import dump, load

In [43]:
def load_dataset():
    train = pd.read_csv("../data/ML-CUP22-INTERNAL-TR.csv", header=None, sep=",")
    train.drop(0, axis=1, inplace=True)

    x_scaler = StandardScaler()

    x_train = train.iloc[:, :9].to_numpy().astype(np.float64)
    y_train = train.iloc[:, 9:].to_numpy().astype(np.float64)

    x_scaler.fit(x_train)
    x_train = x_scaler.transform(x_train)
    
    return x_train, y_train


def mee(y_true, y_pred):
    return np.mean(np.sqrt(np.sum(np.square(y_true - y_pred), axis=1)))

In [48]:
def ParameterTune_Regression(x_train, y_train):
    
    param_grid = [
        {
            'estimator__kernel': ['rbf'],
            'estimator__C': [0.1, 0.5, 1, 10, 20, 30, 40 ,50],
            'estimator__gamma': [0.01, 0.05, 0.1, 0.5, 1],
            'estimator__epsilon': [0.1, 0.3, 0.5, 0.7, 0.9]
        },
        {
            'estimator__kernel': ['linear'],
            'estimator__C': [5, 10, 15, 20, 25, 30],
            'estimator__epsilon': [0.1, 0.3, 0.5, 0.7, 0.9]
        },
        {
            'estimator__kernel': ['poly'],
            'estimator__degree': [2, 3, 4, 5, 6],
            'estimator__C': [1, 10, 25, 50, 75, 100],
            'estimator__epsilon': [0.1, 0.3, 0.5, 0.7, 0.9]
        },
    ] 

    cv = KFold(n_splits=5, shuffle=True, random_state=42)
    mee_score = make_scorer(mee, greater_is_better=False)


    reg = MultiOutputRegressor(SVR())
    grid_search = GridSearchCV(
        reg,
        param_grid=param_grid,
        cv=cv,
        verbose=4,
        n_jobs=-1,
        refit=True,
        scoring=mee_score
    ) # n_jobs=-1 --> sets to num. of processors available 
    
    res = grid_search.fit(x_train, y_train)

    return res

In [49]:
x_train, y_train = load_dataset()
print(x_train.shape, y_train.shape)

(1194, 9) (1194, 2)


In [50]:
res = ParameterTune_Regression(x_train, y_train)

Fitting 5 folds for each of 380 candidates, totalling 1900 fits
[CV 3/5] END estimator__C=0.1, estimator__epsilon=0.1, estimator__gamma=0.01, estimator__kernel=rbf;, score=-2.878 total time=   0.2s
[CV 5/5] END estimator__C=0.1, estimator__epsilon=0.1, estimator__gamma=0.01, estimator__kernel=rbf;, score=-2.916 total time=   0.2s
[CV 4/5] END estimator__C=0.1, estimator__epsilon=0.1, estimator__gamma=0.05, estimator__kernel=rbf;, score=-2.157 total time=   0.2s
[CV 3/5] END estimator__C=0.1, estimator__epsilon=0.1, estimator__gamma=0.1, estimator__kernel=rbf;, score=-1.939 total time=   0.2s
[CV 2/5] END estimator__C=0.1, estimator__epsilon=0.1, estimator__gamma=0.5, estimator__kernel=rbf;, score=-3.409 total time=   0.3s
[CV 2/5] END estimator__C=0.1, estimator__epsilon=0.1, estimator__gamma=1, estimator__kernel=rbf;, score=-5.503 total time=   0.2s
[CV 2/5] END estimator__C=0.1, estimator__epsilon=0.3, estimator__gamma=0.01, estimator__kernel=rbf;, score=-3.024 total time=   0.2s
[CV

[CV 4/5] END estimator__C=0.1, estimator__epsilon=0.1, estimator__gamma=0.01, estimator__kernel=rbf;, score=-3.035 total time=   0.2s
[CV 3/5] END estimator__C=0.1, estimator__epsilon=0.1, estimator__gamma=0.05, estimator__kernel=rbf;, score=-2.037 total time=   0.2s
[CV 2/5] END estimator__C=0.1, estimator__epsilon=0.1, estimator__gamma=0.1, estimator__kernel=rbf;, score=-2.000 total time=   0.2s
[CV 1/5] END estimator__C=0.1, estimator__epsilon=0.1, estimator__gamma=0.5, estimator__kernel=rbf;, score=-3.506 total time=   0.2s
[CV 5/5] END estimator__C=0.1, estimator__epsilon=0.1, estimator__gamma=0.5, estimator__kernel=rbf;, score=-3.394 total time=   0.3s
[CV 4/5] END estimator__C=0.1, estimator__epsilon=0.1, estimator__gamma=1, estimator__kernel=rbf;, score=-5.513 total time=   0.2s
[CV 3/5] END estimator__C=0.1, estimator__epsilon=0.3, estimator__gamma=0.01, estimator__kernel=rbf;, score=-2.898 total time=   0.2s
[CV 2/5] END estimator__C=0.1, estimator__epsilon=0.3, estimator__ga

[CV 2/5] END estimator__C=0.5, estimator__epsilon=0.9, estimator__gamma=1, estimator__kernel=rbf;, score=-3.348 total time=   0.2s
[CV 5/5] END estimator__C=1, estimator__epsilon=0.1, estimator__gamma=0.01, estimator__kernel=rbf;, score=-1.991 total time=   0.2s
[CV 1/5] END estimator__C=1, estimator__epsilon=0.1, estimator__gamma=0.05, estimator__kernel=rbf;, score=-1.460 total time=   0.2s
[CV 3/5] END estimator__C=1, estimator__epsilon=0.1, estimator__gamma=0.1, estimator__kernel=rbf;, score=-1.446 total time=   0.4s
[CV 4/5] END estimator__C=1, estimator__epsilon=0.1, estimator__gamma=0.1, estimator__kernel=rbf;, score=-1.532 total time=   0.2s
[CV 1/5] END estimator__C=1, estimator__epsilon=0.1, estimator__gamma=1, estimator__kernel=rbf;, score=-2.627 total time=   0.2s
[CV 2/5] END estimator__C=1, estimator__epsilon=0.1, estimator__gamma=1, estimator__kernel=rbf;, score=-2.733 total time=   0.2s
[CV 4/5] END estimator__C=1, estimator__epsilon=0.3, estimator__gamma=0.01, estimator

[CV 1/5] END estimator__C=0.5, estimator__epsilon=0.9, estimator__gamma=1, estimator__kernel=rbf;, score=-3.289 total time=   0.2s
[CV 1/5] END estimator__C=1, estimator__epsilon=0.1, estimator__gamma=0.01, estimator__kernel=rbf;, score=-1.853 total time=   0.2s
[CV 2/5] END estimator__C=1, estimator__epsilon=0.1, estimator__gamma=0.01, estimator__kernel=rbf;, score=-1.907 total time=   0.2s
[CV 4/5] END estimator__C=1, estimator__epsilon=0.1, estimator__gamma=0.05, estimator__kernel=rbf;, score=-1.589 total time=   0.3s
[CV 5/5] END estimator__C=1, estimator__epsilon=0.1, estimator__gamma=0.05, estimator__kernel=rbf;, score=-1.683 total time=   0.2s
[CV 4/5] END estimator__C=1, estimator__epsilon=0.1, estimator__gamma=0.5, estimator__kernel=rbf;, score=-2.002 total time=   0.2s
[CV 5/5] END estimator__C=1, estimator__epsilon=0.1, estimator__gamma=0.5, estimator__kernel=rbf;, score=-2.097 total time=   0.2s
[CV 2/5] END estimator__C=1, estimator__epsilon=0.3, estimator__gamma=0.01, est

[CV 1/5] END estimator__C=10, estimator__epsilon=0.9, estimator__gamma=1, estimator__kernel=rbf;, score=-2.254 total time=   0.2s
[CV 3/5] END estimator__C=20, estimator__epsilon=0.1, estimator__gamma=0.01, estimator__kernel=rbf;, score=-1.645 total time=   0.3s
[CV 4/5] END estimator__C=20, estimator__epsilon=0.1, estimator__gamma=0.01, estimator__kernel=rbf;, score=-1.683 total time=   0.3s
[CV 4/5] END estimator__C=20, estimator__epsilon=0.1, estimator__gamma=0.05, estimator__kernel=rbf;, score=-1.524 total time=   0.4s
[CV 5/5] END estimator__C=20, estimator__epsilon=0.1, estimator__gamma=0.05, estimator__kernel=rbf;, score=-1.617 total time=   0.4s
[CV 2/5] END estimator__C=20, estimator__epsilon=0.1, estimator__gamma=0.5, estimator__kernel=rbf;, score=-1.929 total time=   0.8s
[CV 3/5] END estimator__C=20, estimator__epsilon=0.1, estimator__gamma=0.5, estimator__kernel=rbf;, score=-1.728 total time=   1.3s
[CV 4/5] END estimator__C=20, estimator__epsilon=0.3, estimator__gamma=0.0

[CV 1/5] END estimator__C=30, estimator__epsilon=0.9, estimator__gamma=0.5, estimator__kernel=rbf;, score=-1.797 total time=   0.2s
[CV 5/5] END estimator__C=30, estimator__epsilon=0.9, estimator__gamma=0.5, estimator__kernel=rbf;, score=-2.157 total time=   0.2s
[CV 3/5] END estimator__C=30, estimator__epsilon=0.9, estimator__gamma=1, estimator__kernel=rbf;, score=-2.292 total time=   0.2s
[CV 2/5] END estimator__C=40, estimator__epsilon=0.1, estimator__gamma=0.01, estimator__kernel=rbf;, score=-1.629 total time=   0.3s
[CV 5/5] END estimator__C=40, estimator__epsilon=0.1, estimator__gamma=0.01, estimator__kernel=rbf;, score=-1.740 total time=   0.3s
[CV 4/5] END estimator__C=40, estimator__epsilon=0.1, estimator__gamma=0.05, estimator__kernel=rbf;, score=-1.536 total time=   0.6s
[CV 3/5] END estimator__C=40, estimator__epsilon=0.1, estimator__gamma=0.1, estimator__kernel=rbf;, score=-1.445 total time=   0.8s
[CV 2/5] END estimator__C=40, estimator__epsilon=0.1, estimator__gamma=0.5,

[CV 4/5] END estimator__C=30, estimator__epsilon=0.9, estimator__gamma=0.5, estimator__kernel=rbf;, score=-1.876 total time=   0.3s
[CV 4/5] END estimator__C=30, estimator__epsilon=0.9, estimator__gamma=1, estimator__kernel=rbf;, score=-2.463 total time=   0.2s
[CV 3/5] END estimator__C=40, estimator__epsilon=0.1, estimator__gamma=0.01, estimator__kernel=rbf;, score=-1.613 total time=   0.4s
[CV 2/5] END estimator__C=40, estimator__epsilon=0.1, estimator__gamma=0.05, estimator__kernel=rbf;, score=-1.501 total time=   0.6s
[CV 1/5] END estimator__C=40, estimator__epsilon=0.1, estimator__gamma=0.1, estimator__kernel=rbf;, score=-1.351 total time=   0.7s
[CV 5/5] END estimator__C=40, estimator__epsilon=0.1, estimator__gamma=0.1, estimator__kernel=rbf;, score=-1.722 total time=   0.8s
[CV 4/5] END estimator__C=40, estimator__epsilon=0.1, estimator__gamma=0.5, estimator__kernel=rbf;, score=-2.075 total time=   1.0s
[CV 3/5] END estimator__C=40, estimator__epsilon=0.1, estimator__gamma=1, es

[CV 1/5] END estimator__C=50, estimator__epsilon=0.9, estimator__gamma=0.01, estimator__kernel=rbf;, score=-1.556 total time=   0.2s
[CV 3/5] END estimator__C=50, estimator__epsilon=0.9, estimator__gamma=0.01, estimator__kernel=rbf;, score=-1.641 total time=   0.2s
[CV 2/5] END estimator__C=50, estimator__epsilon=0.9, estimator__gamma=0.05, estimator__kernel=rbf;, score=-1.532 total time=   0.3s
[CV 2/5] END estimator__C=50, estimator__epsilon=0.9, estimator__gamma=0.1, estimator__kernel=rbf;, score=-1.547 total time=   0.3s
[CV 1/5] END estimator__C=50, estimator__epsilon=0.9, estimator__gamma=0.5, estimator__kernel=rbf;, score=-1.859 total time=   0.3s
[CV 5/5] END estimator__C=50, estimator__epsilon=0.9, estimator__gamma=0.5, estimator__kernel=rbf;, score=-2.242 total time=   0.3s
[CV 4/5] END estimator__C=50, estimator__epsilon=0.9, estimator__gamma=1, estimator__kernel=rbf;, score=-2.468 total time=   0.2s
[CV 3/5] END estimator__C=5, estimator__epsilon=0.1, estimator__kernel=line

[CV 3/5] END estimator__C=50, estimator__epsilon=0.9, estimator__gamma=0.1, estimator__kernel=rbf;, score=-1.538 total time=   0.3s
[CV 3/5] END estimator__C=50, estimator__epsilon=0.9, estimator__gamma=0.5, estimator__kernel=rbf;, score=-1.806 total time=   0.3s
[CV 2/5] END estimator__C=50, estimator__epsilon=0.9, estimator__gamma=1, estimator__kernel=rbf;, score=-2.471 total time=   0.2s
[CV 1/5] END estimator__C=5, estimator__epsilon=0.1, estimator__kernel=linear;, score=-2.099 total time=   0.5s
[CV 5/5] END estimator__C=5, estimator__epsilon=0.1, estimator__kernel=linear;, score=-2.217 total time=   0.5s
[CV 4/5] END estimator__C=5, estimator__epsilon=0.3, estimator__kernel=linear;, score=-2.205 total time=   0.5s
[CV 3/5] END estimator__C=5, estimator__epsilon=0.5, estimator__kernel=linear;, score=-2.190 total time=   0.4s
[CV 2/5] END estimator__C=5, estimator__epsilon=0.7, estimator__kernel=linear;, score=-2.099 total time=   0.5s
[CV 2/5] END estimator__C=5, estimator__epsilo

[CV 1/5] END estimator__C=1, estimator__degree=5, estimator__epsilon=0.7, estimator__kernel=poly;, score=-3.984 total time=   0.2s
[CV 5/5] END estimator__C=1, estimator__degree=5, estimator__epsilon=0.7, estimator__kernel=poly;, score=-3.821 total time=   0.2s
[CV 4/5] END estimator__C=1, estimator__degree=5, estimator__epsilon=0.9, estimator__kernel=poly;, score=-3.622 total time=   0.2s
[CV 3/5] END estimator__C=1, estimator__degree=6, estimator__epsilon=0.1, estimator__kernel=poly;, score=-4.686 total time=   0.3s
[CV 2/5] END estimator__C=1, estimator__degree=6, estimator__epsilon=0.3, estimator__kernel=poly;, score=-5.208 total time=   0.3s
[CV 2/5] END estimator__C=1, estimator__degree=6, estimator__epsilon=0.5, estimator__kernel=poly;, score=-5.209 total time=   0.3s
[CV 1/5] END estimator__C=1, estimator__degree=6, estimator__epsilon=0.7, estimator__kernel=poly;, score=-5.333 total time=   0.3s
[CV 5/5] END estimator__C=1, estimator__degree=6, estimator__epsilon=0.7, estimator

[CV 3/5] END estimator__C=1, estimator__degree=6, estimator__epsilon=0.7, estimator__kernel=poly;, score=-4.681 total time=   0.2s
[CV 1/5] END estimator__C=1, estimator__degree=6, estimator__epsilon=0.9, estimator__kernel=poly;, score=-5.336 total time=   0.2s
[CV 5/5] END estimator__C=1, estimator__degree=6, estimator__epsilon=0.9, estimator__kernel=poly;, score=-5.025 total time=   0.2s
[CV 3/5] END estimator__C=10, estimator__degree=2, estimator__epsilon=0.1, estimator__kernel=poly;, score=-4.485 total time=   0.3s
[CV 2/5] END estimator__C=10, estimator__degree=2, estimator__epsilon=0.3, estimator__kernel=poly;, score=-4.832 total time=   0.3s
[CV 1/5] END estimator__C=10, estimator__degree=2, estimator__epsilon=0.5, estimator__kernel=poly;, score=-5.209 total time=   0.3s
[CV 5/5] END estimator__C=10, estimator__degree=2, estimator__epsilon=0.5, estimator__kernel=poly;, score=-4.812 total time=   0.2s
[CV 4/5] END estimator__C=10, estimator__degree=2, estimator__epsilon=0.7, esti

[CV 2/5] END estimator__C=25, estimator__degree=5, estimator__epsilon=0.7, estimator__kernel=poly;, score=-4.001 total time=   1.7s
[CV 2/5] END estimator__C=25, estimator__degree=5, estimator__epsilon=0.9, estimator__kernel=poly;, score=-3.981 total time=   1.5s
[CV 2/5] END estimator__C=25, estimator__degree=6, estimator__epsilon=0.1, estimator__kernel=poly;, score=-5.622 total time=   1.8s
[CV 5/5] END estimator__C=25, estimator__degree=6, estimator__epsilon=0.1, estimator__kernel=poly;, score=-5.265 total time=   2.0s
[CV 4/5] END estimator__C=25, estimator__degree=6, estimator__epsilon=0.3, estimator__kernel=poly;, score=-5.222 total time=   1.3s
[CV 2/5] END estimator__C=25, estimator__degree=6, estimator__epsilon=0.5, estimator__kernel=poly;, score=-5.590 total time=   1.6s
[CV 2/5] END estimator__C=25, estimator__degree=6, estimator__epsilon=0.7, estimator__kernel=poly;, score=-5.585 total time=   1.5s
[CV 1/5] END estimator__C=25, estimator__degree=6, estimator__epsilon=0.9, e

[CV 4/5] END estimator__C=25, estimator__degree=6, estimator__epsilon=0.5, estimator__kernel=poly;, score=-5.221 total time=   1.5s
[CV 3/5] END estimator__C=25, estimator__degree=6, estimator__epsilon=0.7, estimator__kernel=poly;, score=-4.946 total time=   1.4s
[CV 2/5] END estimator__C=25, estimator__degree=6, estimator__epsilon=0.9, estimator__kernel=poly;, score=-5.571 total time=   1.7s
[CV 1/5] END estimator__C=50, estimator__degree=2, estimator__epsilon=0.1, estimator__kernel=poly;, score=-5.272 total time=   0.6s
[CV 4/5] END estimator__C=50, estimator__degree=2, estimator__epsilon=0.1, estimator__kernel=poly;, score=-4.950 total time=   0.6s
[CV 4/5] END estimator__C=50, estimator__degree=2, estimator__epsilon=0.3, estimator__kernel=poly;, score=-4.953 total time=   0.6s
[CV 3/5] END estimator__C=50, estimator__degree=2, estimator__epsilon=0.5, estimator__kernel=poly;, score=-4.473 total time=   0.5s
[CV 3/5] END estimator__C=50, estimator__degree=2, estimator__epsilon=0.7, e

In [58]:
print(f"Best parameters:\n{res.best_params_}")

Best parameters:
{'estimator__C': 10, 'estimator__epsilon': 0.3, 'estimator__gamma': 0.1, 'estimator__kernel': 'rbf'}


In [52]:
print(f"Validation MEE: {-res.best_score_}")

model = res.best_estimator_
y_pred = model.predict(x_train)
print(f"Training MEE: {mee(y_train, y_pred)}")

Validation MEE: 1.4738955623607497
Training MEE: 1.2392860132257646


In [55]:
dump(model, './../models/svm.z') 

['./../models/svm.z']