In [3]:
import dataset_utils as dataset

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.multioutput import MultiOutputRegressor
from sklearn.svm import SVR
import itertools
from svm import *

from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler, MinMaxScaler

#column_names = ['ID', 
#                'INPUT_1', 'INPUT_2', 'INPUT_3', 'INPUT_4', 'INPUT_5', 'INPUT_6', 'INPUT_7', 'INPUT_8', 'INPUT_9', 'INPUT_10',
#                'TARGET_x', 'TARGET_y', 'TARGET_z']
#dataframe = pd.read_csv("../data/ML-CUP23-TR.csv", sep=',', comment='#', names=column_names, index_col= 'ID')
#dataframe.head()

dev_data = dataset.load_dataset("../data/ML-CUP24-TR.csv")
blind_data = dataset.load_dataset("../data/ML-CUP24-TS.csv")

# arange the dataset into inputs and labels
from dataset_utils import skl_arange_dataset

X_dev, y_dev, X_test, y_test, X_blind, train_folds, val_folds = skl_arange_dataset(dev_data, blind_data,
                                                                                   scaler=StandardScaler(),
                                                                                   folds=5)

In [5]:
C_values = [0.01, 0.1, 1, 10, 100, 1000]
epsilon_values = [0.1, 0.01, 0.001, 0.0001]

for C, epsilon in itertools.product(C_values, epsilon_values):
    print(f"C: {C}, epsilon: {epsilon}")
    mee = []
    for t_fold, v_fold in zip(train_folds, val_folds):
        svr = SVR(kernel='linear', C=C, epsilon=epsilon, max_iter=10000)  # SVR instance

        inputs, targets = t_fold
        # Instantiate MultiOutputRegressor with SVR as the base regressor
        mor = MultiOutputRegressor(svr)

        # Train the SVR model
        mor.fit(inputs, targets)

        val_inputs, val_targets = v_fold
        # Predict outputs for the validation set
        predictions = mor.predict(val_inputs)

        # Calculate Euclidean distances between predicted and actual points
        euclidean_distances = np.sqrt(np.sum((predictions - val_targets) ** 2, axis=1))

        # Calculate Mean Euclidean Error
        mean_euclidean_error = np.mean(euclidean_distances)
        mee.append(mean_euclidean_error)
    print(f"Mean Euclidean Error: {np.mean(mee)}")


C: 0.01, epsilon: 0.1
Mean Euclidean Error: 6.043379246147235
C: 0.01, epsilon: 0.01
Mean Euclidean Error: 6.038163194957839
C: 0.01, epsilon: 0.001
Mean Euclidean Error: 6.038716807587248
C: 0.01, epsilon: 0.0001
Mean Euclidean Error: 6.038731369204222
C: 0.1, epsilon: 0.1
Mean Euclidean Error: 1.080429600164028
C: 0.1, epsilon: 0.01
Mean Euclidean Error: 1.0823633545099411
C: 0.1, epsilon: 0.001
Mean Euclidean Error: 1.0818295584834694
C: 0.1, epsilon: 0.0001
Mean Euclidean Error: 1.0819099830724348
C: 1, epsilon: 0.1
Mean Euclidean Error: 0.9580897690074931
C: 1, epsilon: 0.01
Mean Euclidean Error: 0.9643590868748966
C: 1, epsilon: 0.001
Mean Euclidean Error: 0.9636868052192034
C: 1, epsilon: 0.0001




Mean Euclidean Error: 0.9637369676555352
C: 10, epsilon: 0.1
Mean Euclidean Error: 0.9598365318580839
C: 10, epsilon: 0.01




Mean Euclidean Error: 0.9723412331704893
C: 10, epsilon: 0.001
Mean Euclidean Error: 0.9762970045950325
C: 10, epsilon: 0.0001




Mean Euclidean Error: 0.9897685330493318
C: 100, epsilon: 0.1




Mean Euclidean Error: 1.2499588849204568
C: 100, epsilon: 0.01




Mean Euclidean Error: 1.3313165088989751
C: 100, epsilon: 0.001




Mean Euclidean Error: 1.3893037742455192
C: 100, epsilon: 0.0001




Mean Euclidean Error: 1.3197775287598477
C: 1000, epsilon: 0.1




Mean Euclidean Error: 2.2921135689185297
C: 1000, epsilon: 0.01




Mean Euclidean Error: 2.7369591558241746
C: 1000, epsilon: 0.001




Mean Euclidean Error: 2.714502828931578
C: 1000, epsilon: 0.0001




Mean Euclidean Error: 2.5231920622138824


