# FINAL EVALUATION

In [1]:
import numpy as np
import pandas as pd
import mglearn
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

In [2]:
# LOADING BOSTON DATASET FROM MGLEARN
x, y = mglearn.datasets.load_extended_boston()
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# Apply log transformation
x_train = np.log1p(x_train)
x_test  = np.log1p(x_test)

for C in [0.1, 1.0, 1000]:
    for gamma in [0.1, 1.0, 10]:
        svm_model = svm.SVR(kernel='rbf', C=C, gamma=gamma)
        svm_model.fit(x_train, y_train)
        y_pred = svm_model.predict(x_test)
        print(f"Tuning: C = {C}, gamma = {gamma}")
        print("R2 score               : ", round(r2_score(y_test, y_pred)*100, 2),"%")
        print("Mean Relative Error (%): ", round(np.mean(np.abs((y_test - y_pred) / y_test)) * 100, 2),"%")
    print("")

Tuning: C = 0.1, gamma = 0.1
R2 score               :  23.56 %
Mean Relative Error (%):  25.44 %
Tuning: C = 0.1, gamma = 1.0
R2 score               :  19.39 %
Mean Relative Error (%):  26.6 %
Tuning: C = 0.1, gamma = 10
R2 score               :  2.91 %
Mean Relative Error (%):  33.24 %

Tuning: C = 1.0, gamma = 0.1
R2 score               :  51.78 %
Mean Relative Error (%):  15.45 %
Tuning: C = 1.0, gamma = 1.0
R2 score               :  56.41 %
Mean Relative Error (%):  15.7 %
Tuning: C = 1.0, gamma = 10
R2 score               :  18.55 %
Mean Relative Error (%):  26.5 %

Tuning: C = 1000, gamma = 0.1
R2 score               :  84.56 %
Mean Relative Error (%):  9.86 %
Tuning: C = 1000, gamma = 1.0
R2 score               :  78.31 %
Mean Relative Error (%):  15.66 %
Tuning: C = 1000, gamma = 10
R2 score               :  51.73 %
Mean Relative Error (%):  20.42 %



In [3]:
# QUESTION 5
# BOSTON DATASET
x, y = mglearn.datasets.load_extended_boston()
x_train1, x_test1, y_train1, y_test1 = train_test_split(x, y, test_size=0.4, random_state=42)
# WAVE DATASET
x, y = mglearn.datasets.make_wave(n_samples=100)
x_train2, x_test2, y_train2, y_test2 = train_test_split(x, y, test_size=0.4, random_state=42)

datasets = [ ("*** BOSTON DATASET ***", x_train1, x_test1, y_train1, y_test1),
             ("*** WAVES DATASET ***", x_train2, x_test2, y_train2, y_test2) ]

for dataset_name, x_train, x_test, y_train, y_test in datasets:
    print(f"Dataset: {dataset_name}")
    for kernel in ['linear', 'rbf', 'poly']:
        print(kernel.upper() + ' MODEL')
        svm_model = svm.SVR(kernel=kernel)
        svm_model.fit(x_train, y_train)
        y_pred = svm_model.predict(x_test)
        print("R2 score               : ", round(r2_score(y_test, y_pred), 2))
        print("Mean Relative Error (%): ", round(np.mean(np.abs((y_test - y_pred) / y_test)) * 100, 2))
        print("")

Dataset: *** BOSTON DATASET ***
LINEAR MODEL
R2 score               :  0.73
Mean Relative Error (%):  13.55

RBF MODEL
R2 score               :  0.56
Mean Relative Error (%):  16.6

POLY MODEL
R2 score               :  0.67
Mean Relative Error (%):  13.97

Dataset: *** WAVES DATASET ***
LINEAR MODEL
R2 score               :  0.65
Mean Relative Error (%):  93.25

RBF MODEL
R2 score               :  0.65
Mean Relative Error (%):  99.75

POLY MODEL
R2 score               :  0.45
Mean Relative Error (%):  75.83

