#MULTIVARIATE REGRESSION

Import the Computer Hardware Data Set

In [None]:
import matplotlib.pyplot as plt
import numpy as np

In [None]:
import pandas as pd
data = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/cpu-performance/machine.data', header=None)
data.columns = ['Vendor Name', 'Model Name', 'MYCT', ' MMIN', ' MMAX', 'CACH','CHMIN','CHMAX', 'PRP-class','ERP']
data = data.drop(['Vendor Name', 'Model Name', 'ERP'], axis=1)
data.head()

Unnamed: 0,MYCT,MMIN,MMAX,CACH,CHMIN,CHMAX,PRP-class
0,125,256,6000,256,16,128,198
1,29,8000,32000,32,8,32,269
2,29,8000,32000,32,8,32,220
3,29,8000,32000,32,8,32,172
4,29,8000,16000,32,8,16,132


Separating features from the target variable

In [None]:
x_data = data[[ 'MYCT', ' MMIN', ' MMAX', 'CACH','CHMIN','CHMAX']]
y_data = data['PRP-class']

The goal is to predict the performance index (PRP) based on the CPU configuration

### SVM and GP MULTIVARIATE REGRESSION

In [None]:
from sklearn.model_selection import train_test_split, KFold
from sklearn import svm
from sklearn.gaussian_process.kernels import RBF
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.metrics import mean_squared_error
import statistics
from math import sqrt


In [None]:
MSE_svm_train_vec=[]
MSE_svm_val_vec=[]
MSE_GP_train_vec=[]
MSE_GP_val_vec=[]

kf = KFold(n_splits=10, shuffle=True)

# Iterating through each of the folds in K-Fold
for train_index, val_index in kf.split(y_data):

    #print("TRAIN:", train_index, "VALIDATION:", val_index)
  
    # Splitting the training set from the validation set for this specific fold
    X_train, X_val = x_data.iloc[train_index, :], x_data.iloc[val_index, :]
    y_train, y_val = y_data.iloc[train_index], y_data.iloc[val_index]

    #SVR TRAINING
    regr = svm.SVR(kernel='rbf', C=1).fit(X_train, y_train)
    #print(X_train)
    #GP TRAINING
   # kernel = 1 * RBF(length_scale=1.0, length_scale_bounds=(1e-2, 1e2)) #length_scale_bounds=(1e-2, 1e2)
    gpr = GaussianProcessRegressor(kernel=None, random_state=0).fit(X_train, y_train)

    #SVR PREDICTION
    svm_pred_train=regr.predict(X_train) #returns y_pred
    svm_pred_val=regr.predict(X_val)

    #GP PREDICTION
    mean_prediction_train, std_prediction_train = gpr.predict(X_train, return_std=True)
    mean_prediction_val, std_prediction_val = gpr.predict(X_val, return_std=True)

    #MSE SVM
    MSE_svm_train=mean_squared_error(y_train, svm_pred_train)
    MSE_svm_train_vec.append( sqrt(MSE_svm_train))

    MSE_svm_val=mean_squared_error(y_val, svm_pred_val)
    MSE_svm_val_vec.append(sqrt(MSE_svm_val))

    #MSE GP
    MSE_GP_train=mean_squared_error(y_train, mean_prediction_train,)
    MSE_GP_train_vec.append(sqrt(MSE_GP_train))

    MSE_GP_val=mean_squared_error(y_val, mean_prediction_val)
    MSE_GP_val_vec.append(sqrt(MSE_GP_val))


MEAN_MSE_svm_train = statistics.mean(MSE_svm_train_vec)
MEAN_MSE_svm_val = statistics.mean(MSE_svm_val_vec)

MEAN_MSE_GP_train = statistics.mean(MSE_GP_train_vec)
MEAN_MSE_GP_val = statistics.mean(MSE_GP_val_vec)
print('SVM REGRESSION')
print('RMSE train:',MEAN_MSE_svm_train) #quanto mediamente i punti si discostano dalla retta di regressione
print('RMSE validation:',MEAN_MSE_svm_val)
print('-----------------------------------')
print('GP REGRESSION')
print('RMSE train:',MEAN_MSE_GP_train)
print('RMSE validation:',MEAN_MSE_GP_val)

SVM REGRESSION
RMSE train: 157.89962455065668
RMSE validation: 137.5320143938734
-----------------------------------
GP REGRESSION
RMSE train: 9.352379173043268
RMSE validation: 161.5531208421878
