## Importing necessary modules

In [64]:
from sklearn import preprocessing
import numpy as np
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.metrics import mean_squared_error

## Import data as pandas dataframe

In [65]:
data = pd.read_csv("data.csv")

## Splitting data and normalising data after splitting

In [66]:
y = data[['Y1','Y2']]
X =  data[['X1','X2','X3','X4','X5','X6','X7','X8']]
y = np.array(y)
X = np.array(X)



In [67]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
scaler = MinMaxScaler()
normalisedX_train = scaler.fit_transform(X_train)
normalisedX_test = scaler.fit_transform(X_test)

## Regression function

In [68]:
def regressor(model,x_train,y_train,x_test,y_test):
    model.fit(x_train,y_train)
    outputs = model.predict(x_test)
    return mean_squared_error(outputs,y_test)

## Neural Network Regression

In [69]:
from sklearn.neural_network import MLPRegressor

MLP = MLPRegressor(max_iter=10000)
print(regressor(MLP,normalisedX_train,y_train,normalisedX_test,y_test))

10.324876097531902


## Random Forest Regression

In [70]:
from sklearn.ensemble import RandomForestRegressor

RFR = RandomForestRegressor()
print(regressor(RFR,normalisedX_train,y_train,normalisedX_test,y_test))

2.0715639090909144


## SVM Regression

In [71]:
from sklearn.svm import SVR

ytrain1,ytrain2=np.split(y_train,2,axis=1)
ytest1,ytest2=np.split(y_test,2,axis=1)

ytrain1 = ytrain1.flatten()
ytrain2 = ytrain2.flatten()
ytest1 = ytest1.flatten()
ytest2 = ytest2.flatten()


SVR1 = SVR()
print(regressor(SVR1,normalisedX_train,ytrain1,normalisedX_test,ytest1))

SVR2 = SVR()
print(regressor(SVR2,normalisedX_train,ytrain2,normalisedX_test,ytest2))

8.504729910815652
14.415513857421846


## Cross Validation

In [72]:
from sklearn.model_selection import cross_val_score

X = scaler.fit_transform(X)
RFRvalues = cross_val_score(RFR, X, y, cv=5, scoring='neg_mean_squared_error')
MLPvalues = cross_val_score(MLP, X, y, cv=5, scoring='neg_mean_squared_error')

y1,y2=np.split(y,2,axis=1)
SVMvaluesY1 = cross_val_score(MLP, X, y1.flatten(), cv=5, scoring='neg_mean_squared_error')
SVMvaluesY2 = cross_val_score(MLP, X, y2.flatten(), cv=5, scoring='neg_mean_squared_error')


print("Random forest values are:",RFRvalues)
print("Neural Network values are:",MLPvalues)
print("Support Vector Machine values for y1 are:",SVMvaluesY1)
print("Support Vector Machine values for y2 are:",SVMvaluesY2)


Random forest values are: [-10.23951144  -2.31207814  -1.51835618  -2.23648824  -2.52669106]
Neural Network values are: [-11.63293261  -9.61325679  -5.11536483  -8.3739203   -7.0016948 ]
Support Vector Machine values for y1 are: [-11.55157989  -7.44591161  -5.53531637  -5.28080389  -6.97393723]
Support Vector Machine values for y2 are: [-10.9200483  -10.60668932  -8.70812916  -9.49556164  -8.47711567]


In [73]:
print("Random forest mean:",RFRvalues.mean())
print("Neural Network mean:",MLPvalues.mean())
print("Support Vector Machine values mean:",SVMvaluesY1.mean())
print("Support Vector Machine values mean:",SVMvaluesY2.mean())

Random forest mean: -3.7666250131131016
Neural Network mean: -8.347433867222282
Support Vector Machine values mean: -7.357509800810954
Support Vector Machine values mean: -9.641508820031216


## KFold 

In [74]:
from sklearn.model_selection import KFold

kf5 = KFold(n_splits=5,random_state = 4, shuffle=True)
MLP = MLPRegressor(max_iter=5000)
RFR = RandomForestRegressor()
SVR1 = SVR()
SVR2= SVR()

RFRvalues = []
MLPvalues = []
SVMvaluesY1 = []
SVMvaluesY2 = []


for train_index, test_index in kf5.split(X):
    
    X_train, X_test, y_train, y_test = X[train_index], X[test_index],y[train_index], y[test_index]
    
    

    normalisedX_train = scaler.fit_transform(X_train)
    normalisedX_test = scaler.fit_transform(X_test)

    MLPvalues.append(regressor(MLP,normalisedX_train,y_train,normalisedX_test,y_test))
    RFRvalues.append(regressor(RFR,normalisedX_train,y_train,normalisedX_test,y_test))
    
    ytrain1,ytrain2=np.split(y_train,2,axis=1)
    ytest1,ytest2=np.split(y_test,2,axis=1)

    ytrain1 = ytrain1.flatten()
    ytrain2 = ytrain2.flatten()
    ytest1 = ytest1.flatten()
    ytest2 = ytest2.flatten()
    
    SVMvaluesY1.append(regressor(SVR1,normalisedX_train,ytrain1,normalisedX_test,ytest1))
    SVMvaluesY2.append(regressor(SVR2,normalisedX_train,ytrain2,normalisedX_test,ytest2))
    
    
print("Random forest values are:",RFRvalues)
print("Neural Network values are:",MLPvalues)
print("Support Vector Machine values for y1 are:",SVMvaluesY1)
print("Support Vector Machine values for y2 are:",SVMvaluesY2)


Random forest values are: [1.447013123441557, 1.6594315740909085, 1.6991209703896186, 2.0369087556535885, 2.205296934117647]
Neural Network values are: [7.096557384370879, 6.375449521353525, 7.686284018584752, 8.135937573747636, 5.924770908785142]
Support Vector Machine values for y1 are: [7.762967785789221, 7.53251878907466, 8.36644555775056, 9.957125148285282, 6.088576926687808]
Support Vector Machine values for y2 are: [8.860262283180703, 7.969576037525045, 12.10326599168046, 13.157286152808897, 8.596092059312433]


In [75]:
print("Random forest mean:",sum(RFRvalues)/len(RFRvalues))
print("Neural Network mean:",sum(MLPvalues)/len(MLPvalues))
print("Support Vector Machine values for y1 mean:",sum(SVMvaluesY1)/len(SVMvaluesY1))
print("Support Vector Machine values for y2 mean:",sum(SVMvaluesY2)/len(SVMvaluesY2))

Random forest mean: 1.8095542715386634
Neural Network mean: 7.043799881368386
Support Vector Machine values for y1 mean: 7.941526841517506
Support Vector Machine values for y2 mean: 10.137296504901506
