### required libraries

In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import pylab as pl
from sklearn import preprocessing


### open file

In [2]:
D_electric_df = pd.read_csv("Performance-Degradation Data Nelson.csv")
D_electric_df.head()

Unnamed: 0,x1,x2,y
0,1.0,180.0,15.0
1,1.0,180.0,17.0
2,1.0,180.0,15.5
3,1.0,180.0,16.5
4,1.0,225.0,15.5


In [3]:
D_electric_df.dtypes

x1    float64
x2    float64
y     float64
dtype: object

### defin x &  y 

In [4]:
feature_df = D_electric_df[['x1', 'x2']]
X = np.asarray(feature_df)
X[0:5]

array([[  1., 180.],
       [  1., 180.],
       [  1., 180.],
       [  1., 180.],
       [  1., 225.]])

In [5]:
D_electric_df['y'] = D_electric_df['y'].astype('float')
y = np.asarray(D_electric_df['y'])
y [0:20]

array([15. , 17. , 15.5, 16.5, 15.5, 15. , 16. , 14.5, 15. , 14.5, 12.5,
       11. , 14. , 13. , 14. , 11.5, 14. , 16. , 13. , 13.5])

### normalizing x

In [6]:
print(X[0:10])
Scaler= preprocessing.StandardScaler().fit(X)
X =Scaler.transform(X.astype(float))
print(X[0:10])

[[  1. 180.]
 [  1. 180.]
 [  1. 180.]
 [  1. 180.]
 [  1. 225.]
 [  1. 225.]
 [  1. 225.]
 [  1. 225.]
 [  1. 250.]
 [  1. 250.]]
[[-0.94101309 -1.49618805]
 [-0.94101309 -1.49618805]
 [-0.94101309 -1.49618805]
 [-0.94101309 -1.49618805]
 [-0.94101309 -0.21374115]
 [-0.94101309 -0.21374115]
 [-0.94101309 -0.21374115]
 [-0.94101309 -0.21374115]
 [-0.94101309  0.49872935]
 [-0.94101309  0.49872935]]


### Data separation

In [7]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test =train_test_split( X, y, test_size=0.25, random_state=50)
print ('Train set:', X_train.shape,  y_train.shape)
print ('Test set:', X_test.shape,  y_test.shape)

Train set: (96, 2) (96,)
Test set: (32, 2) (32,)


### calculate R2-Score and Mean absolte error with K-Fold cross validation

In [8]:
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
import numpy as np

model_linear = SVR(kernel='linear')
model_rbf = SVR(kernel='rbf')
model_poly = SVR(kernel='poly')
model_sigmoid = SVR(kernel='sigmoid')

mae_linear = -1 * cross_val_score(model_linear, X, y, cv=4, scoring='neg_mean_absolute_error')
mae_rbf = -1 * cross_val_score(model_rbf, X, y, cv=4, scoring='neg_mean_absolute_error')
mae_poly = -1 * cross_val_score(model_poly, X, y, cv=4, scoring='neg_mean_absolute_error')
mae_sigmoid = -1 * cross_val_score(model_sigmoid, X, y, cv=4, scoring='neg_mean_absolute_error')

r2_linear = cross_val_score(model_linear, X, y, cv=4, scoring='r2')
r2_rbf = cross_val_score(model_rbf, X, y, cv=4, scoring='r2')
r2_poly = cross_val_score(model_poly, X, y, cv=4, scoring='r2')
r2_sigmoid = cross_val_score(model_sigmoid, X, y, cv=4, scoring='r2')

print("MAE (linear kernel):", np.mean(mae_linear))
print("MAE (RBF kernel):", np.mean(mae_rbf))
print("MAE (polynomial kernel):", np.mean(mae_poly))
print("MAE (sigmoid kernel):", np.mean(mae_sigmoid))
print("R2 score (linear kernel):", np.mean(r2_linear))
print("R2 score (RBF kernel):", np.mean(r2_rbf))
print("R2 score (polynomial kernel):", np.mean(r2_poly))
print("R2 score (sigmoid kernel):", np.mean(r2_sigmoid))

MAE (linear kernel): 2.0696381245514797
MAE (RBF kernel): 2.2985670505536637
MAE (polynomial kernel): 3.762904507074092
MAE (sigmoid kernel): 3.00050725303824
R2 score (linear kernel): 0.07312273727223464
R2 score (RBF kernel): 0.10895320219817875
R2 score (polynomial kernel): -1.2982322540950209
R2 score (sigmoid kernel): -0.157566767233079


### calculate R2-Score and Mean absolte error L2 Regularization with alpha=1

In [9]:
from sklearn.kernel_ridge import KernelRidge
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
import numpy as np

model_linear = KernelRidge(kernel='linear', alpha=1)
model_rbf = KernelRidge(kernel='rbf', alpha=1)
model_poly = KernelRidge(kernel='polynomial', alpha=1)
model_sigmoid = KernelRidge(kernel='sigmoid', alpha=1)

model_linear.fit(X_train, y_train)
model_rbf.fit(X_train, y_train)
model_poly.fit(X_train, y_train)
model_sigmoid.fit(X_train, y_train)

yhat_linear = model_linear.predict(X_test)
yhat_rbf = model_rbf.predict(X_test)
yhat_poly = model_poly.predict(X_test)
yhat_sigmoid = model_sigmoid.predict(X_test)

mae_linear = mean_absolute_error(y_test, yhat_linear)
mae_rbf = mean_absolute_error(y_test, yhat_rbf)
mae_poly = mean_absolute_error(y_test, yhat_poly)
mae_sigmoid = mean_absolute_error(y_test, yhat_sigmoid)

r2_linear = r2_score(y_test, yhat_linear)
r2_rbf = r2_score(y_test, yhat_rbf)
r2_poly = r2_score(y_test, yhat_poly)
r2_sigmoid = r2_score(y_test, yhat_sigmoid)


print("MAE (linear kernel with regularization):", mae_linear)
print("MAE (RBF kernel with regularization):", mae_rbf)
print("MAE (polynomial kernel with regularization):", mae_poly)
print("MAE (sigmoid kernel with regularization):", mae_sigmoid)
print("R2 score (linear kernel with regularization):", r2_linear)
print("R2 score (RBF kernel with regularization):", r2_rbf)
print("R2 score (polynomial kernel with regularization):", r2_poly)
print("R2 score (sigmoid kernel with regularization):", r2_sigmoid)

MAE (linear kernel with regularization): 11.29929843954404
MAE (RBF kernel with regularization): 1.1016312676690652
MAE (polynomial kernel with regularization): 1.02141057479098
MAE (sigmoid kernel with regularization): 1.2015347058745174
R2 score (linear kernel with regularization): -8.033753233858183
R2 score (RBF kernel with regularization): 0.8540177945904703
R2 score (polynomial kernel with regularization): 0.8740271562889432
R2 score (sigmoid kernel with regularization): 0.8564279597644999




### calculate R2-Score and Mean absolte error L2 Regularization with alpha=2

In [10]:
from sklearn.kernel_ridge import KernelRidge
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
import numpy as np

model_linear = KernelRidge(kernel='linear', alpha=2)
model_rbf = KernelRidge(kernel='rbf', alpha=2)
model_poly = KernelRidge(kernel='polynomial', alpha=2)
model_sigmoid = KernelRidge(kernel='sigmoid', alpha=2)

model_linear.fit(X_train, y_train)
model_rbf.fit(X_train, y_train)
model_poly.fit(X_train, y_train)
model_sigmoid.fit(X_train, y_train)

yhat_linear = model_linear.predict(X_test)
yhat_rbf = model_rbf.predict(X_test)
yhat_poly = model_poly.predict(X_test)
yhat_sigmoid = model_sigmoid.predict(X_test)

mae_linear = mean_absolute_error(y_test, yhat_linear)
mae_rbf = mean_absolute_error(y_test, yhat_rbf)
mae_poly = mean_absolute_error(y_test, yhat_poly)
mae_sigmoid = mean_absolute_error(y_test, yhat_sigmoid)

r2_linear = r2_score(y_test, yhat_linear)
r2_rbf = r2_score(y_test, yhat_rbf)
r2_poly = r2_score(y_test, yhat_poly)
r2_sigmoid = r2_score(y_test, yhat_sigmoid)


print("MAE  (linear kernel with regularization):", mae_linear)
print("MAE  (RBF kernel with regularization):", mae_rbf)
print("MAE  (polynomial kernel with regularization):", mae_poly)
print("MAE  (sigmoid kernel with regularization):", mae_sigmoid)
print("R2 score (linear kernel with regularization):", r2_linear)
print("R2 score (RBF kernel with regularization):", r2_rbf)
print("R2 score (polynomial kernel with regularization):", r2_poly)
print("R2 score (sigmoid kernel with regularization):", r2_sigmoid)

MAE  (linear kernel with regularization): 11.3030567335848
MAE  (RBF kernel with regularization): 1.2911341613149134
MAE  (polynomial kernel with regularization): 1.0801986667755576
MAE  (sigmoid kernel with regularization): 2.2035119653940543
R2 score (linear kernel with regularization): -8.039870553817783
R2 score (RBF kernel with regularization): 0.8005220564348796
R2 score (polynomial kernel with regularization): 0.8642112090064145
R2 score (sigmoid kernel with regularization): 0.4784939846338221




### finding best R-2Score

In [11]:
from sklearn.kernel_ridge import KernelRidge
from sklearn.model_selection import GridSearchCV
import numpy as np

model = KernelRidge()

param_grid = {'alpha': [0.2, 0.8, 1, 5, 10, 20, 50 ,300],
              'degree': [2,3,4],
              'kernel': ['linear', 'rbf', 'polynomial']}

grid_search = GridSearchCV(model, param_grid=param_grid, cv=4, scoring='r2')

grid_search.fit(X_train, y_train)

print("Best parameters: ", grid_search.best_params_)
print("Best R2 score: ", grid_search.best_score_)

Best parameters:  {'alpha': 0.2, 'degree': 3, 'kernel': 'polynomial'}
Best R2 score:  0.7923436501510474
