In [1]:
import os 
import sys 
from copy import deepcopy
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
from sklearn import datasets
from sklearn import svm
from sklearn.utils import shuffle 
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import random
import seaborn as sns
np.random.seed(seed=0)
# Set seed so that consistency is maintained and can debug properly 

In [6]:
df = pd.read_csv(r"Dataset.csv")
df.shape

(173, 6)

In [7]:
df.head()

Unnamed: 0,Diameter,porosity,shape factor,Kinematic Viscosity,a,b
0,0.00101,0.4,7.0,7.09e-07,99.0,2630.0
1,0.00101,0.381,7.0,7.37e-07,115.0,3450.0
2,0.0017,0.436,7.0,7.19e-07,32.5,1100.0
3,0.0017,0.417,7.0,7.41e-07,47.5,1990.0
4,0.0017,0.403,7.0,7.37e-07,40.0,1640.0


In [8]:
x = df.iloc[:, 0:4]
x.head()

Unnamed: 0,Diameter,porosity,shape factor,Kinematic Viscosity
0,0.00101,0.4,7.0,7.09e-07
1,0.00101,0.381,7.0,7.37e-07
2,0.0017,0.436,7.0,7.19e-07
3,0.0017,0.417,7.0,7.41e-07
4,0.0017,0.403,7.0,7.37e-07


In [9]:
ya = df.iloc[:, 4]
yb = df.iloc[:, 5]

ya.head()

0     99.0
1    115.0
2     32.5
3     47.5
4     40.0
Name: a, dtype: float64

In [10]:
ya.shape

(173,)

In [13]:
from sklearn.preprocessing import StandardScaler
sc_x = StandardScaler()
sc_ya = StandardScaler()
sc_yb = StandardScaler()
x = sc_x.fit_transform(x)
ya = sc_ya.fit_transform(ya.values.reshape(-1, 1))
yb = sc_yb.fit_transform(yb.values.reshape(-1, 1))

In [37]:
X_train, X_test, yb_train, yb_test = train_test_split(x, ya, test_size=0.2, train_size=0.8, random_state=42, shuffle=True)

In [38]:
X_train


array([[-4.81836573e-01,  2.54423291e+00, -4.82131531e-01,
        -1.11705482e-01],
       [-3.32304484e-01,  9.17085413e-01,  1.16265110e+00,
         6.68136583e-02],
       [ 6.68303145e+00,  1.36718758e-01, -1.39589966e+00,
         6.68136583e-02],
       [-2.86684525e-01, -1.10854718e+00, -8.47638783e-01,
        -1.17328132e-01],
       [ 2.75863043e-02,  7.01239317e-01,  1.16265110e+00,
        -5.12619932e-02],
       [-4.62321368e-01,  5.37010287e-02,  1.16265110e+00,
         1.08983534e-01],
       [ 3.74297994e-01,  6.77889339e-03,  1.13523806e+00,
        -4.42036174e-01],
       [-4.64095477e-01,  3.19357762e-01, -4.82131531e-01,
        -3.56290760e-01],
       [-4.10872192e-01, -1.93872447e+00,  1.16265110e+00,
        -2.53677396e-01],
       [ 5.95301351e-01,  1.04991378e+00,  1.16265110e+00,
         3.87004080e-02],
       [ 4.56170335e+00, -8.26286901e-01, -1.39589966e+00,
         6.68136583e-02],
       [-4.08436344e-02, -5.65611231e-01, -1.39589966e+00,
      

In [39]:
X_train.shape

(138, 4)

In [40]:
X_test.shape

(35, 4)

In [42]:
regr = svm.SVR(kernel = 'rbf')
regr.fit(X_train, yb_train.ravel())



SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
    gamma='auto_deprecated', kernel='rbf', max_iter=-1, shrinking=True,
    tol=0.001, verbose=False)

In [44]:
yb_pred = regr.predict(X_test)
yb_pred

array([-0.26103506, -0.37855141,  0.21812848,  0.07171888, -0.13636204,
       -0.25514503, -0.37768713, -0.1837672 , -0.15952263,  0.1853181 ,
       -0.23841665,  0.15027558,  0.3187022 ,  0.03857932, -0.26796281,
       -0.16008745,  0.03928963, -0.22819429, -0.06227064, -0.19234852,
        0.3639033 , -0.25026364, -0.21116065, -0.38385817, -0.3106398 ,
       -0.19306385, -0.27357421, -0.19114152, -0.17737897, -0.23456855,
       -0.39909606, -0.18551262, -0.17345239, -0.19056427, -0.17261964])

In [47]:
check = pd.DataFrame({'Real Values':sc_yb.inverse_transform(yb_test.reshape(-1)), 'Predicted Values':sc_yb.inverse_transform(yb_pred)})
check

Unnamed: 0,Real Values,Predicted Values
0,505.820769,573.318914
1,512.196909,327.02393
2,3697.814924,1577.567125
3,919.779445,1270.71661
4,501.406517,834.613121
5,502.877934,585.663459
6,511.706437,328.835318
7,965.883847,735.259697
8,501.484993,786.072341
9,651.000589,1508.801957


In [50]:
from sklearn.metrics import mean_squared_error
import math
MSE = mean_squared_error(yb_test, yb_pred)
RMSE = math.sqrt(MSE)
print("Root Mean Square Error:\n")
print(RMSE)

Root Mean Square Error:

0.42353059832614603


In [51]:
print(MSE)

0.17937816771850323
