# Support Vector Regression 

In [1]:
#Importing all libraries 
import os 
import csv
import pandas as pd 
import matplotlib.pyplot as plt 
import numpy as np
import joblib 
import sklearn 
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error as MSE
from sklearn.svm import SVR

In [2]:
# Data Files 
testFile='./data/test.csv'
trainFile='./data/train.csv'
df=pd.read_csv(trainFile)
df.head()

Unnamed: 0,latitude,longitude,year,month,day,cp,e,ilspf,lsp,pev,tcrw,tp,swvl1,swvl2,ro
0,17.75,77.25,2019,5,30,0.0,-0.000114,0.0,0.0,-0.001029,0.0,0.0,0.206655,0.219927,0.0
1,17.0,77.25,2018,10,23,1e-05,-0.000437,0.0,0.0,-0.000713,0.00196,1e-05,0.334029,0.33247,4.875474e-07
2,17.25,77.0,2019,11,25,0.0,-0.000361,0.0,0.0,-0.000659,0.0,0.0,0.279484,0.327522,3.046822e-06
3,17.0,77.25,2018,10,2,4e-05,-0.000458,0.0,1.945253e-05,-0.000642,0.002063,6e-05,0.38543,0.34014,4.875474e-07
4,17.75,78.0,2018,11,3,2e-06,-0.000173,0.0,4.139729e-07,-0.000341,0.0,2e-06,0.238017,0.265713,1.95019e-06


In [14]:
def preProcessing(dataframe):
    #Split into X,y 
    #Scale them
    df1=dataframe[['cp','e','ilspf','lsp','pev','tcrw','tp','swvl1','swvl2','ro']]
    X = df1.iloc[:,:8].values.astype(float)
    y = df1.iloc[:,-1].values.astype(float)
    X=np.reshape(X,(-1,8))
    y=np.reshape(y,(-1,1))
    sc_X = StandardScaler()
    sc_y = StandardScaler()
    X = sc_X.fit_transform(X)
    y = sc_y.fit_transform(y)
    return(X,y,sc_X,sc_y)

In [15]:
# Training it on the file.
X,y,sx,sy=preProcessing(df)
print(X.shape)
print(y.shape)
print(X[0])
print(y[0])

(17337, 8)
(17337, 1)
[-0.37698086  1.86727592 -0.38875237 -0.14678389 -1.59503255 -0.20562039
 -0.24838408 -0.96510798]
[-0.07059821]


In [16]:
# Testing 
X_test,y_test,sxt,syt=preProcessing(pd.read_csv(testFile))
print(X_test.shape)
print(y_test.shape)

(913, 8)
(913, 1)


# Kernel-RBF

In [17]:
# Fitting the SVR 
rbf_reg = SVR(kernel='rbf')
rbf_reg.fit(X,y.ravel())

SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
  gamma='auto_deprecated', kernel='rbf', max_iter=-1, shrinking=True,
  tol=0.001, verbose=False)

In [27]:
# Loss Generation. 
y_pred_test = sy.inverse_transform((rbf_reg.predict(X_test)))
loss=1/913*(MSE(sy.inverse_transform(y_test),y_pred_test))
print(loss)

1.0722267439661577e-11


In [19]:
# Storing the files 
joblib.dump(rbf_reg,'./model_files/'+str('SVR_')+str('RBF')+'.sav')

['./model_files/SVR_RBF.sav']

## Kernel - linear


In [20]:
# Fitting the SVR 
lin_reg = SVR(kernel='linear')
lin_reg.fit(X,y.ravel())

SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
  gamma='auto_deprecated', kernel='linear', max_iter=-1, shrinking=True,
  tol=0.001, verbose=False)

In [26]:
# Loss Generation. 
y_pred_test = sy.inverse_transform((lin_reg.predict(X_test)))
loss=1/913*(MSE(sy.inverse_transform(y_test),y_pred_test))
print(loss)

9.888033234263935e-12


In [22]:
# Storing the files 
joblib.dump(lin_reg,'./model_files/'+str('SVR_')+str('LINEAR')+'.sav')

['./model_files/SVR_LINEAR.sav']

# Kernel-Polynomial(Degree=3)


In [23]:
# Fitting the SVR 
poly_reg = SVR(kernel='poly',degree=2)
poly_reg.fit(X,y.ravel())

SVR(C=1.0, cache_size=200, coef0=0.0, degree=2, epsilon=0.1,
  gamma='auto_deprecated', kernel='poly', max_iter=-1, shrinking=True,
  tol=0.001, verbose=False)

In [28]:
# Loss Generation. 
y_pred_test = sy.inverse_transform((poly_reg.predict(X_test)))
loss=1/913*(MSE(sy.inverse_transform(y_test),y_pred_test))
print(loss)

1.702653648407953e-12


In [25]:
# Storing the files 
joblib.dump(poly_reg,'./model_files/'+str('SVR_')+str('POLYNOMIAL')+'.sav')

['./model_files/SVR_POLYNOMIAL.sav']

In [38]:
svr_linear=joblib.load('/home/niharika/Desktop/Hydrological_Modelling/Final_Hydro/Model_Codes/model_files/SVR_LINEAR.sav')
svr_poly=joblib.load('/home/niharika/Desktop/Hydrological_Modelling/Final_Hydro/Model_Codes/model_files/SVR_POLYNOMIAL.sav')
svr_rbf=joblib.load('/home/niharika/Desktop/Hydrological_Modelling/Final_Hydro/Model_Codes/model_files/SVR_RBF.sav')


In [39]:
df_test=pd.read_csv('/home/niharika/Desktop/Hydrological_Modelling/Final_Hydro/Model_Codes/data/graph_Test.csv')
df_test_vals=pd.read_csv('/home/niharika/Desktop/Hydrological_Modelling/Final_Hydro/Model_Codes/data/graph_test_gd.csv')

In [40]:
print(df_test.shape)

(87, 6)


In [51]:
X_test=df_test[['cp','lsp','swvl1','ro','sd']].values.astype(float)
X_test=np.reshape(X_test,(-1,5))
X_test=sx.transform(X_test)
print(X_test.shape)

y_test=df_test_vals['dis'].values.astype('float')
print(y_test.shape)

(87, 5)
(87,)


In [53]:
# Loss Generation. 
y_pred_test = sy.inverse_transform((rbf_reg.predict(X_test)))
loss=1/1461*(MSE((y_test),y_pred_test))
print(loss)

21.689333433982164
