#Importing Libraries

In [1]:
import pandas as pd
import numpy as np
import sklearn
import keras
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder 
from sklearn.metrics import confusion_matrix 
from sklearn.metrics import accuracy_score 
from sklearn.metrics import classification_report

#Importing Data

In [2]:
df = pd.read_csv('RiverDataResult.csv')

In [3]:
print(df.shape)

(2326, 22)


In [4]:
df.head()

Unnamed: 0,Date,Time,Temp,pH,ORP,EC,RES,TDS,Salinity,Pressure,D.O.,Turbidity,QpH,QTur,QTDS,QDO,WpH,WTur,WTDS,WDO,WQI,Result
0,17-02-2020,17:04:52,20.67,8.4,88.6,470,2128,306,0.23,14.568,101.5,36.7,65,47,55,95,16.575,9.165,9.625,35.625,70.99,4
1,17-02-2020,17:05:02,20.67,8.4,88.5,470,2128,305,0.23,14.569,101.4,42.4,65,38,55,95,16.575,7.41,9.625,35.625,69.235,3
2,17-02-2020,17:05:12,20.67,8.4,88.2,470,2128,305,0.23,14.569,101.5,38.2,65,47,55,95,16.575,9.165,9.625,35.625,70.99,4
3,17-02-2020,17:05:22,20.67,8.4,87.9,470,2128,305,0.23,14.567,101.7,38.0,65,47,55,95,16.575,9.165,9.625,35.625,70.99,4
4,17-02-2020,17:05:32,20.68,8.41,87.8,469,2132,305,0.23,14.566,101.7,46.8,65,38,55,95,16.575,7.41,9.625,35.625,69.235,3


#Splitting

In [5]:
X = df.iloc[:,[3,7,10,11]]

In [6]:
print(X)

       pH   TDS   D.O.  Turbidity
0     8.40  306  101.5       36.7
1     8.40  305  101.4       42.4
2     8.40  305  101.5       38.2
3     8.40  305  101.7       38.0
4     8.41  305  101.7       46.8
...    ...  ...    ...        ...
2321  8.94  299  124.9       12.3
2322  8.96  297  125.6       11.9
2323  8.98  296  125.9       11.8
2324  8.95  297  125.7       11.6
2325  8.93  299  125.7       13.6

[2326 rows x 4 columns]


In [7]:
Y=df['Result']

In [8]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.33, random_state=45)

X_train = X_train.values
X_test = X_test.values

Y_train = Y_train.values
Y_test = Y_test.values

#Scaling

In [9]:
from sklearn.preprocessing import StandardScaler

In [10]:
scaler = StandardScaler()
scaler.fit(X_train)

X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

#Model

In [11]:
from sklearn import svm

In [12]:
linear = svm.SVC(kernel='linear', C=1, decision_function_shape='ovo').fit(X_train, Y_train)
rbf = svm.SVC(kernel='rbf', gamma=1, C=1, decision_function_shape='ovo').fit(X_train, Y_train)
poly = svm.SVC(kernel='poly', degree=3, C=1, decision_function_shape='ovo').fit(X_train, Y_train)
sig = svm.SVC(kernel='sigmoid', C=1, decision_function_shape='ovo').fit(X_train, Y_train)

In [13]:
linear_pred = linear.predict(X_test)
poly_pred = poly.predict(X_test)
rbf_pred = rbf.predict(X_test)
sig_pred = sig.predict(X_test)

#Metrics

In [14]:
cm_lin = confusion_matrix(Y_test, linear_pred)
cm_poly = confusion_matrix(Y_test, poly_pred)
cm_rbf = confusion_matrix(Y_test, rbf_pred)
cm_sig = confusion_matrix(Y_test, sig_pred)
print(cm_lin)
print(cm_poly)
print(cm_rbf)
print(cm_sig)

[[  2   0   0]
 [  2 361   8]
 [  0  75 320]]
[[  1   1   0]
 [  1 364   6]
 [  0 173 222]]
[[  0   2   0]
 [  0 365   6]
 [  0   9 386]]
[[  0   0   2]
 [  0 301  70]
 [  0 100 295]]


In [15]:
accuracy_lin = linear.score(X_test, Y_test)
accuracy_poly = poly.score(X_test, Y_test)
accuracy_rbf = rbf.score(X_test, Y_test)
accuracy_sig = sig.score(X_test, Y_test)
print(accuracy_lin)
print(accuracy_poly)
print(accuracy_rbf)
print(accuracy_sig)

0.8893229166666666
0.7643229166666666
0.9778645833333334
0.7760416666666666


#Prediction

In [16]:
acc_lin_train = linear.predict(X_train)
acc3 = accuracy_score(Y_train, acc_lin_train)
print(acc3);

0.9030808729139923


#Error

In [17]:
from sklearn.metrics import mean_absolute_error

In [18]:
mean_absolute_error(Y_test, linear_pred)

0.11067708333333333