In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, f1_score, recall_score, precision_score
from sklearn.svm import SVC
import joblib

## Data Cleaning

In [2]:
df = pd.read_csv('../dataSet/final_datasetBSL.csv')
df.columns = [i for i in range(df.shape[1])]
df = df.rename(columns={63: 'Output'})
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,54,55,56,57,58,59,60,61,62,Output
0,0.682996,0.77364117,-9.367768e-07,0.692656,0.729596,-0.174701,0.691045,0.625183,-0.228506,0.649930,...,0.507712,0.494957,-0.017714,0.517896,0.548844,-0.029763,0.564395,0.574762,-0.016216,0.0
1,0.762095,0.7272415,-1.369458e-06,0.584345,0.742350,-0.059305,0.438985,0.684447,-0.083520,0.375221,...,0.589935,0.492781,-0.009994,0.582887,0.531607,-0.003973,0.616653,0.555095,-0.000574,0.0
2,0.762095,0.7272415,-1.369458e-06,0.584345,0.742350,-0.059305,0.438985,0.684447,-0.083520,0.375221,...,0.589935,0.492781,-0.009994,0.582887,0.531607,-0.003973,0.616653,0.555095,-0.000574,0.0
3,0.726126,0.69589,-1.350590e-06,0.546200,0.697156,-0.000828,0.419077,0.640116,-0.003470,0.368921,...,0.622228,0.466040,0.000354,0.615332,0.495862,0.020312,0.638481,0.523002,0.029026,0.0
4,0.726126,0.69589,-1.350590e-06,0.546200,0.697156,-0.000828,0.419077,0.640116,-0.003470,0.368921,...,0.622228,0.466040,0.000354,0.615332,0.495862,0.020312,0.638481,0.523002,0.029026,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1796,0.768670,0.73333734,-7.503206e-07,0.643080,0.677789,0.003755,0.578414,0.598121,-0.018650,0.525291,...,0.569929,0.554485,-0.099221,0.587647,0.612358,-0.087410,0.619529,0.620216,-0.075388,9.0
1797,0.756091,0.7364298,-7.255918e-07,0.630323,0.680481,0.004235,0.566116,0.601193,-0.018907,0.514663,...,0.566694,0.557956,-0.101260,0.585304,0.616279,-0.089827,0.618410,0.626375,-0.077260,9.0
1798,0.751026,0.74793875,-8.269635e-07,0.618629,0.689210,-0.001354,0.550726,0.607424,-0.025242,0.500948,...,0.571145,0.565462,-0.105240,0.592090,0.622349,-0.095353,0.628224,0.626882,-0.083943,9.0
1799,0.740190,0.76102793,-8.382949e-07,0.609556,0.705677,0.000564,0.535308,0.624422,-0.022166,0.481659,...,0.570382,0.576960,-0.107344,0.593714,0.628928,-0.093116,0.631174,0.626211,-0.078013,9.0


In [3]:
print("Uncleaned dataset shape =", df.shape)

Uncleaned dataset shape = (1801, 64)


In [4]:
# removing null values from our dataset

all_null_values = df[df.iloc[:, 0] == 0]
print("Number of null values =", len(all_null_values.index))

Number of null values = 0


In [5]:
# dropping those null values from our dataset

df.drop(all_null_values.index, inplace=True)
df = df.dropna()

In [6]:
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,54,55,56,57,58,59,60,61,62,Output
0,0.682996,0.77364117,-9.367768e-07,0.692656,0.729596,-0.174701,0.691045,0.625183,-0.228506,0.649930,...,0.507712,0.494957,-0.017714,0.517896,0.548844,-0.029763,0.564395,0.574762,-0.016216,0.0
1,0.762095,0.7272415,-1.369458e-06,0.584345,0.742350,-0.059305,0.438985,0.684447,-0.083520,0.375221,...,0.589935,0.492781,-0.009994,0.582887,0.531607,-0.003973,0.616653,0.555095,-0.000574,0.0
2,0.762095,0.7272415,-1.369458e-06,0.584345,0.742350,-0.059305,0.438985,0.684447,-0.083520,0.375221,...,0.589935,0.492781,-0.009994,0.582887,0.531607,-0.003973,0.616653,0.555095,-0.000574,0.0
3,0.726126,0.69589,-1.350590e-06,0.546200,0.697156,-0.000828,0.419077,0.640116,-0.003470,0.368921,...,0.622228,0.466040,0.000354,0.615332,0.495862,0.020312,0.638481,0.523002,0.029026,0.0
4,0.726126,0.69589,-1.350590e-06,0.546200,0.697156,-0.000828,0.419077,0.640116,-0.003470,0.368921,...,0.622228,0.466040,0.000354,0.615332,0.495862,0.020312,0.638481,0.523002,0.029026,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1796,0.768670,0.73333734,-7.503206e-07,0.643080,0.677789,0.003755,0.578414,0.598121,-0.018650,0.525291,...,0.569929,0.554485,-0.099221,0.587647,0.612358,-0.087410,0.619529,0.620216,-0.075388,9.0
1797,0.756091,0.7364298,-7.255918e-07,0.630323,0.680481,0.004235,0.566116,0.601193,-0.018907,0.514663,...,0.566694,0.557956,-0.101260,0.585304,0.616279,-0.089827,0.618410,0.626375,-0.077260,9.0
1798,0.751026,0.74793875,-8.269635e-07,0.618629,0.689210,-0.001354,0.550726,0.607424,-0.025242,0.500948,...,0.571145,0.565462,-0.105240,0.592090,0.622349,-0.095353,0.628224,0.626882,-0.083943,9.0
1799,0.740190,0.76102793,-8.382949e-07,0.609556,0.705677,0.000564,0.535308,0.624422,-0.022166,0.481659,...,0.570382,0.576960,-0.107344,0.593714,0.628928,-0.093116,0.631174,0.626211,-0.078013,9.0


In [7]:
print("Cleaned dataset shape =", df.shape)

Cleaned dataset shape = (1543, 64)


## Data Preparation

In [8]:
X = df.iloc[:, :-1]
print("Features shape =", X.shape)

Y = df.iloc[:, -1]
print("Labels shape =", Y.shape)

Features shape = (1543, 63)
Labels shape = (1543,)


## Data Split

In [9]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=0)

## Model Making

In [10]:
svm = SVC(C=100, gamma=0.5, kernel='rbf')

In [11]:
svm.fit(x_train, y_train)

In [12]:
print("Training score =", svm.score(x_train, y_train))

Training score = 1.0


In [13]:
joblib.dump(svm, "../model/model.pkl")

['../model/model.pkl']

In [14]:
y_pred = svm.predict(x_test)

In [15]:
print("Testing score =", accuracy_score(y_test, y_pred))

Testing score = 0.9935275080906149


## Visualization

In [16]:
cf_matrix = confusion_matrix(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='micro')
recall = recall_score(y_test, y_pred, average='micro')
precision = precision_score(y_test, y_pred, average='micro')

In [17]:
f1, recall, precision

(0.9935275080906149, 0.9935275080906149, 0.9935275080906149)