In [1]:
from pycaret.classification import *
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.metrics import accuracy_score, roc_auc_score, recall_score, precision_score, f1_score, cohen_kappa_score, matthews_corrcoef
import time

## 4 sensors

In [None]:
data = pd.read_excel('/home/soham/Desktop/GitHub/BTP/data/4sensors/polynomial_data.xlsx')
data = data[['Sensor1', 'Sensor2', 'Sensor3', 'Sensor4', 'Label']]

clf_setup = setup(data=data, target='Label', session_id=123, preprocess=True, verbose=False)
comparison_df = pull()
best_model = compare_models()

In [12]:
# ADDING SVM - POLYNOMIAL MODEL

svm_poly = SVC(kernel='poly', degree=3, C=1, random_state=123)
svm_poly.fit(data[['Sensor1', 'Sensor2', 'Sensor3', 'Sensor4']], data['Label'])
start_time = time.time()
y_pred = svm_poly.predict(data[['Sensor1', 'Sensor2', 'Sensor3', 'Sensor4']])
end_time = time.time()

accuracy = accuracy_score(data['Label'], y_pred)
auc = None
recall = recall_score(data['Label'], y_pred, average='weighted')
precision = precision_score(data['Label'], y_pred, average='weighted')
f1 = f1_score(data['Label'], y_pred, average='weighted')
kappa = cohen_kappa_score(data['Label'], y_pred)
mcc = matthews_corrcoef(data['Label'], y_pred)
tt = end_time - start_time

svm_poly_metrics = {'Model': 'SVM - Polynomial Kernel', 'Accuracy': accuracy, 'AUC': auc, 'Recall': recall, 'Prec.': precision, 'F1': f1, 'Kappa': kappa, 'MCC': mcc, 'TT (Sec)': tt}

# ADDING SVM - RBF MODEL

svm_rbf = SVC(kernel='rbf', C=1, random_state=123)
svm_rbf.fit(data[['Sensor1', 'Sensor2', 'Sensor3', 'Sensor4']], data['Label'])

start_time = time.time()
y_pred = svm_rbf.predict(data[['Sensor1', 'Sensor2', 'Sensor3', 'Sensor4']])
end_time = time.time()

accuracy = accuracy_score(data['Label'], y_pred)
auc = None
recall = recall_score(data['Label'], y_pred, average='weighted')
precision = precision_score(data['Label'], y_pred, average='weighted')
f1 = f1_score(data['Label'], y_pred, average='weighted')
kappa = cohen_kappa_score(data['Label'], y_pred)
mcc = matthews_corrcoef(data['Label'], y_pred)
tt = end_time - start_time

svm_rbf_metrics = {'Model': 'SVM - RBF Kernel', 'Accuracy': accuracy, 'AUC': auc, 'Recall': recall, 'Prec.': precision, 'F1': f1, 'Kappa': kappa, 'MCC': mcc, 'TT (Sec)': tt}

In [None]:
comparison_df = pull()
comparison_df.loc['svm-poly'] = svm_poly_metrics
comparison_df.loc['svm-rbf'] = svm_rbf_metrics
comparison_df = comparison_df.sort_values('Accuracy', ascending=False)
comparison_df

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lr,Logistic Regression,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.008
knn,K Neighbors Classifier,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.008
nb,Naive Bayes,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.005
dt,Decision Tree Classifier,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.005
ridge,Ridge Classifier,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.006
rf,Random Forest Classifier,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.025
ada,Ada Boost Classifier,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.013
gbc,Gradient Boosting Classifier,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.025
lda,Linear Discriminant Analysis,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.007
et,Extra Trees Classifier,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.02


## 4 Sensors with PCA

In [None]:
data = pd.read_excel('/home/soham/Desktop/GitHub/BTP/data/4sensors/PCA.xlsx')

clf2 = setup(data, target='Label', session_id=123, verbose=False)
comparison_df = pull()
best_model = compare_models()

In [28]:
# ADDING SVM - POLYNOMIAL MODEL

svm_poly = SVC(kernel='poly', degree=3, C=1, random_state=123)
svm_poly.fit(data[['PC1','PC2']], data['Label'])
start_time = time.time()
y_pred = svm_poly.predict(data[['PC1','PC2']])
end_time = time.time()

accuracy = accuracy_score(data['Label'], y_pred)
auc = None
recall = recall_score(data['Label'], y_pred, average='weighted')
precision = precision_score(data['Label'], y_pred, average='weighted')
f1 = f1_score(data['Label'], y_pred, average='weighted')
kappa = cohen_kappa_score(data['Label'], y_pred)
mcc = matthews_corrcoef(data['Label'], y_pred)
tt = end_time - start_time

svm_poly_metrics = {'Model': 'SVM - Polynomial Kernel', 'Accuracy': accuracy, 'AUC': auc, 'Recall': recall, 'Prec.': precision, 'F1': f1, 'Kappa': kappa, 'MCC': mcc, 'TT (Sec)': tt}

# ADDING SVM - RBF MODEL

svm_rbf = SVC(kernel='rbf', C=1, random_state=123)
svm_rbf.fit(data[['PC1','PC2']], data['Label'])

start_time = time.time()
y_pred = svm_rbf.predict(data[['PC1','PC2']])
end_time = time.time()

accuracy = accuracy_score(data['Label'], y_pred)
auc = None
recall = recall_score(data['Label'], y_pred, average='weighted')
precision = precision_score(data['Label'], y_pred, average='weighted')
f1 = f1_score(data['Label'], y_pred, average='weighted')
kappa = cohen_kappa_score(data['Label'], y_pred)
mcc = matthews_corrcoef(data['Label'], y_pred)
tt = end_time - start_time

svm_rbf_metrics = {'Model': 'SVM - RBF Kernel', 'Accuracy': accuracy, 'AUC': auc, 'Recall': recall, 'Prec.': precision, 'F1': f1, 'Kappa': kappa, 'MCC': mcc, 'TT (Sec)': tt}

In [29]:
comparison_df = pull()
comparison_df.loc['svm-poly'] = svm_poly_metrics
comparison_df.loc['svm-rbf'] = svm_rbf_metrics
comparison_df = comparison_df.sort_values('Accuracy', ascending=False)
comparison_df

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lr,Logistic Regression,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.196
knn,K Neighbors Classifier,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.1
nb,Naive Bayes,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.005
dt,Decision Tree Classifier,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.006
ridge,Ridge Classifier,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.006
rf,Random Forest Classifier,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.028
qda,Quadratic Discriminant Analysis,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.005
ada,Ada Boost Classifier,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.013
gbc,Gradient Boosting Classifier,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.025
lda,Linear Discriminant Analysis,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.005


## 4 Sensors LDA

In [None]:
data = pd.read_excel('/home/soham/Desktop/GitHub/BTP/data/4sensors/LDA.xlsx')

clf2 = setup(data, target='Label', session_id=123, verbose=False)
comparison_df = pull()
best_model = compare_models()

In [34]:
# ADDING SVM - POLYNOMIAL MODEL

svm_poly = SVC(kernel='poly', degree=3, C=1, random_state=123)
svm_poly.fit(data[['LD1','LD2']], data['Label'])
start_time = time.time()
y_pred = svm_poly.predict(data[['LD1','LD2']])
end_time = time.time()

accuracy = accuracy_score(data['Label'], y_pred)
auc = None
recall = recall_score(data['Label'], y_pred, average='weighted')
precision = precision_score(data['Label'], y_pred, average='weighted')
f1 = f1_score(data['Label'], y_pred, average='weighted')
kappa = cohen_kappa_score(data['Label'], y_pred)
mcc = matthews_corrcoef(data['Label'], y_pred)
tt = end_time - start_time

svm_poly_metrics = {'Model': 'SVM - Polynomial Kernel', 'Accuracy': accuracy, 'AUC': auc, 'Recall': recall, 'Prec.': precision, 'F1': f1, 'Kappa': kappa, 'MCC': mcc, 'TT (Sec)': tt}

# ADDING SVM - RBF MODEL

svm_rbf = SVC(kernel='rbf', C=1, random_state=123)
svm_rbf.fit(data[['LD1', 'LD2']], data['Label'])

start_time = time.time()
y_pred = svm_rbf.predict(data[['LD1', 'LD2']])
end_time = time.time()

accuracy = accuracy_score(data['Label'], y_pred)
auc = None
recall = recall_score(data['Label'], y_pred, average='weighted')
precision = precision_score(data['Label'], y_pred, average='weighted')
f1 = f1_score(data['Label'], y_pred, average='weighted')
kappa = cohen_kappa_score(data['Label'], y_pred)
mcc = matthews_corrcoef(data['Label'], y_pred)
tt = end_time - start_time

svm_rbf_metrics = {'Model': 'SVM - RBF Kernel', 'Accuracy': accuracy, 'AUC': auc, 'Recall': recall, 'Prec.': precision, 'F1': f1, 'Kappa': kappa, 'MCC': mcc, 'TT (Sec)': tt}

In [35]:
comparison_df = pull()
comparison_df.loc['svm-poly'] = svm_poly_metrics
comparison_df.loc['svm-rbf'] = svm_rbf_metrics
comparison_df = comparison_df.sort_values('Accuracy', ascending=False)
comparison_df

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lr,Logistic Regression,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.007
knn,K Neighbors Classifier,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.008
svm,SVM - Linear Kernel,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.006
ridge,Ridge Classifier,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.006
rf,Random Forest Classifier,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.025
lda,Linear Discriminant Analysis,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.005
et,Extra Trees Classifier,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.021
lightgbm,Light Gradient Boosting Machine,1.0,1.0,1.0,1.0,1.0,1.0,1.0,19.789
svm-poly,SVM - Polynomial Kernel,1.0,,1.0,1.0,1.0,1.0,1.0,0.000919
svm-rbf,SVM - RBF Kernel,1.0,,1.0,1.0,1.0,1.0,1.0,0.000887


## 3 Sensors

In [None]:
data = pd.read_csv('/home/soham/Desktop/GitHub/BTP/data/smote/labelled.csv')
data = data[['Sensor1', 'Sensor2', 'Sensor3', 'label']]

clf3 = setup(data, target='label', session_id=123, verbose=False)
comparison_df = pd.DataFrame(pull())
best_model = compare_models()

In [39]:
# ADDING SVM - POLYNOMIAL MODEL

svm_poly = SVC(kernel='poly', degree=3, C=1, random_state=123)
svm_poly.fit(data[['Sensor1', 'Sensor2', 'Sensor3']], data['label'])
start_time = time.time()
y_pred = svm_poly.predict(data[['Sensor1', 'Sensor2', 'Sensor3']])
end_time = time.time()

accuracy = accuracy_score(data['label'], y_pred)
auc = None
recall = recall_score(data['label'], y_pred, average='weighted')
precision = precision_score(data['label'], y_pred, average='weighted')
f1 = f1_score(data['label'], y_pred, average='weighted')
kappa = cohen_kappa_score(data['label'], y_pred)
mcc = matthews_corrcoef(data['label'], y_pred)
tt = end_time - start_time

svm_poly_metrics = {'Model': 'SVM - Polynomial Kernel', 'Accuracy': accuracy, 'AUC': auc, 'Recall': recall, 'Prec.': precision, 'F1': f1, 'Kappa': kappa, 'MCC': mcc, 'TT (Sec)': tt}

# ADDING SVM - RBF MODEL

svm_rbf = SVC(kernel='rbf', C=1, random_state=123)
svm_rbf.fit(data[['Sensor1', 'Sensor2', 'Sensor3']], data['label'])

start_time = time.time()
y_pred = svm_rbf.predict(data[['Sensor1', 'Sensor2', 'Sensor3']])
end_time = time.time()

accuracy = accuracy_score(data['label'], y_pred)
auc = None
recall = recall_score(data['label'], y_pred, average='weighted')
precision = precision_score(data['label'], y_pred, average='weighted')
f1 = f1_score(data['label'], y_pred, average='weighted')
kappa = cohen_kappa_score(data['label'], y_pred)
mcc = matthews_corrcoef(data['label'], y_pred)
tt = end_time - start_time

svm_rbf_metrics = {'Model': 'SVM - RBF Kernel', 'Accuracy': accuracy, 'AUC': auc, 'Recall': recall, 'Prec.': precision, 'F1': f1, 'Kappa': kappa, 'MCC': mcc, 'TT (Sec)': tt}

In [40]:
comparison_df = pull()
comparison_df.loc['svm-poly'] = svm_poly_metrics
comparison_df.loc['svm-rbf'] = svm_rbf_metrics
comparison_df = comparison_df.sort_values('Accuracy', ascending=False)
comparison_df

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lda,Linear Discriminant Analysis,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.005
lr,Logistic Regression,0.9833,0.0,0.9833,0.9889,0.9822,0.975,0.9783,0.007
knn,K Neighbors Classifier,0.9833,0.9833,0.9833,0.9889,0.9822,0.975,0.9783,0.008
dt,Decision Tree Classifier,0.9833,0.9875,0.9833,0.9889,0.9822,0.975,0.9783,0.006
rf,Random Forest Classifier,0.9833,1.0,0.9833,0.9889,0.9822,0.975,0.9783,0.026
qda,Quadratic Discriminant Analysis,0.9833,0.0,0.9833,0.9889,0.9822,0.975,0.9783,0.006
gbc,Gradient Boosting Classifier,0.9833,0.0,0.9833,0.9889,0.9822,0.975,0.9783,0.028
et,Extra Trees Classifier,0.9833,1.0,0.9833,0.9889,0.9822,0.975,0.9783,0.02
ada,Ada Boost Classifier,0.9033,0.0,0.9033,0.8756,0.8769,0.8438,0.8683,0.012
svm-poly,SVM - Polynomial Kernel,0.88,,0.88,0.911765,0.875982,0.82,0.838307,0.001145


## 3 Sensors PCA

In [None]:
data = pd.read_excel('/home/soham/Desktop/GitHub/BTP/data/mixture/3 solutions/PCA.xlsx')
data = data[['PC1', 'PC2', 'Labels']]

clf4 = setup(data, target='Labels', session_id=123, verbose=False)
comparison_df = pd.DataFrame(pull())
best_model = compare_models()

In [42]:
# ADDING SVM - POLYNOMIAL MODEL

svm_poly = SVC(kernel='poly', degree=3, C=1, random_state=123)
svm_poly.fit(data[['PC1','PC2']], data['Labels'])
start_time = time.time()
y_pred = svm_poly.predict(data[['PC1','PC2']])
end_time = time.time()

accuracy = accuracy_score(data['Labels'], y_pred)
auc = None
recall = recall_score(data['Labels'], y_pred, average='weighted')
precision = precision_score(data['Labels'], y_pred, average='weighted')
f1 = f1_score(data['Labels'], y_pred, average='weighted')
kappa = cohen_kappa_score(data['Labels'], y_pred)
mcc = matthews_corrcoef(data['Labels'], y_pred)
tt = end_time - start_time

svm_poly_metrics = {'Model': 'SVM - Polynomial Kernel', 'Accuracy': accuracy, 'AUC': auc, 'Recall': recall, 'Prec.': precision, 'F1': f1, 'Kappa': kappa, 'MCC': mcc, 'TT (Sec)': tt}

# ADDING SVM - RBF MODEL

svm_rbf = SVC(kernel='rbf', C=1, random_state=123)
svm_rbf.fit(data[['PC1','PC2']], data['Labels'])

start_time = time.time()
y_pred = svm_rbf.predict(data[['PC1','PC2']])
end_time = time.time()

accuracy = accuracy_score(data['Labels'], y_pred)
auc = None
recall = recall_score(data['Labels'], y_pred, average='weighted')
precision = precision_score(data['Labels'], y_pred, average='weighted')
f1 = f1_score(data['Labels'], y_pred, average='weighted')
kappa = cohen_kappa_score(data['Labels'], y_pred)
mcc = matthews_corrcoef(data['Labels'], y_pred)
tt = end_time - start_time

svm_rbf_metrics = {'Model': 'SVM - RBF Kernel', 'Accuracy': accuracy, 'AUC': auc, 'Recall': recall, 'Prec.': precision, 'F1': f1, 'Kappa': kappa, 'MCC': mcc, 'TT (Sec)': tt}

In [43]:
comparison_df = pull()
comparison_df.loc['svm-poly'] = svm_poly_metrics
comparison_df.loc['svm-rbf'] = svm_rbf_metrics
comparison_df = comparison_df.sort_values('Accuracy', ascending=False)
comparison_df

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
dt,Decision Tree Classifier,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.005
gbc,Gradient Boosting Classifier,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.025
lr,Logistic Regression,0.9833,0.0,0.9833,0.9889,0.9822,0.975,0.9783,0.007
knn,K Neighbors Classifier,0.9833,0.9833,0.9833,0.9889,0.9822,0.975,0.9783,0.009
rf,Random Forest Classifier,0.9833,1.0,0.9833,0.9889,0.9822,0.975,0.9783,0.027
et,Extra Trees Classifier,0.9833,1.0,0.9833,0.9889,0.9822,0.975,0.9783,0.023
ada,Ada Boost Classifier,0.9667,0.0,0.9667,0.9778,0.9644,0.95,0.9567,0.013
qda,Quadratic Discriminant Analysis,0.9633,0.0,0.9633,0.9756,0.9609,0.9438,0.9518,0.005
lightgbm,Light Gradient Boosting Machine,0.96,0.9942,0.96,0.9533,0.9507,0.9354,0.9457,11.105
lda,Linear Discriminant Analysis,0.8867,0.0,0.8867,0.9044,0.8724,0.8229,0.8493,0.005


## 3 Sensors LDA

In [None]:
data = pd.read_csv('/home/soham/Desktop/GitHub/BTP/data/smote/labelled.csv')
data = data[['Sensor1', 'Sensor2', 'Sensor3', 'label']]

lda = LDA(n_components=2)
X = data[['Sensor1', 'Sensor2', 'Sensor3']]
y = data['label']
lda_data = lda.fit_transform(X, y)
data['LD1'] = lda_data[:, 0]
data['LD2'] = lda_data[:, 1]


data = data[['LD1', 'LD2', 'label']]

clf5 = setup(data, target='label', session_id=123, verbose=False)
comparison_df = pd.DataFrame(pull())
best_model = compare_models()

In [47]:
# ADDING SVM - POLYNOMIAL MODEL

svm_poly = SVC(kernel='poly', degree=3, C=1, random_state=123)
svm_poly.fit(data[['LD1', 'LD2']], data['label'])
start_time = time.time()
y_pred = svm_poly.predict(data[['LD1', 'LD2']])
end_time = time.time()

accuracy = accuracy_score(data['label'], y_pred)
auc = None
recall = recall_score(data['label'], y_pred, average='weighted')
precision = precision_score(data['label'], y_pred, average='weighted')
f1 = f1_score(data['label'], y_pred, average='weighted')
kappa = cohen_kappa_score(data['label'], y_pred)
mcc = matthews_corrcoef(data['label'], y_pred)
tt = end_time - start_time

svm_poly_metrics = {'Model': 'SVM - Polynomial Kernel', 'Accuracy': accuracy, 'AUC': auc, 'Recall': recall, 'Prec.': precision, 'F1': f1, 'Kappa': kappa, 'MCC': mcc, 'TT (Sec)': tt}

# ADDING SVM - RBF MODEL

svm_rbf = SVC(kernel='rbf', C=1, random_state=123)
svm_rbf.fit(data[['LD1', 'LD2']], data['label'])

start_time = time.time()
y_pred = svm_rbf.predict(data[['LD1', 'LD2']])
end_time = time.time()

accuracy = accuracy_score(data['label'], y_pred)
auc = None
recall = recall_score(data['label'], y_pred, average='weighted')
precision = precision_score(data['label'], y_pred, average='weighted')
f1 = f1_score(data['label'], y_pred, average='weighted')
kappa = cohen_kappa_score(data['label'], y_pred)
mcc = matthews_corrcoef(data['label'], y_pred)
tt = end_time - start_time

svm_rbf_metrics = {'Model': 'SVM - RBF Kernel', 'Accuracy': accuracy, 'AUC': auc, 'Recall': recall, 'Prec.': precision, 'F1': f1, 'Kappa': kappa, 'MCC': mcc, 'TT (Sec)': tt}

In [48]:
comparison_df = pull()
comparison_df.loc['svm-poly'] = svm_poly_metrics
comparison_df.loc['svm-rbf'] = svm_rbf_metrics
comparison_df = comparison_df.sort_values('Accuracy', ascending=False)
comparison_df

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lr,Logistic Regression,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.006
knn,K Neighbors Classifier,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.01
dt,Decision Tree Classifier,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.005
rf,Random Forest Classifier,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.026
qda,Quadratic Discriminant Analysis,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.005
ada,Ada Boost Classifier,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.016
gbc,Gradient Boosting Classifier,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.025
lda,Linear Discriminant Analysis,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.005
lightgbm,Light Gradient Boosting Machine,1.0,1.0,1.0,1.0,1.0,1.0,1.0,10.736
svm-rbf,SVM - RBF Kernel,0.986667,,0.986667,0.987179,0.986661,0.98,0.980261,0.000929
