In [30]:
# Importing required modules

# data processing
import pandas as pd

# ensemble modelling
import time
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report
from sklearn.preprocessing import LabelEncoder

In [31]:
# Loading prebuilt model structures stored as '[MODEL].h5'

rf = pickle.load(open('../models/h5s/random-forest.h5', 'rb'))
lr = pickle.load(open('../models/h5s/logistic-regression.h5', 'rb'))
lda = pickle.load(open('../models/h5s/linear-discriminant-analysis.h5', 'rb'))
knn = pickle.load(open('../models/h5s/kNN.h5', 'rb'))
cart = pickle.load(open('../models/h5s/CART.h5', 'rb'))
svm = pickle.load(open('../models/h5s/support-vector-machine.h5', 'rb'))

In [32]:
columns = ["date", "time", "FC1_Read_Input_Register", "FC2_Read_Discrete_Value", "FC3_Read_Holding_Register", "FC4_Read_Coil", "label", "type"]

IoT = pd.read_csv("../data/Test_Modbus.csv", low_memory=False)
IoT = IoT.dropna()

# encoding string type variables to numeric type (required for model training)
encoder = LabelEncoder()
IoT['type'] = encoder.fit_transform(IoT['type'])

x = IoT.drop(['label', 'date', 'time', 'type'], axis=1)
y = IoT['type']

49999


In [33]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

In [34]:
voting = VotingClassifier(estimators=[('RF', rf), ('LR', lr), ('LDA', lda), ('KNN', knn), ('CART', cart), ('SVM', svm)], voting='hard')

In [35]:
filename = '../models/h5s/VC.h5'
pickle.dump(voting, open(filename, 'wb'))

In [36]:
start = time.time()
voting.fit(x_train, y_train)
training_time = time.time() - start

In [37]:
start = time.time()
y_pred = voting.predict(x_test)
test_time = time.time() - start
y_pred

array([2, 2, 2, ..., 2, 2, 2])

In [38]:
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted', labels=np.unique(y_pred))
recall = recall_score(y_test, y_pred, average='weighted', labels=np.unique(y_pred))
f1 = f1_score(y_test, y_pred, average='weighted', labels=np.unique(y_pred))

print('Accuracy:', accuracy)
print('Precision:', precision)
print('Recall:', recall)
print('F1_Score:', f1)
print(f'Training Time: {training_time}s')
print(f'Testing Time: {test_time}s')
print(classification_report(y_test, y_pred))

# 0.455891         0.65163        0.571981        0.366899     1607.571023

Accuracy: 0.5988
Precision: 0.6775604502971861
Recall: 0.8314357123021383
F1_Score: 0.6891665951286284
Training Time: 180.80464911460876s
Testing Time: 53.95462608337402s
              precision    recall  f1-score   support

           0       0.92      0.43      0.59      1664
           1       0.98      0.71      0.83       750
           2       0.55      0.99      0.70      4788
           3       0.00      0.00      0.00      2725
           4       0.00      0.00      0.00        73

    accuracy                           0.60     10000
   macro avg       0.49      0.43      0.42     10000
weighted avg       0.49      0.60      0.50     10000



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
