In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score, f1_score, precision_score, recall_score, confusion_matrix
from time import time
import pickle

In [2]:
IoT = pd.read_csv('../../data/IoT_Modbus.csv')
# IoT = IoT.iloc[0:500]

IoT = IoT.dropna()

encoder = LabelEncoder()
IoT['type'] = encoder.fit_transform(IoT['type'])

IoT.head()

Unnamed: 0,date,time,FC1_Read_Input_Register,FC2_Read_Discrete_Value,FC3_Read_Holding_Register,FC4_Read_Coil,label,type
0,31-Mar-19,12:36:55,53287,1463,33518,23014,0,2
1,31-Mar-19,12:36:58,41029,55891,26004,50645,0,2
2,31-Mar-19,12:36:58,41029,55891,26004,50645,0,2
3,31-Mar-19,12:37:00,64661,40232,33460,44046,0,2
4,31-Mar-19,12:37:01,64661,40232,33460,44046,0,2


In [3]:
x = IoT.drop(['type', 'label', 'date', 'time'], axis=1)
y = IoT['type']

features = len(x.columns)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

In [4]:
model3 = RandomForestClassifier(n_estimators=100)

In [5]:
filename = '../h5s/random-forest.h5'
pickle.dump(model3, open(filename, 'wb'))

In [6]:
start = time()
model3.fit(x_train, y_train)
training_time = time() - start

In [7]:
start = time()
y_pred = model3.predict(x_test)
test_time = time() - start
y_pred

array([2, 0, 2, ..., 2, 0, 3])

In [8]:
confusion_matrix(y_test, y_pred)

array([[ 7579,     0,   442,     0,     0,     0],
       [    0,   948,    76,     0,     0,     0],
       [  216,     3, 44356,    12,     5,     1],
       [    4,     1,  1013,  2579,     0,     0],
       [    0,     0,    34,     0,    66,     0],
       [    0,     0,    30,     0,     0,    74]], dtype=int64)

In [9]:
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted', labels=np.unique(y_pred))
recall = recall_score(y_test, y_pred, average='weighted', labels=np.unique(y_pred))
f1 = f1_score(y_test, y_pred, average='weighted', labels=np.unique(y_pred))

print('Accuracy:', accuracy)
print('Precision:', precision)
print('Recall:', recall)
print('F1_Score:', f1)
print(f'Training Time: {training_time}s')
print(f'Testing Time: {test_time}s')
print(classification_report(y_test, y_pred))

Accuracy: 0.9680182454429916
Precision: 0.9686012012398135
Recall: 0.9680182454429916
F1_Score: 0.9665922608213019
Training Time: 194.22159481048584s
Testing Time: 3.8215675354003906s
              precision    recall  f1-score   support

           0       0.97      0.94      0.96      8021
           1       1.00      0.93      0.96      1024
           2       0.97      0.99      0.98     44593
           3       1.00      0.72      0.83      3597
           4       0.93      0.66      0.77       100
           5       0.99      0.71      0.83       104

    accuracy                           0.97     57439
   macro avg       0.97      0.83      0.89     57439
weighted avg       0.97      0.97      0.97     57439

