Import necessary dependencies

In [1]:
import os
import numpy as np
import pandas as pd
import random
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.utils.class_weight import compute_class_weight
import torch
from torch import optim
import torch.nn as nn
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
from matplotlib import pyplot as plt
from tqdm import tqdm
from datetime import datetime

In [2]:
seed = 42
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

In [3]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
# elif torch.backends.mps.is_available():
#     device = torch.device("mps:0")
else:
    device = torch.device("cpu")

Load data

In [4]:
data = pd.read_csv('datasets/IoT_Modbus.csv')

# TODO: Complete EDA

Combine 'date' and 'time' into a single datetime column

In [5]:
data['datetime'] = pd.to_datetime(data['date'] + ' ' + data['time'])

  data['datetime'] = pd.to_datetime(data['date'] + ' ' + data['time'])


Extract time features

In [6]:
data['year'] = data['datetime'].dt.year
data['month'] = data['datetime'].dt.month
data['day'] = data['datetime'].dt.day
data['hour'] = data['datetime'].dt.hour
data['minute'] = data['datetime'].dt.minute
data['second'] = data['datetime'].dt.second
data['dayofweek'] = data['datetime'].dt.dayofweek

Time series models need to ensure that the data set is arranged in time order

In [7]:
# Sort the data by datetime
data = data.sort_values(by='datetime')

# Drop the original date, time, and timestamp columns
data.drop(['date', 'time', 'datetime', 'type'], axis=1, inplace=True)

# Adjust feature order
order = ['year', 'month', 'day', 'hour', 'minute', 'second', 'dayofweek', 'FC1_Read_Input_Register', 'FC2_Read_Discrete_Value', 'FC3_Read_Holding_Register', 'FC4_Read_Coil', 'label']
data = data[order].astype('int32')

# Split the dataset (Sequential Split)

In [8]:
# Calculate split points
split_idx = int(len(data) * 0.8)

# Split the data set, keeping order
train_data = data.iloc[:split_idx]
test_data = data.iloc[split_idx:]

# Separate features and labels
X_train = train_data.drop('label', axis=1)
y_train = train_data['label']
X_test = test_data.drop('label', axis=1)
y_test = test_data['label']

# Data preprocessing (Normalization)

In [9]:
feature_columns = [col for col in X_train.columns if col != 'label']
scaler = MinMaxScaler()
X_train[feature_columns] = scaler.fit_transform(X_train[feature_columns]).astype('float32')
X_test[feature_columns] = scaler.transform(X_test[feature_columns]).astype('float32')
X_train.info()

<class 'pandas.core.frame.DataFrame'>
Index: 229755 entries, 541 to 184368
Data columns (total 11 columns):
 #   Column                     Non-Null Count   Dtype  
---  ------                     --------------   -----  
 0   year                       229755 non-null  float32
 1   month                      229755 non-null  float32
 2   day                        229755 non-null  float32
 3   hour                       229755 non-null  float32
 4   minute                     229755 non-null  float32
 5   second                     229755 non-null  float32
 6   dayofweek                  229755 non-null  float32
 7   FC1_Read_Input_Register    229755 non-null  float32
 8   FC2_Read_Discrete_Value    229755 non-null  float32
 9   FC3_Read_Holding_Register  229755 non-null  float32
 10  FC4_Read_Coil              229755 non-null  float32
dtypes: float32(11)
memory usage: 11.4 MB


# Execution model
## Create model

In [10]:
class LightweightLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1, width_multiplier=1.0):
        super(LightweightLSTM, self).__init__()
        # Adjust hidden size based on the width multiplier
        adjusted_hidden_size = int(hidden_size * width_multiplier)

        # Define the LSTM layer
        self.lstm = nn.LSTM(input_size, adjusted_hidden_size, num_layers=num_layers, batch_first=True)

        self.fc = nn.Linear(adjusted_hidden_size, output_size)

    def forward(self, x):
        # LSTM layer
        lstm_out, _ = self.lstm(x)

        # Take the output of the last time step
        last_time_step_out = lstm_out[:, -1, :]

        # Output layer
        out = self.fc(last_time_step_out)
        return out

Initialize model

In [11]:
features_num = X_train.shape[1]
hidden_neurons_num = 512
output_neurons_num = 1
lstm_num_layers = 2
multiplier = 0.5

model = LightweightLSTM(features_num, hidden_neurons_num, output_neurons_num, lstm_num_layers, multiplier).to(device)

In [12]:
class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y_train), y=y_train)
class_weights = torch.tensor(class_weights, dtype=torch.float).to(device=device)

Build loss functions and optimizers

In [13]:
weights = torch.tensor([1, class_weights[1]], dtype=torch.float)
criterion = nn.BCEWithLogitsLoss(torch.FloatTensor ([weights[1] / weights[0]])).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.0005)

Construct Data Loader

In [14]:
batch_size = 128
X_train_tensor = torch.tensor(X_train.values).float().unsqueeze(1).to(device)
y_train_tensor = torch.tensor(y_train.values).float().unsqueeze(1).to(device)
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)

Training model

In [None]:
num_epochs = 100
pbar = tqdm(total=num_epochs)
loss_list = [None] * num_epochs
acc_list = [None] * num_epochs
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    running_accuracy = 0.0
    times = 0

    for inputs, labels in train_loader:
        # FP
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # BP and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Calculate indicators
        model.eval()
        with torch.no_grad():
            probabilities = torch.sigmoid(outputs)
            predictions = (probabilities > 0.5).float().cpu().numpy()

            # Calculate indicators
            y = labels.cpu().numpy()
            running_loss += loss.item() * inputs.size(0)
            running_accuracy += accuracy_score(y, predictions)
            times += 1
            
    epoch_loss = running_loss / len(train_loader.dataset)
    accuracy = running_accuracy / times
    loss_list[epoch] = epoch_loss
    acc_list[epoch] = accuracy
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss}, Accuracy: {accuracy}')

    X_test_tensor = torch.tensor(X_test.values).float().unsqueeze(1).to(device)
    model.eval()
    outputs = model(X_test_tensor)
    with torch.no_grad():
        probabilities = torch.sigmoid(outputs)
        predictions = (probabilities > 0.5).float().cpu().numpy()
    
        # Calculate indicators
        acc = accuracy_score(y_test, predictions)
        precision = precision_score(y_test, predictions)
        recall = recall_score(y_test, predictions)
        f1 = f1_score(y_test, predictions)
    
        print("Accuracy: ", acc, ", Precision: ", precision, ", Recall: ", recall, ", F1: ", f1)
    pbar.update(1)
pbar.reset()

  0%|          | 0/100 [00:00<?, ?it/s]

Epoch [1/100], Loss: 0.5964092749277182, Accuracy: 0.8649068593314764


  1%|          | 1/100 [00:12<20:22, 12.35s/it]

Accuracy:  0.1377461306777625 , Precision:  0.1377461306777625 , Recall:  1.0 , F1:  0.24213860537711743
Epoch [2/100], Loss: 0.6697538053469778, Accuracy: 0.8352324164345404


  2%|▏         | 2/100 [00:24<19:53, 12.17s/it]

Accuracy:  0.1377461306777625 , Precision:  0.1377461306777625 , Recall:  1.0 , F1:  0.24213860537711743
Epoch [3/100], Loss: 0.6841559864119013, Accuracy: 0.8446204735376045


  3%|▎         | 3/100 [00:36<19:20, 11.97s/it]

Accuracy:  0.1377461306777625 , Precision:  0.1377461306777625 , Recall:  1.0 , F1:  0.24213860537711743
Epoch [4/100], Loss: 0.6319667941687636, Accuracy: 0.8554535167130919


  4%|▍         | 4/100 [00:47<18:55, 11.83s/it]

Accuracy:  0.1377461306777625 , Precision:  0.1377461306777625 , Recall:  1.0 , F1:  0.24213860537711743
Epoch [5/100], Loss: 0.6062442797480067, Accuracy: 0.8665389972144847


  5%|▌         | 5/100 [00:59<18:36, 11.75s/it]

Accuracy:  0.1377461306777625 , Precision:  0.1377461306777625 , Recall:  1.0 , F1:  0.24213860537711743
Epoch [6/100], Loss: 0.5614399757185735, Accuracy: 0.8753830083565459


  6%|▌         | 6/100 [01:10<18:15, 11.66s/it]

Accuracy:  0.1377461306777625 , Precision:  0.1377461306777625 , Recall:  1.0 , F1:  0.24213860537711743
Epoch [7/100], Loss: 0.5116179810095153, Accuracy: 0.8955562325905293


  7%|▋         | 7/100 [01:22<17:59, 11.60s/it]

Accuracy:  0.1377461306777625 , Precision:  0.1377461306777625 , Recall:  1.0 , F1:  0.24213860537711743
Epoch [8/100], Loss: 0.5134656784693081, Accuracy: 0.8870778203342619


  8%|▊         | 8/100 [01:34<17:53, 11.67s/it]

Accuracy:  0.1377461306777625 , Precision:  0.1377461306777625 , Recall:  1.0 , F1:  0.24213860537711743
Epoch [9/100], Loss: 0.6041127104192278, Accuracy: 0.8602759401114206


  9%|▉         | 9/100 [01:45<17:44, 11.70s/it]

Accuracy:  0.1377461306777625 , Precision:  0.1377461306777625 , Recall:  1.0 , F1:  0.24213860537711743
Epoch [10/100], Loss: 0.6575821128615906, Accuracy: 0.8291303969359332


 10%|█         | 10/100 [01:57<17:34, 11.71s/it]

Accuracy:  0.1377461306777625 , Precision:  0.1377461306777625 , Recall:  1.0 , F1:  0.24213860537711743


Visualizing the training process

In [None]:
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(loss_list, label='Training Loss')
plt.title('Training Loss per Epoch')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

# Draw accuracy curve
plt.subplot(1, 2, 2)
plt.plot(acc_list, label='Training Accuracy')
plt.title('Training Accuracy per Epoch')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.show()

Unseen test set performance

In [None]:
X_test_tensor = torch.tensor(X_test.values).float().unsqueeze(1).to(device)

model.eval()
outputs = model(X_test_tensor)
with torch.no_grad():
    probabilities = torch.sigmoid(outputs)
    predictions = (probabilities > 0.5).float().cpu().numpy()

    # Calculate indicators
    acc = accuracy_score(y_test, predictions)
    precision = precision_score(y_test, predictions)
    recall = recall_score(y_test, predictions)
    f1 = f1_score(y_test, predictions)

    print("Accuracy: ", acc, ", Precision: ", precision, ", Recall: ", recall, ", F1: ", f1)

Save model

In [None]:
save_folder = "save_model"

if not os.path.exists(save_folder):
    os.makedirs(save_folder)

current_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
model_filename = f"model_rnn_{current_time}.pt"
full_path = os.path.join(save_folder, model_filename)
torch.save(model.state_dict(), full_path)

print("Model saved as:", full_path)