Import necessary dependencies

In [16]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.utils.class_weight import compute_class_weight
import torch
from torch import optim
import torch.nn as nn
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
from matplotlib import pyplot as plt
from tqdm import tqdm

In [17]:
seed = 42
torch.manual_seed(seed)
np.random.seed(seed)

In [18]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
# elif torch.backends.mps.is_available():
#     device = torch.device("mps:0")
else:
    device = torch.device("cpu")

Load data

In [19]:
data = pd.read_csv('IoT_Modbus.csv')

# TODO: Complete EDA

Combine 'date' and 'time' into a single datetime column

In [20]:
data['datetime'] = pd.to_datetime(data['date'] + ' ' + data['time'])

  data['datetime'] = pd.to_datetime(data['date'] + ' ' + data['time'])


Extract time features

In [21]:
data['year'] = data['datetime'].dt.year
data['month'] = data['datetime'].dt.month
data['day'] = data['datetime'].dt.day
data['hour'] = data['datetime'].dt.hour
data['minute'] = data['datetime'].dt.minute
data['second'] = data['datetime'].dt.second
data['dayofweek'] = data['datetime'].dt.dayofweek

Time series models need to ensure that the data set is arranged in time order

In [22]:
# Sort the data by datetime
data = data.sort_values(by='datetime')

# Drop the original date, time, and timestamp columns
data.drop(['date', 'time', 'datetime', 'type'], axis=1, inplace=True)

# Adjust feature order
order = ['year', 'month', 'day', 'hour', 'minute', 'second', 'dayofweek', 'FC1_Read_Input_Register', 'FC2_Read_Discrete_Value', 'FC3_Read_Holding_Register', 'FC4_Read_Coil', 'label']
data = data[order].astype('int32')

# Split the dataset (Sequential Split)

In [23]:
# Calculate split points
split_idx = int(len(data) * 0.8)

# Split the data set, keeping order
train_data = data.iloc[:split_idx]
test_data = data.iloc[split_idx:]

# Separate features and labels
X_train = train_data.drop('label', axis=1)
y_train = train_data['label']
X_test = test_data.drop('label', axis=1)
y_test = test_data['label']

# Data preprocessing (Normalization)

In [24]:
feature_columns = [col for col in X_train.columns if col != 'label']
scaler = MinMaxScaler()
X_train[feature_columns] = scaler.fit_transform(X_train[feature_columns]).astype('float32')
X_test[feature_columns] = scaler.transform(X_test[feature_columns]).astype('float32')
X_train.info()

<class 'pandas.core.frame.DataFrame'>
Index: 229755 entries, 541 to 184368
Data columns (total 11 columns):
 #   Column                     Non-Null Count   Dtype  
---  ------                     --------------   -----  
 0   year                       229755 non-null  float32
 1   month                      229755 non-null  float32
 2   day                        229755 non-null  float32
 3   hour                       229755 non-null  float32
 4   minute                     229755 non-null  float32
 5   second                     229755 non-null  float32
 6   dayofweek                  229755 non-null  float32
 7   FC1_Read_Input_Register    229755 non-null  float32
 8   FC2_Read_Discrete_Value    229755 non-null  float32
 9   FC3_Read_Holding_Register  229755 non-null  float32
 10  FC4_Read_Coil              229755 non-null  float32
dtypes: float32(11)
memory usage: 11.4 MB


# Execution model
## Create model

In [25]:
class LightweightLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1, width_multiplier=1.0):
        super(LightweightLSTM, self).__init__()
        # Adjust hidden size based on the width multiplier
        adjusted_hidden_size = int(hidden_size * width_multiplier)

        # Define the LSTM layer
        self.lstm = nn.LSTM(input_size, adjusted_hidden_size, num_layers=num_layers, batch_first=True)

        self.linear_1 = nn.Linear(adjusted_hidden_size, hidden_size)
        self.linear_2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # LSTM layer
        lstm_out, _ = self.lstm(x)

        # Take the output of the last time step
        last_time_step_out = lstm_out[:, -1, :]

        # Output layer
        x = self.linear_1(last_time_step_out)
        out = self.linear_2(x)
        return out

Initialize model

In [26]:
features_num = X_train.shape[1]
hidden_neurons_num = 512
output_neurons_num = 1
lstm_num_layers = 2
multiplier = 0.5

model = LightweightLSTM(features_num, hidden_neurons_num, output_neurons_num, lstm_num_layers, multiplier).to(device)

In [27]:
class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y_train), y=y_train)
class_weights = torch.tensor(class_weights, dtype=torch.float).to(device=device)

Build loss functions and optimizers

In [28]:
weights = torch.tensor([1, class_weights[1]], dtype=torch.float)
criterion = nn.BCEWithLogitsLoss(torch.FloatTensor ([weights[1] / weights[0]])).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.0005)

Construct Data Loader

In [29]:
batch_size = 128
X_train_tensor = torch.tensor(X_train.values).float().unsqueeze(1).to(device)
y_train_tensor = torch.tensor(y_train.values).float().unsqueeze(1).to(device)
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)

Training model

In [None]:
num_epochs = 100
pbar = tqdm(total=num_epochs)
loss_list = [None] * num_epochs
acc_list = [None] * num_epochs
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    running_accuracy = 0.0
    times = 0

    for inputs, labels in train_loader:
        # FP
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # BP and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Calculate indicators
        model.eval()
        with torch.no_grad():
            probabilities = torch.sigmoid(outputs)
            predictions = (probabilities > 0.5).float().cpu().numpy()

            # Calculate indicators
            y = labels.cpu().numpy()
            running_loss += loss.item() * inputs.size(0)
            running_accuracy += accuracy_score(y, predictions)
            times += 1
            
    epoch_loss = running_loss / len(train_loader.dataset)
    accuracy = running_accuracy / times
    loss_list[epoch] = epoch_loss
    acc_list[epoch] = accuracy
    
    pbar.update(1)
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss}, Accuracy: {accuracy}')
pbar.reset()


  1%|          | 1/100 [01:26<2:22:19, 86.26s/it]

  1%|          | 1/100 [00:11<19:47, 11.99s/it][A

Epoch [1/100], Loss: 0.5191381459612797, Accuracy: 0.8886316155988858



  2%|▏         | 2/100 [00:24<19:37, 12.01s/it][A

Epoch [2/100], Loss: 0.6318073102902988, Accuracy: 0.8426270891364902



  3%|▎         | 3/100 [00:36<19:26, 12.03s/it][A

Epoch [3/100], Loss: 0.570751524707785, Accuracy: 0.871661733983287



  4%|▍         | 4/100 [00:48<19:15, 12.04s/it][A

Epoch [4/100], Loss: 0.5494877114544434, Accuracy: 0.8769237465181059



  5%|▌         | 5/100 [01:00<19:00, 12.00s/it][A

Epoch [5/100], Loss: 0.5486913711521415, Accuracy: 0.8846056754874652



  6%|▌         | 6/100 [01:12<18:46, 11.98s/it][A

Epoch [6/100], Loss: 0.538357495194683, Accuracy: 0.8904726671309192



  7%|▋         | 7/100 [01:24<18:36, 12.00s/it][A

Epoch [7/100], Loss: 0.5312699786801272, Accuracy: 0.8893671657381615



  8%|▊         | 8/100 [01:36<18:32, 12.09s/it][A

Epoch [8/100], Loss: 0.5429557446774748, Accuracy: 0.8848755222841226



  9%|▉         | 9/100 [01:48<18:17, 12.06s/it][A

Epoch [9/100], Loss: 0.5166464149413522, Accuracy: 0.8970186281337047



 10%|█         | 10/100 [02:00<18:05, 12.06s/it][A

Epoch [10/100], Loss: 0.5374927606624922, Accuracy: 0.8880614554317549



 11%|█         | 11/100 [02:11<17:34, 11.85s/it][A

Epoch [11/100], Loss: 0.5129982957812427, Accuracy: 0.8898763927576602



 12%|█▏        | 12/100 [02:23<17:10, 11.71s/it][A

Epoch [12/100], Loss: 0.4926084902648893, Accuracy: 0.8989597841225627



 13%|█▎        | 13/100 [02:34<16:54, 11.66s/it][A

Epoch [13/100], Loss: 0.4773069024250208, Accuracy: 0.8964092966573816



 14%|█▍        | 14/100 [02:46<16:41, 11.64s/it][A

Epoch [14/100], Loss: 0.4892477450217079, Accuracy: 0.903525417827298



 15%|█▌        | 15/100 [02:57<16:22, 11.56s/it][A

Epoch [15/100], Loss: 0.4628682955570198, Accuracy: 0.9028203342618384



 16%|█▌        | 16/100 [03:08<16:05, 11.50s/it][A

Epoch [16/100], Loss: 0.47295919306160783, Accuracy: 0.9079865947075209



 17%|█▋        | 17/100 [03:20<15:57, 11.53s/it][A

Epoch [17/100], Loss: 0.4545608324814424, Accuracy: 0.9098711699164346



 18%|█▊        | 18/100 [03:32<15:42, 11.50s/it][A

Epoch [18/100], Loss: 0.47582321239585373, Accuracy: 0.9118253830083566



 19%|█▉        | 19/100 [03:43<15:28, 11.46s/it][A

Epoch [19/100], Loss: 0.4488834859166241, Accuracy: 0.9142017757660167



 20%|██        | 20/100 [03:54<15:15, 11.44s/it][A

Epoch [20/100], Loss: 0.4450644303231953, Accuracy: 0.9149982590529248



 21%|██        | 21/100 [04:06<15:02, 11.42s/it][A

Epoch [21/100], Loss: 0.41812666550824107, Accuracy: 0.9121692200557103



 22%|██▏       | 22/100 [04:17<14:51, 11.44s/it][A

Epoch [22/100], Loss: 0.4268518593841211, Accuracy: 0.9224538649025069



 23%|██▎       | 23/100 [04:29<14:39, 11.43s/it][A

Epoch [23/100], Loss: 0.40286830263604506, Accuracy: 0.9182625348189415



 24%|██▍       | 24/100 [04:40<14:28, 11.42s/it][A

Epoch [24/100], Loss: 0.40285957130159206, Accuracy: 0.9246866295264624



 25%|██▌       | 25/100 [04:51<14:16, 11.41s/it][A

Epoch [25/100], Loss: 0.4026595334508867, Accuracy: 0.9268628133704735



 26%|██▌       | 26/100 [05:03<14:09, 11.48s/it][A

Epoch [26/100], Loss: 0.39968321009205604, Accuracy: 0.9276592966573816



 27%|██▋       | 27/100 [05:14<13:57, 11.47s/it][A

Epoch [27/100], Loss: 0.4012412878143133, Accuracy: 0.9308844011142061



 28%|██▊       | 28/100 [05:26<13:43, 11.44s/it][A

Epoch [28/100], Loss: 0.3773492969757913, Accuracy: 0.9362769846796657



 29%|██▉       | 29/100 [05:37<13:32, 11.45s/it][A

Epoch [29/100], Loss: 0.36561799232171105, Accuracy: 0.9393715181058496



 30%|███       | 30/100 [05:49<13:22, 11.46s/it][A

Epoch [30/100], Loss: 0.3571116034932184, Accuracy: 0.9415128830083566



 31%|███       | 31/100 [06:01<13:16, 11.54s/it][A

Epoch [31/100], Loss: 0.35657230171279103, Accuracy: 0.9359157381615599



 32%|███▏      | 32/100 [06:13<13:30, 11.93s/it][A

Epoch [32/100], Loss: 0.3552905861328782, Accuracy: 0.9413561977715877



 33%|███▎      | 33/100 [06:25<13:19, 11.93s/it][A

Epoch [33/100], Loss: 0.3452960621517089, Accuracy: 0.9426096796657382



 34%|███▍      | 34/100 [06:37<13:10, 11.97s/it][A

Epoch [34/100], Loss: 0.3417511639593692, Accuracy: 0.9412604456824513



 35%|███▌      | 35/100 [06:49<12:58, 11.98s/it][A

Epoch [35/100], Loss: 0.3397601993224455, Accuracy: 0.9394716225626741



 36%|███▌      | 36/100 [07:01<12:46, 11.98s/it][A

Epoch [36/100], Loss: 0.33932743458692227, Accuracy: 0.9396761838440112



 37%|███▋      | 37/100 [07:13<12:34, 11.98s/it][A

Epoch [37/100], Loss: 0.3374240632810785, Accuracy: 0.9424399373259053



 38%|███▊      | 38/100 [07:25<12:23, 11.99s/it][A

Epoch [38/100], Loss: 0.3369600003225584, Accuracy: 0.940616295264624



 39%|███▉      | 39/100 [07:37<12:11, 11.99s/it][A

Epoch [39/100], Loss: 0.33585078440238497, Accuracy: 0.9415259401114207



 40%|████      | 40/100 [07:49<11:59, 11.98s/it][A

Epoch [40/100], Loss: 0.3337274481343424, Accuracy: 0.9407512186629526



 41%|████      | 41/100 [08:01<11:48, 12.01s/it][A

Epoch [41/100], Loss: 0.3333738021271995, Accuracy: 0.9374912952646239



 42%|████▏     | 42/100 [08:13<11:37, 12.03s/it][A

Epoch [42/100], Loss: 0.33339068800121957, Accuracy: 0.9400591922005571



 43%|████▎     | 43/100 [08:25<11:24, 12.02s/it][A

Epoch [43/100], Loss: 0.3326381942149867, Accuracy: 0.9406859331476323



 44%|████▍     | 44/100 [08:37<11:11, 11.99s/it][A

Epoch [44/100], Loss: 0.330909174617504, Accuracy: 0.9422614902506964



 45%|████▌     | 45/100 [08:50<11:05, 12.10s/it][A

Epoch [45/100], Loss: 0.32946632127089204, Accuracy: 0.9413649025069638



 46%|████▌     | 46/100 [09:02<10:56, 12.16s/it][A

Epoch [46/100], Loss: 0.3312102573053084, Accuracy: 0.9420308147632313



 47%|████▋     | 47/100 [09:14<10:43, 12.14s/it][A

Epoch [47/100], Loss: 0.3278599637563332, Accuracy: 0.9372823816155988



 48%|████▊     | 48/100 [09:26<10:32, 12.17s/it][A

Epoch [48/100], Loss: 0.32609902252687123, Accuracy: 0.9376175139275766



 49%|████▉     | 49/100 [09:38<10:17, 12.11s/it][A

Epoch [49/100], Loss: 0.3256909649332738, Accuracy: 0.9372910863509749



 50%|█████     | 50/100 [09:51<10:08, 12.16s/it][A

Epoch [50/100], Loss: 0.3252631263130425, Accuracy: 0.9372083913649025



 51%|█████     | 51/100 [10:03<09:59, 12.24s/it][A

Epoch [51/100], Loss: 0.32510948667407963, Accuracy: 0.9374608286908078



 52%|█████▏    | 52/100 [10:15<09:49, 12.29s/it][A

Epoch [52/100], Loss: 0.3249288578164875, Accuracy: 0.9370604108635098



 53%|█████▎    | 53/100 [10:27<09:31, 12.15s/it][A

Epoch [53/100], Loss: 0.3249292135929208, Accuracy: 0.9369690111420613



 54%|█████▍    | 54/100 [10:39<09:17, 12.12s/it][A

Epoch [54/100], Loss: 0.3244644424720125, Accuracy: 0.9366121169916435



 55%|█████▌    | 55/100 [10:51<09:01, 12.04s/it][A

Epoch [55/100], Loss: 0.3245966085950986, Accuracy: 0.9362595752089137



 56%|█████▌    | 56/100 [11:03<08:42, 11.88s/it][A

Epoch [56/100], Loss: 0.32442402511576407, Accuracy: 0.9358286908077994



 57%|█████▋    | 57/100 [11:15<08:33, 11.93s/it][A

Epoch [57/100], Loss: 0.32435944286369306, Accuracy: 0.9353891016713092



 58%|█████▊    | 58/100 [11:27<08:27, 12.09s/it][A

Epoch [58/100], Loss: 0.3241930267800873, Accuracy: 0.9355370821727019



 59%|█████▉    | 59/100 [11:39<08:14, 12.07s/it][A

Epoch [59/100], Loss: 0.32377699611950006, Accuracy: 0.9360898328690808



 60%|██████    | 60/100 [11:51<08:02, 12.07s/it][A

Epoch [60/100], Loss: 0.32317509530872784, Accuracy: 0.9369341922005571



 61%|██████    | 61/100 [12:03<07:45, 11.93s/it][A

Epoch [61/100], Loss: 0.3224939238440934, Accuracy: 0.9377219707520892



 62%|██████▏   | 62/100 [12:14<07:29, 11.82s/it][A

Epoch [62/100], Loss: 0.3218112513697726, Accuracy: 0.9385924442896936



 63%|██████▎   | 63/100 [12:26<07:16, 11.80s/it][A

Epoch [63/100], Loss: 0.32086909645558553, Accuracy: 0.9396108983286908



 64%|██████▍   | 64/100 [12:38<07:07, 11.88s/it][A

Epoch [64/100], Loss: 0.3196554617888329, Accuracy: 0.9405553621169916



 65%|██████▌   | 65/100 [12:50<06:56, 11.90s/it][A

Epoch [65/100], Loss: 0.3183141807765537, Accuracy: 0.9415128830083566



 66%|██████▌   | 66/100 [13:03<06:50, 12.06s/it][A

Epoch [66/100], Loss: 0.3168324243728142, Accuracy: 0.9424878133704735



 67%|██████▋   | 67/100 [13:15<06:37, 12.05s/it][A

Epoch [67/100], Loss: 0.31559054794992214, Accuracy: 0.9426880222841225



 68%|██████▊   | 68/100 [13:27<06:27, 12.11s/it][A

Epoch [68/100], Loss: 0.31502146495968325, Accuracy: 0.9425095752089137



 69%|██████▉   | 69/100 [13:39<06:11, 11.97s/it][A

Epoch [69/100], Loss: 0.3154727010706384, Accuracy: 0.9422571378830084



 70%|███████   | 70/100 [13:50<05:56, 11.88s/it][A

Epoch [70/100], Loss: 0.316476674313447, Accuracy: 0.9419437674094707



 71%|███████   | 71/100 [14:02<05:41, 11.78s/it][A

Epoch [71/100], Loss: 0.31715411839039664, Accuracy: 0.941608635097493



 72%|███████▏  | 72/100 [14:13<05:26, 11.67s/it][A

Epoch [72/100], Loss: 0.3182002365099931, Accuracy: 0.9414345403899721



 73%|███████▎  | 73/100 [14:25<05:19, 11.84s/it][A

Epoch [73/100], Loss: 0.32119966310570813, Accuracy: 0.9418436629526462



 74%|███████▍  | 74/100 [14:37<05:09, 11.89s/it][A

Epoch [74/100], Loss: 0.3289776947129276, Accuracy: 0.9437935236768802



 75%|███████▌  | 75/100 [14:49<04:57, 11.91s/it][A

Epoch [75/100], Loss: 0.33494619634951395, Accuracy: 0.9432973537604457



 76%|███████▌  | 76/100 [15:01<04:46, 11.92s/it][A

Epoch [76/100], Loss: 0.32944153195374953, Accuracy: 0.9428664693593315



 77%|███████▋  | 77/100 [15:13<04:32, 11.87s/it][A

Epoch [77/100], Loss: 0.30857233174241505, Accuracy: 0.9444594359331476



 78%|███████▊  | 78/100 [15:26<04:28, 12.19s/it][A

Epoch [78/100], Loss: 0.3246399418424128, Accuracy: 0.9401941155988858



 79%|███████▉  | 79/100 [15:38<04:15, 12.18s/it][A

Epoch [79/100], Loss: 0.3389038483556069, Accuracy: 0.9414432451253482



 80%|████████  | 80/100 [15:50<04:01, 12.06s/it][A

Epoch [80/100], Loss: 0.33225553451895334, Accuracy: 0.9458347841225627



 81%|████████  | 81/100 [16:02<03:49, 12.07s/it][A

Epoch [81/100], Loss: 0.39286134327218775, Accuracy: 0.9354630919220056



 82%|████████▏ | 82/100 [16:14<03:36, 12.05s/it][A

Epoch [82/100], Loss: 0.33516982641023507, Accuracy: 0.936329213091922



 83%|████████▎ | 83/100 [16:26<03:26, 12.13s/it][A

Epoch [83/100], Loss: 0.31429099776530384, Accuracy: 0.9444594359331476



 84%|████████▍ | 84/100 [16:39<03:14, 12.16s/it][A

Epoch [84/100], Loss: 0.29803926171607825, Accuracy: 0.9488988509749303



 85%|████████▌ | 85/100 [16:51<03:04, 12.27s/it][A

Epoch [85/100], Loss: 0.28856929726876857, Accuracy: 0.9504657033426184



 86%|████████▌ | 86/100 [17:04<02:54, 12.50s/it][A

Epoch [86/100], Loss: 0.2940266723645743, Accuracy: 0.9513013579387186



 87%|████████▋ | 87/100 [17:17<02:43, 12.57s/it][A

Epoch [87/100], Loss: 0.3810376981643594, Accuracy: 0.9375739902506964



 88%|████████▊ | 88/100 [17:30<02:31, 12.62s/it][A

Epoch [88/100], Loss: 0.3283477534265523, Accuracy: 0.9419220055710307



 89%|████████▉ | 89/100 [17:42<02:19, 12.67s/it][A

Epoch [89/100], Loss: 0.29411156491801643, Accuracy: 0.9481589484679666



 90%|█████████ | 90/100 [17:55<02:05, 12.51s/it][A

Epoch [90/100], Loss: 0.2937980138342313, Accuracy: 0.9486072423398328



 91%|█████████ | 91/100 [18:07<01:51, 12.44s/it][A

Epoch [91/100], Loss: 0.2940989491292988, Accuracy: 0.9491425835654597



 92%|█████████▏| 92/100 [18:19<01:39, 12.38s/it][A

Epoch [92/100], Loss: 0.27545991332696623, Accuracy: 0.9540738161559889


Visualizing the training process

In [None]:
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(loss_list, label='Training Loss')
plt.title('Training Loss per Epoch')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

# Draw accuracy curve
plt.subplot(1, 2, 2)
plt.plot(acc_list, label='Training Accuracy')
plt.title('Training Accuracy per Epoch')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.show()

Unseen test set performance

In [None]:
X_test_tensor = torch.tensor(X_test.values).float().unsqueeze(1).to(device)

model.eval()
outputs = model(X_test_tensor)
with torch.no_grad():
    probabilities = torch.sigmoid(outputs)
    predictions = (probabilities > 0.5).float().cpu().numpy()

    # Calculate indicators
    acc = accuracy_score(y_test, predictions)
    precision = precision_score(y_test, predictions)
    recall = recall_score(y_test, predictions)
    f1 = f1_score(y_test, predictions)

    print("Accuracy: ", acc, ", Precision: ", precision, ", Recall: ", recall, ", F1: ", f1)