Import necessary dependencies

In [1]:
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import MinMaxScaler
import torch
from torch import optim
import torch.nn as nn
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
from tqdm import tqdm

In [2]:
if torch.cuda.is_available():
    device = torch.device("cuda")
# elif torch.backends.mps.is_available():
#     device = torch.device("mps:0")
else:
    device = torch.device("cpu")

Load data

In [3]:
data = pd.read_csv('IoT_Modbus.csv')

# TODO: Complete EDA

Combine 'date' and 'time' into a single datetime column

In [4]:
data['datetime'] = pd.to_datetime(data['date'] + ' ' + data['time'])

  data['datetime'] = pd.to_datetime(data['date'] + ' ' + data['time'])


Extract time features

In [5]:
data['year'] = data['datetime'].dt.year
data['month'] = data['datetime'].dt.month
data['day'] = data['datetime'].dt.day
data['hour'] = data['datetime'].dt.hour
data['minute'] = data['datetime'].dt.minute
data['second'] = data['datetime'].dt.second
data['dayofweek'] = data['datetime'].dt.dayofweek

Time series models need to ensure that the data set is arranged in time order

In [6]:
# Sort the data by datetime
data = data.sort_values(by='datetime')

# Drop the original date, time, and timestamp columns
data.drop(['date', 'time', 'datetime', 'type'], axis=1, inplace=True)

# Adjust feature order
order = ['year', 'month', 'day', 'hour', 'minute', 'second', 'dayofweek', 'FC1_Read_Input_Register', 'FC2_Read_Discrete_Value', 'FC3_Read_Holding_Register', 'FC4_Read_Coil', 'label']
data = data[order].astype('int32')

# Split the dataset (Sequential Split)

In [7]:
# Calculate split points
split_idx = int(len(data) * 0.8)

# Split the data set, keeping order
train_data = data.iloc[:split_idx]
test_data = data.iloc[split_idx:]

# Separate features and labels
X_train = train_data.drop('label', axis=1)
y_train = train_data['label']
X_test = test_data.drop('label', axis=1)
y_test = test_data['label']

# Data preprocessing (Normalization)

In [8]:
feature_columns = [col for col in X_train.columns if col != 'label']
scaler = MinMaxScaler()
X_train[feature_columns] = scaler.fit_transform(X_train[feature_columns]).astype('float32')
X_test[feature_columns] = scaler.transform(X_test[feature_columns]).astype('float32')
X_train.info()

<class 'pandas.core.frame.DataFrame'>
Index: 229755 entries, 541 to 184368
Data columns (total 11 columns):
 #   Column                     Non-Null Count   Dtype  
---  ------                     --------------   -----  
 0   year                       229755 non-null  float32
 1   month                      229755 non-null  float32
 2   day                        229755 non-null  float32
 3   hour                       229755 non-null  float32
 4   minute                     229755 non-null  float32
 5   second                     229755 non-null  float32
 6   dayofweek                  229755 non-null  float32
 7   FC1_Read_Input_Register    229755 non-null  float32
 8   FC2_Read_Discrete_Value    229755 non-null  float32
 9   FC3_Read_Holding_Register  229755 non-null  float32
 10  FC4_Read_Coil              229755 non-null  float32
dtypes: float32(11)
memory usage: 11.4 MB


# Execution model
## Create model

In [9]:
class LightweightLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1, width_multiplier=1.0):
        super(LightweightLSTM, self).__init__()
        # Adjust hidden size based on the width multiplier
        adjusted_hidden_size = int(hidden_size * width_multiplier)

        # Define the LSTM layer
        self.lstm = nn.LSTM(input_size, adjusted_hidden_size, num_layers=num_layers, batch_first=True)

        self.linear_1 = nn.Linear(adjusted_hidden_size, hidden_size)
        self.linear_2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # LSTM layer
        lstm_out, _ = self.lstm(x)

        # Take the output of the last time step
        last_time_step_out = lstm_out[:, -1, :]

        # Output layer
        x = self.linear_1(last_time_step_out)
        out = self.linear_2(x)
        return out

Initialize model

In [10]:
features_num = X_train.shape[1]
hidden_neurons_num = 512
output_neurons_num = 1
lstm_num_layers = 2
multiplier = 0.5

model = LightweightLSTM(features_num, hidden_neurons_num, output_neurons_num, lstm_num_layers, multiplier).to(device)

Build loss functions and optimizers

In [11]:
criterion = nn.BCEWithLogitsLoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.0005)

Construct Data Loader

In [12]:
batch_size = 128
X_train_tensor = torch.tensor(X_train.values).float().unsqueeze(1).to(device)
y_train_tensor = torch.tensor(y_train.values).float().unsqueeze(1).to(device)
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)

Training model

In [13]:
num_epochs = 100
pbar = tqdm(total=num_epochs)
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    running_accuracy = 0.0
    loss_list = [None] * num_epochs
    acc_list = [None] * num_epochs
    times = 0

    for inputs, labels in train_loader:
        # FP
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # BP and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Calculate indicators
        model.eval()
        with torch.no_grad():
            probabilities = torch.sigmoid(outputs)
            predictions = (probabilities > 0.5).float().cpu().numpy()

            # Calculate indicators
            y = labels.cpu().numpy()
            running_loss += loss.item() * inputs.size(0)
            running_accuracy += accuracy_score(y, predictions)
            times += 1
            
    epoch_loss = running_loss / len(train_loader.dataset)
    accuracy = running_accuracy / times
    loss_list[epoch] = epoch_loss
    acc_list[epoch] = accuracy
    
    pbar.update(1)
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss}, Accuracy: {accuracy}')

  1%|          | 1/100 [00:12<20:58, 12.71s/it]

Epoch [1/100], Loss: 0.2574290825284579, Accuracy: 0.8851845403899722


  2%|▏         | 2/100 [00:25<21:05, 12.91s/it]

Epoch [2/100], Loss: 0.29370461751966465, Accuracy: 0.8557842966573816


  3%|▎         | 3/100 [00:38<20:49, 12.88s/it]

Epoch [3/100], Loss: 0.269282754191629, Accuracy: 0.8714397632311978


  4%|▍         | 4/100 [00:51<20:26, 12.78s/it]

Epoch [4/100], Loss: 0.2951190477711532, Accuracy: 0.868549791086351


  5%|▌         | 5/100 [01:03<20:03, 12.67s/it]

Epoch [5/100], Loss: 0.292952868387471, Accuracy: 0.8667740250696379


  6%|▌         | 6/100 [01:16<19:56, 12.73s/it]

Epoch [6/100], Loss: 0.2762979987388829, Accuracy: 0.8758617688022284


  7%|▋         | 7/100 [01:29<19:52, 12.82s/it]

Epoch [7/100], Loss: 0.27715755739967357, Accuracy: 0.8843053621169916


  8%|▊         | 8/100 [01:42<19:50, 12.94s/it]

Epoch [8/100], Loss: 0.27227395797000575, Accuracy: 0.8866599930362117


  9%|▉         | 9/100 [01:55<19:39, 12.97s/it]

Epoch [9/100], Loss: 0.26154141735238456, Accuracy: 0.8904073816155988


 10%|█         | 10/100 [02:08<19:32, 13.03s/it]

Epoch [10/100], Loss: 0.2603205396728657, Accuracy: 0.8893192896935933


 11%|█         | 11/100 [02:22<19:37, 13.23s/it]

Epoch [11/100], Loss: 0.24550178406792686, Accuracy: 0.8963831824512535


 12%|█▏        | 12/100 [02:36<19:33, 13.33s/it]

Epoch [12/100], Loss: 0.25943489376628703, Accuracy: 0.8903682103064067


 13%|█▎        | 13/100 [02:50<19:36, 13.52s/it]

Epoch [13/100], Loss: 0.24251842916966054, Accuracy: 0.896879352367688


 14%|█▍        | 14/100 [03:03<19:28, 13.59s/it]

Epoch [14/100], Loss: 0.24721376422536043, Accuracy: 0.897845577994429


 15%|█▌        | 15/100 [03:17<19:16, 13.60s/it]

Epoch [15/100], Loss: 0.241257071713897, Accuracy: 0.9028769150417827


 16%|█▌        | 16/100 [03:30<18:50, 13.46s/it]

Epoch [16/100], Loss: 0.23278212864587314, Accuracy: 0.9056711350974931


 17%|█▋        | 17/100 [03:43<18:27, 13.35s/it]

Epoch [17/100], Loss: 0.22507448450227188, Accuracy: 0.9099930362116991


 18%|█▊        | 18/100 [03:56<18:02, 13.20s/it]

Epoch [18/100], Loss: 0.21453972027912158, Accuracy: 0.9107982242339833


 19%|█▉        | 19/100 [04:09<17:47, 13.18s/it]

Epoch [19/100], Loss: 0.21915187846448217, Accuracy: 0.9170612813370473


 20%|██        | 20/100 [04:22<17:33, 13.17s/it]

Epoch [20/100], Loss: 0.20448000099619115, Accuracy: 0.917174442896936


 21%|██        | 21/100 [04:35<17:00, 12.92s/it]

Epoch [21/100], Loss: 0.2001851721631447, Accuracy: 0.9240511838440112


 22%|██▏       | 22/100 [04:48<16:56, 13.03s/it]

Epoch [22/100], Loss: 0.1963293581448919, Accuracy: 0.9278638579387186


 23%|██▎       | 23/100 [05:02<16:58, 13.23s/it]

Epoch [23/100], Loss: 0.1932266088004793, Accuracy: 0.9297658426183844


 24%|██▍       | 24/100 [05:15<16:42, 13.19s/it]

Epoch [24/100], Loss: 0.1893409627563946, Accuracy: 0.932229282729805


 25%|██▌       | 25/100 [05:28<16:28, 13.18s/it]

Epoch [25/100], Loss: 0.18686708076953404, Accuracy: 0.9329778899721448


 26%|██▌       | 26/100 [05:41<16:09, 13.11s/it]

Epoch [26/100], Loss: 0.1834342093955085, Accuracy: 0.9357721100278552


 27%|██▋       | 27/100 [05:54<16:02, 13.18s/it]

Epoch [27/100], Loss: 0.17850440751921273, Accuracy: 0.9393584610027855


 28%|██▊       | 28/100 [06:07<15:40, 13.06s/it]

Epoch [28/100], Loss: 0.17268594127598516, Accuracy: 0.937299791086351


 29%|██▉       | 29/100 [06:20<15:20, 12.96s/it]

Epoch [29/100], Loss: 0.16963244694929072, Accuracy: 0.9365598885793872


 30%|███       | 30/100 [06:33<15:02, 12.90s/it]

Epoch [30/100], Loss: 0.1665072558940777, Accuracy: 0.937504352367688


 31%|███       | 31/100 [06:45<14:47, 12.86s/it]

Epoch [31/100], Loss: 0.1648617425460168, Accuracy: 0.9380048746518106


 32%|███▏      | 32/100 [06:57<14:20, 12.66s/it]

Epoch [32/100], Loss: 0.16343126590576681, Accuracy: 0.9385619777158775


 33%|███▎      | 33/100 [07:10<13:59, 12.53s/it]

Epoch [33/100], Loss: 0.16253363817234834, Accuracy: 0.9382050835654596


 34%|███▍      | 34/100 [07:22<13:40, 12.43s/it]

Epoch [34/100], Loss: 0.16117724812580303, Accuracy: 0.938866643454039


 35%|███▌      | 35/100 [07:34<13:24, 12.37s/it]

Epoch [35/100], Loss: 0.16075882033011157, Accuracy: 0.9380440459610028


 36%|███▌      | 36/100 [07:46<13:10, 12.34s/it]

Epoch [36/100], Loss: 0.16003220131735862, Accuracy: 0.9381223885793872


 37%|███▋      | 37/100 [07:59<12:55, 12.32s/it]

Epoch [37/100], Loss: 0.15939361320473244, Accuracy: 0.9378220752089137


 38%|███▊      | 38/100 [08:11<12:41, 12.29s/it]

Epoch [38/100], Loss: 0.15859898898369343, Accuracy: 0.9378830083565459


 39%|███▉      | 39/100 [08:23<12:29, 12.28s/it]

Epoch [39/100], Loss: 0.15807050308692372, Accuracy: 0.9371692200557104


 40%|████      | 40/100 [08:35<12:15, 12.26s/it]

Epoch [40/100], Loss: 0.15734279870798998, Accuracy: 0.9376305710306406


 41%|████      | 41/100 [08:48<12:02, 12.24s/it]

Epoch [41/100], Loss: 0.15700533100080427, Accuracy: 0.9370168871866296


 42%|████▏     | 42/100 [09:00<11:48, 12.22s/it]

Epoch [42/100], Loss: 0.15599842953224202, Accuracy: 0.9374347144846796


 43%|████▎     | 43/100 [09:12<11:36, 12.21s/it]

Epoch [43/100], Loss: 0.1557976741598522, Accuracy: 0.9374434192200557


 44%|████▍     | 44/100 [09:24<11:23, 12.21s/it]

Epoch [44/100], Loss: 0.15471332987453892, Accuracy: 0.9377654944289694


 45%|████▌     | 45/100 [09:36<11:10, 12.20s/it]

Epoch [45/100], Loss: 0.1550573346837428, Accuracy: 0.9382007311977716


 46%|████▌     | 46/100 [09:49<10:58, 12.20s/it]

Epoch [46/100], Loss: 0.15426871733447242, Accuracy: 0.9381006267409471


 47%|████▋     | 47/100 [10:01<10:46, 12.20s/it]

Epoch [47/100], Loss: 0.15291941485869753, Accuracy: 0.9382181406685237


 48%|████▊     | 48/100 [10:13<10:33, 12.19s/it]

Epoch [48/100], Loss: 0.15385366181345508, Accuracy: 0.9395238509749304


 49%|████▉     | 49/100 [10:25<10:21, 12.19s/it]

Epoch [49/100], Loss: 0.15441910850612012, Accuracy: 0.9398676880222842


 50%|█████     | 50/100 [10:37<10:09, 12.18s/it]

Epoch [50/100], Loss: 0.15595273814333221, Accuracy: 0.9371387534818941


 51%|█████     | 51/100 [10:49<09:57, 12.19s/it]

Epoch [51/100], Loss: 0.1553587356148452, Accuracy: 0.9367992688022284


 52%|█████▏    | 52/100 [11:02<09:44, 12.19s/it]

Epoch [52/100], Loss: 0.15501302766624417, Accuracy: 0.9380571030640669


 53%|█████▎    | 53/100 [11:14<09:32, 12.19s/it]

Epoch [53/100], Loss: 0.15432092965185387, Accuracy: 0.9387143105849582


 54%|█████▍    | 54/100 [11:26<09:20, 12.19s/it]

Epoch [54/100], Loss: 0.1564326983281734, Accuracy: 0.9397153551532034


 55%|█████▌    | 55/100 [11:39<09:13, 12.31s/it]

Epoch [55/100], Loss: 0.15797913257142357, Accuracy: 0.9383400069637883


 56%|█████▌    | 56/100 [11:51<09:05, 12.39s/it]

Epoch [56/100], Loss: 0.15731915629928855, Accuracy: 0.9381093314763231


 57%|█████▋    | 57/100 [12:04<08:55, 12.45s/it]

Epoch [57/100], Loss: 0.16030529281276068, Accuracy: 0.937887360724234


 58%|█████▊    | 58/100 [12:16<08:44, 12.50s/it]

Epoch [58/100], Loss: 0.16097935125385884, Accuracy: 0.9379134749303621


 59%|█████▉    | 59/100 [12:29<08:33, 12.53s/it]

Epoch [59/100], Loss: 0.16064024533559496, Accuracy: 0.9378786559888579


 60%|██████    | 60/100 [12:42<08:22, 12.56s/it]

Epoch [60/100], Loss: 0.16042067691604833, Accuracy: 0.9383356545961002


 61%|██████    | 61/100 [12:54<08:12, 12.62s/it]

Epoch [61/100], Loss: 0.16027887956861608, Accuracy: 0.9388318245125348


 62%|██████▏   | 62/100 [13:07<08:04, 12.75s/it]

Epoch [62/100], Loss: 0.16025532653672042, Accuracy: 0.9393671657381616


 63%|██████▎   | 63/100 [13:21<07:56, 12.87s/it]

Epoch [63/100], Loss: 0.16043209325264726, Accuracy: 0.9397110027855153


 64%|██████▍   | 64/100 [13:34<07:46, 12.95s/it]

Epoch [64/100], Loss: 0.16032958067475073, Accuracy: 0.9401549442896936


 65%|██████▌   | 65/100 [13:47<07:34, 13.00s/it]

Epoch [65/100], Loss: 0.16048431613552486, Accuracy: 0.9402115250696379


 66%|██████▌   | 66/100 [14:00<07:20, 12.94s/it]

Epoch [66/100], Loss: 0.16062346923212006, Accuracy: 0.940420438718663


 67%|██████▋   | 67/100 [14:13<07:09, 13.03s/it]

Epoch [67/100], Loss: 0.16062832796280038, Accuracy: 0.9411211699164346


 68%|██████▊   | 68/100 [14:26<06:56, 13.01s/it]

Epoch [68/100], Loss: 0.16059805502947344, Accuracy: 0.9418871866295264


 69%|██████▉   | 69/100 [14:39<06:42, 12.98s/it]

Epoch [69/100], Loss: 0.16051708452446567, Accuracy: 0.9422484331476323


 70%|███████   | 70/100 [14:52<06:29, 12.98s/it]

Epoch [70/100], Loss: 0.16036979471392998, Accuracy: 0.9424791086350975


 71%|███████   | 71/100 [15:05<06:17, 13.02s/it]

Epoch [71/100], Loss: 0.16021340950174823, Accuracy: 0.942387708913649


 72%|███████▏  | 72/100 [15:18<06:04, 13.00s/it]

Epoch [72/100], Loss: 0.1606230695211451, Accuracy: 0.9422484331476323


 73%|███████▎  | 73/100 [15:30<05:45, 12.79s/it]

Epoch [73/100], Loss: 0.14572310168281594, Accuracy: 0.9476845403899722


 74%|███████▍  | 74/100 [15:43<05:32, 12.80s/it]

Epoch [74/100], Loss: 0.3598666628991248, Accuracy: 0.9328908426183844


 75%|███████▌  | 75/100 [15:56<05:21, 12.86s/it]

Epoch [75/100], Loss: 0.2642841329144844, Accuracy: 0.9230675487465181


 76%|███████▌  | 76/100 [16:09<05:09, 12.88s/it]

Epoch [76/100], Loss: 0.1845602390027463, Accuracy: 0.9265146239554317


 77%|███████▋  | 77/100 [16:22<04:55, 12.85s/it]

Epoch [77/100], Loss: 0.1614183067758799, Accuracy: 0.942579213091922


 78%|███████▊  | 78/100 [16:35<04:43, 12.89s/it]

Epoch [78/100], Loss: 0.15807589258756022, Accuracy: 0.9453125


 79%|███████▉  | 79/100 [16:47<04:30, 12.88s/it]

Epoch [79/100], Loss: 0.16153942888148032, Accuracy: 0.942587917827298


 80%|████████  | 80/100 [17:00<04:17, 12.87s/it]

Epoch [80/100], Loss: 0.15480162409229356, Accuracy: 0.9440329038997215


 81%|████████  | 81/100 [17:13<04:05, 12.92s/it]

Epoch [81/100], Loss: 0.1555105276181281, Accuracy: 0.9432842966573816


 82%|████████▏ | 82/100 [17:26<03:52, 12.93s/it]

Epoch [82/100], Loss: 0.15378035410841698, Accuracy: 0.9433452298050139


 83%|████████▎ | 83/100 [17:39<03:39, 12.91s/it]

Epoch [83/100], Loss: 0.14998189282194455, Accuracy: 0.9452950905292479


 84%|████████▍ | 84/100 [17:52<03:26, 12.88s/it]

Epoch [84/100], Loss: 0.14702161381945988, Accuracy: 0.9498737813370474


 85%|████████▌ | 85/100 [18:05<03:13, 12.87s/it]

Epoch [85/100], Loss: 0.14665694702893184, Accuracy: 0.9483809192200557


 86%|████████▌ | 86/100 [18:18<03:01, 12.94s/it]

Epoch [86/100], Loss: 0.14526060596464482, Accuracy: 0.9504831128133705


 87%|████████▋ | 87/100 [18:31<02:48, 12.98s/it]

Epoch [87/100], Loss: 0.15102376665963071, Accuracy: 0.9462090877437326


 88%|████████▊ | 88/100 [18:44<02:35, 12.99s/it]

Epoch [88/100], Loss: 0.14444926172821565, Accuracy: 0.9502480849582172


 89%|████████▉ | 89/100 [18:57<02:21, 12.88s/it]

Epoch [89/100], Loss: 0.14892810589913874, Accuracy: 0.9500913997214485


 90%|█████████ | 90/100 [19:09<02:08, 12.83s/it]

Epoch [90/100], Loss: 0.14637445891566853, Accuracy: 0.9514667479108635


 91%|█████████ | 91/100 [19:22<01:54, 12.74s/it]

Epoch [91/100], Loss: 0.14820257344144946, Accuracy: 0.949991295264624


 92%|█████████▏| 92/100 [19:35<01:43, 12.89s/it]

Epoch [92/100], Loss: 0.15107994197246405, Accuracy: 0.9486159470752089


 93%|█████████▎| 93/100 [19:48<01:29, 12.82s/it]

Epoch [93/100], Loss: 0.1543859725334745, Accuracy: 0.9468010097493036


 94%|█████████▍| 94/100 [20:01<01:16, 12.80s/it]

Epoch [94/100], Loss: 0.16310324969809764, Accuracy: 0.9456693941504178


 95%|█████████▌| 95/100 [20:14<01:04, 12.86s/it]

Epoch [95/100], Loss: 0.16653966355517957, Accuracy: 0.9411342270194986


 96%|█████████▌| 96/100 [20:26<00:50, 12.75s/it]

Epoch [96/100], Loss: 0.15278287164968576, Accuracy: 0.946875


 97%|█████████▋| 97/100 [20:38<00:37, 12.65s/it]

Epoch [97/100], Loss: 0.15238240760335742, Accuracy: 0.9457651462395543


 98%|█████████▊| 98/100 [20:51<00:25, 12.52s/it]

Epoch [98/100], Loss: 0.1484641190645874, Accuracy: 0.9472144846796657


 99%|█████████▉| 99/100 [21:03<00:12, 12.45s/it]

Epoch [99/100], Loss: 0.15355829224067083, Accuracy: 0.9452602715877437


100%|██████████| 100/100 [21:15<00:00, 12.39s/it]

Epoch [100/100], Loss: 0.1497416898364087, Accuracy: 0.9453560236768802


Unseen test set performance

In [14]:
X_test_tensor = torch.tensor(X_test.values).float().unsqueeze(1).to(device)

model.eval()
outputs = model(X_test_tensor)
with torch.no_grad():
    probabilities = torch.sigmoid(outputs)
    predictions = (probabilities > 0.5).float().cpu().numpy()

    # 计算指标
    acc = accuracy_score(y_test, predictions)
    precision = precision_score(y_test, predictions)
    recall = recall_score(y_test, predictions)
    f1 = f1_score(y_test, predictions)

    print("Accuracy: ", acc, ", Precision: ", precision, ", Recall: ", recall, ", F1: ", f1)

Accuracy:  0.7851808005014015 , Precision:  0.38861772253811705 , Recall:  0.9761122345803842 , F1:  0.5559114630196149
