LSTM v2 (modify to work with new dataset, seperate attacks, simplify normalization)

In [None]:
import pandas as pd
import numpy as np
from numpy import array
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Input
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

# parameters
num_features = 20
num_scenarios = 6
scenario_length = 1000
attack_type = 0 		# 0: all, 1: DoS, 2: FDI, 3: Replay

# hyper-parameter
seq_len = 20
seq_overlap = seq_len - 1
lstm_blocks = 64
epoch_val = 20
batch_size_val = 4

# 1 data preprocessing
data = pd.read_csv('dataset.csv')

# 1.1 normalization
labels = data[['label_1', 'label_2']]
data = data.drop(columns=['time', 'label_1', 'label_2'])
scaler = StandardScaler()
data_normalized = scaler.fit_transform(data)
data_normalized = pd.DataFrame(data_normalized, columns=data.columns)
data_normalized[['label_1', 'label_2']] = labels

# 1.2 sequence generation
sequences = []
step_len = seq_len - seq_overlap

if attack_type == 0:
    scenario_range = range(0, 6)
elif attack_type == 1:
    scenario_range = range(0, 2)
elif attack_type == 2:
    scenario_range = range(2, 4)
elif attack_type == 3:
    scenario_range = range(4, 6)

for s in scenario_range:
    scenario_start = s * scenario_length
    for i in range(scenario_start, scenario_start + scenario_length - seq_len, step_len):
        sequence = data_normalized[i:i + seq_len]
        sequences.append(sequence)
data_sequences = array(sequences)

# 2 data preperation
# 2.1 train-test split
data_reshaped = data_sequences.reshape(data_sequences.shape[0], -1)
X_train, X_test = train_test_split(data_reshaped, test_size=0.25, random_state=42)
X_train = X_train.reshape(X_train.shape[0], data_sequences.shape[1], data_sequences.shape[2])
X_test = X_test.reshape(X_test.shape[0], data_sequences.shape[1], data_sequences.shape[2])

# 2.2 reshape data for LSTM network
y_train = X_train[:, -1, -2:]
y_test = X_test[:, -1, -2:]
X_train = X_train[:, :, :-2]
X_test = X_test[:, :, :-2]

# 3. model creation
# 3.1 model archiecture
model = Sequential()
model.add(Input(shape=(seq_len, num_features)))
model.add(LSTM(lstm_blocks))
model.add(Dense(2, activation='sigmoid'))

# 3.2 model compile
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())

# 3.3 model train
model.fit(X_train, y_train, epochs=epoch_val, batch_size=batch_size_val)

# 3.4 model test
y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype(int)

# 3.5 print results for each column
num_labels = y_test.shape[1]

for i in range(num_labels):
    y_test_column = y_test[:, i]
    y_pred_column = y_pred[:, i]

    accuracy = accuracy_score(y_test_column, y_pred_column)
    precision = precision_score(y_test_column, y_pred_column)
    recall = recall_score(y_test_column, y_pred_column)
    f1 = f1_score(y_test_column, y_pred_column)

    print(f'Quad {i+1}:')
    print(f'  Accuracy: {accuracy:.3f}')
    print(f'  Precision: {precision:.3f}')
    print(f'  Recall: {recall:.3f}')
    print(f'  F1-score: {f1:.3f}')

Network LSTM v1

In [None]:
import pandas as pd
import numpy as np
from numpy import array
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

# parameters
num_features = 20
num_scenarios = 6
scenario_length = 1000

# hyper-parameter
seq_len = 20
seq_overlap = seq_len - 1
lstm_blocks = 64
epoch_val = 20
batch_size_val = 4
data_normalization = 1

# 1. data preprocessing
data = pd.read_csv('dataset.csv')

# 1.0.5 drop extra columns
data = data.drop(columns=['T_2', 'tau_phi_2', 'tau_theta_2', 'tau_psi_2'])

# 1.1 normalization
if data_normalization == 0:
	data = data.drop(columns=['time'])
	data_normalized = data
else:
	labels = data[['label_1', 'label_2']]
	data = data.drop(columns=['time', 'label_1', 'label_2'])

	if data_normalization == 1:
		scaler = StandardScaler()
	elif data_normalization == 2:
		scaler = MinMaxScaler()

	data_normalized = scaler.fit_transform(data)
	data_normalized = pd.DataFrame(data_normalized, columns=data.columns)
	data_normalized[['label_1', 'label_2']] = labels

# 1.2 sequence generation
sequences = []
step_len = seq_len - seq_overlap
for s in range(num_scenarios):
    scenario_start = s * scenario_length
    for i in range(scenario_start, scenario_start + scenario_length - seq_len, step_len):
        sequence = data_normalized[i:i + seq_len]
        sequences.append(sequence)
data_sequences = array(sequences)

# 2 data preperation
# 2.1 train-test split
data_reshaped = data_sequences.reshape(data_sequences.shape[0], -1)
X_train, X_test = train_test_split(data_reshaped, test_size=0.25, random_state=42)
X_train = X_train.reshape(X_train.shape[0], data_sequences.shape[1], data_sequences.shape[2])
X_test = X_test.reshape(X_test.shape[0], data_sequences.shape[1], data_sequences.shape[2])

# 2.2 reshape data for LSTM network
y_train = X_train[:, -1, -2:]
y_test = X_test[:, -1, -2:]
X_train = X_train[:, :, :-2]
X_test = X_test[:, :, :-2]

# 3. model creation
# 3.1 model archiecture
model = Sequential()
model.add(LSTM(lstm_blocks, input_shape=(seq_len, num_features)))
model.add(Dense(2, activation='sigmoid'))

# 3.2 model compile
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())

# 3.3 model train
model.fit(X_train, y_train, epochs=epoch_val, batch_size=batch_size_val)

# 3.4 model test
y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype(int)

# 3.5 print results for each column
num_labels = y_test.shape[1]

for i in range(num_labels):
    y_test_column = y_test[:, i]
    y_pred_column = y_pred[:, i]

    accuracy = accuracy_score(y_test_column, y_pred_column)
    precision = precision_score(y_test_column, y_pred_column)
    recall = recall_score(y_test_column, y_pred_column)
    f1 = f1_score(y_test_column, y_pred_column)

    print(f'Quad {i+1}:')
    print(f'Accuracy: {accuracy:.3f}')
    print(f'Precision: {precision:.3f}')
    print(f'Recall: {recall:.3f}')
    print(f'F1-score: {f1:.3f}')

