# Bahdanau Attention

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, Dataset
import torch
import torch.nn as nn
import torch.optim as optim

data = pd.read_csv('processed_iot_data.csv')

features = data.drop(columns=['label']).values
labels = data['label'].values

X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

# Normalize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Define the IoTDataset class
class IoTDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return torch.tensor(self.data[idx], dtype=torch.float32), torch.tensor(self.labels[idx], dtype=torch.float32)

# Create DataLoader for training and test sets
train_dataset = IoTDataset(X_train, y_train)
test_dataset = IoTDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Define LSTM Model
class BahdanauAttention(nn.Module):
    def __init__(self, hidden_size):
        super(BahdanauAttention, self).__init__()
        self.hidden_size = hidden_size
        self.attn = nn.Linear(self.hidden_size, hidden_size)
        self.v = nn.Parameter(torch.rand(hidden_size))

    def forward(self, hidden, encoder_outputs):
        timestep = encoder_outputs.size(1)
        h = hidden.repeat(timestep, 1, 1).transpose(0, 1)
        attn_energies = self.score(h, encoder_outputs)
        return torch.softmax(attn_energies, dim=1).unsqueeze(1)

    def score(self, hidden, encoder_outputs):
        energy = torch.tanh(self.attn(encoder_outputs))
        energy = energy.transpose(1, 2)
        v = self.v.repeat(encoder_outputs.size(0), 1).unsqueeze(1)
        energy = torch.bmm(v, energy)
        return energy.squeeze(1)

class LSTMWithAttention(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, dropout=0.5):
        super(LSTMWithAttention, self).__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.attention = BahdanauAttention(hidden_size)
        self.fc = nn.Linear(hidden_size, output_size)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        lstm_out, (h_n, c_n) = self.lstm(x)
        attn_weights = self.attention(h_n[-1], lstm_out)
        context = attn_weights.bmm(lstm_out)
        context = context.squeeze(1)
        context = self.dropout(context)  # Apply dropout
        out = self.fc(context)
        return out

# Hyperparameters
input_size = X_train.shape[1]
hidden_size = 256  # Increased hidden size
output_size = 1
dropout = 0.5  # Dropout rate
learning_rate = 0.0001  # Lower learning rate
batch_size = 64  # Increased batch size

# Model initialization
model = LSTMWithAttention(input_size, hidden_size, output_size, dropout)

# DataLoader
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Training
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(20):  # Increase number of epochs
    model.train()
    for inputs, targets in train_loader:
        inputs = inputs.unsqueeze(1)  # Ensure inputs are of shape (batch_size, sequence_length, input_size)
        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), targets)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f'Epoch {epoch+1}, Loss: {loss.item()}')

# Save the trained model
torch.save(model.state_dict(), 'lstm_with_attention_model.pth')


In [9]:
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix
)

# Function to calculate and print metrics
def print_metrics(y_true, y_pred, y_pred_prob):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    specificity = recall_score(y_true, y_pred, pos_label=0)
    roc_auc = roc_auc_score(y_true, y_pred_prob)
    conf_matrix = confusion_matrix(y_true, y_pred)

    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1 Score:", f1)
    print("Specificity:", specificity)
    print("ROC AUC:", roc_auc)
    print("Confusion Matrix:\n", conf_matrix)

# Evaluate the model
def evaluate(model, data_loader):
    model.eval()
    y_true = []
    y_pred = []
    y_pred_prob = []
    with torch.no_grad():
        for inputs, targets in data_loader:
            inputs = inputs.unsqueeze(1)  # Ensure inputs are of shape (batch_size, sequence_length, input_size)
            outputs = model(inputs)
            probs = torch.sigmoid(outputs.squeeze())
            predictions = torch.round(probs)
            y_true.extend(targets.tolist())
            y_pred.extend(predictions.tolist())
            y_pred_prob.extend(probs.tolist())
    return y_true, y_pred, y_pred_prob

# Load the trained model
model = LSTMWithAttention(input_size, hidden_size, output_size, dropout)
model.load_state_dict(torch.load('lstm_with_attention_model.pth'))

# Evaluate the model and print metrics
y_true, y_pred, y_pred_prob = evaluate(model, test_loader)
print_metrics(y_true, y_pred, y_pred_prob)

Accuracy: 0.8943932605540481
Precision: 0.8978575851393189
Recall: 0.7418981836786902
F1 Score: 0.8124611290096653
Specificity: 0.9623748374883105
ROC AUC: 0.9474732139850601
Confusion Matrix:
 [[210967   8248]
 [ 25223  72502]]


# SELF ATTENTION

In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, Dataset
import torch
import torch.nn as nn
import torch.optim as optim

data = pd.read_csv('processed_iot_data.csv')

features = data.drop(columns=['label']).values
labels = data['label'].values

X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

from sklearn.preprocessing import StandardScaler

# Normalizing features to have mean of 0 and variance of 1
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Defining IoTDataset class for easy data loading to model
class IoTDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return torch.tensor(self.data[idx], dtype=torch.float32), torch.tensor(self.labels[idx], dtype=torch.float32)

# Dataloader for easy batch processing
train_dataset = IoTDataset(X_train, y_train)
test_dataset = IoTDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Defining the Self-Attention mechanism for LSTM with given hidden size, using softmax function, and standard 
# input forwarding functionality
class SelfAttention(nn.Module):
    def __init__(self, hidden_size):
        super(SelfAttention, self).__init__()
        self.hidden_size = hidden_size
        self.attention = nn.Linear(hidden_size, hidden_size)
        self.softmax = nn.Softmax(dim=-1)

    def forward(self, lstm_output):
        attention_scores = self.attention(lstm_output)  
        attention_weights = self.softmax(attention_scores)  
        weighted_output = torch.bmm(attention_weights.transpose(1, 2), lstm_output)  
        return weighted_output.sum(dim=1)  

# Defining the LSTM model with integrated Self-Attention mecahnism, with standard forwarding functionality, and 
# standard dropout regularization so model doesn't overfit and generalizes well to the unseen data
class LSTMWithSelfAttention(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, dropout=0.5):
        super(LSTMWithSelfAttention, self).__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.self_attention = SelfAttention(hidden_size)
        self.fc = nn.Linear(hidden_size, output_size)   # Mapping output the output from the self-attention mechanism to the final output
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        lstm_out, (h_n, c_n) = self.lstm(x)
        attention_output = self.self_attention(lstm_out)
        attention_output = self.dropout(attention_output)
        out = self.fc(attention_output)
        return out

# Hyperparameters
input_size = X_train.shape[1]
hidden_size = 256
output_size = 1
dropout = 0.5
learning_rate = 0.0001

# Model initialization
model = LSTMWithSelfAttention(input_size, hidden_size, output_size, dropout)

# Applying BCE with Logits Loss function for binary classification, computing loss 
# based on 'label's.criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(20):  
    model.train()
    for inputs, targets in train_loader:
        inputs = inputs.unsqueeze(1)  
        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), targets)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f'Epoch {epoch+1}, Loss: {loss.item()}')

# Saving the trained model for metric scores 
torch.save(model.state_dict(), 'lstm_with_self_attention_model.pth')



Epoch 1, Loss: 0.6915909647941589
Epoch 2, Loss: 0.25700071454048157
Epoch 3, Loss: 0.1986079216003418
Epoch 4, Loss: 0.33182841539382935
Epoch 5, Loss: 0.4346599578857422
Epoch 6, Loss: 0.22414802014827728
Epoch 7, Loss: 0.2908736765384674
Epoch 8, Loss: 0.358585000038147
Epoch 9, Loss: 0.23139211535453796
Epoch 10, Loss: 0.39025577902793884
Epoch 11, Loss: 0.1071011945605278
Epoch 12, Loss: 0.23711292445659637
Epoch 13, Loss: 0.12072049081325531
Epoch 14, Loss: 0.4184686839580536
Epoch 15, Loss: 0.10816724598407745
Epoch 16, Loss: 0.3082788288593292
Epoch 17, Loss: 0.08810606598854065
Epoch 18, Loss: 0.232331782579422
Epoch 19, Loss: 0.1703435778617859
Epoch 20, Loss: 0.12610751390457153


In [11]:
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix
)

# Function to calculate and print metrics
def print_metrics(y_true, y_pred, y_pred_prob):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    specificity = recall_score(y_true, y_pred, pos_label=0)
    roc_auc = roc_auc_score(y_true, y_pred_prob)
    conf_matrix = confusion_matrix(y_true, y_pred)

    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1 Score:", f1)
    print("Specificity:", specificity)
    print("ROC AUC:", roc_auc)
    print("Confusion Matrix:\n", conf_matrix)

def evaluate(model, data_loader):
    model.eval()
    y_true = []
    y_pred = []
    y_pred_prob = []
    with torch.no_grad():
        for inputs, targets in data_loader:
            inputs = inputs.unsqueeze(1)  
            outputs = model(inputs)
            probs = torch.sigmoid(outputs.squeeze())
            predictions = torch.round(probs)
            y_true.extend(targets.tolist())
            y_pred.extend(predictions.tolist())
            y_pred_prob.extend(probs.tolist())
    return y_true, y_pred, y_pred_prob

model = LSTMWithSelfAttention(input_size, hidden_size, output_size, dropout)
model.load_state_dict(torch.load('lstm_with_self_attention_model.pth'))

y_true, y_pred, y_pred_prob = evaluate(model, test_loader)
print_metrics(y_true, y_pred, y_pred_prob)

Accuracy: 0.8942544330157127
Precision: 0.8921077700969735
Recall: 0.7474443591711435
F1 Score: 0.8133939856238481
Specificity: 0.9597016627511803
ROC AUC: 0.9479894074234172
Confusion Matrix:
 [[210381   8834]
 [ 24681  73044]]


# ML Models

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, classification_report
)
from sklearn.model_selection import GridSearchCV


# Merging the benign and spoofing datasets
data = pd.read_csv("./processed_iot_data.csv")

# Split the dataset into features and labels
features = data.drop(columns=['label']).values
labels = data['label'].values

 #Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

1. Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint
from tqdm import tqdm
import numpy as np

rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train, y_train)
y_pred = rf_classifier.predict(X_test)
y_pred_prob = rf_classifier.predict_proba(X_test)[:, 1]

print("Random Forest Classifier:")
print_metrics(y_test, y_pred, y_pred_prob)

param_dist = {
    'n_estimators': randint(100, 300), 
    'max_depth': randint(5, 20), 
    'min_samples_split': randint(2, 10), 
    'min_samples_leaf': randint(1, 10),  
    'bootstrap': [True, False]  
}

rf = RandomForestClassifier(random_state=42)

# Initialize Randomized Search for finidng best hyper-parameters
random_search = RandomizedSearchCV(
    rf, 
    param_distributions=param_dist,
    n_iter=50,  # Number of parameter settings that are sampled
    scoring='accuracy',  # Scoring metric to evaluate the model
    cv=5,  # 5-fold cross-validation
    verbose=0,  # Set to 0 to use custom tqdm tracker
    random_state=42,
    n_jobs=-1  # Use all available cores
)

# Wrap the fit method with tqdm progress bar
class TqdmCallback:
    def __init__(self, total):
        self.progress_bar = tqdm(total=total)

    def __call__(self, n):
        self.progress_bar.update(n)

n_iter = random_search.n_iter
callback = TqdmCallback(total=n_iter)

# Wrap fit function with the progress bar
for i in tqdm(range(n_iter), desc="Tuning progress"):
    random_search.fit(X_train, y_train)
    callback(1)

# Print the best parameters and the best score
print("Best Parameters:", random_search.best_params_)
print("Best Accuracy Score:", random_search.best_score_)

# Use the best estimator to make predictions
best_rf = random_search.best_estimator_
y_pred = best_rf.predict(X_test)
y_pred_prob = best_rf.predict_proba(X_test)[:, 1]

# Print metrics or any other analysis you want to perform
print_metrics(y_test, y_pred, y_pred_prob)

2. XGBoost Classifier

In [None]:
# XGBoost Classifier
xgb_classifier = XGBClassifier()
xgb_classifier.fit(X_train, y_train)
y_pred = xgb_classifier.predict(X_test)
y_pred_prob = xgb_classifier.predict_proba(X_test)[:, 1]

print("XGBoost Classifier:")
print_metrics(y_test, y_pred, y_pred_prob)

# Grid Search with XGBoost
param_grid = {
    'learning_rate': [1.0, 0.1, 0.01],
    'max_depth': [3, 5, 7],
    'n_estimators': [100, 200, 300],
}
grid_search = GridSearchCV(XGBClassifier(), param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)
best_params = grid_search.best_params_
best_xgb_classifier = XGBClassifier(**best_params)
best_xgb_classifier.fit(X_train, y_train)
y_pred = best_xgb_classifier.predict(X_test)
y_pred_prob = best_xgb_classifier.predict_proba(X_test)[:, 1]

print("XGBoost Classifier with Grid Search:")
print("Best Hyperparameters:", best_params)
print_metrics(y_test, y_pred, y_pred_prob)

