In [8]:
#mount drive to save models
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [13]:
#import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
%matplotlib inline
from keras.layers import Dense, Dropout, BatchNormalization, Activation
from keras.optimizers import Adam
from keras.optimizers import Adagrad
from keras.callbacks import EarlyStopping
from keras.regularizers import l2
from keras.models import Sequential

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, roc_curve, auc, precision_score, recall_score, f1_score, accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# Load data
file_path1 = '/content/drive/MyDrive/Final year project/datafinal/processed_train.csv'
data = pd.read_csv(file_path1)

# Split into train and test sets
X = data.drop(labels='class', axis=1)
y = data['class']
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, test_size=0.2, random_state=4)

# Convert data to PyTorch tensors
train_data = torch.tensor(X_train.values, dtype=torch.float32)
labels = torch.tensor(y_train.values, dtype=torch.long)
test_data = torch.tensor(X_test.values, dtype=torch.float32)
test_labels = torch.tensor(y_test.values, dtype=torch.long)

# Define SNN model with added hidden layer and dropout
class SNN(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size, dropout_rate=0.2):
        super(SNN, self).__init__()
        self.input_layer = nn.Linear(input_size, hidden_size1)
        self.spike = nn.Hardtanh(0, 1)
        self.hidden_layer1 = nn.Linear(hidden_size1, hidden_size2)
        self.hidden_layer2 = nn.Linear(hidden_size2, hidden_size2)
        self.dropout = nn.Dropout(p=dropout_rate)
        self.synaptic_weights = nn.Parameter(torch.rand(hidden_size2, output_size))
        self.threshold = nn.Parameter(torch.rand(1))

    def forward(self, x):
        x = self.input_layer(x)
        x = self.spike(x - self.threshold)
        x = F.relu(self.hidden_layer1(x))
        x = F.relu(self.hidden_layer2(x))
        x = self.dropout(x)
        x = torch.mm(x, self.synaptic_weights)
        return x

# Training loop with test loss and early stopping
def train_snn(model, train_data, labels, test_data, test_labels, num_epochs, learning_rate, patience=5):
    criterion = nn.CrossEntropyLoss()  # Define the loss function
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-4)  # Add L2 regularization

    history = {'train_loss': [], 'test_loss': []}
    best_test_loss = float('inf')
    no_improvement = 0

    for epoch in range(num_epochs):
        # Training
        model.train()
        outputs = model(train_data)
        loss_train = criterion(outputs, labels)
        optimizer.zero_grad()
        loss_train.backward()
        optimizer.step()

        # Testing
        model.eval()
        with torch.no_grad():
            outputs_test = model(test_data)
            loss_test = criterion(outputs_test, test_labels)

        # Store training and test loss for visualization
        history['train_loss'].append(loss_train.item())
        history['test_loss'].append(loss_test.item())

        # Early stopping
        if loss_test < best_test_loss:
            best_test_loss = loss_test
            no_improvement = 0
        else:
            no_improvement += 1
            if no_improvement >= patience:
                print(f'Early stopping at epoch {epoch + 1}')
                break

        print(f'Epoch [{epoch + 1}/{num_epochs}], Train Loss: {loss_train.item()}, Test Loss: {loss_test.item()}')

    return history

# Initialize SNN model and train
input_size = X_train.shape[1]
hidden_size1 = 128
hidden_size2 = 64
output_size = 2
dropout_rate = 0.3
num_epochs = 100
learning_rate = 0.001
patience = 10

snn_model = SNN(input_size, hidden_size1, hidden_size2, output_size, dropout_rate)
import time
s = time.time()
history = train_snn(snn_model, train_data, labels, test_data, test_labels, num_epochs, learning_rate, patience)
t = time.time() - s
print("training time:", t)

# Plot the training and test loss
plt.figure(figsize=(10, 6))
plt.plot(history['train_loss'], label='Training Loss')
plt.plot(history['test_loss'], label='Test Loss')
plt.title('Training and Test Loss over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

# SNN on the test set
with torch.no_grad():
    test_outputs = snn_model(test_data)
    _, predicted = torch.max(test_outputs, 1)

# Calculate confusion matrix
conf_mat = confusion_matrix(test_labels, predicted)

# Plot Confusion Matrix Heatmap
plt.figure(figsize=(6, 6))
sns.heatmap(conf_mat, annot=True, fmt='d', cmap='Blues', cbar=False)
plt.title('Confusion Matrix Heatmap')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

# Calculate specificity and FAR from confusion matrix
TN = conf_mat[0, 0]
FP = conf_mat[0, 1]
FN = conf_mat[1, 0]
TP = conf_mat[1, 1]

specificity = TN / (TN + FP)
far = FP / (FP + TN)

# Calculate additional performance metrics
precision = precision_score(test_labels, predicted)
recall = recall_score(test_labels, predicted)
f1 = f1_score(test_labels, predicted)
accuracy = accuracy_score(test_labels, predicted)

# ROC curve
fpr, tpr, thresholds = roc_curve(test_labels, test_outputs[:, 1])
roc_auc = auc(fpr, tpr)

# Plot ROC curve
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc="lower right")
plt.show()

# Print performance metrics
print("\nPerformance Metrics:")
print("Specificity: {:.4f}".format(specificity))
print("FAR: {:.2f}".format(far))
print("Precision: {:.4f}".format(precision))
print("Recall: {:.4f}".format(recall))
print("F1 Score: {:.4f}".format(f1))
print("Accuracy: {:.4f}".format(accuracy))

Epoch [1/100], Train Loss: 0.7338631749153137, Test Loss: 0.6421011090278625
Epoch [2/100], Train Loss: 0.6557450294494629, Test Loss: 0.582107424736023
Epoch [3/100], Train Loss: 0.5954012870788574, Test Loss: 0.531035304069519
Epoch [4/100], Train Loss: 0.5445520877838135, Test Loss: 0.4870692789554596
Epoch [5/100], Train Loss: 0.503165602684021, Test Loss: 0.447680801153183
Epoch [6/100], Train Loss: 0.4631696343421936, Test Loss: 0.41122519969940186
Epoch [7/100], Train Loss: 0.4288637340068817, Test Loss: 0.3772095739841461
Epoch [8/100], Train Loss: 0.3960428833961487, Test Loss: 0.3463188409805298
Epoch [9/100], Train Loss: 0.36623185873031616, Test Loss: 0.3192221224308014
Epoch [10/100], Train Loss: 0.3385574519634247, Test Loss: 0.29532116651535034
Epoch [11/100], Train Loss: 0.31656619906425476, Test Loss: 0.2740699350833893
Epoch [12/100], Train Loss: 0.2972395122051239, Test Loss: 0.25501129031181335
Epoch [13/100], Train Loss: 0.2764638662338257, Test Loss: 0.23834733664

In [None]:
snn_model.save("/content/drive/My Drive/Final year project/models/final")

AttributeError: 'SNN' object has no attribute 'save'

In [None]:
torch.save(snn_model.state_dict(), "/content/drive/My Drive/Final year project/models/final_model")

In [None]:
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.metrics import confusion_matrix, roc_curve, auc, precision_score, recall_score, f1_score, accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np

# Load data
file_path1 = '/content/drive/MyDrive/Final year project/datafinal/processed_train.csv'
data = pd.read_csv(file_path1)

# Split into train and test sets
X = data.drop(labels='class', axis=1)
y = data['class']
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, test_size=0.2, random_state=4)

# Convert data to PyTorch tensors
train_data = torch.tensor(X_train.values, dtype=torch.float32)
labels = torch.tensor(y_train.values, dtype=torch.long)
test_data = torch.tensor(X_test.values, dtype=torch.float32)
test_labels = torch.tensor(y_test.values, dtype=torch.long)

# Define SNN model with added hidden layer and dropout
class SNN(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size, dropout_rate=0.2):
        super(SNN, self).__init__()
        self.input_layer = nn.Linear(input_size, hidden_size1)
        self.spike = nn.Hardtanh(0, 1)
        self.hidden_layer1 = nn.Linear(hidden_size1, hidden_size2)
        self.hidden_layer2 = nn.Linear(hidden_size2, hidden_size2)
        self.dropout = nn.Dropout(p=dropout_rate)
        self.synaptic_weights = nn.Parameter(torch.rand(hidden_size2, output_size))
        self.threshold = nn.Parameter(torch.rand(1))

    def forward(self, x):
        x = self.input_layer(x)
        x = self.spike(x - self.threshold)
        x = F.relu(self.hidden_layer1(x))
        x = F.relu(self.hidden_layer2(x))
        x = self.dropout(x)
        x = torch.mm(x, self.synaptic_weights)
        return x

# Training loop with test loss and early stopping
def train_snn(model, train_data, labels, num_epochs, learning_rate, patience=5):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-4)  # Add L2 regularization

    history = {'train_loss': [], 'test_loss': []}
    best_test_loss = float('inf')
    no_improvement = 0

    for epoch in range(num_epochs):
        # Training
        model.train()
        outputs = model(train_data)
        loss_train = criterion(outputs, labels)
        optimizer.zero_grad()
        loss_train.backward()
        optimizer.step()

        # Testing
        model.eval()
        with torch.no_grad():
            outputs_test = model(test_data)
            loss_test = criterion(outputs_test, test_labels)

        # Store training and test loss for visualization
        history['train_loss'].append(loss_train.item())
        history['test_loss'].append(loss_test.item())

        # Early stopping
        if loss_test < best_test_loss:
            best_test_loss = loss_test
            no_improvement = 0
        else:
            no_improvement += 1
            if no_improvement >= patience:
                print(f'Early stopping at epoch {epoch + 1}')
                break

        print(f'Epoch [{epoch + 1}/{num_epochs}], Train Loss: {loss_train.item()}, Test Loss: {loss_test.item()}')

    return history

# Cross-validation with FAR calculation
def cross_validate_snn(X, y, num_folds, input_size, hidden_size1, hidden_size2, output_size, dropout_rate, num_epochs, learning_rate, patience):
    kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)
    roc_auc_scores = []
    precision_scores = []
    recall_scores = []
    f1_scores = []
    accuracy_scores = []
    far_scores = []

    for train_index, test_index in kf.split(X):
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]

        # Convert data to PyTorch tensors
        train_data = torch.tensor(X_train.values, dtype=torch.float32)
        labels = torch.tensor(y_train.values, dtype=torch.long)
        test_data = torch.tensor(X_test.values, dtype=torch.float32)
        test_labels = torch.tensor(y_test.values, dtype=torch.long)

        snn_model = SNN(input_size, hidden_size1, hidden_size2, output_size, dropout_rate)
        history = train_snn(snn_model, train_data, labels, num_epochs, learning_rate, patience)

        with torch.no_grad():
            test_outputs = snn_model(test_data)
            _, predicted = torch.max(test_outputs, 1)

        # Calculate performance metrics
        fpr, tpr, thresholds = roc_curve(test_labels, test_outputs[:, 1])
        roc_auc = auc(fpr, tpr)
        roc_auc_scores.append(roc_auc)

        precision = precision_score(test_labels, predicted)
        recall = recall_score(test_labels, predicted)
        f1 = f1_score(test_labels, predicted)
        accuracy = accuracy_score(test_labels, predicted)

        precision_scores.append(precision)
        recall_scores.append(recall)
        f1_scores.append(f1)
        accuracy_scores.append(accuracy)

        # Calculate False Alarm Rate (FAR)
        TN, FP, FN, TP = confusion_matrix(test_labels, predicted).ravel()
        far = FP / (FP + TN)
        far_scores.append(far)

    # Calculate mean and standard deviation of performance metrics
    roc_auc_mean = np.mean(roc_auc_scores)
    roc_auc_std = np.std(roc_auc_scores)
    precision_mean = np.mean(precision_scores)
    recall_mean = np.mean(recall_scores)
    f1_mean = np.mean(f1_scores)
    accuracy_mean = np.mean(accuracy_scores)
    far_mean = np.mean(far_scores)
    far_std = np.std(far_scores)

    print("\nPerformance Metrics:")
    print(f"ROC AUC: {roc_auc_mean:.4f} ± {roc_auc_std:.4f}")
    print(f"Precision: {precision_mean:.4f}")
    print(f"Recall: {recall_mean:.4f}")
    print(f"F1 Score: {f1_mean:.4f}")
    print(f"Accuracy: {accuracy_mean:.4f}")
    print(f"False Alarm Rate (FAR): {far_mean:.4f} ± {far_std:.4f}")

# Initialize SNN model and cross-validate
input_size = X_train.shape[1]
hidden_size1 = 128
hidden_size2 = 64
output_size = 2
dropout_rate = 0.3
num_epochs = 100
learning_rate = 0.001
patience = 10
num_folds = 5

cross_validate_snn(X, y, num_folds, input_size, hidden_size1, hidden_size2, output_size, dropout_rate, num_epochs, learning_rate, patience)

Epoch [1/100], Train Loss: 0.7520098090171814, Test Loss: 0.6582658886909485
Epoch [2/100], Train Loss: 0.6719823479652405, Test Loss: 0.5941689014434814
Epoch [3/100], Train Loss: 0.6078024506568909, Test Loss: 0.5433616638183594
Epoch [4/100], Train Loss: 0.5578403472900391, Test Loss: 0.5000085830688477
Epoch [5/100], Train Loss: 0.5168921947479248, Test Loss: 0.4600018858909607
Epoch [6/100], Train Loss: 0.4769808351993561, Test Loss: 0.42323195934295654
Epoch [7/100], Train Loss: 0.4419533908367157, Test Loss: 0.38957563042640686
Epoch [8/100], Train Loss: 0.40875253081321716, Test Loss: 0.35831359028816223
Epoch [9/100], Train Loss: 0.37963181734085083, Test Loss: 0.3298010230064392
Epoch [10/100], Train Loss: 0.35299256443977356, Test Loss: 0.3039620518684387
Epoch [11/100], Train Loss: 0.3291572034358978, Test Loss: 0.28078967332839966
Epoch [12/100], Train Loss: 0.30547448992729187, Test Loss: 0.2602701783180237
Epoch [13/100], Train Loss: 0.28672829270362854, Test Loss: 0.242