In [1]:
# !pip install --upgrade scipy

In [2]:
import torch 
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader

import os, pathlib, glob, random
import numpy as np
import matplotlib.pyplot as plt 

from sklearn.metrics import confusion_matrix
import scipy
from scipy import io

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [4]:
batch_size = 32
output_nodes = 10
learning_rate = 0.001

In [5]:
import os
import random
import scipy.io
import numpy as np
from torch.utils.data import Dataset, DataLoader
# ======================
# Paths for both datasets
# ======================
train_mgdcc = r"/kaggle/input/mfcc-new/mfcc/train"
val_mgdcc   = r"/kaggle/input/mfcc-new/mfcc/dev"
test_mgdcc  = r"/kaggle/input/mfcc-new/mfcc/test"

train_lfcc = r"/kaggle/input/lfcc-feature/LFCC_Features/train"
val_lfcc   = r"/kaggle/input/lfcc-feature/LFCC_Features/dev"
test_lfcc  = r"/kaggle/input/lfcc-feature/LFCC_Features/test"

# ======================
# Dataset Class
# ======================
class MixedFeatureDataset(Dataset):
    def __init__(self, mgdcc_dir, lfcc_dir, max_len=10):
        """Load MGDCC + LFCC features for the same audio files."""
        self.files = []
        self.class_to_idx = {}
        self.max_len = max_len

        classes = sorted(entry.name for entry in os.scandir(mgdcc_dir) if entry.is_dir())

        for c in classes:
            if c not in self.class_to_idx:
                self.class_to_idx[c] = len(self.class_to_idx)

            mgdcc_cdir = os.path.join(mgdcc_dir, c)
            lfcc_cdir = os.path.join(lfcc_dir, c)

            for f in os.listdir(mgdcc_cdir):
                mgdcc_file = os.path.join(mgdcc_cdir, f)
                lfcc_file = os.path.join(lfcc_cdir, f)

                if os.path.exists(lfcc_file):   # only keep if both exist
                    self.files.append((mgdcc_file, lfcc_file, self.class_to_idx[c]))

        random.shuffle(self.files)

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        mgdcc_file, lfcc_file, label = self.files[idx]
        try:
            mgdcc_vals = scipy.io.loadmat(mgdcc_file)['final'].T
            lfcc_vals  = scipy.io.loadmat(lfcc_file)['final'].T
    
            # Align feature dimension (rows)
            max_rows = max(mgdcc_vals.shape[0], lfcc_vals.shape[0])
    
            def pad_rows(x, target_rows):
                if x.shape[0] < target_rows:
                    pad_amt = target_rows - x.shape[0]
                    return np.pad(x, pad_width=((0, pad_amt), (0, 0)), mode='constant')
                return x
    
            mgdcc_vals = pad_rows(mgdcc_vals, max_rows)
            lfcc_vals  = pad_rows(lfcc_vals, max_rows)
    
            # Now safe to hstack (same rows, concatenate columns)
            data = np.hstack([mgdcc_vals, lfcc_vals])
    
            # Optional: pad/truncate along time dimension (rows = frames)
            max_len = 10
            if max_len > data.shape[0]:
                pad_width = max_len - data.shape[0]
                data = np.pad(data, pad_width=((0, pad_width), (0, 0)), mode='constant')
            else:
                data = data[:max_len, :]
    
        except Exception as e:
            print(f"Error loading files {mgdcc_file}, {lfcc_file}: {str(e)}")
            return None
    
        return data, label
    
    
    
    # ======================
# DataLoader wrapper
# ======================
class PtDataLoader(DataLoader):
    def __init__(self, mgdcc_dir, lfcc_dir, batch_size, shuffle=True):
        dataset = MixedFeatureDataset(mgdcc_dir, lfcc_dir)
        super().__init__(dataset, batch_size=batch_size, shuffle=shuffle)

# ======================
# Load datasets
# ======================
batch_size = 32

train_dataset = MixedFeatureDataset(train_mgdcc, train_lfcc)
val_dataset   = MixedFeatureDataset(val_mgdcc, val_lfcc)
test_dataset  = MixedFeatureDataset(test_mgdcc, test_lfcc)

train_dataloader = PtDataLoader(train_mgdcc, train_lfcc, batch_size=batch_size)
val_dataloader   = PtDataLoader(val_mgdcc, val_lfcc, batch_size=batch_size)
test_dataloader  = PtDataLoader(test_mgdcc, test_lfcc, batch_size=batch_size)

train_count = len(train_dataset)
val_count   = len(val_dataset)
test_count  = len(test_dataset)

print(f"Training samples: {train_count}\nValidation samples: {val_count}\nTesting samples: {test_count}")


Training samples: 111633
Validation samples: 37152
Testing samples: 37971


In [6]:
drop_amount = 0.255

import torch
import torch.nn as nn
import torch.nn.functional as F
from collections import OrderedDict
from torch.nn.parameter import Parameter
from torch.utils import data

class Res2NetBlock(nn.Module):
    def __init__(self, in_channels, out_channels, scale=4, kernel_size=3, stride=1, padding=1):
        super(Res2NetBlock, self).__init__()
        assert out_channels % scale == 0, "Output channels must be divisible by scale"
        self.scale = scale
        self.split_channels = out_channels // scale

        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size=1, stride=1, bias=False)
        self.bn1 = nn.BatchNorm1d(out_channels)

        self.convs = nn.ModuleList([
            nn.Conv1d(self.split_channels, self.split_channels, kernel_size, stride=stride, padding=padding, bias=False)
            for _ in range(scale - 1)
        ])

        self.bn2 = nn.BatchNorm1d(out_channels)
        self.conv3 = nn.Conv1d(out_channels, out_channels, kernel_size=1, stride=1, bias=False)

        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        splits = torch.chunk(out, self.scale, dim=1)
        out = splits[0]
        result = [out]  # Collect outputs here to avoid in-place addition
        for i, conv in enumerate(self.convs):
            result.append(conv(splits[i + 1]))
        out = torch.cat(result, dim=1)  # Concatenate all parts

        out = self.conv3(out)
        out = self.bn2(out)
        return self.relu(out)


class AMSoftmaxLoss(nn.Module):
    def __init__(self, in_features, out_features, s=30.0, m=0.4):
        super(AMSoftmaxLoss, self).__init__()
        self.s = s
        self.m = m
        self.weight = Parameter(torch.DoubleTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)

    def forward(self, x, labels):
        cosine = F.linear(F.normalize(x), F.normalize(self.weight))
        phi = cosine - self.m
        one_hot = torch.zeros_like(cosine)
        one_hot.scatter_(1, labels.view(-1, 1).long(), 1)
        logits = one_hot * phi + (1.0 - one_hot) * cosine
        logits *= self.s
        return logits


class AASIST2(nn.Module):
    def __init__(self, d_args):
        super(AASIST2, self).__init__()
        self.res2net_block1 = Res2NetBlock(d_args['in_channels'], d_args['filts'][0], scale=4)
        self.res2net_block2 = Res2NetBlock(d_args['filts'][0], d_args['filts'][1], scale=4)
        self.res2net_block3 = Res2NetBlock(d_args['filts'][1], d_args['filts'][2], scale=4)

        self.gru = nn.GRU(input_size=d_args['filts'][2],
                          hidden_size=d_args['gru_node'],
                          num_layers=d_args['nb_gru_layer'],
                          batch_first=True)

        self.fc1 = nn.Linear(d_args['gru_node'], d_args['nb_fc_node'])  # Input size must match GRU hidden size
        self.fc2 = nn.Linear(d_args['nb_fc_node'], d_args['nb_classes'])  # Matches number of classes

        self.am_softmax = AMSoftmaxLoss(d_args['nb_fc_node'], d_args['nb_classes'])  # Use fc1 output size

    def forward(self, x, labels=None, is_test=False):
        x = self.res2net_block1(x)
        x = self.res2net_block2(x)
        x = self.res2net_block3(x)

        x = x.permute(0, 2, 1)  # Permute for GRU input [batch, sequence, feature]
        self.gru.flatten_parameters()
        x, _ = self.gru(x)
        x = x[:, -1, :]  # Take last time step output

        x = self.fc1(x)
        logits = self.fc2(x)

        if labels is not None:  # Compute AMSoftmax only if labels are provided
            return self.am_softmax(x, labels)
        else:  # Return logits for testing/inference
            return F.softmax(logits, dim=1)

d_args = {
    'in_channels': 1,
    'filts': [64, 128, 256],
    'gru_node': 128,
    'nb_gru_layer': 2,
    'nb_fc_node': 64,
    'nb_classes': 2
}
model = AASIST2(d_args)


In [7]:
import torch
from torch import nn
from torch.nn import Parameter
import torch.nn.functional as F

In [8]:
# drop_amount = 0.255

# class BiLSTMClassifier(nn.Module):
#     def __init__(self, input_size, hidden_size, num_layers, num_classes):
#         super(BiLSTMClassifier, self).__init__()
#         self.hidden_size = hidden_size
#         self.num_layers = num_layers
#         self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=True)
#         self.dropout = nn.Dropout(p=drop_amount)
#         self.fc = nn.Linear(hidden_size*2, num_classes)

#     def forward(self, x):
#         h0 = torch.zeros(self.num_layers*2, x.size(0), self.hidden_size).to(device=x.device, dtype=torch.double)
#         c0 = torch.zeros(self.num_layers*2, x.size(0), self.hidden_size).to(device=x.device, dtype=torch.double)
#         out, _ = self.lstm(x, (h0, c0))
#         out = self.dropout(out)
#         # Extract the output of the last time step from both directions
#         last_hidden_state = torch.cat((out[:, -1, :self.hidden_size], out[:, 0, self.hidden_size:]), dim=1)
#         output = self.fc(last_hidden_state)
#         return output
import torch
import torch.nn as nn
import torch.nn.functional as F

drop_amount = 0.255  # keep your global drop

class BiLSTMWithCrossAttention(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes, num_heads=4):
        super().__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # BiLSTM (same as yours)
        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            bidirectional=True
        )

        # Cross-attention: Q from BiLSTM (dim = 2*hidden),
        # K,V from input projected to the same dim.
        embed_dim = hidden_size * 2
        self.k_proj = nn.Linear(input_size, embed_dim)
        self.v_proj = nn.Linear(input_size, embed_dim)
        self.cross_attn = nn.MultiheadAttention(
            embed_dim=embed_dim,
            num_heads=num_heads,
            batch_first=True,
            dropout=drop_amount
        )

        self.dropout = nn.Dropout(p=drop_amount)
        self.fc = nn.Linear(embed_dim, num_classes)

    def forward(self, x):
        # x: [batch, seq_len, input_size]
        # init states in the same device/dtype as x
        h0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size,
                         device=x.device, dtype=x.dtype)
        c0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size,
                         device=x.device, dtype=x.dtype)

        # BiLSTM
        lstm_out, _ = self.lstm(x, (h0, c0))            # [B, T, 2H]

        # Project input to K,V for cross-attention
        K = self.k_proj(x)                               # [B, T, 2H]
        V = self.v_proj(x)                               # [B, T, 2H]

        # Cross-attention: queries = BiLSTM outputs
        attn_out, _ = self.cross_attn(query=lstm_out, key=K, value=V)  # [B, T, 2H]

        # Residual + dropout
        fused = self.dropout(lstm_out + attn_out)        # [B, T, 2H]

        # Keep your original "last forward + first backward" trick
        last_hidden_state = torch.cat(
            (fused[:, -1, :self.hidden_size], fused[:, 0, self.hidden_size:]),
            dim=1
        )  # [B, 2H]

        output = self.fc(last_hidden_state)              # [B, num_classes]
        return output


In [9]:
# Define the parameters
input_size = 40
hidden_size = 256
num_layers = 2
num_classes = 2
# model = BiLSTMClassifier(input_size, hidden_size, num_layers, num_classes)
model = BiLSTMWithCrossAttention(
    input_size=40,   # your MFCC dim
    hidden_size=256,
    num_layers=2,
    num_classes=2,
    num_heads=4
)
model.to(device, dtype=torch.double)  # stays compatible with your training loop

model.to(device, dtype=torch.double)

BiLSTMWithCrossAttention(
  (lstm): LSTM(40, 256, num_layers=2, batch_first=True, bidirectional=True)
  (k_proj): Linear(in_features=40, out_features=512, bias=True)
  (v_proj): Linear(in_features=40, out_features=512, bias=True)
  (cross_attn): MultiheadAttention(
    (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
  )
  (dropout): Dropout(p=0.255, inplace=False)
  (fc): Linear(in_features=512, out_features=2, bias=True)
)

In [10]:
import torch
import torch.nn as nn
import torch.optim as optim

In [11]:
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [12]:
print(model)

BiLSTMWithCrossAttention(
  (lstm): LSTM(40, 256, num_layers=2, batch_first=True, bidirectional=True)
  (k_proj): Linear(in_features=40, out_features=512, bias=True)
  (v_proj): Linear(in_features=40, out_features=512, bias=True)
  (cross_attn): MultiheadAttention(
    (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
  )
  (dropout): Dropout(p=0.255, inplace=False)
  (fc): Linear(in_features=512, out_features=2, bias=True)
)


In [13]:
from tqdm import tqdm

# Model training and testing
n_total_steps = len(train_dataloader)
train_accuracy_list = []
train_loss_list = []
val_accuracy_list = []
max_acc = 0
num_epochs = 40
pred_labels = []
act_labels = []
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

for epoch in range(num_epochs):
    model.train()
    train_accuracy = 0.0
    train_loss = 0.0

    # Training loop with tqdm
    train_loop = tqdm(enumerate(train_dataloader), total=len(train_dataloader), desc=f"Epoch {epoch+1}/{num_epochs} [Train]")
    for batch_idx, (images, labels) in train_loop:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = loss_function(outputs, labels)
        loss.backward()
        optimizer.step()

        # stats
        train_loss += loss.item() * images.size(0)
        _, prediction = torch.max(outputs.data, 1)
        train_accuracy += int((prediction == labels).sum().item())

        # Update tqdm bar
        train_loop.set_postfix(loss=loss.item())

    train_accuracy /= train_count
    train_loss /= train_count
    train_accuracy_list.append(train_accuracy)
    train_loss_list.append(train_loss)

    # Validation loop with tqdm
    model.eval()
    val_accuracy = 0.0
    pred = []
    lab = []

    val_loop = tqdm(val_dataloader, total=len(val_dataloader), desc=f"Epoch {epoch+1}/{num_epochs} [Val]")
    with torch.no_grad():
        for images, labels in val_loop:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, prediction = torch.max(outputs.data, 1)
            val_accuracy += int((prediction == labels).sum().item())
            pred.extend(prediction.cpu().tolist())
            lab.extend(labels.cpu().tolist())

    val_accuracy /= val_count
    val_accuracy_list.append(val_accuracy)

    if max_acc < val_accuracy:
        max_acc = val_accuracy
        pred_labels = pred
        actual_labels = lab
        torch.save(model, "best_accuracy_model_BiLSTM.pth")

    print(f"Epoch: {epoch + 1}/{num_epochs}, Train Loss: {train_loss:.4f}, "
          f"Train Accuracy: {train_accuracy:.4f}, Val Accuracy: {val_accuracy:.4f}")

print("Max Accuracy:", max_acc)


Epoch 1/40 [Train]: 100%|██████████| 3489/3489 [28:24<00:00,  2.05it/s, loss=0.508]
Epoch 1/40 [Val]: 100%|██████████| 1161/1161 [09:18<00:00,  2.08it/s]


Epoch: 1/40, Train Loss: 0.3159, Train Accuracy: 0.8696, Val Accuracy: 0.8597


Epoch 2/40 [Train]: 100%|██████████| 3489/3489 [08:52<00:00,  6.55it/s, loss=0.18]
Epoch 2/40 [Val]: 100%|██████████| 1161/1161 [03:41<00:00,  5.25it/s]


Epoch: 2/40, Train Loss: 0.2202, Train Accuracy: 0.9191, Val Accuracy: 0.8980


Epoch 3/40 [Train]: 100%|██████████| 3489/3489 [07:21<00:00,  7.90it/s, loss=0.166]
Epoch 3/40 [Val]: 100%|██████████| 1161/1161 [02:44<00:00,  7.06it/s]


Epoch: 3/40, Train Loss: 0.1451, Train Accuracy: 0.9439, Val Accuracy: 0.9058


Epoch 4/40 [Train]: 100%|██████████| 3489/3489 [09:38<00:00,  6.03it/s, loss=0.263]
Epoch 4/40 [Val]: 100%|██████████| 1161/1161 [03:08<00:00,  6.16it/s]


Epoch: 4/40, Train Loss: 0.1139, Train Accuracy: 0.9564, Val Accuracy: 0.9204


Epoch 5/40 [Train]: 100%|██████████| 3489/3489 [08:25<00:00,  6.90it/s, loss=0.0258]
Epoch 5/40 [Val]: 100%|██████████| 1161/1161 [02:42<00:00,  7.13it/s]


Epoch: 5/40, Train Loss: 0.0901, Train Accuracy: 0.9657, Val Accuracy: 0.9184


Epoch 6/40 [Train]: 100%|██████████| 3489/3489 [08:00<00:00,  7.26it/s, loss=0.0374]
Epoch 6/40 [Val]: 100%|██████████| 1161/1161 [01:52<00:00, 10.35it/s]


Epoch: 6/40, Train Loss: 0.0730, Train Accuracy: 0.9725, Val Accuracy: 0.9286


Epoch 7/40 [Train]: 100%|██████████| 3489/3489 [07:32<00:00,  7.70it/s, loss=0.0497]
Epoch 7/40 [Val]: 100%|██████████| 1161/1161 [01:53<00:00, 10.27it/s]


Epoch: 7/40, Train Loss: 0.0599, Train Accuracy: 0.9779, Val Accuracy: 0.9244


Epoch 8/40 [Train]: 100%|██████████| 3489/3489 [08:47<00:00,  6.62it/s, loss=0.00756]
Epoch 8/40 [Val]: 100%|██████████| 1161/1161 [03:04<00:00,  6.30it/s]


Epoch: 8/40, Train Loss: 0.0516, Train Accuracy: 0.9812, Val Accuracy: 0.9291


Epoch 9/40 [Train]: 100%|██████████| 3489/3489 [09:24<00:00,  6.18it/s, loss=0.0131]
Epoch 9/40 [Val]: 100%|██████████| 1161/1161 [03:18<00:00,  5.85it/s]


Epoch: 9/40, Train Loss: 0.0425, Train Accuracy: 0.9845, Val Accuracy: 0.9370


Epoch 10/40 [Train]: 100%|██████████| 3489/3489 [08:07<00:00,  7.16it/s, loss=0.0937]
Epoch 10/40 [Val]: 100%|██████████| 1161/1161 [01:58<00:00,  9.79it/s]


Epoch: 10/40, Train Loss: 0.0375, Train Accuracy: 0.9862, Val Accuracy: 0.9342


Epoch 11/40 [Train]: 100%|██████████| 3489/3489 [05:50<00:00,  9.96it/s, loss=0.00917]
Epoch 11/40 [Val]: 100%|██████████| 1161/1161 [01:54<00:00, 10.12it/s]


Epoch: 11/40, Train Loss: 0.0337, Train Accuracy: 0.9880, Val Accuracy: 0.9287


Epoch 12/40 [Train]: 100%|██████████| 3489/3489 [05:56<00:00,  9.78it/s, loss=0.131]
Epoch 12/40 [Val]: 100%|██████████| 1161/1161 [01:53<00:00, 10.23it/s]


Epoch: 12/40, Train Loss: 0.0301, Train Accuracy: 0.9894, Val Accuracy: 0.9250


Epoch 13/40 [Train]: 100%|██████████| 3489/3489 [07:58<00:00,  7.28it/s, loss=0.00164]
Epoch 13/40 [Val]: 100%|██████████| 1161/1161 [03:11<00:00,  6.07it/s]


Epoch: 13/40, Train Loss: 0.0277, Train Accuracy: 0.9902, Val Accuracy: 0.9274


Epoch 14/40 [Train]: 100%|██████████| 3489/3489 [09:38<00:00,  6.03it/s, loss=0.0037]
Epoch 14/40 [Val]: 100%|██████████| 1161/1161 [02:58<00:00,  6.49it/s]


Epoch: 14/40, Train Loss: 0.0249, Train Accuracy: 0.9912, Val Accuracy: 0.9331


Epoch 15/40 [Train]: 100%|██████████| 3489/3489 [09:01<00:00,  6.44it/s, loss=0.00484]
Epoch 15/40 [Val]: 100%|██████████| 1161/1161 [02:02<00:00,  9.46it/s]


Epoch: 15/40, Train Loss: 0.0233, Train Accuracy: 0.9921, Val Accuracy: 0.9361


Epoch 16/40 [Train]: 100%|██████████| 3489/3489 [06:47<00:00,  8.56it/s, loss=0.000359]
Epoch 16/40 [Val]: 100%|██████████| 1161/1161 [02:12<00:00,  8.74it/s]


Epoch: 16/40, Train Loss: 0.0323, Train Accuracy: 0.9903, Val Accuracy: 0.9315


Epoch 17/40 [Train]: 100%|██████████| 3489/3489 [06:44<00:00,  8.62it/s, loss=0.000171]
Epoch 17/40 [Val]: 100%|██████████| 1161/1161 [02:02<00:00,  9.51it/s]


Epoch: 17/40, Train Loss: 0.0194, Train Accuracy: 0.9932, Val Accuracy: 0.9353


Epoch 18/40 [Train]: 100%|██████████| 3489/3489 [06:36<00:00,  8.79it/s, loss=0.0229]
Epoch 18/40 [Val]: 100%|██████████| 1161/1161 [01:59<00:00,  9.75it/s]


Epoch: 18/40, Train Loss: 0.0190, Train Accuracy: 0.9935, Val Accuracy: 0.9365


Epoch 19/40 [Train]: 100%|██████████| 3489/3489 [06:05<00:00,  9.55it/s, loss=0.000291]
Epoch 19/40 [Val]: 100%|██████████| 1161/1161 [01:56<00:00,  9.93it/s]


Epoch: 19/40, Train Loss: 0.0187, Train Accuracy: 0.9938, Val Accuracy: 0.9337


Epoch 20/40 [Train]: 100%|██████████| 3489/3489 [06:19<00:00,  9.21it/s, loss=0.003]
Epoch 20/40 [Val]: 100%|██████████| 1161/1161 [01:58<00:00,  9.82it/s]


Epoch: 20/40, Train Loss: 0.0193, Train Accuracy: 0.9933, Val Accuracy: 0.9358


Epoch 21/40 [Train]: 100%|██████████| 3489/3489 [07:08<00:00,  8.15it/s, loss=0.000149]
Epoch 21/40 [Val]: 100%|██████████| 1161/1161 [01:49<00:00, 10.63it/s]


Epoch: 21/40, Train Loss: 0.0175, Train Accuracy: 0.9940, Val Accuracy: 0.9360


Epoch 22/40 [Train]: 100%|██████████| 3489/3489 [06:20<00:00,  9.17it/s, loss=0.0364]
Epoch 22/40 [Val]: 100%|██████████| 1161/1161 [01:48<00:00, 10.67it/s]


Epoch: 22/40, Train Loss: 0.0173, Train Accuracy: 0.9940, Val Accuracy: 0.9346


Epoch 23/40 [Train]: 100%|██████████| 3489/3489 [06:00<00:00,  9.68it/s, loss=0.000938]
Epoch 23/40 [Val]: 100%|██████████| 1161/1161 [01:48<00:00, 10.65it/s]


Epoch: 23/40, Train Loss: 0.0181, Train Accuracy: 0.9944, Val Accuracy: 0.9362


Epoch 24/40 [Train]: 100%|██████████| 3489/3489 [05:49<00:00,  9.99it/s, loss=0.0062]
Epoch 24/40 [Val]: 100%|██████████| 1161/1161 [01:46<00:00, 10.90it/s]


Epoch: 24/40, Train Loss: 0.0152, Train Accuracy: 0.9946, Val Accuracy: 0.9348


Epoch 25/40 [Train]: 100%|██████████| 3489/3489 [07:14<00:00,  8.04it/s, loss=0.000872]
Epoch 25/40 [Val]: 100%|██████████| 1161/1161 [02:13<00:00,  8.67it/s]


Epoch: 25/40, Train Loss: 0.0151, Train Accuracy: 0.9950, Val Accuracy: 0.9354


Epoch 26/40 [Train]: 100%|██████████| 3489/3489 [09:08<00:00,  6.36it/s, loss=0.0019]
Epoch 26/40 [Val]: 100%|██████████| 1161/1161 [03:21<00:00,  5.76it/s]


Epoch: 26/40, Train Loss: 0.0140, Train Accuracy: 0.9952, Val Accuracy: 0.9341


Epoch 27/40 [Train]: 100%|██████████| 3489/3489 [09:40<00:00,  6.01it/s, loss=0.0008]
Epoch 27/40 [Val]: 100%|██████████| 1161/1161 [03:12<00:00,  6.04it/s]


Epoch: 27/40, Train Loss: 0.0139, Train Accuracy: 0.9954, Val Accuracy: 0.9326


Epoch 28/40 [Train]: 100%|██████████| 3489/3489 [09:07<00:00,  6.38it/s, loss=0.00427]
Epoch 28/40 [Val]: 100%|██████████| 1161/1161 [03:24<00:00,  5.68it/s]


Epoch: 28/40, Train Loss: 0.0145, Train Accuracy: 0.9950, Val Accuracy: 0.9320


Epoch 29/40 [Train]: 100%|██████████| 3489/3489 [09:18<00:00,  6.25it/s, loss=0.00376]
Epoch 29/40 [Val]: 100%|██████████| 1161/1161 [03:17<00:00,  5.89it/s]


Epoch: 29/40, Train Loss: 0.0157, Train Accuracy: 0.9946, Val Accuracy: 0.9302


Epoch 30/40 [Train]: 100%|██████████| 3489/3489 [08:07<00:00,  7.15it/s, loss=0.000348]
Epoch 30/40 [Val]: 100%|██████████| 1161/1161 [01:48<00:00, 10.74it/s]


Epoch: 30/40, Train Loss: 0.0133, Train Accuracy: 0.9953, Val Accuracy: 0.9336


Epoch 31/40 [Train]: 100%|██████████| 3489/3489 [06:35<00:00,  8.82it/s, loss=0.00102]
Epoch 31/40 [Val]: 100%|██████████| 1161/1161 [01:56<00:00,  9.93it/s]


Epoch: 31/40, Train Loss: 0.0126, Train Accuracy: 0.9959, Val Accuracy: 0.9327


Epoch 32/40 [Train]: 100%|██████████| 3489/3489 [06:30<00:00,  8.94it/s, loss=0.00426]
Epoch 32/40 [Val]: 100%|██████████| 1161/1161 [01:52<00:00, 10.28it/s]


Epoch: 32/40, Train Loss: 0.0128, Train Accuracy: 0.9956, Val Accuracy: 0.9325


Epoch 33/40 [Train]: 100%|██████████| 3489/3489 [06:48<00:00,  8.55it/s, loss=0.0469]
Epoch 33/40 [Val]: 100%|██████████| 1161/1161 [02:55<00:00,  6.63it/s]


Epoch: 33/40, Train Loss: 0.0125, Train Accuracy: 0.9958, Val Accuracy: 0.9337


Epoch 34/40 [Train]: 100%|██████████| 3489/3489 [10:22<00:00,  5.60it/s, loss=0.000945]
Epoch 34/40 [Val]: 100%|██████████| 1161/1161 [03:10<00:00,  6.09it/s]


Epoch: 34/40, Train Loss: 0.0116, Train Accuracy: 0.9960, Val Accuracy: 0.9330


Epoch 35/40 [Train]: 100%|██████████| 3489/3489 [09:23<00:00,  6.19it/s, loss=0.00087]
Epoch 35/40 [Val]: 100%|██████████| 1161/1161 [02:50<00:00,  6.83it/s]


Epoch: 35/40, Train Loss: 0.0112, Train Accuracy: 0.9961, Val Accuracy: 0.9363


Epoch 36/40 [Train]: 100%|██████████| 3489/3489 [08:31<00:00,  6.82it/s, loss=0.00835]
Epoch 36/40 [Val]: 100%|██████████| 1161/1161 [03:29<00:00,  5.55it/s]


Epoch: 36/40, Train Loss: 0.0121, Train Accuracy: 0.9960, Val Accuracy: 0.9342


Epoch 37/40 [Train]: 100%|██████████| 3489/3489 [09:02<00:00,  6.44it/s, loss=0.000204]
Epoch 37/40 [Val]: 100%|██████████| 1161/1161 [01:57<00:00,  9.87it/s]


Epoch: 37/40, Train Loss: 0.0117, Train Accuracy: 0.9961, Val Accuracy: 0.9338


Epoch 38/40 [Train]: 100%|██████████| 3489/3489 [06:34<00:00,  8.84it/s, loss=0.257]
Epoch 38/40 [Val]: 100%|██████████| 1161/1161 [01:47<00:00, 10.79it/s]


Epoch: 38/40, Train Loss: 0.0106, Train Accuracy: 0.9962, Val Accuracy: 0.9318


Epoch 39/40 [Train]: 100%|██████████| 3489/3489 [06:34<00:00,  8.84it/s, loss=0.000172]
Epoch 39/40 [Val]: 100%|██████████| 1161/1161 [01:51<00:00, 10.45it/s]


Epoch: 39/40, Train Loss: 0.0109, Train Accuracy: 0.9964, Val Accuracy: 0.9334


Epoch 40/40 [Train]: 100%|██████████| 3489/3489 [05:49<00:00,  9.97it/s, loss=0.00176]
Epoch 40/40 [Val]: 100%|██████████| 1161/1161 [01:48<00:00, 10.70it/s]

Epoch: 40/40, Train Loss: 0.0110, Train Accuracy: 0.9964, Val Accuracy: 0.9275
Max Accuracy: 0.9369616709732989





In [14]:
best_model=torch.load("best_accuracy_model_BiLSTM.pth", weights_only=False)
best_model.to(device, dtype=torch.double)
best_model.eval()

# Initialize variables to store results
testing_accuracy = 0.0
pred_labels = []
act_labels = []

# Iterate over the test dataloader
with torch.no_grad():
    for i, (images, labels) in enumerate(test_dataloader):
        if torch.cuda.is_available():
            images = Variable(images.cuda())
            labels = Variable(labels.cuda())
        else:
            images = Variable(images)
            labels = Variable(labels)

        # Forward pass (only input x)
        outputs = best_model(images)

        # Get predictions by taking the index with the highest score
        _, prediction = torch.max(outputs.data, 1)

        # Calculate the number of correct predictions
        testing_accuracy += (prediction == labels).sum().item()

        # Store predicted and actual labels
        pred_labels.extend(prediction.cpu().tolist())
        act_labels.extend(labels.cpu().tolist())

# Calculate the testing accuracy
testing_accuracy /= len(test_dataloader.dataset)

# Print the testing accuracy
print(f"Testing Accuracy: {testing_accuracy * 100:.2f}%")

Testing Accuracy: 95.03%


In [15]:
# # Calculate the confusion matrix
# import seaborn as sns
# conf_mat = confusion_matrix(act_labels, pred_labels)
# # Plot confusion matrix heat map
# sns.heatmap(conf_mat, cmap="flare",annot=True, fmt = "g", 
#             cbar_kws={"label":"color bar"},
#             xticklabels=train_dataset.classes,
#             yticklabels=train_dataset.classes)
# plt.xlabel("Predicted")
# plt.ylabel("Actual")
# plt.title("Confusion Matrix")
# plt.savefig("ConfusionMatrix_BiLSTM.png")
# plt.show()
# from sklearn.metrics import f1_score
# f1_score = f1_score(pred_labels, act_labels, average='macro')
# print('F1 Score : ', f1_score)

In [16]:
import numpy as np
import sklearn.metrics

"""
Python compute equal error rate (eer)
ONLY tested on binary classification

:param label: ground-truth label, should be a 1-d list or np.array, each element represents the ground-truth label of one sample
:param pred: model prediction, should be a 1-d list or np.array, each element represents the model prediction of one sample
:param positive_label: the class that is viewed as positive class when computing EER
:return: equal error rate (EER)
"""
def compute_eer(label, pred):
    # all fpr, tpr, fnr, fnr, threshold are lists (in the format of np.array)
    fpr, tpr, threshold = sklearn.metrics.roc_curve(label, pred)
    fnr = 1 - tpr

    # the threshold of fnr == fpr
    eer_threshold = threshold[np.nanargmin(np.absolute((fnr - fpr)))]

    # theoretically eer from fpr and eer from fnr should be identical but they can be slightly differ in reality
    eer_1 = fpr[np.nanargmin(np.absolute((fnr - fpr)))]
    eer_2 = fnr[np.nanargmin(np.absolute((fnr - fpr)))]

    # return the mean of eer from fpr and from fnr
    eer = (eer_1 + eer_2) / 2
    return eer

eer = compute_eer(act_labels, pred_labels)
print('The equal error rate is {:.3f}'.format(eer))

The equal error rate is 0.050
