In [1]:
# !pip install --upgrade scipy

In [2]:
import torch 
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader

import os, pathlib, glob, random
import numpy as np
import matplotlib.pyplot as plt 

from sklearn.metrics import confusion_matrix
import scipy
from scipy import io

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [4]:
batch_size = 32
output_nodes = 10
learning_rate = 0.001

In [5]:
import os
import random
import scipy.io
import numpy as np
from torch.utils.data import Dataset, DataLoader

# Paths for both datasets
train_data_paths = [
    r"/kaggle/input/mfcc-fornew/mfcc/train" #,  # Language 1
    # r"/kaggle/input/singfox-t2-gfcc-icassp/GFCC_T2/gfcc/train"   # Language 2
]
validation_data_paths = [
    r"/kaggle/input/mfcc-fornew/mfcc/dev" #,
    # r"/kaggle/input/singfox-t2-gfcc-icassp/GFCC_T2/gfcc/val"
]
testing_data_paths = [
    r"/kaggle/input/mfcc-fornew/mfcc/test" #,
    # r"/kaggle/input/singfox-t2-gfcc-icassp/GFCC_T2/gfcc/test"
]

class MixedPtDataset(Dataset):
    def __init__(self, directories):
        """Load features from multiple directories."""
        self.files = []
        self.class_to_idx = {}

        for directory in directories:
            classes = sorted(entry.name for entry in os.scandir(directory) if entry.is_dir())
            
            # Assign class indices if not already assigned
            for c in classes:
                if c not in self.class_to_idx:
                    self.class_to_idx[c] = len(self.class_to_idx)

            for c in classes:
                c_dir = os.path.join(directory, c)
                c_files = [(os.path.join(c_dir, f), self.class_to_idx[c]) for f in os.listdir(c_dir)]
                self.files.extend(c_files)

        random.shuffle(self.files)

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        filepath, label = self.files[idx]
        try:
            mat_vals = scipy.io.loadmat(filepath)
            data = mat_vals['final'].T
            max_len = 10
            if max_len > data.shape[0]:
                pad_width = max_len - data.shape[0]
                data = np.pad(data, pad_width=((0, pad_width), (0, 0)), mode='constant')
            else:
                data = data[:max_len, :]
        except Exception as e:
            print(f"Error loading file {filepath}: {str(e)}")
            return None
        return data, label

# Combine both datasets
train_dataset = MixedPtDataset(train_data_paths)
val_dataset = MixedPtDataset(validation_data_paths)
test_dataset = MixedPtDataset(testing_data_paths)

class PtDataLoader(DataLoader):
    def __init__(self, directories, batch_size, shuffle=True):
        dataset = MixedPtDataset(directories)
        super().__init__(dataset, batch_size=batch_size, shuffle=shuffle)

# Load mixed datasets
batch_size = 32
train_dataloader = PtDataLoader(directories=train_data_paths, batch_size=batch_size)
val_dataloader = PtDataLoader(directories=validation_data_paths, batch_size=batch_size)
test_dataloader = PtDataLoader(directories=testing_data_paths, batch_size=batch_size)

train_count = len(train_dataset)
val_count = len(val_dataset)
test_count = len(test_dataset)

print(f"Training samples: {train_count}\nValidation samples: {val_count}\nTesting samples: {test_count}")

Training samples: 111633
Validation samples: 37152
Testing samples: 37971


In [6]:
# train_dataset = PtDataset(train_data_path)
# val_dataset = PtDataset(val_data_path)
# test_dataset = PtDataset(test_data_path)

In [7]:
# class PtDataLoader(DataLoader):
#     def __init__(self, directory, batch_size, shuffle=True):
#         dataset = PtDataset(directory)
#         super().__init__(dataset, batch_size=batch_size, shuffle=shuffle)

In [8]:
# train_dataloader = PtDataLoader(directory=train_data_path, batch_size=batch_size)
# val_dataloader = PtDataLoader(directory=val_data_path, batch_size=batch_size)
# test_dataloader = PtDataLoader(directory=test_data_path, batch_size=batch_size)

In [9]:
# train_count = len(train_dataset) 
# val_count = len(val_dataset)
# test_count = len(test_dataset)

In [10]:
print(train_count)
print(val_count)
print(test_count)

111633
37152
37971


In [11]:
drop_amount = 0.255

import torch
import torch.nn as nn
import torch.nn.functional as F
from collections import OrderedDict
from torch.nn.parameter import Parameter
from torch.utils import data

class Res2NetBlock(nn.Module):
    def __init__(self, in_channels, out_channels, scale=4, kernel_size=3, stride=1, padding=1):
        super(Res2NetBlock, self).__init__()
        assert out_channels % scale == 0, "Output channels must be divisible by scale"
        self.scale = scale
        self.split_channels = out_channels // scale

        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size=1, stride=1, bias=False)
        self.bn1 = nn.BatchNorm1d(out_channels)

        self.convs = nn.ModuleList([
            nn.Conv1d(self.split_channels, self.split_channels, kernel_size, stride=stride, padding=padding, bias=False)
            for _ in range(scale - 1)
        ])

        self.bn2 = nn.BatchNorm1d(out_channels)
        self.conv3 = nn.Conv1d(out_channels, out_channels, kernel_size=1, stride=1, bias=False)

        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        splits = torch.chunk(out, self.scale, dim=1)
        out = splits[0]
        result = [out]  # Collect outputs here to avoid in-place addition
        for i, conv in enumerate(self.convs):
            result.append(conv(splits[i + 1]))
        out = torch.cat(result, dim=1)  # Concatenate all parts

        out = self.conv3(out)
        out = self.bn2(out)
        return self.relu(out)


class AMSoftmaxLoss(nn.Module):
    def __init__(self, in_features, out_features, s=30.0, m=0.4):
        super(AMSoftmaxLoss, self).__init__()
        self.s = s
        self.m = m
        self.weight = Parameter(torch.DoubleTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)

    def forward(self, x, labels):
        cosine = F.linear(F.normalize(x), F.normalize(self.weight))
        phi = cosine - self.m
        one_hot = torch.zeros_like(cosine)
        one_hot.scatter_(1, labels.view(-1, 1).long(), 1)
        logits = one_hot * phi + (1.0 - one_hot) * cosine
        logits *= self.s
        return logits


class AASIST2(nn.Module):
    def __init__(self, d_args):
        super(AASIST2, self).__init__()
        self.res2net_block1 = Res2NetBlock(d_args['in_channels'], d_args['filts'][0], scale=4)
        self.res2net_block2 = Res2NetBlock(d_args['filts'][0], d_args['filts'][1], scale=4)
        self.res2net_block3 = Res2NetBlock(d_args['filts'][1], d_args['filts'][2], scale=4)

        self.gru = nn.GRU(input_size=d_args['filts'][2],
                          hidden_size=d_args['gru_node'],
                          num_layers=d_args['nb_gru_layer'],
                          batch_first=True)

        self.fc1 = nn.Linear(d_args['gru_node'], d_args['nb_fc_node'])  # Input size must match GRU hidden size
        self.fc2 = nn.Linear(d_args['nb_fc_node'], d_args['nb_classes'])  # Matches number of classes

        self.am_softmax = AMSoftmaxLoss(d_args['nb_fc_node'], d_args['nb_classes'])  # Use fc1 output size

    def forward(self, x, labels=None, is_test=False):
        x = self.res2net_block1(x)
        x = self.res2net_block2(x)
        x = self.res2net_block3(x)

        x = x.permute(0, 2, 1)  # Permute for GRU input [batch, sequence, feature]
        self.gru.flatten_parameters()
        x, _ = self.gru(x)
        x = x[:, -1, :]  # Take last time step output

        x = self.fc1(x)
        logits = self.fc2(x)

        if labels is not None:  # Compute AMSoftmax only if labels are provided
            return self.am_softmax(x, labels)
        else:  # Return logits for testing/inference
            return F.softmax(logits, dim=1)

d_args = {
    'in_channels': 1,
    'filts': [64, 128, 256],
    'gru_node': 128,
    'nb_gru_layer': 2,
    'nb_fc_node': 64,
    'nb_classes': 2
}
model = AASIST2(d_args)


In [12]:
import torch
from torch import nn
from torch.nn import Parameter
import torch.nn.functional as F

In [13]:
# drop_amount = 0.255

# class BiLSTMClassifier(nn.Module):
#     def __init__(self, input_size, hidden_size, num_layers, num_classes):
#         super(BiLSTMClassifier, self).__init__()
#         self.hidden_size = hidden_size
#         self.num_layers = num_layers
#         self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=True)
#         self.dropout = nn.Dropout(p=drop_amount)
#         self.fc = nn.Linear(hidden_size*2, num_classes)

#     def forward(self, x):
#         h0 = torch.zeros(self.num_layers*2, x.size(0), self.hidden_size).to(device=x.device, dtype=torch.double)
#         c0 = torch.zeros(self.num_layers*2, x.size(0), self.hidden_size).to(device=x.device, dtype=torch.double)
#         out, _ = self.lstm(x, (h0, c0))
#         out = self.dropout(out)
#         # Extract the output of the last time step from both directions
#         last_hidden_state = torch.cat((out[:, -1, :self.hidden_size], out[:, 0, self.hidden_size:]), dim=1)
#         output = self.fc(last_hidden_state)
#         return output
import torch
import torch.nn as nn
import torch.nn.functional as F

drop_amount = 0.255  # keep your global drop

class BiLSTMWithCrossAttention(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes, num_heads=4):
        super().__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # BiLSTM (same as yours)
        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            bidirectional=True
        )

        # Cross-attention: Q from BiLSTM (dim = 2*hidden),
        # K,V from input projected to the same dim.
        embed_dim = hidden_size * 2
        self.k_proj = nn.Linear(input_size, embed_dim)
        self.v_proj = nn.Linear(input_size, embed_dim)
        self.cross_attn = nn.MultiheadAttention(
            embed_dim=embed_dim,
            num_heads=num_heads,
            batch_first=True,
            dropout=drop_amount
        )

        self.dropout = nn.Dropout(p=drop_amount)
        self.fc = nn.Linear(embed_dim, num_classes)

    def forward(self, x):
        # x: [batch, seq_len, input_size]
        # init states in the same device/dtype as x
        h0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size,
                         device=x.device, dtype=x.dtype)
        c0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size,
                         device=x.device, dtype=x.dtype)

        # BiLSTM
        lstm_out, _ = self.lstm(x, (h0, c0))            # [B, T, 2H]

        # Project input to K,V for cross-attention
        K = self.k_proj(x)                               # [B, T, 2H]
        V = self.v_proj(x)                               # [B, T, 2H]

        # Cross-attention: queries = BiLSTM outputs
        attn_out, _ = self.cross_attn(query=lstm_out, key=K, value=V)  # [B, T, 2H]

        # Residual + dropout
        fused = self.dropout(lstm_out + attn_out)        # [B, T, 2H]

        # Keep your original "last forward + first backward" trick
        last_hidden_state = torch.cat(
            (fused[:, -1, :self.hidden_size], fused[:, 0, self.hidden_size:]),
            dim=1
        )  # [B, 2H]

        output = self.fc(last_hidden_state)              # [B, num_classes]
        return output


In [14]:
# Define the parameters
input_size = 20
hidden_size = 256
num_layers = 2
num_classes = 2
# model = BiLSTMClassifier(input_size, hidden_size, num_layers, num_classes)
model = BiLSTMWithCrossAttention(
    input_size=20,   # your MFCC dim
    hidden_size=256,
    num_layers=2,
    num_classes=2,
    num_heads=4
)
model.to(device, dtype=torch.double)  # stays compatible with your training loop

model.to(device, dtype=torch.double)

BiLSTMWithCrossAttention(
  (lstm): LSTM(20, 256, num_layers=2, batch_first=True, bidirectional=True)
  (k_proj): Linear(in_features=20, out_features=512, bias=True)
  (v_proj): Linear(in_features=20, out_features=512, bias=True)
  (cross_attn): MultiheadAttention(
    (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
  )
  (dropout): Dropout(p=0.255, inplace=False)
  (fc): Linear(in_features=512, out_features=2, bias=True)
)

In [15]:
import torch
import torch.nn as nn
import torch.optim as optim

In [16]:
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [17]:
print(model)

BiLSTMWithCrossAttention(
  (lstm): LSTM(20, 256, num_layers=2, batch_first=True, bidirectional=True)
  (k_proj): Linear(in_features=20, out_features=512, bias=True)
  (v_proj): Linear(in_features=20, out_features=512, bias=True)
  (cross_attn): MultiheadAttention(
    (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
  )
  (dropout): Dropout(p=0.255, inplace=False)
  (fc): Linear(in_features=512, out_features=2, bias=True)
)


In [18]:
from tqdm import tqdm

# Model training and testing
n_total_steps = len(train_dataloader)
train_accuracy_list = []
train_loss_list = []
val_accuracy_list = []
max_acc = 0
num_epochs = 40
pred_labels = []
act_labels = []
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

for epoch in range(num_epochs):
    model.train()
    train_accuracy = 0.0
    train_loss = 0.0

    # Training loop with tqdm
    train_loop = tqdm(enumerate(train_dataloader), total=len(train_dataloader), desc=f"Epoch {epoch+1}/{num_epochs} [Train]")
    for batch_idx, (images, labels) in train_loop:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = loss_function(outputs, labels)
        loss.backward()
        optimizer.step()

        # stats
        train_loss += loss.item() * images.size(0)
        _, prediction = torch.max(outputs.data, 1)
        train_accuracy += int((prediction == labels).sum().item())

        # Update tqdm bar
        train_loop.set_postfix(loss=loss.item())

    train_accuracy /= train_count
    train_loss /= train_count
    train_accuracy_list.append(train_accuracy)
    train_loss_list.append(train_loss)

    # Validation loop with tqdm
    model.eval()
    val_accuracy = 0.0
    pred = []
    lab = []

    val_loop = tqdm(val_dataloader, total=len(val_dataloader), desc=f"Epoch {epoch+1}/{num_epochs} [Val]")
    with torch.no_grad():
        for images, labels in val_loop:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, prediction = torch.max(outputs.data, 1)
            val_accuracy += int((prediction == labels).sum().item())
            pred.extend(prediction.cpu().tolist())
            lab.extend(labels.cpu().tolist())

    val_accuracy /= val_count
    val_accuracy_list.append(val_accuracy)

    if max_acc < val_accuracy:
        max_acc = val_accuracy
        pred_labels = pred
        actual_labels = lab
        torch.save(model, "best_accuracy_model_BiLSTM.pth")

    print(f"Epoch: {epoch + 1}/{num_epochs}, Train Loss: {train_loss:.4f}, "
          f"Train Accuracy: {train_accuracy:.4f}, Val Accuracy: {val_accuracy:.4f}")

print("Max Accuracy:", max_acc)


Epoch 1/40 [Train]: 100%|██████████| 3489/3489 [06:40<00:00,  8.71it/s, loss=0.128]
Epoch 1/40 [Val]: 100%|██████████| 1161/1161 [02:17<00:00,  8.42it/s]


Epoch: 1/40, Train Loss: 0.3200, Train Accuracy: 0.8613, Val Accuracy: 0.8645


Epoch 2/40 [Train]: 100%|██████████| 3489/3489 [02:40<00:00, 21.76it/s, loss=0.302]
Epoch 2/40 [Val]: 100%|██████████| 1161/1161 [00:30<00:00, 38.02it/s]


Epoch: 2/40, Train Loss: 0.2138, Train Accuracy: 0.9164, Val Accuracy: 0.8774


Epoch 3/40 [Train]: 100%|██████████| 3489/3489 [01:58<00:00, 29.51it/s, loss=0.058]
Epoch 3/40 [Val]: 100%|██████████| 1161/1161 [00:33<00:00, 34.88it/s]


Epoch: 3/40, Train Loss: 0.1596, Train Accuracy: 0.9370, Val Accuracy: 0.8915


Epoch 4/40 [Train]: 100%|██████████| 3489/3489 [01:56<00:00, 29.85it/s, loss=0.0216]
Epoch 4/40 [Val]: 100%|██████████| 1161/1161 [00:30<00:00, 38.45it/s]


Epoch: 4/40, Train Loss: 0.1229, Train Accuracy: 0.9528, Val Accuracy: 0.9138


Epoch 5/40 [Train]: 100%|██████████| 3489/3489 [01:53<00:00, 30.86it/s, loss=0.222]
Epoch 5/40 [Val]: 100%|██████████| 1161/1161 [00:30<00:00, 37.46it/s]


Epoch: 5/40, Train Loss: 0.1102, Train Accuracy: 0.9592, Val Accuracy: 0.8753


Epoch 6/40 [Train]: 100%|██████████| 3489/3489 [01:54<00:00, 30.51it/s, loss=0.0484]
Epoch 6/40 [Val]: 100%|██████████| 1161/1161 [00:30<00:00, 38.34it/s]


Epoch: 6/40, Train Loss: 0.0892, Train Accuracy: 0.9663, Val Accuracy: 0.9162


Epoch 7/40 [Train]: 100%|██████████| 3489/3489 [01:55<00:00, 30.20it/s, loss=0.0407]
Epoch 7/40 [Val]: 100%|██████████| 1161/1161 [00:31<00:00, 36.56it/s]


Epoch: 7/40, Train Loss: 0.0718, Train Accuracy: 0.9733, Val Accuracy: 0.9209


Epoch 8/40 [Train]: 100%|██████████| 3489/3489 [01:53<00:00, 30.73it/s, loss=0.104]
Epoch 8/40 [Val]: 100%|██████████| 1161/1161 [00:30<00:00, 37.78it/s]


Epoch: 8/40, Train Loss: 0.0973, Train Accuracy: 0.9634, Val Accuracy: 0.9242


Epoch 9/40 [Train]: 100%|██████████| 3489/3489 [01:54<00:00, 30.51it/s, loss=0.00877]
Epoch 9/40 [Val]: 100%|██████████| 1161/1161 [00:30<00:00, 37.62it/s]


Epoch: 9/40, Train Loss: 0.0708, Train Accuracy: 0.9736, Val Accuracy: 0.9178


Epoch 10/40 [Train]: 100%|██████████| 3489/3489 [01:51<00:00, 31.28it/s, loss=0.00295]
Epoch 10/40 [Val]: 100%|██████████| 1161/1161 [00:30<00:00, 38.06it/s]


Epoch: 10/40, Train Loss: 0.0800, Train Accuracy: 0.9698, Val Accuracy: 0.9197


Epoch 11/40 [Train]: 100%|██████████| 3489/3489 [01:53<00:00, 30.76it/s, loss=0.000942]
Epoch 11/40 [Val]: 100%|██████████| 1161/1161 [00:29<00:00, 39.90it/s]


Epoch: 11/40, Train Loss: 0.0639, Train Accuracy: 0.9772, Val Accuracy: 0.9158


Epoch 12/40 [Train]: 100%|██████████| 3489/3489 [01:51<00:00, 31.34it/s, loss=0.266]
Epoch 12/40 [Val]: 100%|██████████| 1161/1161 [00:31<00:00, 36.88it/s]


Epoch: 12/40, Train Loss: 0.0681, Train Accuracy: 0.9748, Val Accuracy: 0.9211


Epoch 13/40 [Train]: 100%|██████████| 3489/3489 [01:52<00:00, 30.91it/s, loss=0.0289]
Epoch 13/40 [Val]: 100%|██████████| 1161/1161 [00:30<00:00, 37.64it/s]


Epoch: 13/40, Train Loss: 0.0494, Train Accuracy: 0.9824, Val Accuracy: 0.9230


Epoch 14/40 [Train]: 100%|██████████| 3489/3489 [01:54<00:00, 30.38it/s, loss=0.0068]
Epoch 14/40 [Val]: 100%|██████████| 1161/1161 [00:30<00:00, 37.64it/s]


Epoch: 14/40, Train Loss: 0.0410, Train Accuracy: 0.9854, Val Accuracy: 0.9195


Epoch 15/40 [Train]: 100%|██████████| 3489/3489 [01:54<00:00, 30.37it/s, loss=0.0408]
Epoch 15/40 [Val]: 100%|██████████| 1161/1161 [00:31<00:00, 37.35it/s]


Epoch: 15/40, Train Loss: 0.0428, Train Accuracy: 0.9849, Val Accuracy: 0.9213


Epoch 16/40 [Train]: 100%|██████████| 3489/3489 [01:55<00:00, 30.11it/s, loss=0.02]
Epoch 16/40 [Val]: 100%|██████████| 1161/1161 [00:29<00:00, 39.35it/s]


Epoch: 16/40, Train Loss: 0.0327, Train Accuracy: 0.9881, Val Accuracy: 0.9289


Epoch 17/40 [Train]: 100%|██████████| 3489/3489 [01:56<00:00, 30.02it/s, loss=0.0305]
Epoch 17/40 [Val]: 100%|██████████| 1161/1161 [00:31<00:00, 37.16it/s]


Epoch: 17/40, Train Loss: 0.0320, Train Accuracy: 0.9888, Val Accuracy: 0.9286


Epoch 18/40 [Train]: 100%|██████████| 3489/3489 [01:55<00:00, 30.24it/s, loss=0.0219]
Epoch 18/40 [Val]: 100%|██████████| 1161/1161 [00:29<00:00, 39.16it/s]


Epoch: 18/40, Train Loss: 0.0262, Train Accuracy: 0.9909, Val Accuracy: 0.9301


Epoch 19/40 [Train]: 100%|██████████| 3489/3489 [01:54<00:00, 30.36it/s, loss=0.00584]
Epoch 19/40 [Val]: 100%|██████████| 1161/1161 [00:30<00:00, 38.04it/s]


Epoch: 19/40, Train Loss: 0.0329, Train Accuracy: 0.9894, Val Accuracy: 0.9268


Epoch 20/40 [Train]: 100%|██████████| 3489/3489 [01:59<00:00, 29.31it/s, loss=0.000722]
Epoch 20/40 [Val]: 100%|██████████| 1161/1161 [00:31<00:00, 36.95it/s]


Epoch: 20/40, Train Loss: 0.0258, Train Accuracy: 0.9911, Val Accuracy: 0.9260


Epoch 21/40 [Train]: 100%|██████████| 3489/3489 [02:06<00:00, 27.48it/s, loss=8.69e-5]
Epoch 21/40 [Val]: 100%|██████████| 1161/1161 [00:33<00:00, 34.85it/s]


Epoch: 21/40, Train Loss: 0.0214, Train Accuracy: 0.9923, Val Accuracy: 0.9312


Epoch 22/40 [Train]: 100%|██████████| 3489/3489 [02:02<00:00, 28.45it/s, loss=0.000447]
Epoch 22/40 [Val]: 100%|██████████| 1161/1161 [00:33<00:00, 34.46it/s]


Epoch: 22/40, Train Loss: 0.0219, Train Accuracy: 0.9925, Val Accuracy: 0.9273


Epoch 23/40 [Train]: 100%|██████████| 3489/3489 [02:05<00:00, 27.90it/s, loss=0.00019]
Epoch 23/40 [Val]: 100%|██████████| 1161/1161 [00:32<00:00, 35.91it/s]


Epoch: 23/40, Train Loss: 0.0194, Train Accuracy: 0.9934, Val Accuracy: 0.9287


Epoch 24/40 [Train]: 100%|██████████| 3489/3489 [02:01<00:00, 28.78it/s, loss=0.00225]
Epoch 24/40 [Val]: 100%|██████████| 1161/1161 [00:32<00:00, 35.64it/s]


Epoch: 24/40, Train Loss: 0.0265, Train Accuracy: 0.9917, Val Accuracy: 0.9291


Epoch 25/40 [Train]: 100%|██████████| 3489/3489 [01:55<00:00, 30.10it/s, loss=0.00143]
Epoch 25/40 [Val]: 100%|██████████| 1161/1161 [00:30<00:00, 37.80it/s]


Epoch: 25/40, Train Loss: 0.0389, Train Accuracy: 0.9877, Val Accuracy: 0.9255


Epoch 26/40 [Train]: 100%|██████████| 3489/3489 [01:58<00:00, 29.41it/s, loss=0.164]
Epoch 26/40 [Val]: 100%|██████████| 1161/1161 [00:31<00:00, 37.11it/s]


Epoch: 26/40, Train Loss: 0.0180, Train Accuracy: 0.9938, Val Accuracy: 0.9220


Epoch 27/40 [Train]: 100%|██████████| 3489/3489 [01:59<00:00, 29.15it/s, loss=2.09e-5]
Epoch 27/40 [Val]: 100%|██████████| 1161/1161 [00:31<00:00, 36.94it/s]


Epoch: 27/40, Train Loss: 0.0228, Train Accuracy: 0.9941, Val Accuracy: 0.9275


Epoch 28/40 [Train]: 100%|██████████| 3489/3489 [02:00<00:00, 28.98it/s, loss=0.000283]
Epoch 28/40 [Val]: 100%|██████████| 1161/1161 [00:31<00:00, 36.53it/s]


Epoch: 28/40, Train Loss: 0.0283, Train Accuracy: 0.9938, Val Accuracy: 0.9251


Epoch 29/40 [Train]: 100%|██████████| 3489/3489 [01:59<00:00, 29.18it/s, loss=0.00103]
Epoch 29/40 [Val]: 100%|██████████| 1161/1161 [00:30<00:00, 37.80it/s]


Epoch: 29/40, Train Loss: 0.0185, Train Accuracy: 0.9949, Val Accuracy: 0.9128


Epoch 30/40 [Train]: 100%|██████████| 3489/3489 [01:58<00:00, 29.52it/s, loss=0.000344]
Epoch 30/40 [Val]: 100%|██████████| 1161/1161 [00:30<00:00, 37.51it/s]


Epoch: 30/40, Train Loss: 0.0238, Train Accuracy: 0.9926, Val Accuracy: 0.9269


Epoch 31/40 [Train]: 100%|██████████| 3489/3489 [01:53<00:00, 30.63it/s, loss=0.00813]
Epoch 31/40 [Val]: 100%|██████████| 1161/1161 [00:31<00:00, 37.13it/s]


Epoch: 31/40, Train Loss: 0.0300, Train Accuracy: 0.9910, Val Accuracy: 0.9251


Epoch 32/40 [Train]: 100%|██████████| 3489/3489 [01:56<00:00, 29.92it/s, loss=0.172]
Epoch 32/40 [Val]: 100%|██████████| 1161/1161 [00:34<00:00, 33.69it/s]


Epoch: 32/40, Train Loss: 0.0337, Train Accuracy: 0.9946, Val Accuracy: 0.9278


Epoch 33/40 [Train]: 100%|██████████| 3489/3489 [02:20<00:00, 24.84it/s, loss=0.00264]
Epoch 33/40 [Val]: 100%|██████████| 1161/1161 [00:30<00:00, 37.63it/s]


Epoch: 33/40, Train Loss: 0.0223, Train Accuracy: 0.9924, Val Accuracy: 0.9225


Epoch 34/40 [Train]: 100%|██████████| 3489/3489 [01:57<00:00, 29.70it/s, loss=0.00729]
Epoch 34/40 [Val]: 100%|██████████| 1161/1161 [00:31<00:00, 36.95it/s]


Epoch: 34/40, Train Loss: 0.0339, Train Accuracy: 0.9934, Val Accuracy: 0.9191


Epoch 35/40 [Train]: 100%|██████████| 3489/3489 [01:55<00:00, 30.33it/s, loss=0.00013]
Epoch 35/40 [Val]: 100%|██████████| 1161/1161 [00:30<00:00, 38.39it/s]


Epoch: 35/40, Train Loss: 0.0286, Train Accuracy: 0.9920, Val Accuracy: 0.9296


Epoch 36/40 [Train]: 100%|██████████| 3489/3489 [01:59<00:00, 29.10it/s, loss=0.0146]
Epoch 36/40 [Val]: 100%|██████████| 1161/1161 [00:33<00:00, 34.76it/s]


Epoch: 36/40, Train Loss: 0.0278, Train Accuracy: 0.9907, Val Accuracy: 0.9215


Epoch 37/40 [Train]: 100%|██████████| 3489/3489 [01:58<00:00, 29.52it/s, loss=0.0336]
Epoch 37/40 [Val]: 100%|██████████| 1161/1161 [00:30<00:00, 38.50it/s]


Epoch: 37/40, Train Loss: 0.0289, Train Accuracy: 0.9898, Val Accuracy: 0.9156


Epoch 38/40 [Train]: 100%|██████████| 3489/3489 [01:57<00:00, 29.64it/s, loss=0.000826]
Epoch 38/40 [Val]: 100%|██████████| 1161/1161 [00:30<00:00, 37.54it/s]


Epoch: 38/40, Train Loss: 0.0230, Train Accuracy: 0.9920, Val Accuracy: 0.9270


Epoch 39/40 [Train]: 100%|██████████| 3489/3489 [01:56<00:00, 29.93it/s, loss=4.22e-5]
Epoch 39/40 [Val]: 100%|██████████| 1161/1161 [00:35<00:00, 32.35it/s]


Epoch: 39/40, Train Loss: 0.0161, Train Accuracy: 0.9947, Val Accuracy: 0.9258


Epoch 40/40 [Train]: 100%|██████████| 3489/3489 [02:11<00:00, 26.45it/s, loss=0.000962]
Epoch 40/40 [Val]: 100%|██████████| 1161/1161 [00:29<00:00, 38.76it/s]

Epoch: 40/40, Train Loss: 0.0161, Train Accuracy: 0.9951, Val Accuracy: 0.9280
Max Accuracy: 0.9312284668389319





In [19]:
best_model=torch.load("best_accuracy_model_BiLSTM.pth", weights_only=False)
best_model.to(device, dtype=torch.double)
best_model.eval()

# Initialize variables to store results
testing_accuracy = 0.0
pred_labels = []
act_labels = []

# Iterate over the test dataloader
with torch.no_grad():
    for i, (images, labels) in enumerate(test_dataloader):
        if torch.cuda.is_available():
            images = Variable(images.cuda())
            labels = Variable(labels.cuda())
        else:
            images = Variable(images)
            labels = Variable(labels)

        # Forward pass (only input x)
        outputs = best_model(images)

        # Get predictions by taking the index with the highest score
        _, prediction = torch.max(outputs.data, 1)

        # Calculate the number of correct predictions
        testing_accuracy += (prediction == labels).sum().item()

        # Store predicted and actual labels
        pred_labels.extend(prediction.cpu().tolist())
        act_labels.extend(labels.cpu().tolist())

# Calculate the testing accuracy
testing_accuracy /= len(test_dataloader.dataset)

# Print the testing accuracy
print(f"Testing Accuracy: {testing_accuracy * 100:.2f}%")

Testing Accuracy: 94.66%


In [20]:
# # Calculate the confusion matrix
# import seaborn as sns
# conf_mat = confusion_matrix(act_labels, pred_labels)
# # Plot confusion matrix heat map
# sns.heatmap(conf_mat, cmap="flare",annot=True, fmt = "g", 
#             cbar_kws={"label":"color bar"},
#             xticklabels=train_dataset.classes,
#             yticklabels=train_dataset.classes)
# plt.xlabel("Predicted")
# plt.ylabel("Actual")
# plt.title("Confusion Matrix")
# plt.savefig("ConfusionMatrix_BiLSTM.png")
# plt.show()
# from sklearn.metrics import f1_score
# f1_score = f1_score(pred_labels, act_labels, average='macro')
# print('F1 Score : ', f1_score)

In [21]:
import numpy as np
import sklearn.metrics

"""
Python compute equal error rate (eer)
ONLY tested on binary classification

:param label: ground-truth label, should be a 1-d list or np.array, each element represents the ground-truth label of one sample
:param pred: model prediction, should be a 1-d list or np.array, each element represents the model prediction of one sample
:param positive_label: the class that is viewed as positive class when computing EER
:return: equal error rate (EER)
"""
def compute_eer(label, pred):
    # all fpr, tpr, fnr, fnr, threshold are lists (in the format of np.array)
    fpr, tpr, threshold = sklearn.metrics.roc_curve(label, pred)
    fnr = 1 - tpr

    # the threshold of fnr == fpr
    eer_threshold = threshold[np.nanargmin(np.absolute((fnr - fpr)))]

    # theoretically eer from fpr and eer from fnr should be identical but they can be slightly differ in reality
    eer_1 = fpr[np.nanargmin(np.absolute((fnr - fpr)))]
    eer_2 = fnr[np.nanargmin(np.absolute((fnr - fpr)))]

    # return the mean of eer from fpr and from fnr
    eer = (eer_1 + eer_2) / 2
    return eer

eer = compute_eer(act_labels, pred_labels)
print('The equal error rate is {:.3f}'.format(eer))

The equal error rate is 0.053
