In [1]:
!pip install --upgrade scipy

Collecting scipy
  Downloading scipy-1.12.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.4/60.4 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
Downloading scipy-1.12.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (38.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m38.4/38.4 MB[0m [31m43.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: scipy
  Attempting uninstall: scipy
    Found existing installation: scipy 1.11.4
    Uninstalling scipy-1.11.4:
      Successfully uninstalled scipy-1.11.4
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
cuml 23.8.0 requires cupy-cuda11x>=12.0.0, which is not installed.
cuml 23.8.0 requires dask==2023.7.1, but you have dask 2024.1.0 which is incompatible.
cuml 2

In [2]:
import torch 
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader

import os, pathlib, glob, random
import numpy as np
import matplotlib.pyplot as plt 

from sklearn.metrics import confusion_matrix
import scipy
from scipy import io

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [4]:
batch_size = 32
output_nodes = 10
learning_rate = 0.001

In [5]:
train_data_path = r"/kaggle/input/emotion-challange-lfrcc/LFRCC/BP_Utthira/Train"
val_data_path = r"/kaggle/input/emotion-challange-lfrcc/LFRCC/BP_Utthira/Val"
test_data_path = r"/kaggle/input/emotion-challange-lfrcc/LFRCC/BP_Utthira/Test"

In [6]:
class PtDataset(Dataset):
    def __init__(self, directory):
        self.directory = directory
        self.classes = sorted(entry.name for entry in os.scandir(directory) if entry.is_dir())
        self.class_to_idx = {c: i for i, c in enumerate(self.classes)}
        self.files = []
        for c in self.classes:
            c_dir = os.path.join(directory, c)
            c_files = [(os.path.join(c_dir, f), self.class_to_idx[c]) for f in os.listdir(c_dir)]
            self.files.extend(c_files)
        random.shuffle(self.files)
        
    def __len__(self):
        return len(self.files)
    
    def __getitem__(self, idx):
        filepath, label = self.files[idx]
        try:
            mat_vals = scipy.io.loadmat(filepath)
            data = mat_vals['final']
            data = data.T
            max_len=900
            if (max_len > data.shape[0]):
                pad_width = max_len - data.shape[0]
                data = np.pad(data, pad_width=((0, pad_width),(0,0)), mode='constant')
            else:
                data = data[:max_len, :]
        except Exception as e:
            print(f"Error loading file {filepath}: {str(e)}")
            return None
        return data, label

In [7]:
train_dataset = PtDataset(train_data_path)
val_dataset = PtDataset(val_data_path)
test_dataset = PtDataset(test_data_path)

In [8]:
class PtDataLoader(DataLoader):
    def __init__(self, directory, batch_size, shuffle=True):
        dataset = PtDataset(directory)
        super().__init__(dataset, batch_size=batch_size, shuffle=shuffle)

In [9]:
train_dataloader = PtDataLoader(directory=train_data_path, batch_size=batch_size)
val_dataloader = PtDataLoader(directory=val_data_path, batch_size=batch_size)
test_dataloader = PtDataLoader(directory=test_data_path, batch_size=batch_size)

In [10]:
train_count = len(train_dataset) 
val_count = len(val_dataset)
test_count = len(test_dataset)

In [11]:
print(train_count)
print(val_count)
print(test_count)

53386
15341
2347


In [12]:
drop_amount = 0.255

class BiLSTMClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(BiLSTMClassifier, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=True)
        self.dropout = nn.Dropout(p=drop_amount)
        self.fc = nn.Linear(hidden_size*2, num_classes)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers*2, x.size(0), self.hidden_size).to(device=x.device, dtype=torch.double)
        c0 = torch.zeros(self.num_layers*2, x.size(0), self.hidden_size).to(device=x.device, dtype=torch.double)
        out, _ = self.lstm(x, (h0, c0))
        out = self.dropout(out)
        # Extract the output of the last time step from both directions
        last_hidden_state = torch.cat((out[:, -1, :self.hidden_size], out[:, 0, self.hidden_size:]), dim=1)
        output = self.fc(last_hidden_state)
        return output

In [13]:
import torch
from torch import nn
from torch.nn import Parameter
import torch.nn.functional as F

In [14]:
# Define the parameters
input_size = 20
hidden_size = 256
num_layers = 2
num_classes = 8

model = BiLSTMClassifier(input_size, hidden_size, num_layers, num_classes)
model.to(device, dtype=torch.double)

BiLSTMClassifier(
  (lstm): LSTM(20, 256, num_layers=2, batch_first=True, bidirectional=True)
  (dropout): Dropout(p=0.255, inplace=False)
  (fc): Linear(in_features=512, out_features=8, bias=True)
)

In [15]:
import torch
import torch.nn as nn
import torch.optim as optim

In [16]:
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [17]:
print(model)

BiLSTMClassifier(
  (lstm): LSTM(20, 256, num_layers=2, batch_first=True, bidirectional=True)
  (dropout): Dropout(p=0.255, inplace=False)
  (fc): Linear(in_features=512, out_features=8, bias=True)
)


In [18]:
#Model training and testing 
n_total_steps = len(train_dataloader) # n_total_steps * batch size will give total number of training files (consider that last batch may not be fully filled)
train_accuracy_list = []
train_loss_list = []
val_accuracy_list = []
max_acc=0
num_epochs = 40
pred_labels =[]
act_labels = []

for epoch in range(num_epochs):
    
    #Evaluation and training on training dataset
    model.train()
    train_accuracy=0.0
    train_loss=0.0
    
    for batch_idx, (images,labels) in enumerate(train_dataloader):
        if torch.cuda.is_available():
            images=Variable(images.cuda())
            labels=Variable(labels.cuda())
        ##images = images.unsqueeze(1)
        optimizer.zero_grad()
        outputs=model(images)
        loss=loss_function(outputs,labels)
        loss.backward()
        optimizer.step()
        
        
        train_loss+= loss.cpu().data*images.size(0)
        _,prediction=torch.max(outputs.data,1)
        
        train_accuracy+=int(torch.sum(prediction==labels.data))
        
    train_accuracy=train_accuracy/train_count
    train_loss=train_loss/train_count
    
    train_accuracy_list.append(train_accuracy)
    train_loss_list.append(train_loss)

    
    # Validation on Validation dataset
    model.eval()
    val_accuracy=0.0
    pred = []
    lab = []
    
    for i, (images,labels) in enumerate(val_dataloader):
        if torch.cuda.is_available():
            images=Variable(images.cuda())
            labels=Variable(labels.cuda())
        ##images = images.unsqueeze(1) 
#         print(i,images.shape)
        outputs=model(images)
        _,prediction=torch.max(outputs.data,1)
        val_accuracy+=int(torch.sum(prediction==labels.data))
        
        pred.extend(prediction.tolist())
        lab.extend(labels.tolist())
    
    val_accuracy=val_accuracy/val_count
    val_accuracy_list.append(val_accuracy)
    if max_acc < val_accuracy:
        max_acc = val_accuracy
        pred_labels = pred
        actual_labels = lab
        max_acc = val_accuracy
        torch.save(model,"best_accuracy_model_BiLSTM.pth")
    print('Epoch : '+str(epoch+1)+'/'+str(num_epochs)+'   Train Loss : '+str(train_loss)+'   Train Accuracy : '+str(train_accuracy)+'   Test Accuracy : '+str(val_accuracy))
print(max_acc)   
print('Finished Training and Testing')

Epoch : 1/40   Train Loss : tensor(1.5285, dtype=torch.float64)   Train Accuracy : 0.4682875660285468   Test Accuracy : 0.3694022553940421
Epoch : 2/40   Train Loss : tensor(1.5126, dtype=torch.float64)   Train Accuracy : 0.4703854943243547   Test Accuracy : 0.3740955609151946
Epoch : 3/40   Train Loss : tensor(1.4977, dtype=torch.float64)   Train Accuracy : 0.47291424718090885   Test Accuracy : 0.3763770288768659
Epoch : 4/40   Train Loss : tensor(1.4813, dtype=torch.float64)   Train Accuracy : 0.4815682014011164   Test Accuracy : 0.37200964735023795
Epoch : 5/40   Train Loss : tensor(1.4721, dtype=torch.float64)   Train Accuracy : 0.4839471022365414   Test Accuracy : 0.38387328075092886
Epoch : 6/40   Train Loss : tensor(1.4704, dtype=torch.float64)   Train Accuracy : 0.4884051998651332   Test Accuracy : 0.3700541033830911
Epoch : 7/40   Train Loss : tensor(1.4579, dtype=torch.float64)   Train Accuracy : 0.49113999925073987   Test Accuracy : 0.3768333224692002
Epoch : 8/40   Train Lo

In [19]:
# best_model = torch.load("best_accuracy_model_BiLSTM.pth")
# best_model.eval()
# testing_accuracy = 0.0
# pred_labels = []
# act_labels = []
# for i, (images, labels) in enumerate(test_dataloader):
#     if torch.cuda.is_available():
#         images = Variable(images.cuda())
#         labels = Variable(labels.cuda())
    
#     outputs = best_model(images)
#     _, prediction = torch.max(outputs.data, 1)
    
#     testing_accuracy += int(torch.sum(prediction == labels.data))
    
#     pred_labels.extend(prediction.tolist())
#     act_labels.extend(labels.tolist())

# testing_accuracy = testing_accuracy / len(test_dataloader.dataset)
# print("testing Accuracy:", testing_accuracy)
import torch
from torch.autograd import Variable

# Assuming you have defined your test_dataloader previously

best_model = torch.load("best_accuracy_model_BiLSTM.pth")
best_model.eval()

# Initialize lists to store filenames and predicted labels
file_predictions = []

for i, (images, _) in enumerate(test_dataloader):  # Assuming you don't have access to actual labels
    if torch.cuda.is_available():
        images = Variable(images.cuda())

    outputs = best_model(images)
    _, prediction = torch.max(outputs.data, 1)
    
    # Extract filenames from test dataset
    batch_filenames = test_dataloader.dataset.samples[i][0]
    
    # Append filename and predicted label to the list
    for filename, pred_label in zip(batch_filenames, prediction):
        # Assuming you have a mapping from label index to class name
        # Replace label_to_class with your actual mapping
        class_name = label_to_class[pred_label.item()]
        file_predictions.append((filename, class_name))

# Save the filename and predicted label pairs to a text file
with open("predicted_labels.txt", "w") as f:
    for filename, pred_class in file_predictions:
        f.write(f"{filename}, {pred_class}\n")

print("Prediction results saved to predicted_labels.txt")


AttributeError: 'PtDataset' object has no attribute 'samples'

In [None]:
plt.plot(train_accuracy_list, label='Train Accuracy')
plt.plot(test_accuracy_list, label='Test Accuracy')

plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Train and Test Accuracy')
plt.legend()
plt.savefig("TrainVsTest.png")
plt.show()

In [None]:
# Calculate the confusion matrix
import seaborn as sns
conf_mat = confusion_matrix(actual_labels, pred_labels)
# Plot confusion matrix heat map
sns.heatmap(conf_mat, cmap="flare",annot=True, fmt = "g", 
            cbar_kws={"label":"color bar"},
            xticklabels=train_dataset.classes,
            yticklabels=train_dataset.classes)
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.savefig("ConfusionMatrix.png")
plt.show()