In [1]:
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from sklearn.preprocessing import LabelEncoder
from PIL import Image

In [2]:
print(torch.cuda.is_available())
print(torch.__version__)

True
2.5.1+cu121


In [3]:
df = pd.read_csv("../data/melspectrogram_test_dataset.csv")
label_encoder = LabelEncoder()
df['Emotion'] = label_encoder.fit_transform(df["Emotion"])
print(df["Emotion"].nunique())
df = df[['Melspectrogrampath', 'Emotion']]
df.to_csv('melspectrogram_test.csv', index=False)

16


In [4]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [5]:
class MelSpectrogramDataset(Dataset):
    def __init__(self, csv_file, transform=None):
        self.data = pd.read_csv(csv_file)
        self.transform = transform
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        img_path = self.data.iloc[idx, 0]
        img_path = "../data/" + img_path
        label = self.data.iloc[idx, 1]
        image = Image.open(img_path)
        image = image.convert("RGB")
        
        if self.transform:
            image = self.transform(image)
            
        return image, torch.tensor(label, dtype=torch.long)

In [6]:
test_dataset = MelSpectrogramDataset("melspectrogram_test.csv", transform=transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [7]:
num_classes = df["Emotion"].nunique()
model = models.resnet50(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, num_classes)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
model.to(device)
model.eval()



cuda


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [9]:
correct = 0
total = 0

for images, labels in test_loader:
    images, labels = images.to(device), labels.to(device)
    with torch.no_grad():
        outputs = model(images)
    predicted = torch.argmax(outputs, dim=1)
    
    correct += (predicted == labels).sum().item()
    total += labels.size(0)
    
print(total) 
accuracy = correct/total
print(f"Test Accuracy: {accuracy:.2f}")

print("Label Mapping:", dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_))))

14941
Test Accuracy: 0.03
Label Mapping: {'Anger': np.int64(0), 'Anxious': np.int64(1), 'Apologetic': np.int64(2), 'Assertive': np.int64(3), 'Bored': np.int64(4), 'Calm': np.int64(5), 'Concerned': np.int64(6), 'Disgust': np.int64(7), 'Encouraging': np.int64(8), 'Excited': np.int64(9), 'Fear': np.int64(10), 'Happy': np.int64(11), 'Neutral': np.int64(12), 'Question': np.int64(13), 'Sad': np.int64(14), 'Surprise': np.int64(15)}
