## Approach B

### Data (not augmented)

In [17]:
import os
import torch
import torchaudio
import torchaudio.transforms as T
from torch.utils.data import Dataset

class AccentSpectrogramDataset(Dataset):
    def __init__(self, folder_path,
                 target_sr: int = 16000,
                 use_mel: bool = False,
                 n_fft: int = 400,
                 hop_length: int = None,
                 n_mels: int = 64,
                 log_scale: bool = True):
        # store file paths only; transform per item
        self.file_paths = [
            os.path.join(folder_path, f)
            for f in os.listdir(folder_path)
            if f.endswith('.wav')
        ]
        self.target_sr = target_sr
        self.use_mel = use_mel
        self.n_fft = n_fft
        self.hop_length = hop_length or n_fft // 2
        self.n_mels = n_mels
        self.log_scale = log_scale

        # pre-configure transform funct
        if self.use_mel:
            self._transform = lambda w: T.MelSpectrogram(
                sample_rate=self.target_sr,
                n_fft=self.n_fft,
                hop_length=self.hop_length,
                n_mels=self.n_mels
            )(w)
        else:
            self._transform = lambda w: T.Spectrogram(
                n_fft=self.n_fft,
                hop_length=self.hop_length
            )(w)

    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, idx):
        path = self.file_paths[idx]
        waveform, sr = torchaudio.load(path)
        if sr != self.target_sr:
            waveform = T.Resample(sr, self.target_sr)(waveform)

        spec = self._transform(waveform)
        if self.log_scale:
            spec = torch.log(spec + 1e-6)

        fname = os.path.basename(path)
        accent = int(fname[0]) - 1          # classes 0–4
        gender = fname[1]  # 'm' or 'f' 
        return spec, accent, gender

In [2]:
# import torch.nn.functional as F

# def pad_collate(batch):
#     specs, accents = zip(*batch)

#     max_len = max([s.shape[-1] for s in specs])
#     padded_specs = []

#     for s in specs:
#         pad_amount = max_len - s.shape[-1]
#         padded = F.pad(s, (0, pad_amount))
#         padded_specs.append(padded)

    # return (
#         torch.stack(padded_specs),             # [B, 1, Freq, Time]
#         torch.tensor(accents),                # [B]
#     )

In [18]:
# #baseline
import torch.nn.functional as F
def pad_collate(batch, target_width=208):
    specs, accents, genders = zip(*batch)
    padded_specs = []
    for s in specs:
        pad_amount = target_width - s.shape[-1]
        if pad_amount > 0:
            padded = torch.nn.functional.pad(s, (0, pad_amount))
        else:
            padded = s[..., :target_width]
        padded_specs.append(padded)
    return (
        torch.stack(padded_specs),
        torch.tensor(accents),
        list(genders)   # <--- returns a list of 'm'/'f'
    )

In [19]:
#dataset = AccentSpectrogramDataset("/Users/larsheijnen/DL/Train")
dataset = AccentSpectrogramDataset("/Users/larsheijnen/DL/Train")
print(f"Total samples: {len(dataset)}")

# Look at shape of first spectrogram
x, y, z= dataset[6]
print(f"Spectrogram shape: {x.shape}")
print(f"Label: {y}")
print(f"Gender: {z}")

Total samples: 3166
Spectrogram shape: torch.Size([1, 201, 526])
Label: 1
Gender: m


In [20]:
from torch.utils.data import DataLoader

# Use batch_size=4 for low RAM, pin_memory is False for macOS/MPS
dataloader = DataLoader(dataset, batch_size=4, shuffle=True, collate_fn=pad_collate, pin_memory=False)

# Try again
for batch in dataloader:
    spectrograms, accents, gender = batch
    print(f"Spectrograms: {spectrograms.shape}")  # (B, 1, F, T)
    print(f"Accents: {accents}")                  # (B,)
    print(f"Gender: {gender}")
    break

Spectrograms: torch.Size([4, 1, 201, 208])
Accents: tensor([4, 2, 4, 3])
Gender: ['f', 'f', 'f', 'f']


In [6]:
import torch.nn as nn
import torch.nn.functional as F

#Model 1 (baseline)
class CNNBaseline(nn.Module):
    def __init__(self, num_classes: int = 5):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 8, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(8, 16, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
    
        self.pool = nn.AdaptiveAvgPool2d((16, 16))  
        self.fc = nn.Linear(32 * 16 * 16, num_classes)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        x = self.pool(x)
        x = x.view(x.size(0), -1)
        return self.fc(x)
    
#Model 2 (baseline + batch normalization)
class CNNBaseline_BatchNorm(nn.Module):
    def __init__(self, num_classes: int = 5):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 8, kernel_size=3, padding=1)
        self.bn1   = nn.BatchNorm2d(8)
        self.conv2 = nn.Conv2d(8, 16, kernel_size=3, padding=1)
        self.bn2   = nn.BatchNorm2d(16)
        self.conv3 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.bn3   = nn.BatchNorm2d(32)
        
        self.pool = nn.AdaptiveAvgPool2d((16, 16))  
        self.fc = nn.Linear(32 * 16 * 16, num_classes)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        x = self.pool(x)
        x = x.view(x.size(0), -1)
        return self.fc(x)
    
#Model 3 (baseline + dropout 0.3)
class CNNBaseline_Dropout3(nn.Module):
    def __init__(self, num_classes: int = 5, dropout_p: float = 0.3):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 8, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(8, 16, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        
        self.pool = nn.AdaptiveAvgPool2d((16, 16))  
        self.dropout = nn.Dropout(dropout_p)
        self.fc = nn.Linear(32 * 16 * 16, num_classes)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        x = self.pool(x)
        x = x.view(x.size(0), -1)
        return self.fc(x)
    
#Model 4 (baseline + dropout 0.5)
class CNNBaseline_Dropout5(nn.Module):
    def __init__(self, num_classes: int = 5, dropout_p: float = 0.5):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 8, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(8, 16, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        
        self.pool = nn.AdaptiveAvgPool2d((16, 16))  
        self.dropout = nn.Dropout(dropout_p) 
        self.fc = nn.Linear(32 * 16 * 16, num_classes)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        x = self.pool(x)
        x = x.view(x.size(0), -1)
        return self.fc(x)

#Model 5 (baseline + bacth normalization + dropout 0.3)
class CNNBaseline_Dropout3_BatchNorm(nn.Module):
    def __init__(self, num_classes: int = 5, dropout_p: float = 0.3):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 8, kernel_size=3, padding=1)
        self.bn1   = nn.BatchNorm2d(8)
        self.conv2 = nn.Conv2d(8, 16, kernel_size=3, padding=1)
        self.bn2   = nn.BatchNorm2d(16)
        self.conv3 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.bn3   = nn.BatchNorm2d(32)

        self.pool = nn.AdaptiveAvgPool2d((16, 16))  
        self.dropout = nn.Dropout(dropout_p) 
        self.fc = nn.Linear(32 * 16 * 16, num_classes)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        x = self.pool(x)
        x = x.view(x.size(0), -1)
        return self.fc(x)

#Model 6 (baseline + bacth normalization + dropout 0.5)
class CNNBaseline_Dropout5_BatchNorm(nn.Module):
    def __init__(self, num_classes: int = 5, dropout_p: float = 0.5):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 8, kernel_size=3, padding=1)
        self.bn1   = nn.BatchNorm2d(8)
        self.conv2 = nn.Conv2d(8, 16, kernel_size=3, padding=1)
        self.bn2   = nn.BatchNorm2d(16)
        self.conv3 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.bn3   = nn.BatchNorm2d(32)

        self.pool = nn.AdaptiveAvgPool2d((16, 16))  
        self.dropout = nn.Dropout(dropout_p) 
        self.fc = nn.Linear(32 * 16 * 16, num_classes)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        x = self.pool(x)
        x = x.view(x.size(0), -1)
        return self.fc(x)


In [7]:
class AccentCNN(nn.Module):
    def __init__(self, num_classes: int = 5, dropout_p: float = 0.3):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 8, kernel_size=3, padding=1)
        self.bn1   = nn.BatchNorm2d(8)
        self.pool1 = nn.MaxPool2d(2)

        self.conv2 = nn.Conv2d(8, 16, kernel_size=3, padding=1)
        self.bn2   = nn.BatchNorm2d(16)
        self.pool2 = nn.MaxPool2d(2)

        self.dropout = nn.Dropout(dropout_p)

        self.conv3 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.bn3   = nn.BatchNorm2d(32)
        self.pool3 = nn.AdaptiveAvgPool2d((1,1))

        self.fc = nn.Linear(32, num_classes) 

    def forward(self, x):
        x = self.pool1(F.relu(self.bn1(self.conv1(x))))
        x = self.pool2(F.relu(self.bn2(self.conv2(x))))
        x = self.dropout(x)
        x = self.pool3(F.relu(self.bn3(self.conv3(x))))
        x = x.view(x.size(0), -1)               # → (B, 32)
        return self.fc(x)

In [8]:
models_dict = {
    "Model1": CNNBaseline,
    "Model2": CNNBaseline_BatchNorm, 
    "Model3": CNNBaseline_Dropout3,
    "Model4": CNNBaseline_Dropout5,
    "Model5": CNNBaseline_Dropout3_BatchNorm,
    "Model6": CNNBaseline_Dropout5_BatchNorm,}

In [9]:
import torch
from torch.utils.data import DataLoader, random_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

# Prepare dataset & split
dataset = AccentSpectrogramDataset(
    '/Users/larsheijnen/DL/Train',
    target_sr=16000,
    use_mel=True,
    n_fft=1024,
    hop_length=256,
    n_mels=64,
    log_scale=True)

train_len = int(0.8 * len(dataset))
test_len  = len(dataset) - train_len
train_ds, test_ds = random_split(dataset, [train_len, test_len], generator=torch.Generator().manual_seed(42))

train_loader = DataLoader(train_ds, batch_size=4, shuffle=True,  collate_fn=pad_collate)
test_loader  = DataLoader(test_ds,  batch_size=4, shuffle=False, collate_fn=pad_collate)

device    = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
criterion = nn.CrossEntropyLoss()

# General (not by gender) evaluation helper
def evaluate(loader, model, device):
    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for specs, labels, _ in loader:
            specs, labels = specs.to(device), labels.to(device)
            outputs = model(specs)
            preds = outputs.argmax(dim=1)
            all_preds.extend(preds.cpu().tolist())
            all_labels.extend(labels.cpu().tolist())
    acc    = accuracy_score(all_labels, all_preds)
    prec   = precision_score(all_labels, all_preds, average='macro', zero_division=0)
    recall = recall_score(all_labels, all_preds, average='macro')
    f1     = f1_score(all_labels, all_preds, average='macro')
    return acc, prec, recall, f1

# Gender-based evaluation helper
def evaluate_by_gender(loader, model, device):
    model.eval()
    all_preds, all_labels, all_genders = [], [], []
    with torch.no_grad():
        for specs, labels, genders in loader:
            specs, labels = specs.to(device), labels.to(device)
            outputs = model(specs)
            preds = outputs.argmax(dim=1)
            all_preds.extend(preds.cpu().tolist())
            all_labels.extend(labels.cpu().tolist())
            all_genders.extend(genders)
    results = {}
    for gender in ['m', 'f']:
        idxs = [i for i, g in enumerate(all_genders) if g == gender]
        gender_preds = [all_preds[i] for i in idxs]
        gender_labels = [all_labels[i] for i in idxs]
        acc = accuracy_score(gender_labels, gender_preds)
        prec = precision_score(gender_labels, gender_preds, average='macro', zero_division=0)
        recall = recall_score(gender_labels, gender_preds, average='macro')
        f1 = f1_score(gender_labels, gender_preds, average='macro')
        results[gender] = {'accuracy': acc, 'precision': prec, 'recall': recall, 'f1': f1}
    return results

def classification_report_for_model(model, loader, device):
    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for specs, labels, _ in loader:
            specs, labels = specs.to(device), labels.to(device)
            outputs = model(specs)
            preds = outputs.argmax(dim=1)
            all_preds.extend(preds.cpu().tolist())
            all_labels.extend(labels.cpu().tolist())
    print(classification_report(all_labels, all_preds, digits=3))

for model_name, model_class in models_dict.items():
    model = model_class().to(device)
    print(f"\n=== Training model: {type(model).__name__} ===")
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
    num_epochs = 10
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for specs, labels, genders in train_loader:
            specs, labels = specs.to(device), labels.to(device)
            optimizer.zero_grad()
            loss = criterion(model(specs), labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        # Compute and print general metrics for this epoch (not by gender)
        train_acc, train_prec, train_recall, train_f1 = evaluate(train_loader, model, device)
        test_acc, test_prec, test_recall, test_f1 = evaluate(test_loader, model, device)
        print(
            f"Epoch {epoch+1:2d} | "
            f"Train Loss: {running_loss:.3f} | "
            f"Train Acc: {train_acc*100:5.2f}% | "
            f"Train Prec: {train_prec*100:5.2f}% | "
            f"Train Recall: {train_recall*100:5.2f}% | "
            f"Train F1: {train_f1*100:5.2f}% || "
            f"Test Acc: {test_acc*100:5.2f}% | "
            f"Test Prec: {test_prec*100:5.2f}% | "
            f"Test Recall: {test_recall*100:5.2f}% | "
            f"Test F1: {test_f1*100:5.2f}%"
        )
        
    os.makedirs("saved_models", exist_ok=True)
    torch.save(model.state_dict(), f"saved_models/{type(model).__name__}_latest.pth")

    print(f"\nClassification Report for {type(model).__name__}:")
    classification_report_for_model(model, test_loader, device)

    print(f"\nGender breakdown for {type(model).__name__}:")
    gender_results = evaluate_by_gender(test_loader, model, device)
    for gender in gender_results:
        label = "Male" if gender == "m" else "Female"
        print(f"{label}: {gender_results[gender]}")


=== Training model: CNNBaseline ===
Epoch  1 | Train Loss: 813.565 | Train Acc: 71.45% | Train Prec: 76.42% | Train Recall: 71.24% | Train F1: 71.02% || Test Acc: 66.09% | Test Prec: 71.79% | Test Recall: 66.08% | Test F1: 65.65%
Epoch  2 | Train Loss: 373.368 | Train Acc: 84.87% | Train Prec: 86.01% | Train Recall: 82.65% | Train F1: 83.51% || Test Acc: 74.45% | Test Prec: 76.05% | Test Recall: 72.84% | Test F1: 72.98%
Epoch  3 | Train Loss: 267.581 | Train Acc: 90.64% | Train Prec: 90.77% | Train Recall: 89.27% | Train F1: 89.71% || Test Acc: 78.23% | Test Prec: 78.63% | Test Recall: 77.14% | Test F1: 77.37%
Epoch  4 | Train Loss: 184.139 | Train Acc: 95.77% | Train Prec: 95.39% | Train Recall: 95.69% | Train F1: 95.50% || Test Acc: 83.75% | Test Prec: 83.62% | Test Recall: 83.82% | Test F1: 83.55%
Epoch  5 | Train Loss: 150.532 | Train Acc: 93.96% | Train Prec: 94.97% | Train Recall: 92.36% | Train F1: 93.01% || Test Acc: 80.76% | Test Prec: 82.99% | Test Recall: 79.25% | Test F1: 

### Predicting acccent on Test data

In [21]:
testset_folder = "/Users/larsheijnen/DL/Test set"
test_dataset = AccentSpectrogramDataset(
    testset_folder,
    target_sr=16000,
    use_mel=True,
    n_fft=1024,
    hop_length=256,
    n_mels=64,
    log_scale=True
)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False, collate_fn=pad_collate)

In [26]:
import os
import torch

saved_models_dir = "/Users/larsheijnen/DL/saved_models"
model_files = [
    "CNNBaseline_latest.pth",
    "CNNBaseline_BatchNorm_latest.pth",
    "CNNBaseline_Dropout3_latest.pth",
    "CNNBaseline_Dropout5_latest.pth",
    "CNNBaseline_Dropout3_BatchNorm_latest.pth",
    "CNNBaseline_Dropout5_BatchNorm_latest.pth"
]

# Map model file names to their classes
model_classes = {
    "CNNBaseline_latest.pth": CNNBaseline,
    "CNNBaseline_BatchNorm_latest.pth": CNNBaseline_BatchNorm,
    "CNNBaseline_Dropout3_latest.pth": CNNBaseline_Dropout3,
    "CNNBaseline_Dropout5_latest.pth": CNNBaseline_Dropout5,
    "CNNBaseline_Dropout3_BatchNorm_latest.pth": CNNBaseline_Dropout3_BatchNorm,
    "CNNBaseline_Dropout5_BatchNorm_latest.pth": CNNBaseline_Dropout5_BatchNorm,
}

In [27]:
def predict_accent_on_testset(model, test_loader, device):
    model.eval()
    all_preds = []
    all_fnames = []
    with torch.no_grad():
        for i, (specs, _, _) in enumerate(test_loader):  # gender is ignored
            specs = specs.to(device)
            outputs = model(specs)
            preds = outputs.argmax(dim=1).cpu().tolist()
            all_preds.extend(preds)
            # Get filenames for this batch
            batch_indices = range(i * test_loader.batch_size, i * test_loader.batch_size + len(preds))
            fnames = [os.path.basename(test_dataset.file_paths[idx]) for idx in batch_indices]
            all_fnames.extend(fnames)
    return list(zip(all_fnames, all_preds))

In [28]:
import os
import torch

saved_models_dir = "/Users/larsheijnen/DL/saved_models"
model_files = [
    "CNNBaseline_latest.pth",
    "CNNBaseline_BatchNorm_latest.pth",
    "CNNBaseline_Dropout3_latest.pth",
    "CNNBaseline_Dropout5_latest.pth",
    "CNNBaseline_Dropout3_BatchNorm_latest.pth",
    "CNNBaseline_Dropout5_BatchNorm_latest.pth"
]

model_classes = {
    "CNNBaseline_latest.pth": CNNBaseline,
    "CNNBaseline_BatchNorm_latest.pth": CNNBaseline_BatchNorm,
    "CNNBaseline_Dropout3_latest.pth": CNNBaseline_Dropout3,
    "CNNBaseline_Dropout5_latest.pth": CNNBaseline_Dropout5,
    "CNNBaseline_Dropout3_BatchNorm_latest.pth": CNNBaseline_Dropout3_BatchNorm,
    "CNNBaseline_Dropout5_BatchNorm_latest.pth": CNNBaseline_Dropout5_BatchNorm,
}

device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

for model_file in model_files:
    model_class = model_classes[model_file]
    model = model_class().to(device)
    model_path = os.path.join(saved_models_dir, model_file)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.eval()
    print(f"\nPredictions for model: {model_file}")
    results = predict_accent_on_testset(model, test_loader, device)
    for fname, pred in results:
        print(f"File: {fname} | Predicted Accent: {pred}")


Predictions for model: CNNBaseline_latest.pth
File: 9430.wav | Predicted Accent: 3
File: 4458.wav | Predicted Accent: 4
File: 1534.wav | Predicted Accent: 0
File: 8510.wav | Predicted Accent: 1
File: 7192.wav | Predicted Accent: 4
File: 2607.wav | Predicted Accent: 2
File: 1468.wav | Predicted Accent: 2
File: 5626.wav | Predicted Accent: 1
File: 9949.wav | Predicted Accent: 2
File: 5815.wav | Predicted Accent: 1
File: 6105.wav | Predicted Accent: 0
File: 4060.wav | Predicted Accent: 3
File: 4048.wav | Predicted Accent: 3
File: 8855.wav | Predicted Accent: 0
File: 7232.wav | Predicted Accent: 0
File: 8101.wav | Predicted Accent: 3
File: 8115.wav | Predicted Accent: 4
File: 7540.wav | Predicted Accent: 3
File: 8673.wav | Predicted Accent: 1
File: 2438.wav | Predicted Accent: 4
File: 9974.wav | Predicted Accent: 3
File: 7781.wav | Predicted Accent: 4
File: 8465.wav | Predicted Accent: 0
File: 9747.wav | Predicted Accent: 3
File: 8459.wav | Predicted Accent: 0
File: 4277.wav | Predicted A

## Check on train data

In [29]:
from torch.utils.data import Subset
import numpy as np

trainset_folder = "/Users/larsheijnen/DL/Train"
full_train_dataset = AccentSpectrogramDataset(
    trainset_folder,
    target_sr=16000,
    use_mel=True,
    n_fft=1024,
    hop_length=256,
    n_mels=64,
    log_scale=True
)

# Randomly select 100 indices
np.random.seed(42)
subset_indices = np.random.choice(len(full_train_dataset), size=100, replace=False)
subset_dataset = Subset(full_train_dataset, subset_indices)
subset_loader = DataLoader(subset_dataset, batch_size=4, shuffle=False, collate_fn=pad_collate)

In [30]:
def evaluate_on_subset(model, loader, device):
    model.eval()
    all_preds = []
    all_labels = []
    all_fnames = []
    with torch.no_grad():
        for i, (specs, labels, _) in enumerate(loader):  # ignore gender
            specs = specs.to(device)
            outputs = model(specs)
            preds = outputs.argmax(dim=1).cpu().tolist()
            all_preds.extend(preds)
            all_labels.extend(labels.tolist())
            # Get filenames for this batch
            batch_indices = range(i * loader.batch_size, i * loader.batch_size + len(preds))
            fnames = [os.path.basename(full_train_dataset.file_paths[idx]) for idx in subset_indices[batch_indices.start:batch_indices.stop]]
            all_fnames.extend(fnames)
    return list(zip(all_fnames, all_labels, all_preds))

In [33]:
for model_file in model_files:
    model_class = model_classes[model_file]
    model = model_class().to(device)
    model_path = os.path.join(saved_models_dir, model_file)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.eval()
    print(f"\nEvaluation on subset for model: {model_file}")
    results = evaluate_on_subset(model, subset_loader, device)
    correct = 0
    for fname, true_label, pred_label in results:
        is_correct = true_label == pred_label
        correct += is_correct
        print(f"File: {fname} | True Accent: {true_label + 1} | Predicted Accent: {pred_label + 1} | {'✔️' if is_correct else '❌'}")
    print(f"Accuracy on subset: {correct/len(results)*100:.2f}%")


Evaluation on subset for model: CNNBaseline_latest.pth
File: 2f_7399.wav | True Accent: 2 | Predicted Accent: 2 | ✔️
File: 1m_5041.wav | True Accent: 1 | Predicted Accent: 1 | ✔️
File: 1f_4107.wav | True Accent: 1 | Predicted Accent: 1 | ✔️
File: 3m_3181.wav | True Accent: 3 | Predicted Accent: 3 | ✔️
File: 1m_8027.wav | True Accent: 1 | Predicted Accent: 1 | ✔️
File: 3f_4283.wav | True Accent: 3 | Predicted Accent: 3 | ✔️
File: 2m_2504.wav | True Accent: 2 | Predicted Accent: 2 | ✔️
File: 3m_6518.wav | True Accent: 3 | Predicted Accent: 3 | ✔️
File: 4m_2067.wav | True Accent: 4 | Predicted Accent: 4 | ✔️
File: 1m_8195.wav | True Accent: 1 | Predicted Accent: 1 | ✔️
File: 5f_2432.wav | True Accent: 5 | Predicted Accent: 5 | ✔️
File: 3m_8721.wav | True Accent: 3 | Predicted Accent: 3 | ✔️
File: 1f_6268.wav | True Accent: 1 | Predicted Accent: 1 | ✔️
File: 4m_7425.wav | True Accent: 4 | Predicted Accent: 4 | ✔️
File: 1m_5430.wav | True Accent: 1 | Predicted Accent: 1 | ✔️
File: 1f_9403.