In [6]:
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import torchvision
import torchvision.transforms.v2
import torchvision.transforms as transforms
from torch.utils.data import Dataset

In [9]:
import os
import pandas as pd
from torchvision.io import read_image
import torch
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader

In [58]:
class ImageDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        image = read_image(img_path)
        label = self.img_labels.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label

In [83]:
def evaluate_model(model, loader):
    classes = ("0", "1")
    correct = 0
    total = 0
    correct_pred = {classname: 0 for classname in classes}
    total_pred = {classname: 0 for classname in classes}
    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in loader:
            images, labels = data
            # calculate outputs by running images through the network
            outputs = model(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        
            _, predictions = torch.max(outputs, 1)
            # collect the correct predictions for each class
            for label, prediction in zip(labels, predictions):
                if label == prediction:
                    correct_pred[classes[label]] += 1
                total_pred[classes[label]] += 1

    print(f'Accuracy of the network on the 1000 test images: {100 * correct // total} %')
    # print accuracy for each class
    for classname, correct_count in correct_pred.items():
        accuracy = 100 * float(correct_count) / total_pred[classname]
        print(f'Accuracy for class: {classname:5s} is {accuracy:.1f} %, correct: {correct_count}, total: {total_pred[classname]}')

In [88]:
def macro_f1(correct_one, total_one, correct_zero, total_zero):
    # Calculate precision, recall, and F1 score for class 0
    true_positive_0 = correct_zero
    false_positive_0 = total_one - correct_one
    false_negative_0 = total_zero - correct_zero

    precision_0 = true_positive_0 / (true_positive_0 + false_positive_0)
    recall_0 = true_positive_0 / (true_positive_0 + false_negative_0)
    f1_score_0 = 2 * (precision_0 * recall_0) / (precision_0 + recall_0)

    # Calculate precision, recall, and F1 score for class 1
    true_positive_1 = correct_one
    false_positive_1 = false_negative_0 
    false_negative_1 = false_positive_0

    precision_1 = true_positive_1 / (true_positive_1 + false_positive_1)
    recall_1 = true_positive_1 / (true_positive_1 + false_negative_1)
    f1_score_1 = 2 * (precision_1 * recall_1) / (precision_1 + recall_1)

    # Calculate macro average F1 score
    macro_avg_f1_score = (f1_score_0 + f1_score_1) / 2

    return macro_avg_f1_score

In [59]:
transform = torchvision.transforms.v2.Compose([transforms.Resize((96, 194)),transforms.v2.ToImage(),\
                                               transforms.v2.ToDtype(torch.float32, scale=True),\
                                               transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])


trainset = ImageDataset(annotations_file = "..\\work_folder\\daps_Data\\Spectrograms_mel_cropped_training\\labels.csv", img_dir = "..\\work_folder\\daps_Data\\Spectrograms_mel_cropped_training\\Images"\
                       , transform = transform)

testset = ImageDataset(annotations_file = "..\\work_folder\\daps_Data\\Spectrograms_mel_cropped_test\\labels.csv", img_dir = "..\\work_folder\\daps_Data\\Spectrograms_mel_cropped_test\\Images"\
                      , transform = transform)

In [60]:
EPOCHS = 10  # Adjust based on training speed
LEARNING_RATE = 0.001  # Assuming square images, resize to 64x64
NUM_CLASSES = 2  # Change this based on your dataset

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
batch_size = 64

trainloader = DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=0)

testloader = DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=0)
classes = ("0", "1")

In [61]:
class Net(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),  # (N, 32, 64, 64)
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),  # (N, 32, 32, 32)

            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),  # (N, 64, 32, 32)
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),  # (N, 64, 16, 16)

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),  # (N, 128, 16, 16)
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)  # (N, 128, 8, 8)
        )
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(36864, 256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x

In [62]:
PATH = '../work_folder/model_4.pth'
net = Net(2)
net.load_state_dict(torch.load(PATH, weights_only=True))

<All keys matched successfully>

In [26]:
evaluate_model(net, testloader)

Accuracy of the network on the 1000 test images: 91 %
Accuracy for class: 0     is 97.9 %, correct: 9949, total: 10166
Accuracy for class: 1     is 77.4 %, correct: 3519, total: 4549


In [91]:
macro_f1(3519, 4549, 9949, 10166)

0.8952566360416545

In [63]:
evaluate_model(net, trainloader)

Accuracy of the network on the 81177 training images: 92 %
Accuracy for class: 0     is 98.8 %, correct: 55459, total: 56130
Accuracy for class: 1     is 78.3 %, correct: 19604, total: 25047


In [92]:
macro_f1(19604, 25047, 55459, 56130)

0.9064282528926193

In [64]:
trainset = ImageDataset(annotations_file = "..\\work_folder\\daps_Data\\Spectrograms2_cropped_training\\labels.csv", img_dir = "..\\work_folder\\daps_Data\\Spectrograms2_cropped_training\\Images"\
                       , transform = transform)

testset = ImageDataset(annotations_file = "..\\work_folder\\daps_Data\\Spectrograms2_cropped_test\\labels.csv", img_dir = "..\\work_folder\\daps_Data\\Spectrograms2_cropped_test\\Images"\
                      , transform = transform)

In [65]:
trainloader = DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=0)

testloader = DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=0)

In [66]:
PATH = '../work_folder/model_5.pth'
net = Net(2)
net.load_state_dict(torch.load(PATH, weights_only=True))

<All keys matched successfully>

In [30]:
evaluate_model(net, testloader)

Accuracy of the network on the 1000 test images: 94 %
Accuracy for class: 0     is 95.7 %, correct: 9662, total: 10096
Accuracy for class: 1     is 93.1 %, correct: 4236, total: 4549


In [93]:
macro_f1(4236, 4549, 9662, 10096)

0.9408769061825313

In [67]:
evaluate_model(net, trainloader)

Accuracy of the network on the 81177 training images: 96 %
Accuracy for class: 0     is 97.4 %, correct: 54696, total: 56141
Accuracy for class: 1     is 93.7 %, correct: 23455, total: 25036


In [94]:
macro_f1(23455, 25036, 54696, 56141)

0.9562425187735828

In [68]:
trainset = ImageDataset(annotations_file = "..\\work_folder\\daps_Data\\Spectrograms_cropped_training\\labels.csv", img_dir = "..\\work_folder\\daps_Data\\Spectrograms_cropped_training\\Images"\
                       , transform = transform)

testset = ImageDataset(annotations_file = "..\\work_folder\\daps_Data\\Spectrograms_cropped_test\\labels.csv", img_dir = "..\\work_folder\\daps_Data\\Spectrograms_cropped_test\\Images"\
                      , transform = transform)

In [69]:
trainloader = DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=0)

testloader = DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=0)

In [70]:
PATH = '../work_folder/model_6.pth'
net = Net(2)
net.load_state_dict(torch.load(PATH, weights_only=True))

<All keys matched successfully>

In [38]:
evaluate_model(net, testloader)

Accuracy of the network on the 1000 test images: 88 %
Accuracy for class: 0     is 90.0 %, correct: 9146, total: 10166
Accuracy for class: 1     is 84.9 %, correct: 3800, total: 4476


In [95]:
macro_f1(3800, 4476, 9146, 10166)

0.866352513745769

In [71]:
evaluate_model(net, trainloader)

Accuracy of the network on the 81328 training images: 88 %
Accuracy for class: 0     is 91.4 %, correct: 51614, total: 56477
Accuracy for class: 1     is 83.4 %, correct: 20729, total: 24851


In [96]:
macro_f1(20729, 24851, 51614, 56477)

0.8709036114921318

In [72]:
trainset = ImageDataset(annotations_file = "..\\work_folder\\daps_Data\\Spectrograms2_cropped_training\\labels.csv", img_dir = "..\\work_folder\\daps_Data\\Spectrograms2_cropped_training\\Images"\
                       , transform = transform)

testset = ImageDataset(annotations_file = "..\\work_folder\\daps_Data\\Spectrograms2_cropped_test\\labels.csv", img_dir = "..\\work_folder\\daps_Data\\Spectrograms2_cropped_test\\Images"\
                      , transform = transform)

In [73]:
trainloader = DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=0)

testloader = DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=0)

In [74]:
class Net(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),  # (N, 32, 64, 64)
            nn.BatchNorm2d(32),
            nn.LeakyReLU(0.1),
            nn.MaxPool2d(kernel_size=2, stride=2),  # (N, 32, 32, 32)

            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),  # (N, 64, 32, 32)
            nn.BatchNorm2d(64),
            nn.LeakyReLU(0.1),
            nn.MaxPool2d(kernel_size=2, stride=2),  # (N, 64, 16, 16)

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),  # (N, 128, 16, 16)
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.1),
            nn.MaxPool2d(kernel_size=2, stride=2)  # (N, 128, 8, 8)
        )
        self.global_avg_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128, 256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.global_avg_pool(x)
        x = x.view(x.size(0), -1)  # Flatten
        x = self.fc_layers(x)
        return x

In [75]:
PATH = '../work_folder/model_7.pth'
net = Net(2)
net.load_state_dict(torch.load(PATH, weights_only=True))

<All keys matched successfully>

In [45]:
evaluate_model(net, testloader)

Accuracy of the network on the 14645 test images: 80 %
Accuracy for class: 0     is 88.1 %, correct: 8892, total: 10096
Accuracy for class: 1     is 64.4 %, correct: 2928, total: 4549


In [97]:
macro_f1(2928, 4549, 8892, 10096)

0.7687503134664098

In [76]:
evaluate_model(net, trainloader)

Accuracy of the network on the 81177 training images: 95 %
Accuracy for class: 0     is 96.9 %, correct: 54393, total: 56141
Accuracy for class: 1     is 92.2 %, correct: 23089, total: 25036


In [98]:
macro_f1(23089, 25036, 54393, 56141)

0.946530911784752

In [77]:
transform = torchvision.transforms.v2.Compose([transforms.Resize((193, 388)),transforms.v2.ToImage(),\
                                               transforms.v2.ToDtype(torch.float32, scale=True),\
                                               transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
trainset = ImageDataset(annotations_file = "..\\work_folder\\daps_Data\\Spectrograms2_cropped_training\\labels.csv", img_dir = "..\\work_folder\\daps_Data\\Spectrograms2_cropped_training\\Images"\
                       , transform = transform)

testset = ImageDataset(annotations_file = "..\\work_folder\\daps_Data\\Spectrograms2_cropped_test\\labels.csv", img_dir = "..\\work_folder\\daps_Data\\Spectrograms2_cropped_test\\Images"\
                      , transform = transform)

In [78]:
trainloader = DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=0)

testloader = DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=0)

In [79]:
class Net(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),  # (N, 32, 64, 64)
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),  # (N, 32, 32, 32)

            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),  # (N, 64, 32, 32)
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),  # (N, 64, 16, 16)

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),  # (N, 128, 16, 16)
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)  # (N, 128, 8, 8)
        )
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(147456, 256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x

In [80]:
PATH = '../work_folder/model_8.pth'
net = Net(2)
net.load_state_dict(torch.load(PATH, weights_only=True))

<All keys matched successfully>

In [57]:
evaluate_model(net, testloader)

Accuracy of the network on the 14645 test images: 94 %
Accuracy for class: 0     is 95.8 %, correct: 9676, total: 10096
Accuracy for class: 1     is 92.8 %, correct: 4223, total: 4549


In [99]:
macro_f1(4223, 4549, 9676, 10096)

0.9408621752542221

In [81]:
evaluate_model(net, trainloader)

Accuracy of the network on the 81177 training images: 98 %
Accuracy for class: 0     is 98.8 %, correct: 55469, total: 56141
Accuracy for class: 1     is 97.1 %, correct: 24310, total: 25036


In [100]:
macro_f1(24310, 25036, 55469, 56141)

0.9798026267729204