In [2]:
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import torchvision
import torchvision.transforms.v2
from torch.utils.data import Dataset
import os
import pandas as pd
from torchvision.io import read_image
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt

In [3]:
class ImageDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        image = read_image(img_path)
        label = self.img_labels.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label

Funkcja do oceniania dokładności modelu

In [4]:
def evaluate_model(model, loader):
    classes = ("0", "1")
    correct = 0
    total = 0
    correct_pred = {classname: 0 for classname in classes}
    total_pred = {classname: 0 for classname in classes}
    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in loader:
            images, labels = data
            # calculate outputs by running images through the network
            outputs = model(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        
            _, predictions = torch.max(outputs, 1)
            # collect the correct predictions for each class
            for label, prediction in zip(labels, predictions):
                if label == prediction:
                    correct_pred[classes[label]] += 1
                total_pred[classes[label]] += 1

    print(f'Accuracy of the network on the 1000 test images: {100 * correct // total} %')
    # print accuracy for each class
    for classname, correct_count in correct_pred.items():
        accuracy = 100 * float(correct_count) / total_pred[classname]
        print(f'Accuracy for class: {classname:5s} is {accuracy:.1f} %, correct: {correct_count}, total: {total_pred[classname]}')

Funkcja obliczająca macro-averaged f1

In [5]:
def macro_f1(correct_one, total_one, correct_zero, total_zero):
    # Calculate precision, recall, and F1 score for class 0
    true_positive_0 = correct_zero
    false_positive_0 = total_one - correct_one
    false_negative_0 = total_zero - correct_zero

    precision_0 = true_positive_0 / (true_positive_0 + false_positive_0)
    recall_0 = true_positive_0 / (true_positive_0 + false_negative_0)
    f1_score_0 = 2 * (precision_0 * recall_0) / (precision_0 + recall_0)

    # Calculate precision, recall, and F1 score for class 1
    true_positive_1 = correct_one
    false_positive_1 = false_negative_0 
    false_negative_1 = false_positive_0

    precision_1 = true_positive_1 / (true_positive_1 + false_positive_1)
    recall_1 = true_positive_1 / (true_positive_1 + false_negative_1)
    f1_score_1 = 2 * (precision_1 * recall_1) / (precision_1 + recall_1)

    # Calculate macro average F1 score
    macro_avg_f1_score = (f1_score_0 + f1_score_1) / 2

    return macro_avg_f1_score

In [6]:
batch_size = 64
classes = ("0", "1")

In [9]:
transform = torchvision.transforms.v2.Compose([transforms.Resize((96, 194)),transforms.v2.ToImage(),\
                                               transforms.v2.ToDtype(torch.float32, scale=True),\
                                               transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

In [6]:
trainset = ImageDataset(annotations_file = "..\\work_folder\\daps_Data\\Spectrograms_mel_cropped_training\\labels.csv", img_dir = "..\\work_folder\\daps_Data\\Spectrograms_mel_cropped_training\\Images"\
                       , transform = transform)

testset = ImageDataset(annotations_file = "..\\work_folder\\daps_Data\\Spectrograms_mel_cropped_test\\labels.csv", img_dir = "..\\work_folder\\daps_Data\\Spectrograms_mel_cropped_test\\Images"\
                      , transform = transform)

In [7]:
trainloader = DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=0)

testloader = DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=0)

In [7]:
class Net(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),  # (N, 32, 64, 64)
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),  # (N, 32, 32, 32)

            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),  # (N, 64, 32, 32)
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),  # (N, 64, 16, 16)

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),  # (N, 128, 16, 16)
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)  # (N, 128, 8, 8)
        )
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(36864, 256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x

Spectrograms mel, 10 epochs

In [62]:
PATH = '../work_folder/model_4.pth'
net = Net(2)
net.load_state_dict(torch.load(PATH, weights_only=True))

<All keys matched successfully>

In [26]:
evaluate_model(net, testloader)

Accuracy of the network on the 1000 test images: 91 %
Accuracy for class: 0     is 97.9 %, correct: 9949, total: 10166
Accuracy for class: 1     is 77.4 %, correct: 3519, total: 4549


In [91]:
macro_f1(3519, 4549, 9949, 10166)

0.8952566360416545

In [63]:
evaluate_model(net, trainloader)

Accuracy of the network on the 81177 training images: 92 %
Accuracy for class: 0     is 98.8 %, correct: 55459, total: 56130
Accuracy for class: 1     is 78.3 %, correct: 19604, total: 25047


In [92]:
macro_f1(19604, 25047, 55459, 56130)

0.9064282528926193

In [64]:
trainset = ImageDataset(annotations_file = "..\\work_folder\\daps_Data\\Spectrograms2_cropped_training\\labels.csv", img_dir = "..\\work_folder\\daps_Data\\Spectrograms2_cropped_training\\Images"\
                       , transform = transform)

testset = ImageDataset(annotations_file = "..\\work_folder\\daps_Data\\Spectrograms2_cropped_test\\labels.csv", img_dir = "..\\work_folder\\daps_Data\\Spectrograms2_cropped_test\\Images"\
                      , transform = transform)

In [65]:
trainloader = DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=0)

testloader = DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=0)

Spectrograms 2 (logarithmic scale)

In [66]:
PATH = '../work_folder/model_5.pth'
net = Net(2)
net.load_state_dict(torch.load(PATH, weights_only=True))

<All keys matched successfully>

In [30]:
evaluate_model(net, testloader)

Accuracy of the network on the 1000 test images: 94 %
Accuracy for class: 0     is 95.7 %, correct: 9662, total: 10096
Accuracy for class: 1     is 93.1 %, correct: 4236, total: 4549


In [93]:
macro_f1(4236, 4549, 9662, 10096)

0.9408769061825313

In [67]:
evaluate_model(net, trainloader)

Accuracy of the network on the 81177 training images: 96 %
Accuracy for class: 0     is 97.4 %, correct: 54696, total: 56141
Accuracy for class: 1     is 93.7 %, correct: 23455, total: 25036


In [94]:
macro_f1(23455, 25036, 54696, 56141)

0.9562425187735828

In [68]:
trainset = ImageDataset(annotations_file = "..\\work_folder\\daps_Data\\Spectrograms_cropped_training\\labels.csv", img_dir = "..\\work_folder\\daps_Data\\Spectrograms_cropped_training\\Images"\
                       , transform = transform)

testset = ImageDataset(annotations_file = "..\\work_folder\\daps_Data\\Spectrograms_cropped_test\\labels.csv", img_dir = "..\\work_folder\\daps_Data\\Spectrograms_cropped_test\\Images"\
                      , transform = transform)

In [69]:
trainloader = DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=0)

testloader = DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=0)

Spectrograms (linear scale)

In [70]:
PATH = '../work_folder/model_6.pth'
net = Net(2)
net.load_state_dict(torch.load(PATH, weights_only=True))

<All keys matched successfully>

In [38]:
evaluate_model(net, testloader)

Accuracy of the network on the 1000 test images: 88 %
Accuracy for class: 0     is 90.0 %, correct: 9146, total: 10166
Accuracy for class: 1     is 84.9 %, correct: 3800, total: 4476


In [95]:
macro_f1(3800, 4476, 9146, 10166)

0.866352513745769

In [71]:
evaluate_model(net, trainloader)

Accuracy of the network on the 81328 training images: 88 %
Accuracy for class: 0     is 91.4 %, correct: 51614, total: 56477
Accuracy for class: 1     is 83.4 %, correct: 20729, total: 24851


In [96]:
macro_f1(20729, 24851, 51614, 56477)

0.8709036114921318

In [24]:
trainset = ImageDataset(annotations_file = "..\\work_folder\\daps_Data\\Spectrograms2_16_cropped_training\\labels.csv", img_dir = "..\\work_folder\\daps_Data\\Spectrograms2_16_cropped_training\\Images"\
                       , transform = transform)

testset = ImageDataset(annotations_file = "..\\work_folder\\daps_Data\\Spectrograms2_16_cropped_test\\labels.csv", img_dir = "..\\work_folder\\daps_Data\\Spectrograms2_16_cropped_test\\Images"\
                      , transform = transform)

In [25]:
trainloader = DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=0)

testloader = DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=0)

Spectrograms 2 (16000 sampling rate), 4 epochs

In [11]:
PATH = '../work_folder/model_9.pth'
net = Net(2)
net.load_state_dict(torch.load(PATH, weights_only=True))

<All keys matched successfully>

In [12]:
evaluate_model(net, testloader)

Accuracy of the network on the 1000 test images: 93 %
Accuracy for class: 0     is 95.1 %, correct: 9665, total: 10166
Accuracy for class: 1     is 91.1 %, correct: 4146, total: 4549


In [13]:
macro_f1(4146, 4549, 9665, 10166)

0.9285095569311865

In [14]:
evaluate_model(net, trainloader)

Accuracy of the network on the 1000 test images: 96 %
Accuracy for class: 0     is 97.0 %, correct: 54785, total: 56477
Accuracy for class: 1     is 94.9 %, correct: 23771, total: 25047


In [15]:
macro_f1(23771, 25047, 54785, 56477)

0.9574330223200017

Spectrograms 2 (16000 sampling rate), 10 epochs

In [26]:
PATH = '../work_folder/model_10.pth'
net = Net(2)
net.load_state_dict(torch.load(PATH, weights_only=True))

<All keys matched successfully>

In [27]:
evaluate_model(net, testloader)

Accuracy of the network on the 1000 test images: 95 %
Accuracy for class: 0     is 97.6 %, correct: 9918, total: 10166
Accuracy for class: 1     is 92.5 %, correct: 4207, total: 4549


In [28]:
macro_f1(4207, 4549, 9918, 10166)

0.9527944062895248

In [29]:
evaluate_model(net, trainloader)

Accuracy of the network on the 1000 test images: 98 %
Accuracy for class: 0     is 99.2 %, correct: 56022, total: 56477
Accuracy for class: 1     is 97.1 %, correct: 24320, total: 25047


In [30]:
macro_f1(24320, 25047, 56022, 56477)

0.9829181212656668

In [32]:
trainset = ImageDataset(annotations_file = "..\\work_folder\\daps_Data\\Spectrograms_mel_16_cropped_training\\labels.csv", img_dir = "..\\work_folder\\daps_Data\\Spectrograms_mel_16_cropped_training\\Images"\
                       , transform = transform)

testset = ImageDataset(annotations_file = "..\\work_folder\\daps_Data\\Spectrograms_mel_16_cropped_test\\labels.csv", img_dir = "..\\work_folder\\daps_Data\\Spectrograms_mel_16_cropped_test\\Images"\
                      , transform = transform)

In [33]:
trainloader = DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=0)

testloader = DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=0)

Spectrograms mel (16000 sampling rate), 10 epochs

In [34]:
PATH = '../work_folder/model_11.pth'
net = Net(2)
net.load_state_dict(torch.load(PATH, weights_only=True))

<All keys matched successfully>

In [35]:
evaluate_model(net, testloader)

Accuracy of the network on the 1000 test images: 96 %
Accuracy for class: 0     is 97.0 %, correct: 9856, total: 10166
Accuracy for class: 1     is 94.2 %, correct: 4286, total: 4549


In [36]:
macro_f1(4286, 4549, 9856, 10166)

0.9545476683881264

In [37]:
evaluate_model(net, trainloader)

Accuracy of the network on the 1000 test images: 98 %
Accuracy for class: 0     is 98.9 %, correct: 55864, total: 56477
Accuracy for class: 1     is 98.0 %, correct: 24538, total: 25047


In [38]:
macro_f1(24538, 25047, 55864, 56477)

0.9838530552375464

In [39]:
trainset = ImageDataset(annotations_file = "..\\work_folder\\daps_Data\\Spectrograms_mel_16_noise_cropped_training\\labels.csv", img_dir = "..\\work_folder\\daps_Data\\Spectrograms_mel_16_noise_cropped_training\\Images"\
                       , transform = transform)

testset = ImageDataset(annotations_file = "..\\work_folder\\daps_Data\\Spectrograms_mel_16_noise_cropped_test\\labels.csv", img_dir = "..\\work_folder\\daps_Data\\Spectrograms_mel_16_noise_cropped_test\\Images"\
                      , transform = transform)

In [40]:
trainloader = DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=0)

testloader = DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=0)

Spectrograms mel (16000 sampling rate), noise removed, 10 epochs

In [41]:
PATH = '../work_folder/model_12.pth'
net = Net(2)
net.load_state_dict(torch.load(PATH, weights_only=True))

<All keys matched successfully>

In [42]:
evaluate_model(net, testloader)

Accuracy of the network on the 1000 test images: 94 %
Accuracy for class: 0     is 96.0 %, correct: 9759, total: 10166
Accuracy for class: 1     is 91.3 %, correct: 4154, total: 4549


In [43]:
macro_f1(4154, 4549, 9759, 10166)

0.9362481849140427

In [44]:
evaluate_model(net, trainloader)

Accuracy of the network on the 1000 test images: 98 %
Accuracy for class: 0     is 98.7 %, correct: 55727, total: 56477
Accuracy for class: 1     is 97.1 %, correct: 24327, total: 25047


In [45]:
macro_f1(24327, 25047, 55727, 56477)

0.9788275634324223

In [46]:
trainset = ImageDataset(annotations_file = "..\\work_folder\\daps_Data\\Spectrograms2_16_noise_cropped_training\\labels.csv", img_dir = "..\\work_folder\\daps_Data\\Spectrograms2_16_noise_cropped_training\\Images"\
                       , transform = transform)

testset = ImageDataset(annotations_file = "..\\work_folder\\daps_Data\\Spectrograms2_16_noise_cropped_test\\labels.csv", img_dir = "..\\work_folder\\daps_Data\\Spectrograms2_16_noise_cropped_test\\Images"\
                      , transform = transform)

In [47]:
trainloader = DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=0)

testloader = DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=0)

Spectrograms 2 (16000 sampling rate), noise removed, 10 epochs

In [48]:
PATH = '../work_folder/model_13.pth'
net = Net(2)
net.load_state_dict(torch.load(PATH, weights_only=True))

<All keys matched successfully>

In [49]:
evaluate_model(net, testloader)

Accuracy of the network on the 1000 test images: 94 %
Accuracy for class: 0     is 95.7 %, correct: 9729, total: 10166
Accuracy for class: 1     is 91.3 %, correct: 4155, total: 4549


In [50]:
macro_f1(4155, 4549, 9729, 10166)

0.9340663772129099

In [51]:
evaluate_model(net, trainloader)

Accuracy of the network on the 1000 test images: 98 %
Accuracy for class: 0     is 98.7 %, correct: 55743, total: 56477
Accuracy for class: 1     is 97.2 %, correct: 24351, total: 25047


In [52]:
macro_f1(24351, 25047, 55743, 56477)

0.9794055118907778

In [72]:
trainset = ImageDataset(annotations_file = "..\\work_folder\\daps_Data\\Spectrograms2_cropped_training\\labels.csv", img_dir = "..\\work_folder\\daps_Data\\Spectrograms2_cropped_training\\Images"\
                       , transform = transform)

testset = ImageDataset(annotations_file = "..\\work_folder\\daps_Data\\Spectrograms2_cropped_test\\labels.csv", img_dir = "..\\work_folder\\daps_Data\\Spectrograms2_cropped_test\\Images"\
                      , transform = transform)

In [73]:
trainloader = DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=0)

testloader = DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=0)

In [None]:
class Net(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),  # (N, 32, 64, 64)
            nn.BatchNorm2d(32),
            nn.LeakyReLU(0.1),
            nn.MaxPool2d(kernel_size=2, stride=2),  # (N, 32, 32, 32)

            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),  # (N, 64, 32, 32)
            nn.BatchNorm2d(64),
            nn.LeakyReLU(0.1),
            nn.MaxPool2d(kernel_size=2, stride=2),  # (N, 64, 16, 16)

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),  # (N, 128, 16, 16)
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.1),
            nn.MaxPool2d(kernel_size=2, stride=2)  # (N, 128, 8, 8)
        )
        self.global_avg_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128, 256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.global_avg_pool(x)
        x = x.view(x.size(0), -1)  # Flatten
        x = self.fc_layers(x)
        return x

Spectrograms 2

In [75]:
PATH = '../work_folder/model_7.pth'
net = Net(2)
net.load_state_dict(torch.load(PATH, weights_only=True))

<All keys matched successfully>

In [45]:
evaluate_model(net, testloader)

Accuracy of the network on the 14645 test images: 80 %
Accuracy for class: 0     is 88.1 %, correct: 8892, total: 10096
Accuracy for class: 1     is 64.4 %, correct: 2928, total: 4549


In [97]:
macro_f1(2928, 4549, 8892, 10096)

0.7687503134664098

In [76]:
evaluate_model(net, trainloader)

Accuracy of the network on the 81177 training images: 95 %
Accuracy for class: 0     is 96.9 %, correct: 54393, total: 56141
Accuracy for class: 1     is 92.2 %, correct: 23089, total: 25036


In [98]:
macro_f1(23089, 25036, 54393, 56141)

0.946530911784752

In [77]:
transform = torchvision.transforms.v2.Compose([transforms.Resize((193, 388)),transforms.v2.ToImage(),\
                                               transforms.v2.ToDtype(torch.float32, scale=True),\
                                               transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
trainset = ImageDataset(annotations_file = "..\\work_folder\\daps_Data\\Spectrograms2_cropped_training\\labels.csv", img_dir = "..\\work_folder\\daps_Data\\Spectrograms2_cropped_training\\Images"\
                       , transform = transform)

testset = ImageDataset(annotations_file = "..\\work_folder\\daps_Data\\Spectrograms2_cropped_test\\labels.csv", img_dir = "..\\work_folder\\daps_Data\\Spectrograms2_cropped_test\\Images"\
                      , transform = transform)

In [78]:
trainloader = DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=0)

testloader = DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=0)

In [12]:
class Net(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),  # (N, 32, 64, 64)
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),  # (N, 32, 32, 32)

            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),  # (N, 64, 32, 32)
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),  # (N, 64, 16, 16)

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),  # (N, 128, 16, 16)
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)  # (N, 128, 8, 8)
        )
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(147456, 256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x

Spectrograms 2

In [None]:
PATH = '../work_folder/model_8.pth'
net = Net(2)
net.load_state_dict(torch.load(PATH, weights_only=True))

In [57]:
evaluate_model(net, testloader)

Accuracy of the network on the 14645 test images: 94 %
Accuracy for class: 0     is 95.8 %, correct: 9676, total: 10096
Accuracy for class: 1     is 92.8 %, correct: 4223, total: 4549


In [99]:
macro_f1(4223, 4549, 9676, 10096)

0.9408621752542221

In [81]:
evaluate_model(net, trainloader)

Accuracy of the network on the 81177 training images: 98 %
Accuracy for class: 0     is 98.8 %, correct: 55469, total: 56141
Accuracy for class: 1     is 97.1 %, correct: 24310, total: 25036


In [100]:
macro_f1(24310, 25036, 55469, 56141)

0.9798026267729204

In [16]:
transform = torchvision.transforms.v2.Compose([transforms.Resize((96, 194)),transforms.v2.ToImage(),\
                                               transforms.v2.ToDtype(torch.float32, scale=True),\
                                               transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

batch_size = 64

trainset = ImageDataset(annotations_file = "..\\work_folder\\daps_Data\\Spectrograms_mel_16_cropped_training\\labels.csv", img_dir = "..\\work_folder\\daps_Data\\Spectrograms_mel_16_cropped_training\\Images"\
                       , transform = transform)

testset = ImageDataset(annotations_file = "..\\work_folder\\daps_Data\\Spectrograms_mel_16_cropped_test\\labels.csv", img_dir = "..\\work_folder\\daps_Data\\Spectrograms_mel_16_cropped_test\\Images"\
                      , transform = transform)

In [17]:
trainloader = DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=0)

testloader = DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=0)

In [13]:
class Net(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),  # (N, 32, 64, 64)
            nn.BatchNorm2d(num_features = 32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),  # (N, 32, 32, 32)

            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),  # (N, 64, 32, 32)
            nn.BatchNorm2d(num_features = 64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),  # (N, 64, 16, 16)

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),  # (N, 128, 16, 16)
            nn.BatchNorm2d(num_features = 128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)  # (N, 128, 8, 8)
        )
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(36864, 256 * 4),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(256 * 4, 128),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x

Spectrograms mel (16000 sampling rate), 10 epochs

In [19]:
PATH = '../work_folder/models/model_15.pth'
net = Net(2)
net.load_state_dict(torch.load(PATH, weights_only=True))

<All keys matched successfully>

In [20]:
evaluate_model(net, testloader)

Accuracy of the network on the 1000 test images: 97 %
Accuracy for class: 0     is 98.1 %, correct: 9975, total: 10166
Accuracy for class: 1     is 94.9 %, correct: 4315, total: 4549


In [21]:
macro_f1(4315, 4549, 9975, 10166)

0.9661028547424905

In [15]:
evaluate_model(net, trainloader)

Accuracy of the network on the 1000 test images: 99 %
Accuracy for class: 0     is 99.8 %, correct: 56352, total: 56477
Accuracy for class: 1     is 97.8 %, correct: 24495, total: 25047


In [16]:
macro_f1(24495, 25047, 56352, 56477)

0.9901991004633596

Spectrograms mel (16000 sampling rate), 30 epochs

In [22]:
PATH = '../work_folder/models/model_15_3.pth'
net = Net(2)
net.load_state_dict(torch.load(PATH, weights_only=True))

<All keys matched successfully>

In [23]:
evaluate_model(net, testloader)

Accuracy of the network on the 1000 test images: 97 %
Accuracy for class: 0     is 98.7 %, correct: 10031, total: 10166
Accuracy for class: 1     is 94.4 %, correct: 4293, total: 4549


In [28]:
macro_f1(4293, 4549, 10031, 10166)

0.9686636232281892

In [24]:
evaluate_model(net, trainloader)

Accuracy of the network on the 1000 test images: 99 %
Accuracy for class: 0     is 99.9 %, correct: 56447, total: 56477
Accuracy for class: 1     is 99.9 %, correct: 25027, total: 25047


In [29]:
macro_f1(25027, 25047, 56447, 56477)

0.9992796892083392

Spectrograms mel (16000 sampling rate), 60 epochs

In [25]:
PATH = '../work_folder/models/model_15_6.pth'
net = Net(2)
net.load_state_dict(torch.load(PATH, weights_only=True))

<All keys matched successfully>

In [26]:
evaluate_model(net, testloader)

Accuracy of the network on the 1000 test images: 97 %
Accuracy for class: 0     is 98.7 %, correct: 10033, total: 10166
Accuracy for class: 1     is 94.3 %, correct: 4291, total: 4549


In [30]:
macro_f1(4291, 4549, 10033, 10166)

0.9686557840407863

In [27]:
evaluate_model(net, trainloader)

Accuracy of the network on the 1000 test images: 99 %
Accuracy for class: 0     is 100.0 %, correct: 56463, total: 56477
Accuracy for class: 1     is 100.0 %, correct: 25039, total: 25047


In [31]:
macro_f1(25039, 25047, 56463, 56477)

0.9996830491723991

In [14]:
trainset = ImageDataset(annotations_file = "..\\work_folder\\daps_Data\\Spectrograms_mel_16_normalized_cropped_training\\labels.csv", img_dir = "..\\work_folder\\daps_Data\\Spectrograms_mel_16_normalized_cropped_training\\Images"\
                       , transform = transform)

testset = ImageDataset(annotations_file = "..\\work_folder\\daps_Data\\Spectrograms_mel_16_normalized_cropped_test\\labels.csv", img_dir = "..\\work_folder\\daps_Data\\Spectrograms_mel_16_normalized_cropped_test\\Images"\
                      , transform = transform)

In [15]:
trainloader = DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=0)

testloader = DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=0)

Spectrograms mel (16000 sampling rate), normalized audio

In [16]:
PATH = '../work_folder/model_20.pth'
net = Net(2)
net.load_state_dict(torch.load(PATH, weights_only=True))

<All keys matched successfully>

In [17]:
evaluate_model(net, testloader)

Accuracy of the network on the 1000 test images: 94 %
Accuracy for class: 0     is 96.7 %, correct: 9827, total: 10166
Accuracy for class: 1     is 89.2 %, correct: 4056, total: 4549


In [19]:
macro_f1(4056, 4549, 9827, 10166)

0.9331818212778324

In [18]:
evaluate_model(net, trainloader)

Accuracy of the network on the 1000 test images: 98 %
Accuracy for class: 0     is 99.0 %, correct: 55929, total: 56477
Accuracy for class: 1     is 96.4 %, correct: 24138, total: 25047


In [20]:
macro_f1(24138, 25047, 55929, 56477)

0.978922797338458

In [19]:
class Net(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),  # (N, 32, 64, 64)
            nn.BatchNorm2d(num_features = 32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),  # (N, 32, 32, 32)

            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),  # (N, 64, 32, 32)
            nn.BatchNorm2d(num_features = 64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),  # (N, 64, 16, 16)

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),  # (N, 128, 16, 16)
            nn.BatchNorm2d(num_features = 128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),  # (N, 128, 8, 8)

            nn.Conv2d(128, 512, kernel_size=3, stride=1, padding=1),  # (N, 128, 16, 16)
            nn.BatchNorm2d(num_features = 512),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)  # (N, 128, 8, 8)
        )
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(36864, 2048),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(2048, 256),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(256, 64),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(64, num_classes)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x

In [20]:
PATH = '../work_folder/model_16.pth'
net = Net(2)
net.load_state_dict(torch.load(PATH, weights_only=True))

<All keys matched successfully>

In [21]:
evaluate_model(net, testloader)

Accuracy of the network on the 1000 test images: 96 %
Accuracy for class: 0     is 98.3 %, correct: 9990, total: 10166
Accuracy for class: 1     is 91.5 %, correct: 4162, total: 4549


In [22]:
macro_f1(4162, 4549, 9990, 10166)

0.9546215544468195

In [23]:
evaluate_model(net, trainloader)

Accuracy of the network on the 1000 test images: 99 %
Accuracy for class: 0     is 100.0 %, correct: 56458, total: 56477
Accuracy for class: 1     is 99.8 %, correct: 25003, total: 25047


In [17]:
macro_f1(25003, 25047, 56458, 56477)

0.9990920553251521

In [14]:
class Net(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),  # (N, 32, 64, 64)
            nn.BatchNorm2d(num_features = 32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),  # (N, 32, 32, 32)

            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),  # (N, 64, 32, 32)
            nn.BatchNorm2d(num_features = 64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),  # (N, 64, 16, 16)

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),  # (N, 128, 16, 16)
            nn.BatchNorm2d(num_features = 128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),  # (N, 128, 8, 8)

            nn.Conv2d(128, 512, kernel_size=3, stride=1, padding=1),  # (N, 128, 16, 16)
            nn.BatchNorm2d(num_features = 512),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)  # (N, 128, 8, 8)
        )
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(36864, 2048),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(2048, 256),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(256, 64),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(64, num_classes)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x

In [15]:
PATH = '../work_folder/model_17.pth'
net = Net(2)
net.load_state_dict(torch.load(PATH, weights_only=True))

<All keys matched successfully>

In [16]:
evaluate_model(net, testloader)

Accuracy of the network on the 1000 test images: 96 %
Accuracy for class: 0     is 97.4 %, correct: 9903, total: 10166
Accuracy for class: 1     is 94.4 %, correct: 4296, total: 4549


In [18]:
macro_f1(4296, 4549, 9903, 10166)

0.9589776534552596

In [19]:
evaluate_model(net, trainloader)

Accuracy of the network on the 1000 test images: 99 %
Accuracy for class: 0     is 99.9 %, correct: 56403, total: 56477
Accuracy for class: 1     is 99.9 %, correct: 25014, total: 25047


In [20]:
macro_f1(25014, 25047, 56403, 56477)

0.9984590650967857

In [41]:
class Net(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),  # (N, 32, 64, 64)
            nn.BatchNorm2d(num_features = 32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),  # (N, 32, 32, 32)

            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),  # (N, 64, 32, 32)
            nn.BatchNorm2d(num_features = 64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),  # (N, 64, 16, 16)

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),  # (N, 128, 16, 16)
            nn.BatchNorm2d(num_features = 128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),  # (N, 128, 8, 8)

            nn.Conv2d(128, 512, kernel_size=3, stride=1, padding=1),  # (N, 128, 16, 16)
            nn.BatchNorm2d(num_features = 512),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)  # (N, 128, 8, 8)
        )
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(36864, 2048),
            nn.ReLU(),
            nn.Linear(2048, 256),
            nn.ReLU(),
            nn.Linear(256, 32),
            nn.ReLU(),
            nn.Linear(32, num_classes)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x

In [42]:
PATH = '../work_folder/models/model_18.pth'
net = Net(2)
net.load_state_dict(torch.load(PATH, weights_only=True))

<All keys matched successfully>

In [43]:
evaluate_model(net, testloader)

Accuracy of the network on the 1000 test images: 96 %
Accuracy for class: 0     is 97.5 %, correct: 9911, total: 10166
Accuracy for class: 1     is 92.8 %, correct: 4223, total: 4549


In [45]:
macro_f1(4223, 4549, 9911, 10166)

0.9535806636845822

In [44]:
evaluate_model(net, trainloader)

Accuracy of the network on the 1000 test images: 99 %
Accuracy for class: 0     is 100.0 %, correct: 56462, total: 56477
Accuracy for class: 1     is 99.9 %, correct: 25021, total: 25047


In [46]:
macro_f1(25021, 25047, 56462, 56477)

0.9994092073346887

In [47]:
transform = torchvision.transforms.v2.Compose([transforms.Resize((64, 64)),transforms.v2.ToImage(),\
                                               transforms.v2.ToDtype(torch.float32, scale=True),\
                                               transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

batch_size = 64

trainset = ImageDataset(annotations_file = "..\\work_folder\\daps_Data\\Spectrograms_mel_16_cropped_training\\labels.csv", img_dir = "..\\work_folder\\daps_Data\\Spectrograms_mel_16_cropped_training\\Images"\
                       , transform = transform)

testset = ImageDataset(annotations_file = "..\\work_folder\\daps_Data\\Spectrograms_mel_16_cropped_test\\labels.csv", img_dir = "..\\work_folder\\daps_Data\\Spectrograms_mel_16_cropped_test\\Images"\
                      , transform = transform)

In [48]:
trainloader = DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=0)

testloader = DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=0)

In [49]:
class Net(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),  # (N, 32, 64, 64)
            nn.BatchNorm2d(num_features = 32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),  # (N, 32, 32, 32)

            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),  # (N, 64, 32, 32)
            nn.BatchNorm2d(num_features = 64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),  # (N, 64, 16, 16)

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),  # (N, 128, 16, 16)
            nn.BatchNorm2d(num_features = 128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)  # (N, 128, 8, 8)
        )
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64*64*2, 256 * 4),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(256 * 4, 128),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x

In [51]:
PATH = '../work_folder/models/model_19.pth'
net = Net(2)
net.load_state_dict(torch.load(PATH, weights_only=True))

<All keys matched successfully>

In [52]:
evaluate_model(net, testloader)

Accuracy of the network on the 1000 test images: 96 %
Accuracy for class: 0     is 97.7 %, correct: 9935, total: 10166
Accuracy for class: 1     is 93.0 %, correct: 4230, total: 4549


In [54]:
macro_f1(4230, 4549, 9935, 10166)

0.9560111683503256

In [55]:
evaluate_model(net, trainloader)

Accuracy of the network on the 1000 test images: 99 %
Accuracy for class: 0     is 99.8 %, correct: 56341, total: 56477
Accuracy for class: 1     is 99.6 %, correct: 24958, total: 25047


In [56]:
macro_f1(24958, 25047, 56341, 56477)

0.9967599318940576