**Actual Machine Learning**

In [38]:
import os
import IPython.display as ipd
from IPython.display import Audio
import random
from torchvision import datasets, transforms
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
from torchvision import datasets, models, transforms
# from torchinfo import summary
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from pathlib import Path
import torchvision
from torch.utils.tensorboard import SummaryWriter

In [153]:
def npy_loader(path):
    sample = torch.from_numpy(np.load(path))
    return sample


audio_train_dataset = datasets.DatasetFolder(root='./UrbanSounds8K/spectrograms/train/',
                                       loader=npy_loader,
                                       extensions=['.npy'])

audio_val_dataset = datasets.DatasetFolder(root='./UrbanSounds8K/spectrograms/val/',
                                       loader=npy_loader,
                                       extensions=['.npy'])

print("Training size:", len(audio_train_dataset))
print("Validation size:",len(audio_val_dataset))

train_dataloader = torch.utils.data.DataLoader(audio_train_dataset,
                                               batch_size=64,
                                               num_workers=0,
                                               shuffle=True)

val_dataloader = torch.utils.data.DataLoader(audio_val_dataset,
                                              batch_size=64,
                                              num_workers=0,
                                              shuffle=True,
                                             )

import torch.nn.functional as F
from torch.nn import init

model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet18')
model.conv1 = nn.Conv2d(1,
                        64,
                        kernel_size=(7, 7),
                        stride=(2, 2),
                        padding=(3, 3),
                        bias=False)
model.fc = nn.Linear(in_features=512, out_features=10, bias=True)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Check that it is on Cuda
next(model.parameters()).device

Training size: 18304
Validation size: 353


Using cache found in C:\Users\JianQuan/.cache\torch\hub\pytorch_vision_v0.10.0


device(type='cuda', index=0)

In [3]:
# rm -rf ./logsdir/ # remove all logs

In [158]:
# cost function used to determine best parameters
cost = torch.nn.CrossEntropyLoss()

# used to create optimal parameters
learning_rate = 0.001
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Create the training function
def train(dataloader, model, optimizer):
    train_size = len(dataloader.dataset)
    batch_size = len(next(iter(dataloader))[1])
    total_batch = len(dataloader)
    train_loss, train_accuracy = 0, 0

    model.train()

    for batch, (X, Y) in enumerate(dataloader):
        X, Y = X.to(device), Y.to(device)
        optimizer.zero_grad()
        pred = model(X)
        batch_loss = cost(pred, Y)
        batch_loss.backward()
        optimizer.step()
        batch_accuracy = (pred.argmax(1)==Y).type(torch.float).sum()
        train_loss += batch_loss.item()
        train_accuracy += batch_accuracy.item()
        if batch % 100 == 0:
            print(
                f"Training batch {batch}/{total_batch} -> Loss: {batch_loss.item()}  Accuracy: {batch_accuracy.item()/batch_size*100}%"
            )
    train_loss /= train_size
    train_accuracy /= train_size/100
    return(train_loss, train_accuracy)


# Create the validation function
def val(dataloader, model):
    val_size = len(dataloader.dataset)
    total_batch = len(dataloader)
    val_loss, val_accuracy = 0, 0

    model.eval()

    with torch.no_grad():
        for batch, (X, Y) in enumerate(dataloader):
            X, Y = X.to(device), Y.to(device)
            pred = model(X)
            batch_loss = cost(pred, Y)
            batch_accuracy = (pred.argmax(1)==Y).type(torch.float).sum()
            val_loss += batch_loss.item()
            val_accuracy += batch_accuracy.item()
        if batch % 10 == 0:
            print(
                f"Validation batch {batch}/{total_batch} -> Loss: {batch_loss.item()}  Accuracy: {batch_accuracy.item()/batch_size*100}%"
            )

    val_loss /= val_size
    val_accuracy /= val_size/100
    return(val_loss, val_accuracy)

Tensorboard model graph

In [148]:
first_batch = next(iter(train_dataloader))

In [156]:
import time
from datetime import datetime

title = datetime.now().strftime("%Y-%m-%d,%H-%M-%S")
# title="freqTimeFulldataset10out"

writer = SummaryWriter(f'./logs/{title}')
writer.add_graph(model, first_batch[0].to(device))
writer.close()

In [6]:
def tensorBoardLogging(train_loss, train_accuracy, val_loss, val_accuracy, epoch):
    writer.add_scalar('1 Training/1 Model loss', train_loss, epoch)
    writer.add_scalar('1 Training/2 Model accuracy', train_accuracy, epoch)
    writer.add_scalar('2 Validate/1 Model loss', val_loss, epoch)
    writer.add_scalar('2 Validate/2 Model accuracy', val_accuracy, epoch)
    writer.close()

In [159]:
import time

epochs = 20
for epoch in range(epochs):
    print(f'Epoch {epoch+1}/{epochs}\n-------------------------------')
    start = time.time()
    train_loss, train_accuracy = train(train_dataloader, model, optimizer)
    val_loss, val_accuracy = val(val_dataloader, model)
    end = time.time()
    print(f"\nEpoch duration: {end - start} seconds")
    print(f'Training | Loss: {train_loss} Accuracy: {train_accuracy}%')
    print(f'Validating  | Loss: {val_loss} Accuracy: {val_accuracy}% \n')
    tensorBoardLogging(train_loss, train_accuracy, val_loss, val_accuracy, epoch)

print('Done!')

Epoch 1/20
-------------------------------
Training batch 0/286 -> Loss: 2.346491813659668  Accuracy: 10.9375%


KeyboardInterrupt: 

In [46]:
torch.save(model.state_dict(), f"./model/model_t4,f4,tf4.pt")

Testing trained model

In [52]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet18')
model.conv1 = nn.Conv2d(1,
                        64,
                        kernel_size=(7, 7),
                        stride=(2, 2),
                        padding=(3, 3),
                        bias=False)
model.load_state_dict(torch.load("./model/model_t4,f4,tf4.pt"))

model = model.to(device)

Using cache found in C:\Users\JianQuan/.cache\torch\hub\pytorch_vision_v0.10.0


In [108]:
test_dataset = []
test_dataset = load_audio_files("./UrbanSounds8K/test set/", test_dataset)

audio_dataloader = torch.utils.data.DataLoader(test_dataset,
                                               batch_size=1,
                                               shuffle=False,
                                               num_workers=0)

num_test = create_spectrogram_images(audio_dataloader, "test")

def npy_loader(path):
    sample = torch.from_numpy(np.load(path))
    return sample


spec_test_dataset = datasets.DatasetFolder(
    root='./UrbanSounds8K/spectrograms/test/',
    loader=npy_loader,
    extensions=['.npy'])

test_dataloader = torch.utils.data.DataLoader(
    spec_test_dataset,
    batch_size=64,
    num_workers=0,
    shuffle=False,
)

test: 0/837


In [None]:
%%ipytest

def test_spectrogram_generation_test():
    sum = 0
    dir = [str(p) for p in Path('./UrbanSounds8K/spectrograms/test').glob('*')]
    for path in dir:
        num = len([str(p) for p in Path(path).glob('*')])
        sum += num

    assert sum == num_test

In [85]:
next(iter(spec_test_dataset))

(tensor([[[ 1.9782e+00, -1.5946e+00, -3.4768e+00,  ..., -3.4739e-01,
            3.9526e-01,  4.1237e-01],
          [ 1.2299e-01,  4.8282e-01,  2.8185e-02,  ..., -1.1781e-02,
            9.6032e-01, -1.5953e+00],
          [-3.3370e+00, -1.7491e+00,  2.0832e-01,  ..., -2.3191e+00,
           -2.1903e-01, -2.7251e+00],
          ...,
          [-2.0507e+01, -2.5278e+01, -2.7352e+01,  ..., -2.4447e+01,
           -2.4472e+01, -2.7562e+01],
          [-2.2551e+01, -2.3613e+01, -2.5990e+01,  ..., -2.9114e+01,
           -2.3208e+01, -3.0224e+01],
          [-2.6916e+01, -2.7990e+01, -3.0528e+01,  ..., -2.4957e+01,
           -2.3403e+01, -3.1538e+01]]]),
 0)

In [132]:
test_size = len(test_dataloader.dataset)
total_batch = len(test_dataloader)
test_loss, test_accuracy = 0, 0

model.eval()

cost = torch.nn.CrossEntropyLoss()

with torch.no_grad():
    for batch, (X, Y) in enumerate(test_dataloader):
        X, Y = X.to(device), Y.to(device)
        pred = model(X)
        batch_loss = cost(pred, Y)
        batch_accuracy = (pred.argmax(1) == Y).type(torch.float).sum()
        test_loss += batch_loss.item()
        test_accuracy += batch_accuracy.item()
        if batch == 0:
            first_batch_y = Y
            first_batch_p = pred.argmax(1)


test_loss /= test_size
test_accuracy /= test_size/100

print(f"Test loss: {test_loss}")
print(f"Test accuracy: {test_accuracy}%")


Test loss: 1.1381309649756592e-05
Test accuracy: 100.00000000000001%


In [138]:
for i in range(len(first_batch)):
    title
    writer.add_audio(
        f'Test/({test_dataset[i][2]})A:{class_map[first_batch_y[i].item()]} P:{class_map[first_batch_p[i].item()]}',
        test_dataset[i][0], 44100)

In [65]:
for i, randIndex in enumerate(randIndex_list):
    waveform, class_id, title = test_dataset[randIndex]
    spectrogram_tensor = (spectrogram(waveform) + 1e-12).log2()


    with torch.no_grad():
        model.eval()
        output = model(torch.reshape(spectrogram_tensor, (-1,1)))
    writer.add_audio(
        f'Test/({title})A:{class_map[class_id]} P:{class_map[output]}',
        waveform, 44100)

writer.close()

NameError: name 'create_spectrogram_images' is not defined

**Debugging**

In [None]:

# audio_dataset = []
# sum = 0

# main_path = './UrbanSounds8K/spectrograms/'

# dir = [str(p) for p in Path(main_path).glob('*')]
# for path in dir:
#   print('Loading: '+ path)
#   num = len([str(p) for p in Path(path).glob('*')])
#   sum += num
#   print(num)

# sum