**Actual Machine Learning**

In [1]:
import Augmentation
import torchaudio
from torch.utils.data import Dataset, DataLoader

audioDataset = Augmentation.AudioDataset(
    "./UrbanSounds8K/",
    transformList=[
        torchaudio.transforms.TimeMasking(time_mask_param=80),
        torchaudio.transforms.FrequencyMasking(freq_mask_param=80)
    ])

audioDataloader = DataLoader(audioDataset, batch_size=16)

In [2]:
len(audioDataset)

8732

In [None]:
import torch
import torch.nn as nn
from torchvision import datasets
import numpy as np
from torch.utils.data import Dataset, DataLoader
from datetime import datetime
import os
import time
from torch.utils.tensorboard import SummaryWriter

def npy_loader(path):
    return torch.from_numpy(np.load(path))

def train(dataloader, model, optimizer):
    train_size = len(dataloader.dataset)
    batch_size = len(next(iter(dataloader))[1])
    total_batch = len(dataloader)
    train_loss, train_accuracy = 0, 0

    model.train()

    for batch, (X, Y) in enumerate(dataloader):
        X, Y = X.to(device), Y.to(device)
        optimizer.zero_grad()
        pred = model(X)
        batch_loss = cost(pred, Y)
        batch_loss.backward()
        optimizer.step()
        batch_accuracy = (pred.argmax(1)==Y).type(torch.float).sum()
        train_loss += batch_loss.item()
        train_accuracy += batch_accuracy.item()
        if batch % 100 == 0:
            print(
                f"Training batch {batch}/{total_batch} -> Loss: {batch_loss.item()}  Accuracy: {batch_accuracy.item()/batch_size*100}%"
            )
    train_loss /= train_size
    train_accuracy /= train_size/100
    return(train_loss, train_accuracy)

def val(dataloader, model):
    val_size = len(dataloader.dataset)
    total_batch = len(dataloader)
    val_loss, val_accuracy = 0, 0

    model.eval()

    with torch.no_grad():
        for batch, (X, Y) in enumerate(dataloader):
            X, Y = X.to(device), Y.to(device)
            pred = model(X)
            batch_loss = cost(pred, Y)
            batch_accuracy = (pred.argmax(1)==Y).type(torch.float).sum()
            val_loss += batch_loss.item()
            val_accuracy += batch_accuracy.item()
        if batch % 10 == 0:
            print(
                f"Validation batch {batch}/{total_batch} -> Loss: {batch_loss.item()}  Accuracy: {batch_accuracy.item()/batch_size*100}%"
            )

    val_loss /= val_size
    val_accuracy /= val_size/100
    return(val_loss, val_accuracy)

def tensorBoardLogging(train_loss, train_accuracy, val_loss, val_accuracy,
                       epoch):
    writer.add_scalar('1 Training/1 Model loss', train_loss, epoch)
    writer.add_scalar('1 Training/2 Model accuracy', train_accuracy, epoch)
    writer.add_scalar('2 Validate/1 Model loss', val_loss, epoch)
    writer.add_scalar('2 Validate/2 Model accuracy', val_accuracy, epoch)
    writer.close()


In [None]:
audio_train_dataset = datasets.DatasetFolder(root='./UrbanSounds8K/spectrograms/train/',
                                       loader=npy_loader,
                                       extensions=['.npy'])

audio_val_dataset = datasets.DatasetFolder(root='./UrbanSounds8K/spectrograms/val/',
                                       loader=npy_loader,
                                       extensions=['.npy'])

print("Training size:", len(audio_train_dataset))
print("Validation size:",len(audio_val_dataset))

train_dataloader = torch.utils.data.DataLoader(audio_train_dataset,
                                               batch_size=4,
                                               num_workers=0,
                                               shuffle=True)

val_dataloader = torch.utils.data.DataLoader(audio_val_dataset,
                                              batch_size=4,
                                              num_workers=0,
                                              shuffle=True,
                                             )



model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet18')

model.conv1 = nn.Conv2d(1,
                        64,
                        kernel_size=(7, 7),
                        stride=(2, 2),
                        padding=(3, 3),
                        bias=False)
                        
model.fc = nn.Linear(in_features=512, out_features=10, bias=True)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Check that it is on Cuda
next(model.parameters()).device

In [None]:
# rm -rf ./logsdir/ # remove all logs

Tensorboard model graph

In [None]:
title = datetime.now().strftime("%Y-%m-%d,%H-%M-%S")
title="freqTimeFulldataset10out2"
spec, label = next(iter(train_dataloader))
writer = SummaryWriter(f'./logs/{title}')
writer.add_graph(model, spec.to(device))
writer.close()

In [None]:
cost = torch.nn.CrossEntropyLoss()
learning_rate = 0.001
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
model.load_state_dict(
    torch.load("./model/model_t4,f4,tf4.pt", map_location=device))

In [None]:
epochs = 1
for epoch in range(epochs):
    print(f'Epoch {epoch+1}/{epochs}\n-------------------------------')
    start = time.time()
    train_loss, train_accuracy = train(audioDataloader, model, optimizer)
    val_loss, val_accuracy = val(val_dataloader, model)
    end = time.time()
    print(f"\nEpoch duration: {end - start} seconds")
    print(f'Training | Loss: {train_loss} Accuracy: {train_accuracy}%')
    print(f'Validating  | Loss: {val_loss} Accuracy: {val_accuracy}% \n')
    tensorBoardLogging(train_loss, train_accuracy, val_loss, val_accuracy, epoch)

print('Done!')

In [None]:
audio_test_dataset = datasets.DatasetFolder(root='./UrbanSounds8K/spectrograms/test/',
                                       loader=npy_loader,
                                       extensions=['.npy'])

test_dataloader = torch.utils.data.DataLoader(audio_test_dataset,
                                              batch_size=16,
                                              num_workers=0,
                                              shuffle=True,
                                             )

val(val_dataloader, model)

In [None]:
torch.save(model.state_dict(), f"./model/model_t4,f4,tf4.pt")

**Debugging**

In [None]:

# audio_dataset = []
# sum = 0

# main_path = './UrbanSounds8K/spectrograms/'

# dir = [str(p) for p in Path(main_path).glob('*')]
# for path in dir:
#   print('Loading: '+ path)
#   num = len([str(p) for p in Path(path).glob('*')])
#   sum += num
#   print(num)

# sum