In [1]:
import torch
import torch.nn as nn
from torchvision import datasets
import numpy as np
from torch.utils.data import Dataset, DataLoader
from datetime import datetime
from torch.utils.tensorboard import SummaryWriter
import os

def npy_loader(path):
    return torch.from_numpy(np.load(path))

def train(dataloader, model, optimizer):
    train_size = len(dataloader.dataset)
    batch_size = len(next(iter(dataloader))[1])
    total_batch = len(dataloader)
    train_loss, train_accuracy = 0, 0

    model.train()

    for batch, (X, Y) in enumerate(dataloader):
        X, Y = X.to(device), Y.to(device)
        optimizer.zero_grad()
        pred = model(X)
        batch_loss = cost(pred, Y)
        batch_loss.backward()
        optimizer.step()
        batch_accuracy = (pred.argmax(1)==Y).type(torch.float).sum()
        train_loss += batch_loss.item()
        train_accuracy += batch_accuracy.item()
        if batch % 100 == 0:
            print(
                f"Training batch {batch}/{total_batch} -> Loss: {batch_loss.item()}  Accuracy: {batch_accuracy.item()/batch_size*100}%"
            )
    train_loss /= train_size
    train_accuracy /= train_size/100
    return(train_loss, train_accuracy)

def val(dataloader, model):
    val_size = len(dataloader.dataset)
    total_batch = len(dataloader)
    val_loss, val_accuracy = 0, 0

    model.eval()

    with torch.no_grad():
        for batch, (X, Y) in enumerate(dataloader):
            X, Y = X.to(device), Y.to(device)
            pred = model(X)
            batch_loss = cost(pred, Y)
            batch_accuracy = (pred.argmax(1)==Y).type(torch.float).sum()
            val_loss += batch_loss.item()
            val_accuracy += batch_accuracy.item()
        if batch % 10 == 0:
            print(
                f"Validation batch {batch}/{total_batch} -> Loss: {batch_loss.item()}  Accuracy: {batch_accuracy.item()/batch_size*100}%"
            )

    val_loss /= val_size
    val_accuracy /= val_size/100
    return(val_loss, val_accuracy)

def tensorBoardLogging(train_loss, train_accuracy, val_loss, val_accuracy,
                       epoch):
    writer.add_scalar('1 Training/1 Model loss', train_loss, epoch)
    writer.add_scalar('1 Training/2 Model accuracy', train_accuracy, epoch)
    writer.add_scalar('2 Validate/1 Model loss', val_loss, epoch)
    writer.add_scalar('2 Validate/2 Model accuracy', val_accuracy, epoch)
    writer.close()


In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet18')
model.conv1 = nn.Conv2d(1,
                        64,
                        kernel_size=(7, 7),
                        stride=(2, 2),
                        padding=(3, 3),
                        bias=False)
model.fc = nn.Linear(in_features=512, out_features=10, bias=True)
model.load_state_dict(torch.load("./model/model_t4,f4,tf4_10.pt", map_location=device))

model = model.to(device)

Using cache found in C:\Users\JianQuan/.cache\torch\hub\pytorch_vision_v0.10.0


<bound method Module.parameters of ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)


In [36]:
spec_test_dataset = datasets.DatasetFolder(
    root='./UrbanSounds8K/spectrograms/test/',
    loader=npy_loader,
    extensions=['.npy'])

test_dataloader = torch.utils.data.DataLoader(
    spec_test_dataset,
    batch_size=12,
    num_workers=0,
    shuffle=False,
)

In [37]:
next(iter(spec_test_dataset))

(tensor([[[ 1.9782e+00, -1.5946e+00, -3.4768e+00,  ..., -3.4739e-01,
            3.9526e-01,  4.1237e-01],
          [ 1.2299e-01,  4.8282e-01,  2.8185e-02,  ..., -1.1781e-02,
            9.6032e-01, -1.5953e+00],
          [-3.3370e+00, -1.7491e+00,  2.0831e-01,  ..., -2.3191e+00,
           -2.1903e-01, -2.7251e+00],
          ...,
          [-2.0507e+01, -2.5279e+01, -2.7350e+01,  ..., -2.4447e+01,
           -2.4473e+01, -2.7562e+01],
          [-2.2551e+01, -2.3613e+01, -2.5989e+01,  ..., -2.9115e+01,
           -2.3208e+01, -3.0218e+01],
          [-2.6916e+01, -2.7990e+01, -3.0528e+01,  ..., -2.4957e+01,
           -2.3403e+01, -3.1538e+01]]]),
 0)

In [38]:
test_size = len(test_dataloader.dataset)
total_batch = len(test_dataloader)
test_loss, test_accuracy = 0, 0

model.eval()

cost = torch.nn.CrossEntropyLoss()

with torch.no_grad():
    for batch, (X, Y) in enumerate(test_dataloader):        
        if batch == 0:
            X, Y = X.to(device), Y.to(device)
            pred = model(X)
            batch_loss = cost(pred, Y)
            batch_accuracy = (pred.argmax(1) == Y).type(torch.float).sum()
            test_loss += batch_loss.item()
            test_accuracy += batch_accuracy.item()
            first_batch_y = Y
            first_batch_p = pred.argmax(1)


test_loss /= test_size
test_accuracy /= test_size/100

print(f"Test loss: {test_loss}")
print(f"Test accuracy: {test_accuracy}%")

Test loss: 0.007333646111163615
Test accuracy: 0.0%


In [30]:
pred.argmax(1)

tensor([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 0, 5,
        5, 5, 5, 0, 0, 2, 0, 0, 0, 0, 0, 0, 5, 2, 2, 2])

In [None]:
for i in range(len(first_batch)):
    title
    writer.add_audio(
        f'Test/({test_dataset[i][2]})A:{class_map[first_batch_y[i].item()]} P:{class_map[first_batch_p[i].item()]}',
        test_dataset[i][0], 44100)

In [None]:
for i, randIndex in enumerate(randIndex_list):
    waveform, class_id, title = test_dataset[randIndex]
    spectrogram_tensor = (spectrogram(waveform) + 1e-12).log2()


    with torch.no_grad():
        model.eval()
        output = model(torch.reshape(spectrogram_tensor, (-1,1)))
    writer.add_audio(
        f'Test/({title})A:{class_map[class_id]} P:{class_map[output]}',
        waveform, 44100)

writer.close()