In [None]:
!pip install torch
!pip install torchvision
!pip install sklearn
!pip install tqdm

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting sklearn
  Downloading sklearn-0.0.tar.gz (1.1 kB)
Building wheels for collected packages: sklearn
  Building wheel for sklearn (setup.py) ... [?25l[?25hdone
  Created wheel for sklearn: filename=sklearn-0.0-py2.py3-none-any.whl size=1310 sha256=86129d78795263f71ff300d92db824b4e764d395ad52b03cf8d1ab4ec03ad4c3
  Stored in directory: /root/.cache/pip/wheels/46/ef/c3/157e41f5ee1372d1be90b09f74f82b10e391eaacca8f22d33e
Successfully built sklearn
Installing collected packages: sklearn
Successfully installed sklearn-0.0
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [1]:
import os
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np

import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split

import torchvision
import torchvision.transforms as transforms

from sklearn.metrics import confusion_matrix

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
root_path = "/content/drive/MyDrive/NUS/CS4243/CS4243_mini_project"
data_path = os.path.join(root_path, "spectrogram_data_split")
model_root_path = os.path.join(root_path, "models")

In [4]:
# helper function to display images
def imshow(img):
    npimg = img.cpu().numpy()
    plt.figure(figsize=(20, 20))
    plt.imshow(np.transpose(npimg, (1, 2, 0)))

In [5]:
input_size = (299, 299)
batch_size = 32
is_split = True
# train, validation, test
data_split = [0.8, 0.1, 0.1]

# Image transformations
transform = transforms.Compose([
    transforms.Resize(input_size),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

if is_split:
    # Load data that has already been split
    print("Reading from split data...")
    train_path = os.path.join(data_path, "train")
    validation_path = os.path.join(data_path, "validation")
    test_path = os.path.join(data_path, "test")
    datasets = (torchvision.datasets.ImageFolder(x, transform) for x in [train_path, validation_path, test_path])
else:
    # Load and split data
    print("Reading and splitting data...")
    dataset = torchvision.datasets.ImageFolder(data_path, transform)
    n_data = len(dataset)
    n_train = int(n_data * data_split[0])
    n_validation = int(n_data * data_split[1])
    n_test = n_data - n_train - n_validation
    datasets = random_split(dataset, (n_train, n_validation, n_test))
    train_dataloader, validation_dataloader, test_dataloader = (DataLoader(ds, batch_size=batch_size, shuffle=True, num_workers=2) for ds in datasets)

train_dataloader, validation_dataloader, test_dataloader = (DataLoader(ds, batch_size=batch_size, shuffle=True, num_workers=2) for ds in datasets)
num_train = len(train_dataloader) * batch_size
num_validation = len(validation_dataloader) * batch_size
num_test = len(test_dataloader) * batch_size
print(num_train, "training")
print(num_validation, "validation")
print(num_test, "testing")
print("Total:", num_train + num_validation + num_test)

Reading from split data...
1312 training
192 validation
192 testing
Total: 1696


In [8]:
model = torch.hub.load('pytorch/vision:v0.10.0', 'inception_v3', pretrained=True)
num_classes = len(next(os.walk(data_path))[1])
print(f"Found {num_classes} classes")
model.AuxLogits.fc = nn.Linear(768, num_classes)
model.fc = nn.Linear(2048, num_classes)

Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.10.0


Found 3 classes


In [7]:
model_load_path = os.path.join(model_root_path, 'inception_spectrogram_classifier_e10_lr3')
model.load_state_dict(torch.load(model_load_path))

<All keys matched successfully>

In [None]:
# get some random training images
dataiter = iter(train_dataloader)
images, labels = dataiter.next()

print("Image batch dimensions:", images.shape)
print("Image label dimensions:", labels.shape)

out = model(images)
print("Model output dimensions:", out.shape)

## show images
imshow(torchvision.utils.make_grid(images))
print(labels)

In [13]:
learning_rate = 1e-3
lr_decay = 0.9
num_epochs = 10

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=lr_decay)

In [14]:
def get_labels(logit, size):
    return torch.max(logit, dim=1)[1].view(size)

def get_accuracy(logit, target, batch_size):
    corrects = (get_labels(logit, target.size()).data == target.data).sum()
    accuracy = 100.0 * corrects/batch_size
    return accuracy.item()

In [15]:
model_save_path = os.path.join(model_root_path, "inception_ensemble_spectrogram_classifier_lr3_e20_elr9")

for epoch in range(num_epochs):

    train_running_loss = 0.0
    train_acc = 0.0

    model = model.train()

    # training steps
    pbar = tqdm(total=len(train_dataloader))
    for i, (images, labels) in enumerate(train_dataloader):
        images = images.to(device)
        labels = labels.to(device)

        # forward + backprop + loss
        logit, aux_logit = model(images)
        loss = criterion(logit, labels)
        aux_loss = criterion(aux_logit, labels)
        loss = loss + 0.4 * aux_loss
        optimizer.zero_grad()
        loss.backward()

        # update model params
        optimizer.step()

        # calc training metrics
        train_running_loss += loss.detach().item()
        train_acc += get_accuracy(logit, labels, batch_size)

        # increment progress bar
        pbar.update(1)

    pbar.close()
    scheduler.step()

    val_running_loss = 0.0
    val_acc = 0.0
    model.eval()

    # validation step
    for j, (images, labels) in enumerate(validation_dataloader):
        images = images.to(device)
        labels = labels.to(device)

        # forward step and loss, no bacckprop
        logit = model(images)
        loss = criterion(logit, labels)

        # calc validation metrics
        val_running_loss += loss.detach().item()
        val_acc += get_accuracy(logit, labels, batch_size)


    print('Epoch: %d | Train Loss: %.4f | Train Accuracy: %.2f | Validation Loss: %.4f | Validation Accuracy: %.2f' \
          %(epoch, train_running_loss/i, train_acc/i, val_running_loss/j, val_acc/j))        

torch.save(model.state_dict(), model_save_path)

100%|██████████| 41/41 [00:19<00:00,  2.09it/s]


Epoch: 0 | Train Loss: 1.2744 | Train Accuracy: 58.59 | Validation Loss: 1.7654 | Validation Accuracy: 48.12


100%|██████████| 41/41 [00:19<00:00,  2.08it/s]


Epoch: 1 | Train Loss: 1.1883 | Train Accuracy: 61.80 | Validation Loss: 1.8622 | Validation Accuracy: 36.25


100%|██████████| 41/41 [00:19<00:00,  2.10it/s]


Epoch: 2 | Train Loss: 1.0807 | Train Accuracy: 64.53 | Validation Loss: 1.2104 | Validation Accuracy: 51.25


100%|██████████| 41/41 [00:19<00:00,  2.11it/s]


Epoch: 3 | Train Loss: 1.0565 | Train Accuracy: 66.72 | Validation Loss: 1.2938 | Validation Accuracy: 43.75


100%|██████████| 41/41 [00:19<00:00,  2.11it/s]


Epoch: 4 | Train Loss: 1.1052 | Train Accuracy: 64.38 | Validation Loss: 1.3018 | Validation Accuracy: 41.88


100%|██████████| 41/41 [00:19<00:00,  2.10it/s]


Epoch: 5 | Train Loss: 0.9130 | Train Accuracy: 69.92 | Validation Loss: 1.3573 | Validation Accuracy: 45.62


100%|██████████| 41/41 [00:19<00:00,  2.09it/s]


Epoch: 6 | Train Loss: 0.8323 | Train Accuracy: 74.06 | Validation Loss: 1.3811 | Validation Accuracy: 44.38


100%|██████████| 41/41 [00:19<00:00,  2.08it/s]


Epoch: 7 | Train Loss: 0.8152 | Train Accuracy: 71.48 | Validation Loss: 1.5214 | Validation Accuracy: 46.88


100%|██████████| 41/41 [00:19<00:00,  2.10it/s]


Epoch: 8 | Train Loss: 0.8798 | Train Accuracy: 71.41 | Validation Loss: 1.3989 | Validation Accuracy: 48.12


100%|██████████| 41/41 [00:19<00:00,  2.10it/s]


Epoch: 9 | Train Loss: 0.7513 | Train Accuracy: 74.61 | Validation Loss: 1.3920 | Validation Accuracy: 48.12


In [16]:
model.eval()

test_acc = 0
total_conf_table = np.zeros((3, 3))

for i, (images, labels) in enumerate(test_dataloader):
    images = images.to(device)
    labels = labels.to(device)

    # forward step
    logit = model(images)
    pred = get_labels(logit, labels.size())
    # calc validation metrics
    test_acc += get_accuracy(logit, labels, batch_size)
    pred_np = pred.cpu().detach().numpy()
    label_np = labels.cpu().detach().numpy()
    conf_table = confusion_matrix(label_np, pred_np, labels=[0, 1, 2])
    total_conf_table += conf_table

print("Test Accuracy: %.2f" %(test_acc/i)) 
print("Confusion Table:")
print(total_conf_table)

Test Accuracy: 56.88
Confusion Table:
[[24. 16. 10.]
 [ 2. 42. 10.]
 [ 5. 27. 25.]]
