In [1]:
%load_ext autoreload
%autoreload

import time
import datetime
import os
import random
import copy

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data.sampler import SubsetRandomSampler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import adabound

from trainer_helper import Trainer_Helper

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
random.seed(23)
torch.manual_seed(23)
if device == "cuda:0":
    torch.cuda.manual_seed(23)

device

device(type='cuda', index=0)

In [2]:
data_dir = "UrbanSound8K/bitmap/"
num_classes = 10
batch_size = 8

In [15]:
def bitmap_loader(path):
    with np.load(path) as data:
        #print(data['arr_0'].shape)
        data_len = data['arr_0'].shape[1]
        arr = data['arr_0']
        if data_len > 22050:
            data_len = 22050
            arr = arr[:,0:22050]
        assert arr.shape[1] <= 22050
        #try:
        arr = np.pad(arr, ((0, 0), (0, 22050-data_len)), 'constant')
        #except:
        #    print(data['arr_0'].shape)
        #    print(arr.shape)
        #arr = data['arr_0']
        result = []
        for row in arr:
            unpacked_row = np.unpackbits(row)
            result.append(unpacked_row)

        #return torch.FloatTensor(result)
        return np.array(result)


train_dataset = datasets.DatasetFolder(data_dir + 'train/', loader=bitmap_loader, extensions='npz')
val_dataset = datasets.DatasetFolder(data_dir + 'val/', loader=bitmap_loader, extensions='npz')
test_dataset = datasets.DatasetFolder(data_dir + 'test/', loader=bitmap_loader, extensions='npz')

train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, pin_memory =True, shuffle=True, num_workers=4)
val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, pin_memory=True, num_workers=4)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, num_workers=4)

In [10]:
train_dataset[0]

(array([[1, 1, 1, ..., 0, 0, 0],
        [1, 1, 1, ..., 0, 0, 0],
        [1, 1, 1, ..., 0, 0, 0],
        ...,
        [1, 1, 1, ..., 0, 0, 0],
        [1, 1, 1, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]], dtype=uint8), 0)

In [5]:
class RNN(nn.Module):
    def __init__(self, hidden_size=256, lstm_layers=2, cnn_start_channels=256):
        super(RNN, self).__init__()
        self.name = "CNN({})_LSTM({}_hidden_{})".format(cnn_start_channels, lstm_layers, cnn_start_channels)

        self.conv_layers = nn.Sequential(
            # input.size: 16x176400
            nn.Conv1d(in_channels=16, out_channels=cnn_start_channels, kernel_size=30, stride=10),
            # output: 64 x 17638
            nn.ReLU(),
            nn.BatchNorm1d(cnn_start_channels),
            # output 64x17638

            nn.Conv1d(in_channels=cnn_start_channels, out_channels=2*cnn_start_channels, kernel_size=30, stride=10),
            # output: 256 x 1762
            nn.ReLU(),
            nn.BatchNorm1d(2*cnn_start_channels),
            # output: 256 x 1762

            nn.Conv1d(in_channels=2*cnn_start_channels, out_channels=4*cnn_start_channels, kernel_size=30, stride=10),
            # output: 256 x 175
            nn.ReLU(),
            nn.BatchNorm1d(4*cnn_start_channels),
            # output: 256 x 175
        )

        self.rnn = nn.LSTM(input_size=4*cnn_start_channels,
                            hidden_size=hidden_size, dropout=0.2,
                            num_layers=lstm_layers)

        #self.rnn = nn.GRU(input_size=4*cnn_start_channels,
        #                    hidden_size=hidden_size, dropout=0.2,
        #                    num_layers=lstm_layers)

        self.fc = nn.Linear(hidden_size, 10)

    def forward(self, inputs, hidden = None):
        output = self.conv_layers(inputs)

        output = output.transpose(1, 2).transpose(0, 1)

        output = torch.tanh(output)
        output, hidden = self.rnn(output, hidden)

        output = self.fc(output[-1, :, :])

        return output, hidden

    def get_name(self):
        return self.name


In [None]:
model = RNN()

run_name = datetime.datetime.now().strftime("%Y.%m.%d.%H.%M.%S")
run_name = 'rnn-adabound-lr0-001'

#optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
optimizer = adabound.AdaBound(model.parameters(), lr=1e-3, final_lr=0.1)
criterion = nn.CrossEntropyLoss()

trainer = Trainer_Helper(run_name, device)
trainer.setup_dataloader(train_dataloader, val_dataloader, test_dataloader)
trainer.train_rnn(model, optimizer, criterion, 100)

# Use a CosineAnnelingScheduler with SGD optimizer. Similar result as Adabound but much slower.
#optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
#lr = 1e-3
#lr_handler = CosineAnnealingScheduler(optimizer, 'lr', lr, lr*100, len(train_dataloader), cycle_mult=2)
#trainer.add_event_handler(Events.ITERATION_COMPLETED, lr_handler)




Training Results - Epoch: 1  Avg accuracy: 0.33 Avg loss: 1.72
Validation Results - Epoch: 1  Avg accuracy: 0.31 Avg loss: 1.80
Training Results - Epoch: 2  Avg accuracy: 0.40 Avg loss: 1.60
Validation Results - Epoch: 2  Avg accuracy: 0.35 Avg loss: 1.70
Training Results - Epoch: 3  Avg accuracy: 0.45 Avg loss: 1.52
Validation Results - Epoch: 3  Avg accuracy: 0.37 Avg loss: 1.84
Training Results - Epoch: 4  Avg accuracy: 0.50 Avg loss: 1.35
Validation Results - Epoch: 4  Avg accuracy: 0.39 Avg loss: 1.64
Training Results - Epoch: 5  Avg accuracy: 0.50 Avg loss: 1.40
Validation Results - Epoch: 5  Avg accuracy: 0.39 Avg loss: 1.76
Training Results - Epoch: 6  Avg accuracy: 0.57 Avg loss: 1.16
Validation Results - Epoch: 6  Avg accuracy: 0.40 Avg loss: 1.81
Training Results - Epoch: 7  Avg accuracy: 0.66 Avg loss: 0.96
Validation Results - Epoch: 7  Avg accuracy: 0.41 Avg loss: 1.53
Training Results - Epoch: 8  Avg accuracy: 0.72 Avg loss: 0.78
Validation Results - Epoch: 8  Avg accura