<h1>Classification Tasks on Audio</h1>

<h2>PyTorch</h2>
<h3>Prediction Waveforms</h3>

<p>We will remake the above tasks, more-or-less, with PyTorch.</p>

<p>These two libraries... could not be more different.</p>

<p>Let's start by loading our dataset. Then, we are going to build a dataloader class that will do the data processing for us but to keep us honest (and reproducible) we will shuffle that mamma jamma with a fixed random state as before.</p>

In [None]:
import pickle

import pandas as pd
import numpy as np

RANDOM_STATE = 1

with open('osc_dataset.pickle', 'rb') as handle:
    df = pickle.load(handle)

df = df.sample(frac=1, random_state=RANDOM_STATE)

df['osc_cat'], osc_uniques = pd.factorize(df['osc'])
df['osc_cat'] = np.asarray(df['osc_cat']).astype('float32')
df['freq_cat'], freq_uniques = pd.factorize(df['freq'])
df['freq_cat'] = np.asarray(df['freq_cat']).astype('float32')

In [None]:
from torch.utils.data import Dataset

class MakeDataset(Dataset):
    
    def __init__(self, df, col_name, set, split):
        self.split = split
        self.num_samps = df.shape[0]
        self.get_splits()
        self.feature = df['audio'].iloc[self.sample_splits[set][0]: self.sample_splits[set][1]]
        self.target = df[col_name].iloc[self.sample_splits[set][0]: self.sample_splits[set][1]]

    def get_splits(self):
        #This is all weirdly confusing
        #You can use sklearn's Train-Test split function
        #but instead we will hammmer the wall a bit
        self.sample_splits = {}
        index_cum = 0
        for set in ['train', 'val', 'test']:
            index = index_cum + int(self.split[set] * self.num_samps)
            self.sample_splits[set] = (index_cum, index)
            index_cum = index

    def __getitem__(self, index):
        X = self.feature.iloc[index]
        y = self.target.iloc[index]
        return X, y

    def __len__(self):
        return len(self.feature)

<p>Up next, let's build our model class by subclassing nn.Module.</p>

<p>In Tensorflow, Conv1D layers by default expect the input to be $(batch\_size, \; length, \; num\_filters)$. In PyTorch, the Conv1D layers expect the input to be $(batch\_size, \; num\_filters, \; length)$</p>

<p>There are a collection of <em>lazy</em> PyTorch layers that infer their input and output sizes after running for the first time that would have prevented some aggressive noodle scratching when having to algorithmically build out the network. Instead, the below class breaks out some good ol' fashion dimesionality calculations.</p>

In [None]:
import torch
import torch.nn as nn

class OscClass(nn.Module):

    def __init__(self, output_size, 
                 tensor_shape, num_conv, 
                 base_filter, base_kernel, 
                 stride, pool, activation):
        super().__init__()
        self.num_conv = num_conv
        in_filter = 1
        L_out = tensor_shape
        for i in range(self.num_conv):
            #This is confusing but setattr lets us define class attributes
            #with strings... Helpful for when one might want to
            #programmatically define the number of Conv Layers
            setattr(self, 'conv' + str(i), nn.Conv1d(in_filter, (out_filter := base_filter * (2 ** i)), base_kernel, stride=stride))
            in_filter = out_filter
            L_out = self.calculate_conv_size(L_out, base_kernel, stride)
            L_out = self.calculate_pool_size(L_out, pool)

        self.pool = nn.MaxPool1d(pool)
        #Same-ish as above, use getattr to use
        #a string name to call the "activation" module 
        #from nn
        self.activation = getattr(nn, activation)()
        #We could use lazy linear and not worry about calculating the 
        #number of input nodes but we might as well have a solid grasp of the
        #network mechanics
        self.output_layer = nn.Linear(L_out * out_filter, output_size)
        
    def calculate_conv_size(self, L_in, ker, strid, dil=1, pad=0):
        return int(np.floor((L_in + 2 * pad - dil * (ker - 1) - 1) / strid + 1))

    def calculate_pool_size(self, L_in, ker, strid=None, dil=1, pad=0):
        if not strid:
            strid = ker
        return int(np.floor((L_in + 2 * pad - dil * (ker - 1) - 1) / strid + 1))

    def forward(self, x):
        #Reshape the tensor to be (batch_size, 1, samp_rate)
        x = torch.unsqueeze(x, 1)
        for i in range(self.num_conv):
            #I know this is confusing as shit but the getattr function
            #gives me the ability to programmatically call class
            #attributes/methods with string names!
            x = self.pool(self.activation(getattr(self, 'conv' + str(i))(x)))
        #Leave out batch dim in the flatten
        x = torch.flatten(x, start_dim=1)
        x = self.output_layer(x)
        return x

<p>Like in our Tensorflow model, the following will create some training classes.</p>

In [None]:
def build_train_classes(max_epochs, learning_rate, model):
    CEL_loss = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=np.exp(-1))
    return optimizer, scheduler, CEL_loss

<p>Finally, let's make a class to handle the training, validation, and test loops!</p>

In [None]:
class TrainLoop():
    def __init__(self, datasets, model, max_epochs,
                optimizer, scheduler, loss_fun):
        self.datasets = datasets
        self.model = model
        self.max_epochs = max_epochs
        self.optimizer = optimizer
        self.scheduler = scheduler
        self.loss_fun = loss_fun

        use_cuda = torch.cuda.is_available()
        self.device = torch.device("cuda:0" if use_cuda else "cpu")

        if use_cuda:
            self.model = self.model.to(self.device)

        self.history = {'train':{'loss': [],
                           'acc': []},
                  'val':{'loss': [],
                         'acc': []},
                  'test':{'loss': [],
                         'acc': []},
                        'lr': []
                  }

    def data_loop(self, set):
        if set == 'train':
            self.model.train()
        else:
            self.model.eval()
        y_true = []
        y_pred = []
        loss_temp = []
        for X, y in self.datasets[set]:
            #Cast tensor to longtensor! and send to GPU
            X, y = X.to(self.device), y.type(torch.LongTensor).to(self.device)
            pred = self.model.forward(X)
            loss = self.loss_fun(pred, y)
            if set == 'train':
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()
            loss_temp.append(loss.item())
            y_pred.extend(torch.argmax(pred.data, -1).tolist())
            y_true.extend(y.tolist())
        self.history[set]['loss'].append(np.mean(loss_temp))
        self.history[set]['acc'].append(accuracy_score(y_pred, y_true))

    def collect_lr(self):
        return self.history['lr'].append(self.optimizer.state_dict()['param_groups'][0]['lr'])


    def full_loop(self):
        for epoch in range(self.max_epochs):
            self.collect_lr()
            
            self.data_loop('train')

            self.data_loop('val')

            if epoch > int(self.max_epochs / 2):
                self.scheduler.step()
            
        self.data_loop('test')

        return self.history

<p>We will remake the hyperparameter search conditions but mostly because the activations have different names in Tensorflow vs. PyTorch.</p>

In [None]:
import itertools
import time

NUM_EXP = 100

batch_size = [32, 64, 128, 256]
learning_rate = [0.001, 0.0005, 0.002]
max_epochs = [3, 5, 10, 15]
base_filters = [2, 4, 8]
base_kernels = [4, 8, 10]
base_stride = [1, 2, 4]
base_pool = [3, 4, 5]
num_conv_blocks = [3, 4, 5]
activations = ['ReLU', 'SELU', 'ELU']

hyperparameters = list(itertools.product(batch_size, learning_rate, 
                                         max_epochs, base_filters,
                                         base_kernels, base_stride,
                                         base_pool, num_conv_blocks,
                                         activations))

parameters = []

for i in np.random.choice(np.arange(len(hyperparameters)), size=NUM_EXP, replace=False):
    temp = {}
    temp['batch_size'] = hyperparameters[i][0]
    temp['init_learning_rate'] = hyperparameters[i][1]
    temp['max_epochs'] = hyperparameters[i][2]
    temp['base_filters'] = hyperparameters[i][3]
    temp['base_kernels'] = hyperparameters[i][4]
    temp['base_stride'] = hyperparameters[i][5]
    temp['base_pool'] = hyperparameters[i][6]
    temp['num_conv_blocks'] = hyperparameters[i][7]
    temp['activations'] = hyperparameters[i][8]
    parameters.append(temp)

parameters = sorted(parameters, key=lambda x: x['batch_size'])

del hyperparameters

<p>Finally, let's do our stochastic hyperparameter search pattern!</p>

<p>Notice that we are using DataLoader!</p>

In [None]:
train = True

In [None]:
from torch.utils import data

search_history = []
prev_batch_size = -1
data_col = 'osc_cat'
tensor_shape = df.audio.iloc[0].shape[0]
output_size = pd.unique(df[data_col]).shape[0]

if train:
    for i, params in enumerate(parameters):
        torch.cuda.empty_cache()
        print('{}'.format(i), '='*10)
        try:
            temp = {}
            if params['batch_size'] != prev_batch_size:
                dataset_params = {'batch_size': params['batch_size']}
                datasets = {'train': data.DataLoader(MakeDataset(df, data_col, 'train', SPLIT_DICT), **dataset_params),
                            'val': data.DataLoader(MakeDataset(df, data_col, 'val', SPLIT_DICT), **dataset_params),
                            'test': data.DataLoader(MakeDataset(df, data_col, 'test', SPLIT_DICT), **dataset_params)}
                prev_batch_size = params['batch_size']

            start = time.time()
            
            model = OscClass(output_size, tensor_shape, 
                             params['num_conv_blocks'], params['base_filters'], 
                             params['base_kernels'], params['base_stride'], 
                             params['base_pool'], params['activations']) 
            optimizer, scheduler, CEL_loss = build_train_classes(params['max_epochs'], params['init_learning_rate'], model)
            train_loop = TrainLoop(datasets, model, params['max_epochs'], 
                       optimizer, scheduler, CEL_loss)

            history = train_loop.full_loop()

            temp['train_acc'] = history['train']['acc']
            temp['train_loss'] = history['train']['loss']
            temp['val_acc'] = history['val']['acc']
            temp['val_loss'] = history['val']['loss']
            temp['test_acc'] = history['test']['acc']
            temp['test_loss'] = history['test']['loss']
            temp['learning_rates'] = history['lr']
            temp['realized_epochs'] = len(history['lr'])
            temp['num_params'] = sum(p.numel() for p in model.parameters() if p.requires_grad)

            print('<{} Filters> <{} Kernel> <{} Stride> <{} Pool> <{} Conv Blocks>'.format(params['base_filters'], params['base_kernels'], 
                                                                                           params['base_stride'], params['base_pool'], 
                                                                                           params['num_conv_blocks']))
            
            print('<{} Epochs> <{} Batch Size> <{} Initial Learning Rate>'.format(params['max_epochs'], params['batch_size'], params['init_learning_rate']))
            temp['train_time'] = time.time() - start
            print('Accuracy = {} Parameters = {}'.format(temp['test_acc'], temp['num_params']))
            print('Training took {0:.2f} seconds'.format(temp['train_time']))

            temp.update(params)
            search_history.append(temp)
        except Exception as e:
            print(e)
            temp['error'] = e
            temp['accuracy'] = -1
            print('Not trainable')

train = False

training_df_osc = pd.DataFrame(search_history)

with open('training_df_oscPT.xlsx', 'wb') as handle:
    pickle.dump(training_df_osc, handle)

<h3>Predicting Pitch</h3>

<p>Then, we will run the search again for the pitch classification task with the above PyTorch loop.</p>

<p>The only change we have to do it change the target data column of the input data frame in the MakeDataset class.</p>

In [None]:
train = True

In [None]:
search_history = []
prev_batch_size = -1
data_col = 'freq_cat'
tensor_shape = df.audio.iloc[0].shape[0]
output_size = pd.unique(df[data_col]).shape[0]

if train:
    for i, params in enumerate(parameters):
        torch.cuda.empty_cache()
        print('{}'.format(i), '='*10)
        try:
            temp = {}
            if params['batch_size'] != prev_batch_size:
                dataset_params = {'batch_size': params['batch_size']}
                datasets = {'train': data.DataLoader(MakeDataset(df, data_col, 'train'), **dataset_params),
                            'val': data.DataLoader(MakeDataset(df, data_col, 'val'), **dataset_params),
                            'test': data.DataLoader(MakeDataset(df, data_col, 'test'), **dataset_params)}
                prev_batch_size = params['batch_size']

            start = time.time()
            
            model = OscClass(output_size, tensor_shape, 
                             params['num_conv_blocks'], params['base_filters'], 
                             params['base_kernels'], params['base_stride'], 
                             params['base_pool'], params['activations']) 
            optimizer, scheduler, CEL_loss = build_train_classes(params['max_epochs'], params['init_learning_rate'], model)
            train_loop = TrainLoop(datasets, model, params['max_epochs'], 
                       optimizer, scheduler, CEL_loss)

            history = train_loop.full_loop()

            temp['train_acc'] = history['train']['acc']
            temp['train_loss'] = history['train']['loss']
            temp['val_acc'] = history['val']['acc']
            temp['val_loss'] = history['val']['loss']
            temp['test_acc'] = history['test']['acc']
            temp['test_loss'] = history['test']['loss']
            temp['learning_rates'] = history['lr']
            temp['realized_epochs'] = len(history['lr'])
            temp['num_params'] = sum(p.numel() for p in model.parameters() if p.requires_grad)

            print('<{} Filters> <{} Kernel> <{} Stride> <{} Pool> <{} Conv Blocks>'.format(params['base_filters'], params['base_kernels'], 
                                                                                           params['base_stride'], params['base_pool'], 
                                                                                           params['num_conv_blocks']))
            
            print('<{} Epochs> <{} Batch Size> <{} Initial Learning Rate>'.format(params['max_epochs'], params['batch_size'], params['init_learning_rate']))
            temp['train_time'] = time.time() - start
            print('Accuracy = {} Parameters = {}'.format(temp['test_acc'], temp['num_params']))
            print('Training took {0:.2f} seconds'.format(temp['train_time']))

            temp.update(params)
            search_history.append(temp)
        except Exception as e:
            print(e)
            temp['error'] = e
            temp['accuracy'] = -1
            print('Not trainable')

train = False

training_df_freq = pd.DataFrame(search_history)

with open('training_df_freqPT.xlsx', 'wb') as handle:
    pickle.dump(training_df_freq, handle)