In [1]:
import torch
from torch.utils.data import Dataset

import torchaudio
import torchaudio.transforms

import torchvision


import sys, os

from pprint import pprint

from tqdm.autonotebook import tqdm

import json

import numpy as np

import matplotlib.pylab as plt
import seaborn as sns

import librosa
import librosa.display

import pandas as pd

from pathlib import Path

import gc

MANUAL_SEED = 69

import torch.nn as nn
import torch.nn.functional as F

import torch.optim as optim

from datetime import date
from datetime import datetime

import os.path
from os import path
  
import json

import time

import copy

from matplotlib import pyplot as plt
plt.rcParams['figure.dpi'] = 150
plt.rcParams['savefig.dpi'] = 150

from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler

from sklearn.model_selection import KFold



In [2]:
!jupyter nbextension enable --py widgetsnbextension

Enabling notebook extension jupyter-js-widgets/extension...
      - Validating: [32mOK[0m


In [3]:
def make_dir_if_absent(dir_path):
  
  if not os.path.exists(dir_path):
    os.makedirs(dir_path)
    

In [4]:
class FMADataset(Dataset):

  def __init__(
    self, path, transforms, data_type, mean, std
  ):
    self.path = path
    self.transforms = transforms
    self.data_type = data_type,
    self.mean = mean
    self.std = std
    
    self.data_paths = self._load_audio_list()

  def __len__(self):
    return len(self.data_paths)

  def __getitem__(self, idx):

    data = (torch.load(self.data_paths[idx]) - self.mean) / self.std

    label_one_hot = self._label_from_str_to_one_hot(
      self.data_paths[idx].split("/")[-2]
    )

    return data, label_one_hot
  
  def _label_from_str_to_one_hot(self, label_str: str): 
  
    if label_str == "Pop":
      return torch.tensor([1, 0, 0, 0, 0, 0]).float()
    
    if label_str == "Hip-Hop":
      return torch.tensor([0, 1, 0, 0, 0, 0]).float()
    
    if label_str == "Electronic":
      return torch.tensor([0, 0, 1, 0, 0, 0]).float()
    
    if label_str == "Rock":
      return torch.tensor([0, 0, 0, 1, 0, 0]).float()

    if label_str == "Folk":
      return torch.tensor([0, 0, 0, 0, 1, 0]).float()

    if label_str == "Jazz":
      return torch.tensor([0, 0, 0, 0, 0, 1]).float()
    
  
  def _load_audio_list(self):
    
    audio_path_list = []
    
    for path, subdirs, files in os.walk(self.path):
      for name in files:
          
        file_audio_path = os.path.join(path, name)
        
        audio_path_list.append(file_audio_path)

    return sorted(audio_path_list, reverse=True)
        
        

In [5]:
DATASET_SIZE = "s"
DATASET_TYPE = "waveform"
DATASET_FOLDER = f"./data/{DATASET_TYPE}"

DATASET_NUM_SAMPLES_PER_SECOND = 8000
DATASET_NUM_CHANNELS = 1

DATASET_NAME = f"fma_{DATASET_SIZE}_resampled_{DATASET_NUM_SAMPLES_PER_SECOND}_rechanneled_{DATASET_NUM_CHANNELS}"

dataset_path = f"{DATASET_FOLDER}/{DATASET_NAME}"

SUMMARY_STATISTICS_PATH = f"./data/summary_statistics/{DATASET_NAME}/{DATASET_NAME}_summary_statistics.json"

In [6]:
summary_statistics_json = open(SUMMARY_STATISTICS_PATH)

summary_statistics_dict = json.load(summary_statistics_json)

In [7]:
fma_data_transforms = torch.nn.Sequential(
  # torchvision.transforms.Normalize(
  #   summary_statistics_dict[f"{DATASET_TYPE}_mean"],
  #   summary_statistics_dict[f"{DATASET_TYPE}_std"]
  # )
)

In [8]:
fma_dataset = FMADataset(
  path=dataset_path, 
  transforms=fma_data_transforms,
  data_type=DATASET_TYPE,
  mean=summary_statistics_dict[f"{DATASET_TYPE}_mean"],
  std=summary_statistics_dict[f"{DATASET_TYPE}_std"]
)

In [9]:
TRAIN_VAL_PERCENTAGE = 0.9

full_size = len(fma_dataset)
train_val_size = int(TRAIN_VAL_PERCENTAGE * full_size)
test_size = full_size - train_val_size

In [10]:
generator=torch.Generator().manual_seed(MANUAL_SEED)

fma_dataset_train_val, fma_dataset_test = torch.utils.data.random_split(
  fma_dataset, [train_val_size, test_size], generator
)

In [11]:
TRAIN_PERCENTAGE = 0.8

full_size = train_val_size
train_size = int(TRAIN_PERCENTAGE * full_size)
val_size = full_size - train_size

In [12]:
fma_dataset_train, fma_dataset_val = torch.utils.data.random_split(
  fma_dataset_train_val, [train_size, val_size], generator
)

In [13]:
# print(f"len(fma_dataset_train): {len(fma_dataset_train)}")
# print(f"len(fma_dataset_val)  : {len(fma_dataset_val)}")
# print(f"len(fma_dataset_test) : {len(fma_dataset_test)}")

# **TODO reflect on this and use it as first hypothesis**

"the learning rate and batch size are closely linked — small batch sizes perform best with smaller learning rates, while large batch sizes do best on larger learning rates"

In [14]:
BATCH_SIZE = 16
NUM_WORKERS = 16

data_logs = {
  "data_type": DATASET_TYPE,
  "dataset_size": DATASET_SIZE,
  "batch_size": BATCH_SIZE,
  "num_samples_per_second": DATASET_NUM_SAMPLES_PER_SECOND,
  "num_channels": DATASET_NUM_CHANNELS
}

In [15]:
fma_dataloader_train = torch.utils.data.DataLoader(
  fma_dataset_train, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, 
  generator=generator
)
fma_dataloader_val = torch.utils.data.DataLoader(
  fma_dataset_val, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, 
  generator=generator
)
fma_dataloader_test = torch.utils.data.DataLoader(
  fma_dataset_test, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, 
  generator=generator
)

In [16]:
def count_num_trainable_parameters(model):
  return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [17]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(device)
print(
  torch.cuda.get_device_name(device) if torch.cuda.is_available() else "cpu"
)

cuda
NVIDIA GeForce RTX 2070


In [18]:
def gen_train_id():
  return datetime.now().strftime("%Y_%m_%d_%H_%M_%S")

In [19]:
def save_dict_to_disk(dict, full_path):

  make_dir_if_absent("/".join(full_path.split("/")[:-1]))

  with open(full_path, 'w') as fp:
    json.dump(dict, fp)

In [20]:
def store_ckp(
  model, optimizer, ckp_dir, ckp_name, epoch, loss_train, loss_val, loss_test
):

  model_copy = copy.deepcopy(model)
  
  full_path_pickle = f"{ckp_dir}/{ckp_name}_epoch_{epoch}.pth"
  
  make_dir_if_absent(dir_path="/".join(full_path_pickle.split('/')[:-1]))
  
  torch.save(
    {
      'epoch': epoch,
      'model_state_dict': model_copy.cpu().state_dict(),
      'optimizer_state_dict': optimizer.state_dict(),
      'loss_train': loss_train,
      'loss_val': loss_val,
      'loss_test': loss_test,
    }, 
    full_path_pickle
  )
  
  torch.save(
    model_copy.cpu(), 
    full_path_pickle
  )

In [21]:
def load_ckp(ckp_path, perform_loading_sanity_check):

  loaded_model = torch.load(ckp_path)

  if perform_loading_sanity_check:

    loaded_model.eval()

    sanity_check_out = loaded_model(torch.rand((16, 1, 238000)))

  return loaded_model

In [22]:
def get_num_correct_preds(outputs, labels):
  
  output_pred_ind = torch.argmax(outputs, dim=1)
  labels_ind = torch.argmax(labels, dim=1)
  
  matching_mask = (output_pred_ind == labels_ind).float()
  
  num_correct_preds = matching_mask.sum()
  
  return num_correct_preds

In [23]:
def train_model(
  model, optimizer, criterion,
  batch_size, train_dl, val_dl, test_dl, 
  num_epochs, 
  device, 
  print_freq, ckp_freq, 
  ckp_dir, ckp_name,
  should_close_tqdm_prog_bars_when_done
):

  train_id = gen_train_id()
  
  training_logs = {
    "train_id": train_id,
    "accuracies": {},
    "losses": {}
  }
  
  model = model.to(device)
  
  pbar_epochs = tqdm(range(num_epochs), colour="#9400d3")
  pbar_batches_train = tqdm(
    iter(train_dl), colour="#4169e1", leave=False,
  )
  pbar_batches_val = tqdm(
    iter(val_dl), colour="#008080", leave=False,
  )
  
  training_start_time = time.time()

  for epoch in range(num_epochs):

    running_loss_train = 0.0
    running_loss_val   = 0.0
    running_loss_test  = -1.0
    
    num_correct_preds_train = 0.0
    num_preds_train = 0.0
    accuracy_train = 0.0
    
    num_correct_preds_val = 0.0
    num_preds_val = 0.0
    accuracy_val = 0.0
    
    num_correct_preds_test = 0.0
    num_preds_test = 0.000000001
    accuracy_test = 0.0
        
    ## BEGIN training step
    
    model.train()
    
    pbar_batches_train.reset()
    pbar_batches_val.reset()
    
    pbar_epochs.set_description(f"epoch {epoch}")
    pbar_batches_train.set_description(f"epoch {epoch}")
    pbar_batches_val.set_description  (f"epoch {epoch}")
    
    for batch_x, batch_y in iter(train_dl):

      inputs, labels = batch_x, batch_y
      inputs, labels = inputs.to(device), labels.to(device)
      
      optimizer.zero_grad()

      outputs = model(inputs)
      outputs = outputs.squeeze(-1)
      
      loss = criterion(outputs, labels)
      
      loss.backward()
      optimizer.step()

      running_loss_train += loss.item() * batch_x.shape[0]
      
      num_correct_preds_train += get_num_correct_preds(outputs, labels)
      num_preds_train += outputs.shape[0]
      
      pbar_batches_train.update(1)
      
    
    ## END training step
    
    ## BEGIN validation step
    
    with torch.no_grad():
      
      model.eval()
      
      for batch_x, batch_y in iter(val_dl):

        inputs, labels = batch_x, batch_y
        inputs, labels = inputs.to(device), labels.to(device)
        
        outputs = model(inputs)
        outputs = outputs.squeeze(-1)
        
        loss = criterion(outputs, labels)
        
        running_loss_val += loss.item() * batch_x.shape[0]
        
        num_correct_preds_val += get_num_correct_preds(outputs, labels)
        num_preds_val += outputs.shape[0]
        
        pbar_batches_val.update(1)
        
    ## END validation step
    
    ## BEGIN test step
    
    if (epoch + 1 == num_epochs):
      
      pbar_batches_test = tqdm(
        iter(test_dl), colour="#808000", leave=False,
      )
      pbar_batches_test.set_description  (f"epoch {epoch}")
    
      with torch.no_grad():
        
        model.eval()
        
        for batch_x, batch_y in iter(test_dl):

          inputs, labels = batch_x, batch_y
          inputs, labels = inputs.to(device), labels.to(device)
          
          outputs = model(inputs)
          outputs = outputs.squeeze(-1)
          
          loss = criterion(outputs, labels)
          
          running_loss_test += loss.item() * batch_x.shape[0]
          
          num_correct_preds_test += get_num_correct_preds(outputs, labels)
          num_preds_test += outputs.shape[0]
          
          pbar_batches_test.update(1)
        
    ## END test step
    
    accuracy_train = num_correct_preds_train / num_preds_train
    accuracy_val = num_correct_preds_val / num_preds_val
    accuracy_test = num_correct_preds_test / num_preds_test
    
    training_logs["accuracies"][str(epoch)] = {
      "accuracy_train": accuracy_train.cpu().item(),
      "accuracy_val": accuracy_val.cpu().item(),
    }
    training_logs["losses"][str(epoch)] = {
      "loss_train": running_loss_train,
      "loss_val": running_loss_val,
    }
    
    pbar_epochs.update(1)
    
    if ((epoch + 1) % print_freq == 0):  
      tqdm.write(
        f"epoch: {epoch + 1}\n" + 
        f"      train loss: {running_loss_train}, train acc: {accuracy_train}\n" + 
        f"      val loss  : {running_loss_val}, val acc  : {accuracy_val}\n"
      )
    
    if ((epoch + 1) == num_epochs):
      tqdm.write(
        f"      test loss : {running_loss_test}, test acc : {accuracy_test}"
      )
      
      training_logs["accuracies"][str(epoch)][
        "accuracy_test"
      ] = accuracy_test.cpu().item()
      
      training_logs["losses"][str(epoch)][
        "loss_test"
      ] = running_loss_test
      
    if (ckp_freq != None and (epoch + 1) % ckp_freq == 0):
      
      store_ckp(
        model=model, optimizer=optimizer, 
        ckp_dir=ckp_dir, ckp_name=ckp_name, epoch=epoch, 
        loss_train=running_loss_train, 
        loss_val=running_loss_val, 
        loss_test=running_loss_test
      )
  
  training_end_time = time.time()

  training_logs["training_time_secs"] = training_end_time - training_start_time

  if (should_close_tqdm_prog_bars_when_done):
    pbar_epochs.container.close()
    pbar_batches_train.close()
    pbar_batches_val.close()
    pbar_batches_test.close()
  
  return training_logs

## CNN

### Design motivations

First layers --> neural compression layers --> dimensionality reduction to roughly match dimensions of this paper https://arxiv.org/pdf/1703.01789.pdf

Mid and final layers --> taken 1:1 from the paper linked above

Batch norm placed BEFORE the activation function, as described in the og paper https://arxiv.org/abs/1502.03167 and explained by Bengio in his DL book https://www.deeplearningbook.org/contents/optimization.html in section 8.7.1

Dropout placed according to the og paper: https://arxiv.org/pdf/1207.0580.pdf

In [24]:
class CNN(nn.Module):
  def __init__(
    self, 
    num_layers, 
    kernel_sizes, strides, 
    in_channels, num_filters,
    pool_sizes, pool_strides,
    dropout_p_conv, dropout_p_linear
  ):
    super().__init__()

    self.num_layers = num_layers 
    self.kernel_sizes = kernel_sizes 
    self.strides = strides 
    self.in_channels = in_channels 
    self.num_filters = num_filters
    self.pool_sizes = pool_sizes 
    self.pool_strides = pool_strides
    
    self.dropout_p_conv = dropout_p_conv
    self.dropout_p_linear = dropout_p_linear

    self.bns = {
      "4": nn.BatchNorm1d(num_features=4),
      "6": nn.BatchNorm1d(num_features=6),
      "8": nn.BatchNorm1d(num_features=8),
      "16": nn.BatchNorm1d(num_features=16),
      "32": nn.BatchNorm1d(num_features=32),
      "64": nn.BatchNorm1d(num_features=64),
      "128": nn.BatchNorm1d(num_features=128),
      "256": nn.BatchNorm1d(num_features=256),
      "512": nn.BatchNorm1d(num_features=512)
    }
    
    self.neural_network = nn.Sequential()
    
    for i in range(num_layers):
      
      conv_layer = nn.Conv1d(
        kernel_size=self.kernel_sizes[i],
        stride=self.strides[i],
        in_channels=in_channels,
        out_channels=self.num_filters[i]
      )
      torch.nn.init.xavier_uniform_(conv_layer.weight)

      
      pooling_layer = nn.MaxPool1d(
        kernel_size=self.pool_sizes[i],
        stride=self.pool_strides[i],
      )
      
      in_channels = self.num_filters[i]
      
      self.neural_network.add_module(name=f"conv_{i}", module=conv_layer)
      
      self.neural_network.add_module(name=f"pool_{i}", module=pooling_layer)
        
      self.neural_network.add_module(
        name=f"batchnorm_{i}", module=self.bns[str(self.num_filters[i])]
      )
      
      if (i < num_layers - 1):
        
        self.neural_network.add_module(name=f"activ_{i}", module=nn.ReLU())

      else:
        
        self.neural_network.add_module(name=f"activ_{i}", module=nn.Sigmoid())

      if (i < num_layers - 2):

        self.neural_network.add_module(
          name=f"dropout_{i}", module=nn.Dropout(p=self.dropout_p_conv))
        
      if (i == num_layers - 2):

        self.neural_network.add_module(
          name=f"dropout_{i}", module=nn.Dropout(p=self.dropout_p_linear)
        )
  
  def forward(self, x):    
    x = self.neural_network(x)

    return x

  def get_model_setup(self):
    
    return {
      "num_layers": self.num_layers, 
      "kernel_sizes": self.kernel_sizes, 
      "strides": self.strides, 
      "in_channels": self.in_channels, 
      "num_filters": self.num_filters,
      "pool_sizes": self.pool_sizes, 
      "pool_strides": self.pool_strides,
      "dropout_p_conv": self.dropout_p_conv,
      "dropout_p_linear": self.dropout_p_linear,
    }

In [25]:
def plot_loss_curves(stats):
  epochs = stats["training_logs"]["losses"].keys()
  
  loss_train = [
    j["loss_train"] for j in stats["training_logs"]["losses"].values()
  ]
  
  loss_val = [j["loss_val"] for j in stats["training_logs"]["losses"].values()]

  sns.lineplot(
    x=epochs,
    y=loss_train,
    legend="full",
    label="train loss"
  )

  sns.lineplot(
    x=epochs,
    y=loss_val,
    legend="full",
    label="val loss"
  )

## K-fold cross validation for hyperparameter search

In [26]:
k_fold_cv_num_layers = 5

k_fold_cv_kernel_sizes =[64, 32, 16,   8,   4]
k_fold_cv_pool_sizes   =[ 8,  8,  2,   2,   4]
k_fold_cv_strides      =[ 3,  3,  2,   2,   2]
k_fold_cv_pool_strides =[ 8,  8,  2,   4,   2]
k_fold_cv_num_filters  =[16, 32, 64, 128,   6]

k_fold_cv_in_channels   = 1 if DATASET_TYPE == "waveform" else 2

K_FOLD_CV_DROPOUT_P_CONV = 0.0
K_FOLD_CV_DROPOUT_P_LINEAR = 0.5

In [27]:
def cnn_factory(
  num_layers,
  kernel_sizes,
  strides,
  in_channels,
  num_filters,
  pool_sizes,
  pool_strides,
  dropout_p_conv, 
  dropout_p_linear
):
  return CNN(
  num_layers=num_layers,
  kernel_sizes=kernel_sizes, 
  strides=strides, 
  in_channels=in_channels, 
  num_filters=num_filters,
  pool_sizes=pool_sizes,
  pool_strides=pool_strides,
  dropout_p_conv=dropout_p_conv,
  dropout_p_linear=dropout_p_linear
)

In [28]:
LR = 0.001
# LR = 0.01
MOMENTUM = 0.9
WEIGHT_DECAY = 1e-6
OPTIMIZER_NAME = "SGD"

In [29]:
def optimizer_factory(optimizer_name, model, lr, momentum, weight_decay):

  if optimizer_name == "SGD":
    optimizer = optim.SGD(
      model.parameters(), 
      lr=lr, 
      momentum=momentum,
      nesterov=True,
      weight_decay=weight_decay
    )

    optimizer_config = {
    "lr": lr, 
    "momentum": momentum, 
    "weight_decay": weight_decay,
    "nesterov": True
  }  

  elif optimizer_name == "Adam":

    optimizer = optim.Adam(
      model.parameters(),
      lr=lr,
      weight_decay=weight_decay
    )

    optimizer_config = {
    "lr": lr, 
    "momentum": momentum, 
    "weight_decay": weight_decay
  }  
    

  return optimizer, optimizer_config


In [30]:
K_FOLD_CV_NUM_FOLDS = 3

K_FOLD_CV_BATCH_SIZE = 16

K_FOLD_CV_NUM_EPOCHS = 50

K_FOLD_CV_PRINT_FREQ = int(K_FOLD_CV_NUM_EPOCHS / 10)

K_FOLD_CV_CKP_FREQ = int(K_FOLD_CV_NUM_EPOCHS / 5)

K_FOLD_CV_LOGS_FOLDER = "./k_fold_cv"

K_FOLD_CV_CKP_FOLDER = K_FOLD_CV_LOGS_FOLDER

K_FOLD_CV_SHOULD_CLOSE_TQDM_PROG_BARS_WHEN_DONE=True

In [31]:
cv_models = [

  cnn_factory(
    num_layers=k_fold_cv_num_layers,
    kernel_sizes=k_fold_cv_kernel_sizes, 
    strides=k_fold_cv_strides, 
    in_channels=k_fold_cv_in_channels, 
    num_filters=k_fold_cv_num_filters,
    pool_sizes=k_fold_cv_pool_sizes,
    pool_strides=k_fold_cv_pool_strides,
    dropout_p_conv=K_FOLD_CV_DROPOUT_P_CONV,
    dropout_p_linear=K_FOLD_CV_DROPOUT_P_LINEAR
  ) for _ in range(0, K_FOLD_CV_NUM_FOLDS)

]

cv_criterions = [nn.CrossEntropyLoss() for _ in range(0, K_FOLD_CV_NUM_FOLDS)]

cv_opts = [
  optimizer_factory(
    optimizer_name=OPTIMIZER_NAME,
    model=cv_models[i],
    lr=LR,
    momentum=MOMENTUM,
    weight_decay=WEIGHT_DECAY
  ) for i in range(0, K_FOLD_CV_NUM_FOLDS)
]

cv_optimizers = [opt for opt, _ in cv_opts]
cv_optimizers_configs = [opt_conf for _, opt_conf in cv_opts]

cv_train_dls = []
cv_val_dls = []
cv_test_dls = []

In [32]:
k_fold = KFold(n_splits=K_FOLD_CV_NUM_FOLDS, shuffle=True)

cv_dataset = torch.utils.data.ConcatDataset(
  [fma_dataset_train, fma_dataset_val]
)

for fold, (train_idxs, val_idxs) in enumerate(k_fold.split(cv_dataset)):

  train_subsampler = torch.utils.data.SubsetRandomSampler(train_idxs)
  val_subsampler = torch.utils.data.SubsetRandomSampler(val_idxs)

  cv_train_dls.append(
    torch.utils.data.DataLoader(
      cv_dataset, batch_size=K_FOLD_CV_BATCH_SIZE, sampler=train_subsampler
    )
  )

  cv_val_dls.append(
    torch.utils.data.DataLoader(
      cv_dataset, batch_size=K_FOLD_CV_BATCH_SIZE, sampler=val_subsampler
    )
  )

  cv_test_dls.append(
    torch.utils.data.DataLoader(
      fma_dataset_test, batch_size=K_FOLD_CV_BATCH_SIZE
    )
  )

cv_data_logs = {
  "data_type": DATASET_TYPE,
  "dataset_size": DATASET_SIZE,
  "batch_size": K_FOLD_CV_BATCH_SIZE,
  "num_samples_per_second": DATASET_NUM_SAMPLES_PER_SECOND,
  "num_channels": DATASET_NUM_CHANNELS
}

In [33]:
def perform_k_fold_cv(
  cv_id,
  cv_num_folds,
  cv_models, cv_optimizers, cv_criterions,
  batch_size, 
  cv_train_dls, cv_val_dls, cv_test_dls, 
  cv_num_epochs, 
  cv_device, 
  cv_print_freq, cv_ckp_freq, 
  cv_ckp_dir,
  cv_should_close_tqdm_prog_bars_when_done
):

  cv_training_logs = {}

  pbar_folds = tqdm(range(cv_num_folds), colour="#b22222")

  for fold in pbar_folds:
    pbar_folds.set_description(f"fold {fold}")

    cv_ckp_fold_dir = f"{cv_ckp_dir}/fold_{fold}"

    training_log = train_model(
      model=cv_models[fold], 
      optimizer=cv_optimizers[fold], criterion=cv_criterions[fold],
      batch_size=batch_size,
      train_dl=cv_train_dls[fold], val_dl=cv_val_dls[fold], test_dl=cv_test_dls[fold],
      num_epochs=cv_num_epochs, 
      device=cv_device,
      print_freq=cv_print_freq, ckp_freq=cv_ckp_freq, 
      ckp_dir=cv_ckp_fold_dir, ckp_name=f"{cv_id}_fold_{fold}",
      should_close_tqdm_prog_bars_when_done=cv_should_close_tqdm_prog_bars_when_done,
    )

    cv_training_logs[str(fold)] = training_log

    pbar_folds.update(1)

  return cv_training_logs
  
  

In [34]:
k_fold_cv_id = gen_train_id()

k_fold_cv_training_logs = perform_k_fold_cv(
  cv_id=k_fold_cv_id,
  cv_num_folds=K_FOLD_CV_NUM_FOLDS,
  cv_models=cv_models, cv_optimizers=cv_optimizers, cv_criterions=cv_criterions,
  batch_size=K_FOLD_CV_BATCH_SIZE, 
  cv_train_dls=cv_train_dls, cv_val_dls=cv_val_dls, cv_test_dls=cv_test_dls,
  cv_num_epochs=K_FOLD_CV_NUM_EPOCHS, 
  cv_device=device, 
  cv_print_freq=K_FOLD_CV_PRINT_FREQ, cv_ckp_freq=K_FOLD_CV_CKP_FREQ, 
  cv_ckp_dir=f"{K_FOLD_CV_CKP_FOLDER}/{k_fold_cv_id}",
  cv_should_close_tqdm_prog_bars_when_done=K_FOLD_CV_SHOULD_CLOSE_TQDM_PROG_BARS_WHEN_DONE
)

k_fold_cv_stats = {
  "stats_type": "k_fold_cross_validation",
  "k_folds_cv_num_folds": K_FOLD_CV_NUM_FOLDS,
  "data_logs": cv_data_logs,
  "optimizer_config": cv_optimizers_configs[0], # all the same, one is enough
  "model_setup": cv_models[0].get_model_setup(), # all the same, one is enough
  "training_logs": k_fold_cv_training_logs,
}

save_dict_to_disk(
  dict=k_fold_cv_stats,
  full_path=f"{K_FOLD_CV_LOGS_FOLDER}/{k_fold_cv_id}/{k_fold_cv_id}_stats.json"
)

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/105 [00:00<?, ?it/s]

  0%|          | 0/53 [00:00<?, ?it/s]

epoch: 5
      train loss: 2865.691900730133, train acc: 0.2767106890678406
      val loss  : 1435.3710670471191, val acc  : 0.2589928209781647

epoch: 10
      train loss: 2802.216941356659, train acc: 0.3529411554336548
      val loss  : 1425.364797592163, val acc  : 0.30215826630592346

epoch: 15
      train loss: 2760.5161814689636, train acc: 0.40096038579940796
      val loss  : 1417.3666973114014, val acc  : 0.3117506206035614

epoch: 20
      train loss: 2737.557002544403, train acc: 0.4213685393333435
      val loss  : 1404.5759601593018, val acc  : 0.3465227782726288

epoch: 25
      train loss: 2688.1250414848328, train acc: 0.47298917174339294
      val loss  : 1411.1642217636108, val acc  : 0.33693045377731323

epoch: 30
      train loss: 2648.9542214870453, train acc: 0.5036014318466187
      val loss  : 1400.5500745773315, val acc  : 0.3441247045993805

epoch: 35
      train loss: 2609.8220479488373, train acc: 0.5480191707611084
      val loss  : 1401.5773603916168, val

  0%|          | 0/18 [00:00<?, ?it/s]

epoch: 50
      train loss: 2509.552511692047, train acc: 0.6344537734985352
      val loss  : 1385.4403748512268, val acc  : 0.3609112799167633

      test loss : 467.13998651504517, test acc : 0.3057554066181183


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/105 [00:00<?, ?it/s]

  0%|          | 0/53 [00:00<?, ?it/s]

epoch: 5
      train loss: 2843.6626554727554, train acc: 0.29274144768714905
      val loss  : 1415.6984040737152, val acc  : 0.31092435121536255

epoch: 10
      train loss: 2802.2284297943115, train acc: 0.33833232522010803
      val loss  : 1410.2507808208466, val acc  : 0.33253300189971924

epoch: 15
      train loss: 2764.7923772335052, train acc: 0.3833233118057251
      val loss  : 1396.6739814281464, val acc  : 0.34693875908851624

epoch: 20
      train loss: 2726.8604683876038, train acc: 0.4199160039424896
      val loss  : 1388.1588621139526, val acc  : 0.37575027346611023

epoch: 25
      train loss: 2696.170589566231, train acc: 0.4427114427089691
      val loss  : 1382.5210319757462, val acc  : 0.3697478771209717

epoch: 30
      train loss: 2659.1684885025024, train acc: 0.47150567173957825
      val loss  : 1387.9360805749893, val acc  : 0.394957959651947

epoch: 35
      train loss: 2607.6289899349213, train acc: 0.5410917401313782
      val loss  : 1386.2552857398987

  0%|          | 0/18 [00:00<?, ?it/s]

epoch: 50
      train loss: 2512.7059338092804, train acc: 0.6184762716293335
      val loss  : 1376.858373761177, val acc  : 0.38895556330680847

      test loss : 469.822368144989, test acc : 0.3381294906139374


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/105 [00:00<?, ?it/s]

  0%|          | 0/53 [00:00<?, ?it/s]

epoch: 5
      train loss: 2848.1731390953064, train acc: 0.2939411997795105
      val loss  : 1427.7052655220032, val acc  : 0.304921954870224

epoch: 10
      train loss: 2798.194926261902, train acc: 0.3533293306827545
      val loss  : 1413.7291071414948, val acc  : 0.3313325345516205

epoch: 15
      train loss: 2762.613587975502, train acc: 0.3881223499774933
      val loss  : 1406.9612481594086, val acc  : 0.3481392562389374

epoch: 20
      train loss: 2730.4877676963806, train acc: 0.39772045612335205
      val loss  : 1406.8977580070496, val acc  : 0.33733493089675903

epoch: 25
      train loss: 2685.3711663484573, train acc: 0.4655068814754486
      val loss  : 1398.3675383329391, val acc  : 0.3601440489292145

epoch: 30
      train loss: 2673.1147676706314, train acc: 0.4649069905281067
      val loss  : 1385.7636787891388, val acc  : 0.3673469126224518

epoch: 35
      train loss: 2617.5842628479004, train acc: 0.5020995736122131
      val loss  : 1387.150158882141, val a

  0%|          | 0/18 [00:00<?, ?it/s]

epoch: 50
      train loss: 2524.9277789592743, train acc: 0.6088781952857971
      val loss  : 1384.5257527828217, val acc  : 0.3685474097728729

      test loss : 466.57611989974976, test acc : 0.34532374143600464


# Perform end-to-end training

(using hyperparams found via k-fold cross validation)

In [35]:
LR = 0.001
MOMENTUM = 0.9
WEIGHT_DECAY = 1e-6
OPTIMIZER_NAME = "SGD"