In [1]:
import torch, torchvision
import sys # Python system library needed to load custom functions
import math # module with access to mathematical functions
import os # for changing the directory

import numpy as np  # for performing calculations on numerical arrays
import pandas as pd  # home of the DataFrame construct, _the_ most important object for Data Science
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt  # allows creation of insightful plots

sys.path.append('../../audio_preprocessing')
sys.path.append('../../src')
sys.path.append('../../model_training_utils')


import preprocessing_func_3
from generator_to_dataset_3 import NormalisedDataSet
from gdsc_utils import PROJECT_DIR
import model_training
import model_eval

os.chdir(PROJECT_DIR) # changing our directory to root

In [2]:
df_big_data = pd.read_csv('data/big_data_processed_train_val_one_sec.csv')
df_big_argumented_data = pd.read_csv('data/big_argumentation_data_train_val_one_sec.csv')
df = pd.concat([df_big_data, df_big_argumented_data], ignore_index=True)
df.tail()

Unnamed: 0.1,Unnamed: 0,file_path,label
89379,65625,data/big_data_upsample_train_and_val/65625.wav,65
89380,65626,data/big_data_upsample_train_and_val/65626.wav,65
89381,65627,data/big_data_upsample_train_and_val/65627.wav,65
89382,65628,data/big_data_upsample_train_and_val/65628.wav,65
89383,65629,data/big_data_upsample_train_and_val/65629.wav,65


In [3]:
# import json

# with open('audio_preprocessing/saved_data/upsampled_data_size_128_512_train_val_one_sec.json') as f:
#     my_info = json.load(f)

# mean, std, class_weights = my_info["mean"], my_info["std"], my_info["weights"]

In [4]:
train_df_list = []
val_df_list = []

for i in range(66):
    my_df = df[df["label"] == i]
    current_train_df, current_val_df = train_test_split(my_df, test_size=0.2)
    train_df_list.append(current_train_df)
    val_df_list.append(current_val_df)

df_train = pd.concat(train_df_list, ignore_index=True)
df_val = pd.concat(val_df_list, ignore_index=True)

In [5]:
df_train.shape, df_val.shape

((71481, 3), (17903, 3))

In [6]:
# paths, labels = list(df_train["file_path"]), list(df_train["label"])

# non_normal_gen = preprocessing_func_3.non_normalised_data_generator(
#     paths=paths,
#     labels=labels,
# #     image_preprocess_fn=resize_function(output_shape=(40,256)),
# #     mel_transform_fn=calculate_melsp
# )

In [7]:
# mean, std, class_wights = preprocessing_func_3.get_stats_and_class_weights_of_non_normalised_data_gen(
#     non_normal_gen, (128, 512))

In [8]:
mean, std

(tensor([-1.1211]), tensor([0.7912]))

In [9]:
# import json

# def save_as_json(path, description, mean, std, weights):
#     my_dict = {
#         "description": description,
#         "mean": float(mean),
#         "std": float(std),
#         "weights": list(class_wights.astype(float)),
#     }
#     with open(path, 'w') as f:
#         json.dump(my_dict, f)

# save_as_json(
#     "audio_preprocessing/saved_data/upsampled_data_size_128_512_train_and_val.json", 
#     "seconds 1.5, image shape (128,512)", mean, std, class_wights)

In [10]:
train_dataset = NormalisedDataSet(
    non_normalised_data_generator_fn=preprocessing_func_3.non_normalised_data_generator, 
    normalised_data_generator_fn=preprocessing_func_3.normalised_data_generator,
    df=df_train, 
    mean=mean,
    std=std,
    shuffle=True
)

val_dataset = NormalisedDataSet(
    non_normalised_data_generator_fn=preprocessing_func_3.non_normalised_data_generator, 
    normalised_data_generator_fn=preprocessing_func_3.normalised_data_generator,
    df=df_val, 
    mean=mean,
    std=std,
    shuffle=False
)

train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=28)
val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=28)

In [11]:
device = model_training.get_device()

In [12]:
from torchvision.models import regnet_y_3_2gf
import torch.optim as optim
import torch.nn as nn

#resnet_model = resnet34(weights=ResNet34_Weights.DEFAULT)
regnet_model = regnet_y_3_2gf(num_classes=66)
regnet_model.stem[0] = nn.Conv2d(1, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
regnet_model = regnet_model.to(device)

In [13]:
print(regnet_model)

RegNet(
  (stem): SimpleStemIN(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (trunk_output): Sequential(
    (block1): AnyStage(
      (block1-0): ResBottleneckBlock(
        (proj): Conv2dNormActivation(
          (0): Conv2d(32, 72, kernel_size=(1, 1), stride=(2, 2), bias=False)
          (1): BatchNorm2d(72, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
        (f): BottleneckTransform(
          (a): Conv2dNormActivation(
            (0): Conv2d(32, 72, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (1): BatchNorm2d(72, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): ReLU(inplace=True)
          )
          (b): Conv2dNormActivation(
            (0): Conv2d(72, 72, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=3, bias=False)
            

In [14]:
optimizer = optim.Adam(regnet_model.parameters(), amsgrad=True)
loss = nn.CrossEntropyLoss()

In [15]:
model_training.training(
    model=regnet_model, 
    optimizer=optimizer, 
    loss_fn=loss, 
    train_dataloader=train_dataloader, 
    val_dataloader=val_dataloader, 
    model_path="models/regnet_y_3_2gf_train_and_val", 
    start_epoch=0,
    epochs=500,
    early_stop_thresh=50,
)

End of epoch 0: training accuracy = 68.20%, training loss = 1.0768036263868288, training time taken = 889.99 seconds
End of epoch 0: validation accuracy = 87.39%, validation loss = 0.39692815522648744, validation time taken = 255.03 seconds
End of epoch 1: training accuracy = 90.09%, training loss = 0.3175352533715254, training time taken = 888.53 seconds
End of epoch 1: validation accuracy = 92.76%, validation loss = 0.22927062902401757, validation time taken = 193.67 seconds
End of epoch 2: training accuracy = 93.84%, training loss = 0.19049452508170825, training time taken = 891.19 seconds
End of epoch 2: validation accuracy = 95.10%, validation loss = 0.14930551547325416, validation time taken = 192.69 seconds
End of epoch 3: training accuracy = 95.68%, training loss = 0.1358780658182725, training time taken = 891.00 seconds
End of epoch 3: validation accuracy = 96.53%, validation loss = 0.11330742101380054, validation time taken = 191.91 seconds
End of epoch 4: training accuracy =

KeyboardInterrupt: 

In [16]:
torch.save(regnet_model, 'models/regnet_y_3_2gf_train_and_val/regnet_model_epoch_11.pth')