In [None]:
# select a GPU
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"


In [None]:
#imports 
import sys
import numpy as np
import matplotlib.pyplot as plt
import h5py
import pandas as pd
from sklearn.metrics import confusion_matrix
import librosa
import soundfile as sound

import tensorflow
from tensorflow.keras.optimizers import SGD

print("Librosa version = ",librosa.__version__)
print("Pysoundfile version = ",sound.__version__)
print("keras version = ",tensorflow.keras.__version__)
print("tensorflow version = ",tensorflow.__version__)

sys.path.insert(0, "../")
from NNets import model_resnet_updated
from DCASE_training_functions import LR_WarmRestart, MixupGenerator
from DCASE_plots import plot_confusion_matrix



In [None]:
BasePath = '../../Data/TAU-urban-acoustic-scenes-2020-3class-development/'
TrainFile = BasePath + 'evaluation_setup/fold1_train.csv'
ValFile = BasePath + 'evaluation_setup/fold1_evaluate.csv'
sr = 48000
num_audio_channels = 2

In [None]:
SampleDuration = 10 #seconds

#log-mel spectrogram parameters
NumFreqBins = 256
NumFFTPoints = 4096
HopLength = int(NumFFTPoints/4)
NumTimeBins = int(np.ceil(SampleDuration*sr/HopLength))

#training parameters
init_lr = 0.1
batch_size = 30 #divisible by 3, due to class balance strategy
num_epochs = 126
mixup_alpha = 0.4
crop_length = 400


In [None]:
#load filenames and labels
dev_train_df = pd.read_csv(TrainFile,sep='\t', encoding='ASCII')
dev_val_df = pd.read_csv(ValFile,sep='\t', encoding='ASCII')
wavpaths_train = dev_train_df['filename'].tolist()
wavpaths_val = dev_val_df['filename'].tolist()
y_train_labels =  dev_train_df['scene_label'].astype('category').cat.codes.values
y_val_labels =  dev_val_df['scene_label'].astype('category').cat.codes.values

ClassNames = np.unique(dev_train_df['scene_label'])
NumClasses = len(ClassNames)

y_train = tensorflow.keras.utils.to_categorical(y_train_labels, NumClasses)
y_val = tensorflow.keras.utils.to_categorical(y_val_labels, NumClasses)



In [None]:
ClassNames

In [None]:
LM_train=np.load('Task1b_LM_train_256_4096.npy')
LM_val=np.load('Task1b_LM_val_256_4096.npy')

In [None]:
LM_train = np.log(LM_train+1e-8)
LM_val = np.log(LM_val+1e-8)

In [None]:
LM_train.shape,LM_val.shape

In [None]:
#create and compile the model
wd = 5e-4
num_filters=25

model = model_resnet_updated(NumClasses,
                     input_shape =[NumFreqBins,None,num_audio_channels], 
                     num_filters =num_filters,
                     wd=wd,binarise_weights=True)
model.compile(loss='categorical_crossentropy',
              optimizer =SGD(lr=init_lr,decay=0, momentum=0.9, nesterov=False),
              metrics=['accuracy'])

model.summary()


In [None]:
#create data generator
TrainDataGen = MixupGenerator(LM_train, 
                              y_train, 
                              batch_size=batch_size,
                              alpha=mixup_alpha,
                              crop_length=crop_length,
                              UseBalance=False)

steps_per_epoch =TrainDataGen.__len__()
lr_scheduler = LR_WarmRestart(nbatch=steps_per_epoch,
                              initial_lr=init_lr, min_lr=init_lr*1e-4,
                              epochs_restart = [1.0,3.0, 7.0, 15.0, 31.0, 63.0,127.0,255.0]) 
callbacks = [lr_scheduler]


In [None]:
#train the model
history = model.fit_generator(TrainDataGen,
                              validation_data=(LM_val, y_val),
                              epochs=num_epochs, 
                              verbose=1, 
                              workers=1,
                              max_queue_size = 100,
                              callbacks=callbacks,
                              steps_per_epoch=steps_per_epoch
                              )

In [None]:
model.save_weights('Models/Task1B_starter.h5') #damn - I overwrote these!

In [None]:
plt.figure(figsize=(20,10))
plt.plot(history.history['val_acc'])
plt.plot(history.history['acc'])
plt.ylim([0.8,0.98])

In [None]:
history.history['val_acc'][13],history.history['val_acc'][29],history.history['val_acc'][61],history.history['val_acc'][125],history.history['val_acc'][253],history.history['val_acc'][509]

In [None]:
max(history.history['val_acc'])

In [None]:
y_pred = model.predict(LM_val)

In [None]:
y_pred.shape

In [None]:
plot_confusion_matrix(np.argmax(y_val,axis=-1), np.argmax(y_pred,axis=-1), dev_train_df['scene_label'].unique().tolist(),
                          normalize=False,
                          title=None,
                          cmap=plt.cm.Blues)

In [None]:
#get metrics

weightings = [1,1,1] #justification for transport to bbe higher is each example was used less in training

y_pred_val=np.argmax(weightings*y_pred,axis=-1)
y_val_labels=np.argmax(y_val,axis=-1)
Overall_accuracy = np.sum(y_pred_val==y_val_labels)/LM_val.shape[0]
print("overall accuracy: ", Overall_accuracy)

conf_matrix = confusion_matrix(y_val_labels,y_pred_val)
conf_mat_norm_recall = conf_matrix.astype('float32')/conf_matrix.sum(axis=1)[:,np.newaxis]
conf_mat_norm_precision = conf_matrix.astype('float32')/conf_matrix.sum(axis=0)[:,np.newaxis]
recall_by_class = np.diagonal(conf_mat_norm_recall)
precision_by_class = np.diagonal(conf_mat_norm_precision)
mean_recall = np.mean(recall_by_class)
mean_precision = np.mean(precision_by_class)

print("per-class accuracy (recall): ",recall_by_class)
print("per-class precision: ",precision_by_class)
print("mean per-class recall: ",mean_recall)
print("mean per-class precision: ",mean_precision)
