# **Model 2**

## 1. Python packages and custom functions

### 1.1 Python packages

In [None]:
%%capture
!pip install dcase_util
!pip install pytorch-ignite
!pip install torch_audiomentations

In [None]:
# Required functions/packages:

# General:
import numpy as np
import pandas as pd
import os
import ntpath
import random
import json
import math
import dcase_util
from sklearn.metrics import f1_score, classification_report, confusion_matrix
from tqdm import tqdm
from glob import glob

# PyTorch:
import torch
import torch.optim as optim
import torch.nn.functional as F
import torch.nn as nn
import torchaudio
from torchvision import transforms, utils
from torch.utils.data import Dataset, DataLoader
from ignite._utils import convert_tensor
from torch_audiomentations import AddColoredNoise, ApplyImpulseResponse
from torch.nn import init
from torch.optim.lr_scheduler import OneCycleLR

# Ignite Framework:
from ignite.engine import Engine, Events, create_supervised_trainer, create_supervised_evaluator
from ignite.metrics import Accuracy, Loss, MetricsLambda, Metric
from ignite.handlers import ModelCheckpoint, EarlyStopping
from ignite._utils import convert_tensor

# Ignore warnings:
import warnings
warnings.filterwarnings('ignore')

# Determinism Settings (comment out for a non-deterministic run):
seed = 1000
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)

### 1.2 Custom Functions

In [None]:
class F1Score(Metric):

    def __init__(self, *args, **kwargs):
        self.f1 = 0
        self.count = 0
        super().__init__(*args, **kwargs)

    def update(self, output):
        y_pred, y = output[0].detach(), output[1].detach()

        _, predicted = torch.max(y_pred, 1)
        f = f1_score(y.cpu(), predicted.cpu(), average='macro', zero_division=1)
        self.f1 += f
        self.count += 1

    def reset(self):
        self.f1 = 0
        self.count = 0
        super(F1Score, self).reset()

    def compute(self):
        return self.f1 / self.count

In [None]:
def classifier_calculator(n_bins=None, sub_spectrogram_size=None, hop_size=None):
  n_classifiers = 0
  bins_per_sub = []
  size = sub_spectrogram_size
  start = 0
  while(hop_size*n_classifiers <= n_bins - sub_spectrogram_size):
    n_classifiers = n_classifiers + 1
    bins_per_sub.append([start,size])
    start += hop_size
    size += hop_size

  # + 1 for global classifier:
  n_classifiers = n_classifiers + 1
  bins_per_sub.append(['GLOBAL'])
  print('Total number of classifiers (global included) =', n_classifiers)

  # Sub-spectrogram bin range:
  print(bins_per_sub)

  return n_classifiers

## 2. Loading and transforming data

In [None]:
"""
This script contains the basic building blocks of the DataLoader and Transforms for preprocessing the data in batches.

Updated February 2019
Sai Samarth R Phaye
"""

class ToTensor(object):
	""" Convert ndarrays in sample to Tensors."""

	def __call__(self, sample):
		data, label = sample['data'], sample['label']

		# swap color axis (not required)
		data = data.transpose((0, 1, 2, 3))

		return {'data': torch.from_numpy(data),
				'label': torch.from_numpy(label)}


In [None]:

class ToSubSpectrograms(object):
	""" Generate Sub-Spectrogram Tensors """
	def __init__(self, sub_spectrogram_size=None, hop_size=None, n_bins=None):
		"""
		Parameters
		----------
		sub_spectrogram_size : int
			Size of the SubSpectrogram. Default: 20

		hop_size : int
			Mel-bin hop size of the SubSpectrogram. Default 10

		n_bins : int
			Number of mel-bins of the Spectrogram extracted. Default: 40.
		"""
		self.sub_spectrogram_size, self.hop_size, self.n_bins = sub_spectrogram_size, hop_size, n_bins

	def __call__(self, sample):
		"""
		Parameters
		----------
		sample : PyTorch tensor
			The input tensor data and label
		Returns
		-------
		sub_spectrograms: tensor
			A list of sub-spectrograms. Default size [channels, sub_spectrogram_size, time_indices, n_sub_spectrograms]
		label: tensor
			Corresponding label
		"""
		spectrogram, label = sample['data'], sample['label']

		i = 0
		sub_spectrograms = torch.from_numpy(np.asarray([]))
		while(self.hop_size*i <= self.n_bins - self.sub_spectrogram_size):

			# Extract a Sub-Spectrogram
			subspectrogram = spectrogram[:,i*self.hop_size:i*self.hop_size+self.sub_spectrogram_size,:, :]

			if i == 0:
				sub_spectrograms = subspectrogram
			else:
				sub_spectrograms = torch.cat((subspectrogram, sub_spectrograms), 3)

			i = i + 1

		return sub_spectrograms, label

In [None]:
class InsectDataset(Dataset):
	""" DCASE 2018 Dataset extraction """

	def __init__(self,
	            data_dir=None,
							root_dir=None,
							default_labels_path=None,
							n_bins=None,
							hop_length=None,
							top_db=None,
							f_max=None,
							f_min=None,
							n_fft=None,
							win_length=None,
							representation=None,
							transform=None,
							augmentation=None,
							fragment=None
						):


		species = np.load(default_labels_path)
		species_list = species.tolist()

		list1 = []
		list2 = []

		if fragment == True:
			for path in glob(os.path.join(root_dir, data_dir, '*.wav')):
					file_ID = ntpath.basename(path)
					species_ID = file_ID.split('_')[0]
					list1.append(os.path.join(data_dir, file_ID)) #filenames
					list2.append(species_ID) #target classes


		if fragment == False:
			for path in data_dir:
				file_ID = ntpath.basename(path)
				species_ID = file_ID.split('_')[0]
				list1.append(path) #filenames
				list2.append(species_ID) #target classes


		# Assigning variables:
		self.representation, self.hop_length, self.top_db, self.f_max, self.f_min = representation, hop_length, top_db, f_max, f_min
		self.n_fft, self.win_length, self.root_dir, self.transform, self.datalist = n_fft, win_length, root_dir, transform, list1
		self.labels, self.default_labels, self.n_bins, self.augmentation, self.fragment = list2, species_list, n_bins, augmentation, fragment


	def __len__(self):
		""" set the len(object) funciton """
		return len(self.datalist)

	def __getitem__(self, idx):
		"""
		Function to extract the spectrogram samples and labels from the audio dataset.
		"""

		if self.fragment == True:
			wav_name = os.path.join(self.root_dir,
																self.datalist[idx])

		if self.fragment == False:
			wav_name = self.datalist[idx]


		audioContainer = dcase_util.containers.AudioContainer().load(filename=wav_name, fs=44100)
		audio = audioContainer.data
		sr = audioContainer.fs

		audio = torch.tensor(audio).to(torch.float32)

		if self.augmentation == True:
			audio = audio.unsqueeze(0).unsqueeze(0)
			add_colored_noise = AddColoredNoise(p=0.9, p_mode='per_example', mode='per_example', sample_rate=sr, min_snr_in_db=25, max_snr_in_db=40, min_f_decay=-2, max_f_decay=1.5)
			audio_noise = add_colored_noise(audio)
			add_IR = ApplyImpulseResponse(p=0.7, p_mode='per_example', sample_rate=44100, mode='per_example', compensate_for_propagation_delay=True, ir_paths='/content/drive/MyDrive/Thesis/baseline 2/irs') # Add path to impulse response files
			audio_impulse = add_IR(audio_noise)
			mix_ratio = random.uniform(0, 1)
			audio_augmented = (mix_ratio * audio_noise) + ((1 - mix_ratio) * audio_impulse)
			audio = audio_augmented.squeeze(0).squeeze(0).to(torch.float32)


		if self.representation == 'LINEAR':
			spec = torchaudio.transforms.Spectrogram(n_fft=self.n_fft, hop_length=self.hop_length, win_length=self.win_length)(audio)
			spec_db = torchaudio.transforms.AmplitudeToDB(top_db=self.top_db)(spec)
			spec_db = spec_db.unsqueeze(0).unsqueeze(0)
			spec_db = F.interpolate(spec_db, size=(n_bins, spec_db.shape[3]), mode='bicubic', align_corners=False, antialias=True)
			logmel = spec_db.squeeze(0).squeeze(0)

		elif self.representation == 'MEL':
			spec = torchaudio.transforms.MelSpectrogram(sample_rate=sr, n_fft=self.n_fft, hop_length=self.hop_length, win_length=self.win_length, n_mels=self.n_bins, f_min=self.f_min, f_max=self.f_max)(audio)
			logmel = torchaudio.transforms.AmplitudeToDB(top_db=self.top_db)(spec)

		logmel = np.reshape(logmel, [1, logmel.shape[0], logmel.shape[1], 1])



		# print('Shape of input spectrogram = ', logmel.shape)
		if logmel.shape != (1, 64, 1501, 1):
			# print("PADDING NEEDED FOR", logmel.shape, wav_name)
			# Calculate the padding needed
			padding = [(0, 0), (0, 0), (0, 1), (0, 0)]

			# Pad the spectrogram
			logmel = np.pad(logmel, padding, mode='constant', constant_values=0)
			logmel = torch.tensor(logmel).to(torch.float32)
			# print(logmel.shape, wav_name)




		label = np.asarray(self.default_labels.index(self.labels[idx]))



		sample = {'data': logmel.numpy(), 'label': label}
		if self.transform:
			sample = self.transform(sample)

		return sample

In [None]:
def get_data_loaders(train_batch_size=None,
                     test_batch_size=None,
                     sub_spectrogram_size=None,
                     hop_size=None,
                     n_bins=None,
                     use_cuda=None,
                     root_dir=None,
                     train_dir=None,
                     val_dir=None,
                     default_labels_path=None,
                     hop_length=None,
                     top_db=None,
                     f_max=None,
                     f_min=None,
                     n_fft=None,
                     win_length=None,
                     representation=None):

	kwargs = {'num_workers': 8, 'pin_memory': True} if use_cuda else {}


	# Data transformation:
	data_transform = transforms.Compose([ToTensor(), ToSubSpectrograms(sub_spectrogram_size=sub_spectrogram_size, hop_size=hop_size, n_bins=n_bins)])

	dcase_train = InsectDataset(data_dir=train_dir,
															root_dir=root_dir,
															default_labels_path=default_labels_path,
															n_bins=n_bins,
															hop_length=hop_length,
															top_db=top_db,
															f_max=f_max,
															f_min=f_min,
															n_fft=n_fft,
															win_length=win_length,
															representation=representation,
															transform=data_transform,
															augmentation=True,
															fragment=True)

	dcase_val = InsectDataset(data_dir=val_dir,
														root_dir=root_dir,
														default_labels_path=default_labels_path,
														n_bins=n_bins,
														hop_length=hop_length,
														top_db=top_db,
														f_max=f_max,
														f_min=f_min,
														n_fft=n_fft,
														win_length=win_length,
														representation=representation,
														transform=data_transform,
														augmentation=False,
														fragment=True)


	train_loader = torch.utils.data.DataLoader(dcase_train, batch_size=train_batch_size, shuffle=True, **kwargs)

	val_loader = torch.utils.data.DataLoader(dcase_val, batch_size=test_batch_size, shuffle=False, **kwargs)


	return train_loader, val_loader




In [None]:
def prepare_batch(batch, device=None, non_blocking=False):
	"""
	Inbuilt function in the ignite._utils, for converting the data to tensors.
	Returns the tensors of the input data, using convert_tensor function.
	"""

	x, y = batch
	return (convert_tensor(x, device=device, non_blocking=non_blocking),
		convert_tensor(y, device=device, non_blocking=non_blocking))

## 3. Model

In [None]:
class SubSpectralNet(nn.Module):
	def __init__(self, sub_spectrogram_size=None, hop_size=None, n_bins=None, use_cuda=None):

		super(SubSpectralNet, self).__init__()
		self.sub_spectrogram_size, self.hop_size, self.n_bins, self.use_cuda = sub_spectrogram_size, hop_size, n_bins, use_cuda

		self.n_sub_spectrograms = 0
		while(self.hop_size*self.n_sub_spectrograms <= self.n_bins - self.sub_spectrogram_size):
			self.n_sub_spectrograms = self.n_sub_spectrograms + 1

		print('The number of sub-spectrograms (global excluded) =', self.n_sub_spectrograms)


		# Conv Layer 1:
		self.conv1 = nn.ModuleList([nn.Conv2d(in_channels=1, out_channels=8, kernel_size=(5,5), stride=(2,2), padding=(2,2)) for _ in range(self.n_sub_spectrograms)])
		self.conv1_bn = nn.ModuleList([nn.BatchNorm2d(8) for _ in range(self.n_sub_spectrograms)])
		for i in range(self.n_sub_spectrograms):
			init.kaiming_normal_(self.conv1[i].weight, a=0.1)
			self.conv1[i].bias.data.zero_()


		# Conv Layer 2:
		self.conv2 = nn.ModuleList([nn.Conv2d(in_channels=8, out_channels=16, kernel_size=(3,3), stride=(2,2), padding=(1,1)) for _ in range(self.n_sub_spectrograms)])
		self.conv2_bn = nn.ModuleList([nn.BatchNorm2d(16) for _ in range(self.n_sub_spectrograms)])
		for i in range(self.n_sub_spectrograms):
			init.kaiming_normal_(self.conv2[i].weight, a=0.1)
			self.conv2[i].bias.data.zero_()

		# Conv layer 3:
		self.conv3 = nn.ModuleList([nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(3,3), stride=(2,2), padding=(1,1)) for _ in range(self.n_sub_spectrograms)])
		self.conv3_bn = nn.ModuleList([nn.BatchNorm2d(32) for _ in range(self.n_sub_spectrograms)])
		for i in range(self.n_sub_spectrograms):
			init.kaiming_normal_(self.conv3[i].weight, a=0.1)
			self.conv3[i].bias.data.zero_()

		# Conv layer 4:
		self.conv4 = nn.ModuleList([nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(3,3), stride=(2,2), padding=(1,1)) for _ in range(self.n_sub_spectrograms)])
		self.conv4_bn = nn.ModuleList([nn.BatchNorm2d(64) for _ in range(self.n_sub_spectrograms)])
		for i in range(self.n_sub_spectrograms):
			init.kaiming_normal_(self.conv4[i].weight, a=0.1)
			self.conv4[i].bias.data.zero_()

		# Conv layer 5:
		self.conv5 = nn.ModuleList([nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(3,3), stride=(2,2), padding=(1,1)) for _ in range(self.n_sub_spectrograms)])
		self.conv5_bn = nn.ModuleList([nn.BatchNorm2d(128) for _ in range(self.n_sub_spectrograms)])
		for i in range(self.n_sub_spectrograms):
			init.kaiming_normal_(self.conv5[i].weight, a=0.1)
			self.conv5[i].bias.data.zero_()

		# Linear layer:
		self.ap = nn.ModuleList([nn.AdaptiveAvgPool2d(output_size=1) for _ in range(self.n_sub_spectrograms)])
		self.fc1 = nn.ModuleList([nn.Linear(128, 32) for _ in range(self.n_sub_spectrograms)])

		self.drop1 = nn.ModuleList([nn.Dropout(0.23) for _ in range(self.n_sub_spectrograms)])
		self.fc2 = nn.ModuleList([nn.Linear(32, 66) for _ in range(self.n_sub_spectrograms)])

		numFCs = int(math.log(self.n_sub_spectrograms*32, 2))
		neurons = int(math.pow(2, numFCs))

		self.fcGlobal = []
		tempNeurons =int(32*self.n_sub_spectrograms)
		count = 0

		while(neurons >= 64):
			count +=1
			self.fcGlobal.append(nn.Linear(tempNeurons, neurons))
			self.fcGlobal.append(nn.Dropout(0.23))
			tempNeurons = neurons
			neurons = int(neurons / 2)

		print('Amount of extra fcglobal iterations added =', count)
		if count == 0:
			self.fcGlobal.append(nn.Dropout(0.23))
			print('Extra dropout layer added if no extra layers added (0) to ensure the global and sub-spec has the same layers')

		self.fcGlobal.append(nn.Linear(tempNeurons, 66))
		self.fcGlobal = nn.ModuleList(self.fcGlobal)


	def forward(self, x):
		logits = []
		intermediate = []
		x = x.float()
		if self.use_cuda:
			x = x.cuda()
		input_var = x

		# For every sub-spectrogram:
		for i in range(x.shape[4]):

			x = input_var
			x = self.conv1[i](x[:, :, :, :, i])
			x = F.relu(x)
			x = self.conv1_bn[i](x)

			x = self.conv2[i](x)
			x = F.relu(x)
			x = self.conv2_bn[i](x)

			x = self.conv3[i](x)
			x = F.relu(x)
			x = self.conv3_bn[i](x)

			x = self.conv4[i](x)
			x = F.relu(x)
			x = self.conv4_bn[i](x)


			x = self.conv5[i](x)
			x = F.relu(x)
			x = self.conv5_bn[i](x)

			x = self.ap[i](x)
			x = x.view(x.shape[0], -1)
			x = self.fc1[i](x)
			intermediate.append(x)
			x = self.drop1[i](x)
			x = F.relu(x)
			x = self.fc2[i](x)

			x = x.view(-1, 1, 66)
			logits.append(x)

		# Extracted intermediate layers:
		x = torch.cat((intermediate), 1)

		# Global classification:
		for i in range(len(self.fcGlobal)):
			x = self.fcGlobal[i](x)
		x = x.view(-1, 1, 66)
		logits.append(x)

		# All the outputs (low, mid and high band + global classifier):
		logits = torch.cat((logits), 1)
		return logits


## 4. Training

In [None]:
def run(train_batch_size=None,
        test_batch_size=None,
        epochs=None,
        lr=None,
        weight_decay=None,
        log_interval=None,
        save_dir=None,
        name_model=None,
        sub_spectrogram_size=None,
        hop_size=None,
        n_bins=None,
        root_dir=None,
        train_dir=None,
        val_dir=None,
        default_labels_path=None,
        hop_length=None,
        top_db=None,
        f_min=None,
        f_max=None,
        n_fft=None,
        win_length=None,
        early_stopping=None,
        patience=None,
        representation=None,
        n_classifiers=None,
        save_model=None):

    # Enable GPU if possible:
    use_cuda = torch.cuda.is_available()
    print('Using GPU =', use_cuda)
    device = torch.device('cuda' if use_cuda else 'cpu')

    # Load the data loaders:
    train_loader, val_loader = get_data_loaders(train_batch_size=train_batch_size,
                                                test_batch_size=test_batch_size,
                                                sub_spectrogram_size=sub_spectrogram_size,
                                                hop_size=hop_size,
                                                n_bins=n_bins,
                                                use_cuda=use_cuda,
                                                root_dir=root_dir,
                                                train_dir=train_dir,
                                                val_dir=val_dir,
                                                default_labels_path=default_labels_path,
                                                hop_length=hop_length,
                                                top_db=top_db,
                                                f_max=f_max,
                                                f_min=f_min,
                                                n_fft=n_fft,
                                                win_length=win_length,
                                                representation=representation)

    # Get the model:
    model = SubSpectralNet(sub_spectrogram_size=sub_spectrogram_size, hop_size=hop_size, n_bins=n_bins, use_cuda=use_cuda).to(device)

    # Init the optimizer:
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    scheduler = OneCycleLR(optimizer, max_lr=lr, steps_per_epoch=int(len(train_loader)), epochs=epochs, anneal_strategy='linear')

    def update_model(engine, batch):
        """Prepare batch for training: pass to a device with options."""
        model.train()
        inputs, label = prepare_batch(batch, device=device)

        # Manual normalization:
        inputs_m, inputs_s = inputs.mean(), inputs.std()
        inputs = (inputs - inputs_m) / inputs_s
        optimizer.zero_grad()
        output = model(inputs)
        losses = []

        for ite in range(output.shape[1]):
            losses.append(F.cross_entropy(output[:,ite,:], label))

        loss = sum(losses)
        loss.backward()
        optimizer.step()
        scheduler.step()

        return losses, output, label


    # Get the trainer module:
    trainer = Engine(update_model)


    def evaluate(engine, batch):
        """Prepare batch for evaluation: pass to a device with options."""
        model.eval()
        with torch.no_grad():
            inputs, label = prepare_batch(batch, device=device)

            # Manual normalization:
            inputs_m, inputs_s = inputs.mean(), inputs.std()
            inputs = (inputs - inputs_m) / inputs_s

            output = model(inputs)

            losses = []
            for ite in range(output.shape[1]):
                losses.append(F.cross_entropy(output[:,ite,:], label, reduction='sum').item())

        return losses, output, label

    # Get the evaluator module:
    evaluator = Engine(evaluate)

    output_transforms = {}
    for subSpec in range(n_classifiers):
        def output_transform(output, subSpec=subSpec):
            losses, correct, label = output
            return correct[:, subSpec, :], label
        output_transforms[f'output_transform{subSpec}'] = output_transform

    metrics = {}
    for transform_name, transform_func in output_transforms.items():
        metrics[transform_name] = {
            'accuracy': Accuracy(output_transform=transform_func),
            'loss': Loss(F.cross_entropy, output_transform=transform_func),
            'f1': F1Score(output_transform=transform_func)

        }


    for metric_key, metric_group in metrics.items():
        if isinstance(metric_group, dict):
            for name, metric in metric_group.items():
                metric.attach(evaluator, f'{name}_{metric_key}')
        else:
            metric_group.attach(evaluator, metric_key)




    # Log the events in Ignite: EVERY ITERATION:
    @trainer.on(Events.ITERATION_COMPLETED)
    def log_training_loss(engine):
        iter = (engine.state.iteration - 1) % len(train_loader) + 1
        if iter % log_interval == 0:
            losses, output, label = engine.state.output
            epoch = engine.state.epoch
            message = [f'Train Epoch: {epoch} [{iter}/{len(train_loader)}]']
            for subSpec in range(output.shape[1]):
              if subSpec == (output.shape[1])-1:
                message.append(f'Losses: {round(losses[subSpec].item(), 2)} (Global Classifier)')
              else:
                message.append(f'Losses: {round(losses[subSpec].item(), 2)} (Sub-classifier {subSpec+1})')
            print(message)



    # Log the events in Ignite: Test the validation data on EVERY EPOCH:
    @trainer.on(Events.EPOCH_COMPLETED)
    def log_validation_results(engine):

        evaluator.run(val_loader)
        metrics_results = evaluator.state.metrics
        epoch = engine.state.epoch

        # LOSS:
        message = [f'Validation Results - Epoch: {epoch}, Loss:    ']
        for subSpec in range(n_classifiers):
          loss_metric_key = f'loss_output_transform{subSpec}'
          if subSpec == (n_classifiers - 1):
            message.append(f'{round(metrics_results[loss_metric_key], 2)} (Global Classifier)')
          else:
            message.append(f'{round(metrics_results[loss_metric_key], 2)} (Sub-classifier) {subSpec + 1})')
        print(message)

        # ACC:
        message = [f'Validation Results - Epoch: {epoch}, Accuracy:']
        for subSpec in range(n_classifiers):
          accuracy_metric_key = f'accuracy_output_transform{subSpec}'
          if subSpec == (n_classifiers - 1):
            message.append(f'{round(metrics_results[accuracy_metric_key], 2)} (Global Classifier)')
          else:
            message.append(f'{round(metrics_results[accuracy_metric_key], 2)} (Sub-classifier) {subSpec + 1})')
        print(message)

        # F1-score:
        message = [f'Validation Results - Epoch: {epoch}, F1-score:']
        for subSpec in range(n_classifiers):
          f1_metric_key = f'f1_output_transform{subSpec}'
          if subSpec == (n_classifiers - 1):
            message.append(f'{round(metrics_results[f1_metric_key], 2)} (Global Classifier)')
          else:
            message.append(f'{round(metrics_results[f1_metric_key], 2)} (Sub-classifier) {subSpec + 1})')
        print(message)


    if early_stopping == True:
      def score_function(engine):
        val_F1 = engine.state.metrics[f'f1_output_transform{n_classifiers-1}']
        return val_F1

      # Initialize EarlyStopping:
      early_stopping_mechanism = EarlyStopping(patience=patience, score_function=score_function, trainer=trainer)

      # Attach the early stopping handler to the evaluation engine:
      evaluator.add_event_handler(Events.COMPLETED, early_stopping_mechanism)

    if save_model:
      # Save raw data as json:
      os.makedirs(save_dir, exist_ok=True)

      # Model checkpointing:
      checkpoint_handler = ModelCheckpoint(
          dirname=save_dir,
          filename_prefix=name_model,
          create_dir=True,
          require_empty=False,
          score_function=score_function,
          score_name= f'val_f1',
          global_step_transform=lambda engine, event: engine.state.epoch,
          filename_pattern='{filename_prefix}_{score_name}={score}.{ext}'
      )

      # Attach the handler to the evaluator and trainer:
      evaluator.add_event_handler(Events.COMPLETED, checkpoint_handler, {'model_best': model})

    trainer.run(train_loader, max_epochs=epochs)
    directory_results = os.path.join(save_dir, name_model + '.json')

    # Return the model:
    return model

In [None]:
# General hyperparameters:
train_batch_size = 14																			# Batch size training.
test_batch_size = 14																			# Batch size testing.
lr = 0.001																								# Learning rate.
weight_decay = 0.001																			# Weight decay.
epochs = 50																								# Number of epochs.

# Spectrogram hyperparameters:
representation = 'LINEAR'																	# Spectrogram representation method (LINEAR/MEL).
sample_rate = 44100																				# Sample rate
n_bins = 64																								# Number of frequency bins.
n_fft = 1000																							# Number of FFTs.
hop_length = int((sample_rate*5)/1500)										# Hop length.
win_length = hop_length*2																	# Window length.
f_min = 0																									# Minimum frequency (hz).
f_max = sample_rate/2																			# Maximum frequency (hz).
top_db = 80																								# Top decibel (dB).

# Sub-spectrogram hyperparameters:
number_sub_spectrograms = 2																# Specify the number of sub-spectrograms.
sub_spectrogram_size = (n_bins//number_sub_spectrograms)	# Sub-spectrogram size.
hop_size = sub_spectrogram_size														# Hop size.

# Directories:
default_labels_path = '......./sorted_species.npy'				# Directory to the attached sorted species file.
root_dir = ''																							# Root directory.
train_dir = 'train'																				# Directory to training data.
val_dir = 'val'																						# Directory to validation data.

# Mode:
name_model = ''																						# Name of the model.
train = True																							# Train a new model (True/False).
log_interval = 500																				# Logging interval of training loss.
early_stopping = True																			# Enable early stopping mechanism (True/False).
patience = 10																							# Patience for early stopping.
save_model = True																					# Save thenewly tarined model (True/False).
save_dir = ''																							# Directory where to save the new model and results.
test = False																							# Train a model (True/False).
model_to_test = ''																				# Name of model to be evaluated.



# Calculate how many classifiers we are working with:
n_classifiers = classifier_calculator(n_bins=n_bins, sub_spectrogram_size=sub_spectrogram_size, hop_size=hop_size)
print('Data representation method =', representation, 'spectrograms')

# Start training mechanism:
if train:
	# Run the model:
	model = run(
			train_batch_size=train_batch_size,
			test_batch_size=test_batch_size,
			epochs=epochs,
			lr=lr,
			weight_decay=weight_decay,
			log_interval=log_interval,
			save_dir=save_dir,
			name_model=name_model,
			sub_spectrogram_size=sub_spectrogram_size,
			hop_size=hop_size,
			n_bins=n_bins,
			root_dir=root_dir,
			train_dir=train_dir,
			val_dir=val_dir,
			default_labels_path=default_labels_path,
			hop_length=hop_length,
			top_db=top_db,
			f_min=f_min,
			f_max=f_max,
			n_fft=n_fft,
			win_length=win_length,
			early_stopping=early_stopping,
			patience=patience,
			representation=representation,
			n_classifiers=n_classifiers,
			save_model=save_model
	)

	# Save the model of the last epoch:
	if save_model:
		dir = os.path.join(save_dir, f'{name_model}_last_epoch.pt')
		torch.save(model, dir)

# Start testing mechanism:
if test:
	# Use GPU if available:
	device = 'cuda' if torch.cuda.is_available() else 'cpu'

	# Empty GPU if available:
	if device == 'cuda':
		torch.cuda.empty_cache()

	use_cuda = True if device == 'cuda' else False

	# Initialize the model
	model = SubSpectralNet(sub_spectrogram_size=sub_spectrogram_size, hop_size=hop_size, n_bins=n_bins, use_cuda=use_cuda)

	# Load the state dictionary
	state_dict = torch.load(os.path.join(save_dir, model_to_test), map_location=torch.device(device))

	# Apply the state dictionary
	model.load_state_dict(state_dict)

	# Move model to GPU if available:
	model = model.eval().to(device)

	# Specify keyword arguments for dataloader:
	kwargs = {'num_workers': 8, 'pin_memory': True} if device == 'cuda' else {'num_workers': 8}

	# Specify data transform for the InsectData function:
	data_transform = transforms.Compose([ToTensor(), ToSubSpectrograms(sub_spectrogram_size=sub_spectrogram_size, hop_size=hop_size, n_bins=n_bins)])

	# Create a test dataframe that contains the paths to the test recordings:
	test_df = pd.read_csv('......./metadata.csv')
	test_df = test_df[test_df['subset']=='test']
	test_df['path'] = test_df['path'].str.replace('data', '/content/drive/MyDrive/Thesis')

	# Create a dictionary for saving the full predictions and the true labels
	prediction_output = {'full_predictions':[], 'true_label':[]}

	for i in tqdm(range(len(test_df))):
		name = ntpath.basename(test_df.iloc[i]['path'][:-4]) # Extracts all basenames of the full files
		data = glob(f'................../baseline 2/test/{name}_*.wav') #Gathers all fragments with the same basename
		df = pd.DataFrame(data, columns=['path']) #create a new temporary df with all fragments of one recording
		df = df['path'].tolist()

		pred_ds = InsectDataset(data_dir=df,
															root_dir= '',
															default_labels_path=default_labels_path,
															n_bins=n_bins,
															hop_length=hop_length,
															top_db=top_db,
															f_max=f_max,
															f_min=f_min,
															n_fft=n_fft,
															win_length=win_length,
															representation=representation,
															transform=data_transform,
															augmentation=False,
															fragment=False)

		test_loader = torch.utils.data.DataLoader(pred_ds, batch_size=100, shuffle=False, **kwargs)

		with torch.no_grad():
			for batch in test_loader:
				preds = []
				inputs, label = prepare_batch(batch, device=device)

				# Manual normalization:
				inputs_m, inputs_s = inputs.mean(), inputs.std()
				inputs = (inputs - inputs_m) / inputs_s

				output = model(inputs)
				for sample in range(len(label)):
					preds.append(output[sample,(n_classifiers-1),:].cpu().numpy())
				preds_mean = np.mean(preds, axis=0)
				full_prediction = np.argmax(preds_mean)
				prediction_output['full_predictions'].append(int(full_prediction))
				prediction_output['true_label'].append(int(label[0].item()))

	predictions = prediction_output['full_predictions']
	true_labels = prediction_output['true_label']

	cm = confusion_matrix(true_labels, predictions)
	np.save(os.path.join(save_dir, f'{name_model}_confusion_matrix_raw.npy'), cm)

	report = classification_report(true_labels, predictions, digits=3, output_dict=True)
	evaluation = pd.DataFrame(report).transpose()
	evaluation.to_csv(os.path.join(save_dir, f'{name_model}_test_evaluation.csv'))

	# Calculate the macro F1 score for direct output:
	macro_f1 = f1_score(true_labels, predictions, average='macro', zero_division=1)

	# Save the prediction_output to a JSON file:
	with open(os.path.join(save_dir, f'{name_model}_prediction_and_true_label_raw.json'), 'w') as f:
		json.dump(prediction_output, f)

	print(f'Macro F1 Score: {macro_f1}')
