In [1]:
import numpy as np
import copy
import re
import os
import gzip
import pandas as pd
import random
import librosa.display
import matplotlib.pyplot as plt
# sklearn
from sklearn.metrics import f1_score, accuracy_score, recall_score, r2_score, classification_report
from sklearn.preprocessing import LabelEncoder
# Pytorch
import torch
from torch import nn
from torch import optim
from torch.utils.data import Dataset
from torch.utils.data import SubsetRandomSampler, DataLoader

Split the data in train and evaluation sets

In [2]:
def torch_train_val_split(
    dataset, batch_train, batch_eval, val_size=0.2, shuffle=True, seed=420
):
    # Creating data indices for training and validation splits:
    dataset_size = len(dataset)
    indices = list(range(dataset_size))
    val_split = int(np.floor(val_size * dataset_size))
    if shuffle:
        np.random.seed(seed)
        np.random.shuffle(indices)
    train_indices = indices[val_split:]
    val_indices = indices[:val_split]

    # Creating PT data samplers and loaders:
    train_sampler = SubsetRandomSampler(train_indices)
    val_sampler = SubsetRandomSampler(val_indices)

    train_loader = DataLoader(dataset, batch_size=batch_train, sampler=train_sampler)
    val_loader = DataLoader(dataset, batch_size=batch_eval, sampler=val_sampler)
    return train_loader, val_loader

Read spectrogram

In [3]:
def read_spectrogram(spectrogram_file, chroma=True, fused = False):

    spectrograms = np.load(spectrogram_file)
    
    if not fused:
        if chroma:
            return spectrograms[128:].T
        
        return spectrograms[:128].T
    
    return spectrograms.T

Transform the labels to a different encoding

In [4]:
class LabelTransformer(LabelEncoder):
    def inverse(self, y):
        try:
            return super(LabelTransformer, self).inverse_transform(y)
        except:
            return super(LabelTransformer, self).inverse_transform([y])

    def transform(self, y):
        try:
            return super(LabelTransformer, self).transform(y)
        except:
            return super(LabelTransformer, self).transform([y])

Transform the frequency range to be the same for all spectrograms.

In [5]:
class PaddingTransform(object):
    def __init__(self, max_length, padding_value=0):
        self.max_length = max_length
        self.padding_value = padding_value

    def __call__(self, s):
        if len(s) == self.max_length:
            return s

        if len(s) > self.max_length:
            return s[: self.max_length]

        if len(s) < self.max_length:
            s1 = copy.deepcopy(s)
            pad = np.zeros((self.max_length - s.shape[0], s.shape[1]), dtype=np.float32)
            s1 = np.vstack((s1, pad))
            return s1

Create a dataset for the different spectrograms

In [6]:
class SpectrogramDataset(Dataset):
    def __init__(
        self, path, class_mapping=None, train=True, max_length=-1, read_spec = read_spectrogram, chroma = True, fused = False,
    label_index = 1):
        t = "train" if train else "test"
        p = os.path.join(path, t)
        
        self.label_index = label_index
        self.index = os.path.join(path, "{}_labels.txt".format(t))
        self.files, labels = self.get_files_labels(self.index, class_mapping)
        self.feats = [read_spec(os.path.join(p, f), chroma= chroma, fused = fused) for f in self.files]
        self.feat_dim = self.feats[0].shape[1]
        self.lengths = [len(i) for i in self.feats]
        self.max_length = max(self.lengths) if max_length <= 0 else max_length
        self.zero_pad_and_stack = PaddingTransform(self.max_length)
        self.label_transformer = LabelTransformer()
        if isinstance(labels, (list, tuple)):
            
            try:
                float(labels[0])
                self.labels = np.array(labels).astype('float64')
                
            except:
                self.labels = np.array(
                    self.label_transformer.fit_transform(labels).astype("int64"))
            

    def get_files_labels(self, txt, class_mapping):
        with open(txt, "r") as fd:
            lines = [l.rstrip().split("\t") if '\t' in l else l.rstrip().split(",") for l in fd.readlines()[1:]]
        files, labels = [], []
        
        for l in lines:
            label = l[self.label_index]
            if class_mapping:
                label = class_mapping[l[self.label_index]]
            if not label:
                continue
            # Kaggle automatically unzips the npy.gz format so this hack is needed
            _id = l[0].split('.')[0]
            npy_file = '{}.fused.full.npy'.format(_id)
            files.append(npy_file)
            labels.append(label)
        return files, labels

    def __getitem__(self, item):
        length = min(self.lengths[item], self.max_length)
        return self.zero_pad_and_stack(self.feats[item]), self.labels[item], length

    def __len__(self):
        return len(self.labels)

Create mel spectrogram datasets (train and set).

In [7]:
class_mapping = {
    "Rock": "Rock",
    "Psych-Rock": "Rock",
    "Indie-Rock": None,
    "Post-Rock": "Rock",
    "Psych-Folk": "Folk",
    "Folk": "Folk",
    "Metal": "Metal",
    "Punk": "Metal",
    "Post-Punk": None,
    "Trip-Hop": "Trip-Hop",
    "Pop": "Pop",
    "Electronic": "Electronic",
    "Hip-Hop": "Hip-Hop",
    "Classical": "Classical",
    "Blues": "Blues",
    "Chiptune": "Electronic",
    "Jazz": "Jazz",
    "Soundtrack": None,
    "International": None,
    "Old-Time": None,
    }
def train_test_val_splitter(directory, class_mapping = class_mapping, chroma=False, fused=False):
    
    train_specs = SpectrogramDataset(directory, train = True, class_mapping = class_mapping, max_length = -1, read_spec=read_spectrogram, chroma=chroma, fused = fused)
    inputs, _, _ = next(iter(train_specs))
    test_specs = SpectrogramDataset(directory, train = False, class_mapping = class_mapping, max_length = inputs.shape[0], read_spec=read_spectrogram, chroma=chroma, fused = fused)
    
    train_loader, val_loader = torch_train_val_split(train_specs, 32 ,32, val_size=.33)
    test_loader = DataLoader(test_specs, batch_size=32)
    
    return train_loader, val_loader, test_loader

In [8]:
import torch.nn as nn
class BasicLSTM(nn.Module):
    def __init__(self, input_dim, rnn_size, output_dim, num_layers, bidirectional=False, dropout=0):
        super(BasicLSTM, self).__init__()
        self.bidirectional = bidirectional
        self.rnn_size = rnn_size
        self.feature_size = rnn_size * 2 if self.bidirectional else rnn_size
        self.num_layers = num_layers
        self.dropout = dropout
        
        # --------------- Insert your code here ---------------- #
        # Initialize the LSTM, Dropout, Output layers
        
        self.lstm = nn.LSTM(input_dim, self.rnn_size, self.num_layers, bidirectional=self.bidirectional, batch_first=True, dropout=self.dropout)
        self.linear = nn.Linear(self.feature_size, output_dim)

    def forward(self, x, lengths):
        """ 
            x : 3D numpy array of dimension N x L x D
                N: batch index
                L: sequence index
                D: feature index

            lengths: N x 1
         """
        
        # --------------- Insert your code here ---------------- #
        # Obtain the model's device ID
        DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        
        # You must have all of the outputs of the LSTM, but you need only the last one (that does not exceed the sequence length)
        # To get it use the last_timestep method
        # Then pass it through the remaining network
        
        if self.bidirectional:
            h0 = torch.zeros(self.num_layers*2, x.size(0), self.rnn_size).double().to(DEVICE)
            c0 = torch.zeros(self.num_layers*2, x.size(0), self.rnn_size).double().to(DEVICE)
        else:
            h0 = torch.zeros(self.num_layers, x.size(0), self.rnn_size).double().to(DEVICE)
            c0 = torch.zeros(self.num_layers, x.size(0), self.rnn_size).double().to(DEVICE)
            
        # Forward propagate LSTM
        lstm_out, _ = self.lstm(x, (h0, c0))

        # Forward propagate Linear
        last_outputs = self.linear(self.last_timestep(lstm_out, lengths, self.bidirectional))
        return last_outputs

    def last_timestep(self, outputs, lengths, bidirectional=False):
        """
            Returns the last output of the LSTM taking into account the zero padding
        """
        if bidirectional:
            forward, backward = self.split_directions(outputs)
            last_forward = self.last_by_index(forward, lengths)
            last_backward = backward[:, 0, :]
            # Concatenate and return - maybe add more functionalities like average
            return torch.cat((last_forward, last_backward), dim=-1)

        else:
            return self.last_by_index(outputs, lengths)

    @staticmethod
    def split_directions(outputs):
        direction_size = int(outputs.size(-1) / 2)
        forward = outputs[:, :, :direction_size]
        backward = outputs[:, :, direction_size:]
        return forward, backward

    @staticmethod
    def last_by_index(outputs, lengths):
        # Obtain the model's device ID
        DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        
        # Index of the last output for each sequence.
        idx = (lengths - 1).view(-1, 1).expand(outputs.size(0),
                                               outputs.size(2)).unsqueeze(1).to(DEVICE)
        return outputs.gather(1, idx).squeeze()
    
    
    def fit(self, epochs, dataloader, loss_function, optimizer, model_name, batch_overfit = False):
    
        # Οbtain the model's device ID
        device = next(self.parameters()).device

        # IMPORTANT: switch to train mode
        # Εnable regularization layers, such as Dropout
        self.train()

        batch_overfit = [next(iter(dataloader))]
        loader = dataloader if not batch_overfit else batch_overfit

        for epoch in range(epochs):
            running_loss = 0.0

            for index, batch in enumerate(loader, 1):
                # Get the inputs (batch)
                inputs, labels, lengths = batch
                inputs = inputs.double()
                    
                # Move the batch tensors to the right device
                inputs = inputs.to(device)
                labels = labels.to(device)
                # Step 1 - zero the gradients
                # Remember that PyTorch accumulates gradients.
                # We need to clear them out before each batch!
                optimizer.zero_grad()

                # Step 2 - forward pass: y' = model(x)
                y_preds = self(inputs, lengths)
                # Step 3 - compute loss: L = loss_function(y, y')
                if (isinstance(loss_function, nn.MSELoss)): 
                    labels = torch.unsqueeze(labels, 1)
                    loss = loss_function(y_preds, labels.double())
                    
                else:
                    loss = loss_function(y_preds, labels)


                # Step 4 - backward pass: compute gradient wrt model parameters
                loss.backward()

                # Step 5 - update weights
                optimizer.step()

                # Accumulate loss in a variable.
                running_loss += loss.data.item()


                indicator = epoch if batch_overfit else index
                if (indicator % 100 == 0):
                    print(f'Epoch : {epoch}, Batch : {index}, Running Loss : {running_loss / index}')

        torch.save(self, model_name)

    def evaluate(self, dataloader, loss_function):
        # IMPORTANT: switch to eval mode
        # Disable regularization layers, such as Dropout
        self.eval()
        acc = 0.0
        samples = 0 

        y_pred = []  # the predicted labels
        y = []  # the gold labels

        # Obtain the model's device ID
        device = next(self.parameters()).device

        # IMPORTANT: in evaluation mode, we don't want to keep the gradients
        # so we do everything under torch.no_grad()

        with torch.no_grad():
            for index, batch in enumerate(dataloader, 1):
                # Get the inputs (batch)
                inputs, labels, lengths = batch
                # Step 1 - move the batch tensors to the right device
                inputs = inputs.to(device)
                labels = labels.to(device)

                # Step 2 - forward pass: y' = model(x)
                y_preds = self(inputs, lengths)  # EX9

                # Step 3 - compute loss: L = loss_function(y, y')
                # We compute the loss only for inspection (compare train/test loss)
                # because we do not actually backpropagate in test time
                # loss = loss_function(y_preds, labels)

                # Step 4 - make predictions (class = argmax of posteriors)
                if isinstance(loss_function, nn.CrossEntropyLoss):
                    y_preds_arg = torch.argmax(y_preds, dim=1)
                
                elif isinstance(loss_function, nn.MSELoss):
                    y_preds_arg = y_preds

                # Step 5 - collect the predictions, gold labels and batch loss
                y_pred.append(y_preds_arg.cpu().numpy())
                y.append(labels.cpu().numpy())

                # Compute accuracy 
                # acc += (labels == y_preds_arg).sum().detach().item()
                # samples += inputs.size(0)



        # print(f'Accuracy: {acc / samples}')

        y_pred = [elem for sublist in y_pred for elem in sublist]
        y = [elem for sublist in y for elem in sublist]

        return y_pred, y

In [9]:
def train_eval_model(EPOCHS, model, model_name, dataset):
    
    train_dataset_str = 'train_' + dataset
    test_dataset_str = 'test_' + dataset
    val_dataset_str = 'val_' + dataset
    
    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    # model.double()
    model.to(DEVICE)
    loss_function = nn.CrossEntropyLoss().to(DEVICE)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001,  weight_decay=1e-5)

    model.fit(EPOCHS, datasets[train_dataset_str], loss_function, optimizer, model_name = model_name)

    y_train_true, y_train_pred = model.evaluate(datasets[train_dataset_str], loss_function)
    y_val_true, y_val_pred = model.evaluate(datasets[val_dataset_str], loss_function)
    print()
    print("Accuracy for train:" , accuracy_score(y_train_true, y_train_pred))
    print("Accuracy for validation:" , accuracy_score(y_val_true, y_val_pred))
    print()
    y_test_true, y_test_pred = model.evaluate(datasets[test_dataset_str], loss_function)

    print(classification_report(y_test_true, y_test_pred))

In [10]:
import torch.nn as nn
import torch
import torch.nn.functional as F


class Net(nn.Module):
    def __init__(self, num_features, output_size = 10):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 4, 3, device = 'cpu')
        y = num_features - 3 + 1
        self.pool = nn.MaxPool2d(2, 2)
        y = (y - 2)/2 + 1
        self.norm1 = nn.BatchNorm2d(4, device = 'cpu')
        self.conv2 = nn.Conv2d(4, 8, 3, device = 'cpu')
        y = y - 3 + 1
        y = (y - 2) / 2 + 1
        y = y.apply_(int)
        
        self.norm2 = nn.BatchNorm2d(8, device = 'cpu')
        self.fc1 = nn.Linear(int(8 * torch.prod(y).item()), 120, device = 'cpu')
        self.fc2 = nn.Linear(120, 84, device = 'cpu')
        self.fc3 = nn.Linear(84, output_size, device = 'cpu')

    def forward(self, x):
        
        if isinstance(loss_function, nn.MSELoss):
            
            x = torch.unsqueeze(x, 1).double()
        
        else:
            
            x = torch.unsqueeze(x, 1).float()
            
        x = self.norm1(self.pool(F.relu(self.conv1(x))))
        x = self.norm2(self.pool(F.relu(self.conv2(x))))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
    def fit(self, epochs, dataloader, loss_function, optimizer, model_name, batch_overfit = False):
    
        # Οbtain the model's device ID
        device = next(self.parameters()).device

        # IMPORTANT: switch to train mode
        # Εnable regularization layers, such as Dropout
        self.train()

        batch_overfit = [next(iter(dataloader))]
        loader = dataloader if not batch_overfit else batch_overfit

        for epoch in range(epochs):
            running_loss = 0.0

            for index, batch in enumerate(loader, 1):

                # Get the inputs (batch)
                inputs, labels, lengths = batch                
                if isinstance(loss_function, nn.MSELoss): 
                    labels = torch.unsqueeze(labels, 1)
                    labels = labels.double()

                # Move the batch tensors to the right device
                inputs = inputs.to(device)
                # inputs = inputs.double()
                labels = labels.to(device)
             

                # Step 1 - zero the gradients
                # Remember that PyTorch accumulates gradients.
                # We need to clear them out before each batch!
                optimizer.zero_grad()

                # Step 2 - forward pass: y' = model(x)
                y_preds = self(inputs)

                # Step 3 - compute loss: L = loss_function(y, y')
                loss = loss_function(y_preds, labels)

                # Step 4 - backward pass: compute gradient wrt model parameters
                loss.backward()

                # Step 5 - update weights
                optimizer.step()

                # Accumulate loss in a variable.
                running_loss += loss.data.item()


                indicator = epoch if batch_overfit else index
                if (indicator % 100 == 0):
                    print(f'Epoch : {epoch}, Batch : {index}, Running Loss : {running_loss / index}')

        torch.save(self, model_name)

    def evaluate(self, dataloader, loss_function):
        # IMPORTANT: switch to eval mode
        # Disable regularization layers, such as Dropout
        self.eval()
        acc = 0.0
        samples = 0 

        y_pred = []  # the predicted labels
        y = []  # the gold labels

        # Obtain the model's device ID
        device = next(self.parameters()).device

        # IMPORTANT: in evaluation mode, we don't want to keep the gradients
        # so we do everything under torch.no_grad()

        with torch.no_grad():
            for index, batch in enumerate(dataloader, 1):
                # Get the inputs (batch)
                inputs, labels, lengths = batch
                # Step 1 - move the batch tensors to the right device
                inputs = inputs.to(device)
                labels = labels.to(device)
                # Step 2 - forward pass: y' = model(x)
                y_preds = self(inputs)  # EX9

                # Step 3 - compute loss: L = loss_function(y, y')
                # We compute the loss only for inspection (compare train/test loss)
                # because we do not actually backpropagate in test time
                # loss = loss_function(y_preds, labels)

                # Step 4 - make predictions (class = argmax of posteriors)
                if isinstance(loss_function, nn.MSELoss):
                    y_preds_args = y_preds
                elif isinstance(loss_function, nn.CrossEntropyLoss):
                    y_preds_arg = torch.argmax(y_preds, dim=1)

                # Step 5 - collect the predictions, gold labels and batch loss
                y_pred.append(y_preds_arg.cpu().numpy())
                y.append(labels.cpu().numpy())

                # Compute accuracy 
                # acc += (labels == y_preds_arg).sum().detach().item()
                # samples += inputs.size(0)



        # print(f'Accuracy: {acc / samples}')

        y_pred = [elem for sublist in y_pred for elem in sublist]
        y = [elem for sublist in y for elem in sublist]

        return y_pred, y

In [11]:
directory_beat = '../input/patreco3-multitask-affective-music/data/fma_genre_spectrograms_beat/'
directory = '../input/patreco3-multitask-affective-music/data/fma_genre_spectrograms/'

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

train_loader_mel, val_loader_mel, test_loader_mel = train_test_val_splitter(directory)
train_loader_beat_mel, val_loader_beat_mel, test_loader_beat_mel = train_test_val_splitter(directory_beat)
train_loader_beat_chroma, val_loader_beat_chroma, test_loader_beat_chroma = train_test_val_splitter(directory_beat, chroma = True)
train_loader, val_loader, test_loader = train_test_val_splitter(directory_beat, fused = True)

datasets = {
    'train_loader_mel': train_loader_mel,
    'train_loader_beat_mel': train_loader_beat_mel,
    'train_loader_beat_chroma': train_loader_beat_chroma,
    'train_loader': train_loader,
    'test_loader_mel': test_loader_mel,
    'test_loader_beat_mel': test_loader_beat_mel,
    'test_loader_beat_chroma': test_loader_beat_chroma,
    'test_loader': test_loader,
    'val_loader_mel': val_loader_mel,
    'val_loader_beat_mel': val_loader_beat_mel,
    'val_loader_beat_chroma': val_loader_beat_chroma,
    'val_loader': val_loader}

In [12]:
num_features = next(iter(train_loader_mel))[0].size()[1:]
num_features = torch.tensor(num_features)
net = Net(num_features = num_features)

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# net.double()
net.to(DEVICE)
loss_function = nn.CrossEntropyLoss().to(DEVICE)
optimizer = torch.optim.Adam(net.parameters(), lr=0.001,  weight_decay=1e-5)

net.fit(500, train_loader_mel, loss_function, optimizer, './overtrained', batch_overfit=True)

Epoch : 0, Batch : 1, Running Loss : 2.291560411453247
Epoch : 100, Batch : 1, Running Loss : 3.022843156941235e-05
Epoch : 200, Batch : 1, Running Loss : 1.815612085920293e-05
Epoch : 300, Batch : 1, Running Loss : 1.2401030289765913e-05
Epoch : 400, Batch : 1, Running Loss : 4.418142452777829e-06


In [13]:
num_features = next(iter(train_loader_mel))[0].size()[1:]
num_features = torch.tensor(num_features)
net = Net(num_features = num_features)

loss_function = nn.CrossEntropyLoss().to(DEVICE)
optimizer = torch.optim.Adam(net.parameters(), lr = 0.001)

train_eval_model(500, net, './cnn_500_mel', 'loader_mel')

Epoch : 0, Batch : 1, Running Loss : 2.284116268157959
Epoch : 100, Batch : 1, Running Loss : 1.3299228385221795e-06
Epoch : 200, Batch : 1, Running Loss : 5.699683924831334e-07
Epoch : 300, Batch : 1, Running Loss : 3.762539222407213e-07
Epoch : 400, Batch : 1, Running Loss : 2.719459928357537e-07

Accuracy for train: 0.1883495145631068
Accuracy for validation: 0.1736842105263158

              precision    recall  f1-score   support

           0       0.10      0.07      0.08        55
           1       0.55      0.29      0.38        76
           2       0.17      0.20      0.19        71
           3       0.01      0.09      0.02        11
           4       0.07      0.27      0.12        11
           5       0.00      0.00      0.00        17
           6       0.24      0.26      0.25        73
           7       0.42      0.09      0.15       183
           8       0.11      0.44      0.17        25
           9       0.29      0.19      0.23        53

    accuracy       

In [14]:
train_eval_model(1000, net, './cnn_1000_mel', 'loader_mel')

Epoch : 0, Batch : 1, Running Loss : 4.993778705596924
Epoch : 100, Batch : 1, Running Loss : 9.685719533081283e-07
Epoch : 200, Batch : 1, Running Loss : 4.023306701128604e-07
Epoch : 300, Batch : 1, Running Loss : 2.607700650969491e-07
Epoch : 400, Batch : 1, Running Loss : 2.4214369886976783e-07
Epoch : 500, Batch : 1, Running Loss : 2.235172900100224e-07
Epoch : 600, Batch : 1, Running Loss : 2.3841846541472478e-07
Epoch : 700, Batch : 1, Running Loss : 1.415610029198433e-07
Epoch : 800, Batch : 1, Running Loss : 1.415610029198433e-07
Epoch : 900, Batch : 1, Running Loss : 1.4901158351676713e-07

Accuracy for train: 0.2524271844660194
Accuracy for validation: 0.23421052631578948

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       0.33      0.15      0.20        88
           2       0.55      0.27      0.36       165
           3       0.03      0.67      0.05         3
           4       0.00      0.00   

  _warn_prf(average, modifier, msg_start, len(result))


In [15]:
num_features = next(iter(train_loader_beat_chroma))[0].size()[1:]
num_features = torch.tensor(num_features)
net = Net(num_features = num_features)
train_eval_model(500, net, './cnn_500_mel', 'loader_beat_chroma')

Epoch : 0, Batch : 1, Running Loss : 2.3250396251678467
Epoch : 100, Batch : 1, Running Loss : 0.0012120764004066586
Epoch : 200, Batch : 1, Running Loss : 0.0002377933415118605
Epoch : 300, Batch : 1, Running Loss : 4.872088902629912e-05
Epoch : 400, Batch : 1, Running Loss : 2.9689817893086e-05

Accuracy for train: 0.2058252427184466
Accuracy for validation: 0.1881578947368421

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       0.05      0.14      0.07        14
           2       0.00      0.00      0.00         0
           3       0.61      0.23      0.33       215
           4       0.20      0.21      0.21        38
           5       0.00      0.00      0.00         0
           6       0.68      0.22      0.33       244
           7       0.10      0.09      0.09        45
           8       0.01      0.07      0.02        14
           9       0.06      0.40      0.10         5

    accuracy         

In [16]:
num_features = next(iter(train_loader_beat_mel))[0].size()[1:]
num_features = torch.tensor(num_features)
net = Net(num_features = num_features)
train_eval_model(500, net, './cnn_500_mel', 'loader_beat_mel')

Epoch : 0, Batch : 1, Running Loss : 2.3116092681884766
Epoch : 100, Batch : 1, Running Loss : 6.623480203415966e-06
Epoch : 200, Batch : 1, Running Loss : 3.6656642805610318e-06
Epoch : 300, Batch : 1, Running Loss : 3.058451284232433e-06
Epoch : 400, Batch : 1, Running Loss : 2.73062846645189e-06

Accuracy for train: 0.20064724919093851
Accuracy for validation: 0.2

              precision    recall  f1-score   support

           0       0.07      0.06      0.07        47
           1       0.07      0.19      0.11        16
           2       0.19      0.12      0.15       123
           3       0.35      0.22      0.27       126
           4       0.30      0.21      0.24        58
           5       0.05      0.07      0.06        29
           6       0.49      0.36      0.41       106
           7       0.05      0.05      0.05        37
           8       0.11      0.42      0.17        26
           9       0.00      0.00      0.00         7

    accuracy                     

In [17]:
num_features = next(iter(train_loader_beat_mel))[0].size()[1:]
num_features = torch.tensor(num_features)
net = Net(num_features = num_features)
train_eval_model(1000, net, './cnn_1000_mel', 'loader_beat_mel')

Epoch : 0, Batch : 1, Running Loss : 2.324416399002075
Epoch : 100, Batch : 1, Running Loss : 4.448131221579388e-05
Epoch : 200, Batch : 1, Running Loss : 6.608616786252242e-06
Epoch : 300, Batch : 1, Running Loss : 4.2803412725334056e-06
Epoch : 400, Batch : 1, Running Loss : 3.360201844770927e-06
Epoch : 500, Batch : 1, Running Loss : 2.3953568870638264e-06
Epoch : 600, Batch : 1, Running Loss : 2.130862640115083e-06
Epoch : 700, Batch : 1, Running Loss : 2.1308624127414078e-06
Epoch : 800, Batch : 1, Running Loss : 2.1271373498166213e-06
Epoch : 900, Batch : 1, Running Loss : 2.1308624127414078e-06

Accuracy for train: 0.23171521035598705
Accuracy for validation: 0.2118421052631579

              precision    recall  f1-score   support

           0       0.12      0.08      0.10        63
           1       0.00      0.00      0.00         3
           2       0.09      0.29      0.13        24
           3       0.26      0.17      0.20       126
           4       0.33      0.22 

In [18]:
num_features = next(iter(train_loader_beat_mel))[0].size()[1:]
num_features = torch.tensor(num_features)
net = Net(num_features = num_features)
train_eval_model(1000, net, './cnn_1000_mel', 'loader_beat_mel')

Epoch : 0, Batch : 1, Running Loss : 2.3134002685546875
Epoch : 100, Batch : 1, Running Loss : 1.7519667380838655e-05
Epoch : 200, Batch : 1, Running Loss : 1.1645114682323765e-05
Epoch : 300, Batch : 1, Running Loss : 8.966693712864071e-06
Epoch : 400, Batch : 1, Running Loss : 4.634238393919077e-06
Epoch : 500, Batch : 1, Running Loss : 3.874287813232513e-06
Epoch : 600, Batch : 1, Running Loss : 3.4980364489456406e-06
Epoch : 700, Batch : 1, Running Loss : 3.252169108236558e-06
Epoch : 800, Batch : 1, Running Loss : 3.1068834687175695e-06
Epoch : 900, Batch : 1, Running Loss : 3.0398284707189305e-06

Accuracy for train: 0.19676375404530744
Accuracy for validation: 0.1763157894736842

              precision    recall  f1-score   support

           0       0.12      0.12      0.12        42
           1       0.00      0.00      0.00         0
           2       0.03      0.06      0.03        36
           3       0.09      0.13      0.11        53
           4       0.15      0.13

In [19]:
num_features = next(iter(train_loader))[0].size()[1:]
num_features = torch.tensor(num_features)
net = Net(num_features = num_features)
train_eval_model(500, net, './cnn_500_mel', 'loader')

Epoch : 0, Batch : 1, Running Loss : 2.290799379348755
Epoch : 100, Batch : 1, Running Loss : 2.602003405627329e-05
Epoch : 200, Batch : 1, Running Loss : 4.991797140974086e-06
Epoch : 300, Batch : 1, Running Loss : 2.6225884539599065e-06
Epoch : 400, Batch : 1, Running Loss : 2.2202636955626076e-06

Accuracy for train: 0.18576051779935276
Accuracy for validation: 0.13289473684210526

              precision    recall  f1-score   support

           0       0.03      0.12      0.04         8
           1       0.03      0.50      0.05         2
           2       0.30      0.16      0.21       146
           3       0.17      0.16      0.17        85
           4       0.10      0.20      0.13        20
           5       0.15      0.11      0.13        55
           6       0.23      0.19      0.21        94
           7       0.17      0.13      0.15        54
           8       0.09      0.24      0.13        37
           9       0.12      0.05      0.07        74

    accuracy    

In [20]:
num_features = next(iter(train_loader))[0].size()[1:]
num_features = torch.tensor(num_features)
net = Net(num_features = num_features)
train_eval_model(1000, net, './cnn_1000_mel', 'loader')

Epoch : 0, Batch : 1, Running Loss : 2.294083595275879
Epoch : 100, Batch : 1, Running Loss : 2.349482019781135e-05
Epoch : 200, Batch : 1, Running Loss : 3.7290026284608757e-06
Epoch : 300, Batch : 1, Running Loss : 2.346928795304848e-06
Epoch : 400, Batch : 1, Running Loss : 2.201642928412184e-06
Epoch : 500, Batch : 1, Running Loss : 2.2314452507998794e-06
Epoch : 600, Batch : 1, Running Loss : 2.2612475731875747e-06
Epoch : 700, Batch : 1, Running Loss : 2.2947749585000565e-06
Epoch : 800, Batch : 1, Running Loss : 2.32085199058929e-06
Epoch : 900, Batch : 1, Running Loss : 2.3432037323800614e-06

Accuracy for train: 0.15210355987055016
Accuracy for validation: 0.1368421052631579

              precision    recall  f1-score   support

           0       0.10      0.06      0.07        69
           1       0.05      0.09      0.06        22
           2       0.12      0.11      0.12        92
           3       0.16      0.19      0.17        70
           4       0.23      0.10  

In [21]:
directory_multitask = '../input/patreco3-multitask-affective-music/data/multitask_dataset'
train_multitask = SpectrogramDataset(directory_multitask, train = True, read_spec = read_spectrogram, chroma = False)
train_multitask, val_multitask = torch_train_val_split(train_multitask, 32, 32, val_size = .33)

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

num_mel = 128
num_chroma = 12

lstm = BasicLSTM(num_mel, 64, 1, 1)
lstm.double()
lstm.to(DEVICE)

first_input = next(iter(train_multitask))[0]
num_features = first_input.size()[1:]
num_features = torch.tensor(num_features)

loss_function = nn.MSELoss().to(DEVICE)
optimizer_lstm = torch.optim.Adam(lstm.parameters(), lr=0.001,  weight_decay=1e-5)

lstm.fit(1000, train_multitask, loss_function, optimizer_lstm, './sentiment_mel_best_lstm', batch_overfit=True)
y_pred, y_true = lstm.evaluate(val_multitask, loss_function)

print(r2_score(y_true, y_pred))

Epoch : 0, Batch : 1, Running Loss : 0.5177009293822996
Epoch : 100, Batch : 1, Running Loss : 0.017175129599321007
Epoch : 200, Batch : 1, Running Loss : 0.012150694145091102
Epoch : 300, Batch : 1, Running Loss : 0.0062951936208391155
Epoch : 400, Batch : 1, Running Loss : 0.002192753682042879
Epoch : 500, Batch : 1, Running Loss : 0.0012385864615462555
Epoch : 600, Batch : 1, Running Loss : 0.0010145326441170057
Epoch : 700, Batch : 1, Running Loss : 0.0009414444356843296
Epoch : 800, Batch : 1, Running Loss : 0.0009016567307501995
Epoch : 900, Batch : 1, Running Loss : 0.001020063254727234
-0.6001243274285459


In [22]:
y_pred_train, y_true_train = lstm.evaluate(train_multitask, loss_function)
r2_score(y_true_train, y_pred_train)

-0.4457510838324761

In [23]:
directory_multitask = '../input/patreco3-multitask-affective-music/data/multitask_dataset'
train_multitask = SpectrogramDataset(directory_multitask, train = True, read_spec = read_spectrogram, chroma = False, label_index=2)
train_multitask, val_multitask = torch_train_val_split(train_multitask, 32, 32, val_size = .33)

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

num_mel = 128
num_chroma = 12

lstm = BasicLSTM(num_mel, 64, 1, 1)
lstm.double()
lstm.to(DEVICE)

first_input = next(iter(train_multitask))[0]
num_features = first_input.size()[1:]
num_features = torch.tensor(num_features)

loss_function = nn.MSELoss().to(DEVICE)
optimizer_lstm = torch.optim.Adam(lstm.parameters(), lr=0.001,  weight_decay=1e-5)

lstm.fit(1000, train_multitask, loss_function, optimizer_lstm, './sentiment_mel_best_lstm', batch_overfit=True)
y_pred, y_true = lstm.evaluate(val_multitask, loss_function)

print(r2_score(y_true, y_pred))

Epoch : 0, Batch : 1, Running Loss : 0.1809661908095553
Epoch : 100, Batch : 1, Running Loss : 0.002960557802679402
Epoch : 200, Batch : 1, Running Loss : 0.004594220829668826
Epoch : 300, Batch : 1, Running Loss : 0.002019956718756071
Epoch : 400, Batch : 1, Running Loss : 0.001257799322731224
Epoch : 500, Batch : 1, Running Loss : 0.0017755437710587246
Epoch : 600, Batch : 1, Running Loss : 0.0013707764797101568
Epoch : 700, Batch : 1, Running Loss : 0.0016532554727344121
Epoch : 800, Batch : 1, Running Loss : 0.0009978675272008194
Epoch : 900, Batch : 1, Running Loss : 0.0009142716316724683
-0.23564865200224494


In [24]:
y_pred_train, y_true_train = lstm.evaluate(train_multitask, loss_function)
r2_score(y_true_train, y_pred_train)

-0.15512642381352615

In [25]:
directory_multitask = '../input/patreco3-multitask-affective-music/data/multitask_dataset'
train_multitask = SpectrogramDataset(directory_multitask, train = True, read_spec = read_spectrogram, chroma = False, label_index=3)
train_multitask, val_multitask = torch_train_val_split(train_multitask, 32, 32, val_size = .33)

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

num_mel = 128
num_chroma = 12

lstm = BasicLSTM(num_mel, 64, 1, 1)
lstm.double()
lstm.to(DEVICE)

first_input = next(iter(train_multitask))[0]
num_features = first_input.size()[1:]
num_features = torch.tensor(num_features)

loss_function = nn.MSELoss().to(DEVICE)
optimizer_lstm = torch.optim.Adam(lstm.parameters(), lr=0.001,  weight_decay=1e-5)

lstm.fit(1000, train_multitask, loss_function, optimizer_lstm, './sentiment_mel_best_lstm', batch_overfit=True)
y_pred, y_true = lstm.evaluate(val_multitask, loss_function)

print(r2_score(y_true, y_pred))

Epoch : 0, Batch : 1, Running Loss : 1.0159484593085393
Epoch : 100, Batch : 1, Running Loss : 0.007556597692716099
Epoch : 200, Batch : 1, Running Loss : 0.008346195324199708
Epoch : 300, Batch : 1, Running Loss : 0.034529539877767815
Epoch : 400, Batch : 1, Running Loss : 0.02229578403335255
Epoch : 500, Batch : 1, Running Loss : 0.00596227892462008
Epoch : 600, Batch : 1, Running Loss : 0.015058994210324535
Epoch : 700, Batch : 1, Running Loss : 0.014067949026860479
Epoch : 800, Batch : 1, Running Loss : 0.013165438658718854
Epoch : 900, Batch : 1, Running Loss : 0.012605598742187009
-0.14811675919150247


In [26]:
y_pred_train, y_true_train = lstm.evaluate(train_multitask, loss_function)
r2_score(y_true_train, y_pred_train)

-0.08359513800924545

In [27]:
'''directory_multitask = '../input/patreco3-multitask-affective-music/data/multitask_dataset'
train_multitask = SpectrogramDataset(directory_multitask, train = True, read_spec = read_spectrogram, chroma = False)
train_multitask, val_multitask = torch_train_val_split(train_multitask, 32, 32, val_size = .33)

cnn = Net(num_features, output_size = 1)
cnn.double()
cnn.to(DEVICE)
optimizer_net = torch.optim.Adam(cnn.parameters(), lr=0.001,  weight_decay=1e-5)

cnn.fit(1000, train_multitask, loss_function, optimizer_net, './sentiment_mel_best_cnn')
y_pred, y_true = cnn.evaluate(val_multitask, loss_function)

print(r2_score(y_true, y_pred))'''

"directory_multitask = '../input/patreco3-multitask-affective-music/data/multitask_dataset'\ntrain_multitask = SpectrogramDataset(directory_multitask, train = True, read_spec = read_spectrogram, chroma = False)\ntrain_multitask, val_multitask = torch_train_val_split(train_multitask, 32, 32, val_size = .33)\n\ncnn = Net(num_features, output_size = 1)\ncnn.double()\ncnn.to(DEVICE)\noptimizer_net = torch.optim.Adam(cnn.parameters(), lr=0.001,  weight_decay=1e-5)\n\ncnn.fit(1000, train_multitask, loss_function, optimizer_net, './sentiment_mel_best_cnn')\ny_pred, y_true = cnn.evaluate(val_multitask, loss_function)\n\nprint(r2_score(y_true, y_pred))"

In [28]:
'''if __name__ == '__main__': 

    # %% [code] {"execution":{"iopub.status.busy":"2022-02-08T06:50:41.886063Z","iopub.execute_input":"2022-02-08T06:50:41.886368Z","iopub.status.idle":"2022-02-08T06:50:42.818285Z","shell.execute_reply.started":"2022-02-08T06:50:41.886332Z","shell.execute_reply":"2022-02-08T06:50:42.817544Z"}}
    directory = '../input/patreco3-multitask-affective-music/data/fma_genre_spectrograms/train_labels.txt'
    train_data = pd.read_csv(directory, sep = '\t')
    number_samples = train_data.shape[0]
    sample_rows = np.random.randint(number_samples, size = 2)

    while train_data.iloc[sample_rows[0]][1] == train_data.iloc[sample_rows[1]][1]:

        sample_rows[1] = np.random.randint(number_samples)

    directory = '../input/patreco3-multitask-affective-music/data/fma_genre_spectrograms/train/'
    samples = [train_data.iloc[elem][0] for elem in sample_rows]
    genre = [train_data.iloc[elem][1] for elem in sample_rows]

    samples = [elem.split('.gz')[0] for elem in samples]

    fig = plt.figure(figsize = (10, 10))
    spectrograms = []
    for i in range(len(samples)):

        filename = os.path.join(directory, samples[i])
        ax = fig.add_subplot(2, 1, i + 1)
        ax.set_xlabel('Time')
        ax.set_ylabel('Frequency')

        spectrogram = np.load(filename)[:128]
        spectrograms.append(spectrogram)
        print(f'Spectrogram time steps: {spectrogram.shape[1]}')

        img = librosa.display.specshow(spectrogram, x_axis = 'time', y_axis = 'mel')

        fig.colorbar(img, ax=ax, format ='% +2.f dB')
        ax.set(title='Spectrogram for {}'.format(genre[i]))

    # %% [markdown]
    # These spectrograms describe the amplitude (dB) as a function of time and frequency.

    # %% [markdown]
    # The time steps for the two spectrograms are 1291, 1293 respectively. These numbers are too large for an LSTM to perform well. Therefore we will use beat synced spectrograms instead.

    # %% [code] {"jupyter":{"outputs_hidden":false},"execution":{"iopub.status.busy":"2022-02-08T06:50:42.820199Z","iopub.execute_input":"2022-02-08T06:50:42.820973Z","iopub.status.idle":"2022-02-08T06:50:43.366178Z","shell.execute_reply.started":"2022-02-08T06:50:42.820927Z","shell.execute_reply":"2022-02-08T06:50:43.365521Z"}}
    directory = '../input/patreco3-multitask-affective-music/data/fma_genre_spectrograms_beat/train'

    fig = plt.figure(figsize = (10, 10))
    spectrograms = []
    for i in range(len(samples)):

        filename = os.path.join(directory, samples[i])
        ax = fig.add_subplot(2, 1, i + 1)
        ax.set_xlabel('Time')
        ax.set_ylabel('Frequency')

        spectrogram = np.load(filename)[:128]
        spectrograms.append(spectrogram)
        print(f'Spectrogram time steps: {spectrogram.shape[1]}')

        img = librosa.display.specshow(spectrogram, x_axis = 'time', y_axis='mel')

        fig.colorbar(img, ax=ax, format ='% +2.f dB')
        ax.set(title='Spectrogram for {}'.format(genre[i]))

    # %% [code] {"jupyter":{"outputs_hidden":false},"execution":{"iopub.status.busy":"2022-02-08T06:50:43.367514Z","iopub.execute_input":"2022-02-08T06:50:43.368272Z","iopub.status.idle":"2022-02-08T06:50:43.887186Z","shell.execute_reply.started":"2022-02-08T06:50:43.368232Z","shell.execute_reply":"2022-02-08T06:50:43.886500Z"}}
    directory = '../input/patreco3-multitask-affective-music/data/fma_genre_spectrograms/train/'
    fig = plt.figure(figsize = (10, 10))
    spectrograms = []
    for i in range(len(samples)):

        filename = os.path.join(directory, samples[i])
        ax = fig.add_subplot(2, 1, i + 1)
        ax.set_xlabel('Time')
        ax.set_ylabel('Frequency')

        spectrogram = np.load(filename)[128:]
        spectrograms.append(spectrogram)
        print(f'Spectrogram time steps: {spectrogram.shape[1]}')

        img = librosa.display.specshow(spectrogram, x_axis = 'time', y_axis = 'mel')

        fig.colorbar(img, ax=ax, format ='% +.2f dB')
        ax.set(title='Chromogram for {}'.format(genre[i]))

    # %% [code] {"jupyter":{"outputs_hidden":false},"execution":{"iopub.status.busy":"2022-02-08T06:50:43.888514Z","iopub.execute_input":"2022-02-08T06:50:43.889314Z","iopub.status.idle":"2022-02-08T06:50:44.374974Z","shell.execute_reply.started":"2022-02-08T06:50:43.889273Z","shell.execute_reply":"2022-02-08T06:50:44.374299Z"}}
    directory = '../input/patreco3-multitask-affective-music/data/fma_genre_spectrograms_beat/train'

    fig = plt.figure(figsize = (10, 10))
    spectrograms = []
    for i in range(len(samples)):

        filename = os.path.join(directory, samples[i])
        ax = fig.add_subplot(2, 1, i + 1)
        ax.set_xlabel('Time')
        ax.set_ylabel('Frequency')

        spectrogram = np.load(filename)[128:]
        spectrograms.append(spectrogram)
        print(f'Spectrogram time steps: {spectrogram.shape[1]}')

        img = librosa.display.specshow(spectrogram, x_axis = 'time', y_axis='mel')

        fig.colorbar(img, ax=ax, format ='% +.2f dB')
        ax.set(title='Chromogram for {}'.format(genre[i]))

    # %% [markdown]
    # Map similar classes together ignore underrepresented classes.

    # %% [code] {"execution":{"iopub.status.busy":"2022-02-08T06:50:44.376147Z","iopub.execute_input":"2022-02-08T06:50:44.376939Z","iopub.status.idle":"2022-02-08T06:50:44.385017Z","shell.execute_reply.started":"2022-02-08T06:50:44.376897Z","shell.execute_reply":"2022-02-08T06:50:44.384237Z"}}
    import copy
    import os

    import numpy as np
    from sklearn.preprocessing import LabelEncoder
    from torch.utils.data import DataLoader, Dataset, SubsetRandomSampler


    # %% [code] {"jupyter":{"outputs_hidden":false},"execution":{"iopub.status.busy":"2022-02-08T06:50:44.544403Z","iopub.execute_input":"2022-02-08T06:50:44.544836Z","iopub.status.idle":"2022-02-08T06:51:32.858293Z","shell.execute_reply.started":"2022-02-08T06:50:44.544639Z","shell.execute_reply":"2022-02-08T06:51:32.853793Z"}}
    directory = '../input/patreco3-multitask-affective-music/data/fma_genre_spectrograms/'

    train_loader_mel, val_loader_mel, test_loader_mel = train_test_val_splitter(directory)

    # %% [code] {"execution":{"iopub.status.busy":"2022-02-08T06:51:32.862310Z","iopub.execute_input":"2022-02-08T06:51:32.863718Z","iopub.status.idle":"2022-02-08T06:51:44.964878Z","shell.execute_reply.started":"2022-02-08T06:51:32.863661Z","shell.execute_reply":"2022-02-08T06:51:44.964106Z"},"jupyter":{"outputs_hidden":false}}
    directory_beat = '../input/patreco3-multitask-affective-music/data/fma_genre_spectrograms_beat/'

    train_loader_beat_mel, val_loader_beat_mel, test_loader_beat_mel = train_test_val_splitter(directory_beat)

    # %% [code] {"jupyter":{"outputs_hidden":false},"execution":{"iopub.status.busy":"2022-02-08T06:51:44.966153Z","iopub.execute_input":"2022-02-08T06:51:44.966414Z","iopub.status.idle":"2022-02-08T06:51:47.544861Z","shell.execute_reply.started":"2022-02-08T06:51:44.966381Z","shell.execute_reply":"2022-02-08T06:51:47.544106Z"}}
    directory_beat = '../input/patreco3-multitask-affective-music/data/fma_genre_spectrograms_beat/'

    train_loader_beat_chroma, val_loader_beat_chroma, test_loader_beat_chroma = train_test_val_splitter(directory_beat, chroma = True)

    # %% [code] {"jupyter":{"outputs_hidden":false},"execution":{"iopub.status.busy":"2022-02-08T06:51:47.546239Z","iopub.execute_input":"2022-02-08T06:51:47.546510Z","iopub.status.idle":"2022-02-08T06:51:49.961608Z","shell.execute_reply.started":"2022-02-08T06:51:47.546473Z","shell.execute_reply":"2022-02-08T06:51:49.960847Z"}}
    train_loader, val_loader, test_loader = train_test_val_splitter(directory_beat, fused = True)

    # %% [code] {"jupyter":{"outputs_hidden":false},"execution":{"iopub.status.busy":"2022-02-08T06:51:49.962820Z","iopub.execute_input":"2022-02-08T06:51:49.965991Z","iopub.status.idle":"2022-02-08T06:51:57.477460Z","shell.execute_reply.started":"2022-02-08T06:51:49.965950Z","shell.execute_reply":"2022-02-08T06:51:57.476752Z"}}
    beat_mel_specs_nomap = SpectrogramDataset(
         directory_beat,
         train=True,
         max_length=-1,
         read_spec=read_spectrogram,
         chroma = False)

    labels_before = []
    for i in range(len(beat_mel_specs_nomap)):
        _, label, _ = beat_mel_specs_nomap[i]
        labels_before.append(label)

    beat_mel_specs = SpectrogramDataset(
         directory_beat,
         train=True,
         max_length=-1,
         class_mapping = class_mapping,
         read_spec=read_spectrogram,
         chroma = False)

    labels_after = []
    for i in range(len(beat_mel_specs)):
        _, label, _ = beat_mel_specs[i]
        labels_after.append(label)

    # %% [code] {"jupyter":{"outputs_hidden":false},"execution":{"iopub.status.busy":"2022-02-08T06:51:57.478617Z","iopub.execute_input":"2022-02-08T06:51:57.480071Z","iopub.status.idle":"2022-02-08T06:51:57.494731Z","shell.execute_reply.started":"2022-02-08T06:51:57.480031Z","shell.execute_reply":"2022-02-08T06:51:57.494079Z"}}
    datasets = {
    'train_loader_mel': train_loader_mel,
    'train_loader_beat_mel': train_loader_beat_mel,
    'train_loader_beat_chroma': train_loader_beat_chroma,
    'train_loader': train_loader,
    'test_loader_mel': test_loader_mel,
    'test_loader_beat_mel': test_loader_beat_mel,
    'test_loader_beat_chroma': test_loader_beat_chroma,
    'test_loader': test_loader,
    'val_loader_mel': val_loader_mel,
    'val_loader_beat_mel': val_loader_beat_mel,
    'val_loader_beat_chroma': val_loader_beat_chroma,
    'val_loader': val_loader}

    # %% [code] {"jupyter":{"outputs_hidden":false},"execution":{"iopub.status.busy":"2022-02-08T06:51:57.496406Z","iopub.execute_input":"2022-02-08T06:51:57.496674Z","iopub.status.idle":"2022-02-08T06:51:57.893288Z","shell.execute_reply.started":"2022-02-08T06:51:57.496639Z","shell.execute_reply":"2022-02-08T06:51:57.892588Z"}}
    fig = plt.figure(figsize = (10, 10))
    ax = fig.add_subplot(2, 1, 1)

    ax.hist(labels_before)
    ax.set_title('Before Mapping')

    ax = fig.add_subplot(2, 1, 2)

    ax.hist(labels_after)
    ax.set_title('After Mapping')

    # %% [code] {"jupyter":{"outputs_hidden":false},"execution":{"iopub.status.busy":"2022-02-08T06:51:57.894677Z","iopub.execute_input":"2022-02-08T06:51:57.895758Z","iopub.status.idle":"2022-02-08T06:51:57.900074Z","shell.execute_reply.started":"2022-02-08T06:51:57.895433Z","shell.execute_reply":"2022-02-08T06:51:57.899271Z"}}
    # Define useful parameters that are the same for all the models.
    num_mel = 128
    num_chroma = 12
    n_classes = 10

    # %% [code] {"jupyter":{"outputs_hidden":false},"execution":{"iopub.status.busy":"2022-02-08T06:51:57.901419Z","iopub.execute_input":"2022-02-08T06:51:57.902290Z","iopub.status.idle":"2022-02-08T06:52:17.971310Z","shell.execute_reply.started":"2022-02-08T06:51:57.902251Z","shell.execute_reply":"2022-02-08T06:52:17.970564Z"}}
    RNN_SIZE = 32
    EPOCHS = 2000
    model = BasicLSTM(num_mel, RNN_SIZE, n_classes, 1, bidirectional=True)
    loss_function = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)

    DEVICE = torch.device('cuda')

    model.double()
    model.to(DEVICE)

    train_dataset(EPOCHS, train_loader_beat_mel, model, loss_function, optimizer, batch_overfit = True, model_name = './overfit_model')

    # %% [code] {"jupyter":{"outputs_hidden":false},"execution":{"iopub.status.busy":"2022-02-08T06:52:17.972685Z","iopub.execute_input":"2022-02-08T06:52:17.973089Z","iopub.status.idle":"2022-02-08T06:53:09.472187Z","shell.execute_reply.started":"2022-02-08T06:52:17.973050Z","shell.execute_reply":"2022-02-08T06:53:09.471012Z"}}
    model = BasicLSTM(num_mel, 32, 10, 1, bidirectional=True)
    train_eval_model(500, model, './mel_32_500', 'loader_mel')

    # %% [code] {"jupyter":{"outputs_hidden":false},"execution":{"iopub.status.busy":"2022-02-08T06:53:09.473892Z","iopub.execute_input":"2022-02-08T06:53:09.474163Z","iopub.status.idle":"2022-02-08T06:54:51.458511Z","shell.execute_reply.started":"2022-02-08T06:53:09.474126Z","shell.execute_reply":"2022-02-08T06:54:51.457765Z"}}
    model = BasicLSTM(num_mel, 64, 10, 1, bidirectional=True)
    train_eval_model(1000, model, './mel_64_1000', 'loader_mel')

    # %% [code] {"jupyter":{"outputs_hidden":false},"execution":{"iopub.status.busy":"2022-02-08T06:54:51.460139Z","iopub.execute_input":"2022-02-08T06:54:51.460661Z","iopub.status.idle":"2022-02-08T06:54:56.913416Z","shell.execute_reply.started":"2022-02-08T06:54:51.460621Z","shell.execute_reply":"2022-02-08T06:54:56.912717Z"}}
    model = BasicLSTM(num_mel, 32, 10, 1, bidirectional=True)
    train_eval_model(500,model, './beat_mel_32_500', 'loader_beat_mel')

    # %% [code] {"jupyter":{"outputs_hidden":false},"execution":{"iopub.status.busy":"2022-02-08T06:54:56.914795Z","iopub.execute_input":"2022-02-08T06:54:56.915622Z","iopub.status.idle":"2022-02-08T06:55:08.681901Z","shell.execute_reply.started":"2022-02-08T06:54:56.915584Z","shell.execute_reply":"2022-02-08T06:55:08.681046Z"}}
    model = BasicLSTM(num_mel, 64, 10, 1, bidirectional=True)
    train_eval_model(1000, model, './beat_mel_64_1000', 'loader_beat_mel')

    # %% [code] {"jupyter":{"outputs_hidden":false},"execution":{"iopub.status.busy":"2022-02-08T06:55:08.683129Z","iopub.execute_input":"2022-02-08T06:55:08.685875Z","iopub.status.idle":"2022-02-08T06:55:13.779169Z","shell.execute_reply.started":"2022-02-08T06:55:08.685831Z","shell.execute_reply":"2022-02-08T06:55:13.777692Z"}}
    model = BasicLSTM(num_chroma, 32, 10, 1, bidirectional=True)
    train_eval_model(500, model, './beat_chroma_32_500', 'loader_beat_chroma')

    # %% [code] {"jupyter":{"outputs_hidden":false},"execution":{"iopub.status.busy":"2022-02-08T06:55:13.780615Z","iopub.execute_input":"2022-02-08T06:55:13.780882Z","iopub.status.idle":"2022-02-08T06:55:24.514526Z","shell.execute_reply.started":"2022-02-08T06:55:13.780845Z","shell.execute_reply":"2022-02-08T06:55:24.513786Z"}}
    model = BasicLSTM(num_chroma, 64, 10, 1, bidirectional=True)
    train_eval_model(1000, model, './beat_chroma_64_1000', 'loader_beat_chroma')

    # %% [code] {"jupyter":{"outputs_hidden":false},"execution":{"iopub.status.busy":"2022-02-08T06:55:24.515658Z","iopub.execute_input":"2022-02-08T06:55:24.516057Z","iopub.status.idle":"2022-02-08T06:55:30.083081Z","shell.execute_reply.started":"2022-02-08T06:55:24.516020Z","shell.execute_reply":"2022-02-08T06:55:30.082310Z"}}
    model = BasicLSTM(num_chroma + num_mel, 32, 10, 1, bidirectional=True)
    train_eval_model(500, model, './fused_32_500', 'loader')

    # %% [code] {"jupyter":{"outputs_hidden":false},"execution":{"iopub.status.busy":"2022-02-08T06:55:30.100806Z","iopub.execute_input":"2022-02-08T06:55:30.101138Z","iopub.status.idle":"2022-02-08T06:55:30.111431Z","shell.execute_reply.started":"2022-02-08T06:55:30.101096Z","shell.execute_reply":"2022-02-08T06:55:30.110574Z"}}
    model = net
    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.double()
    model.to(DEVICE)
    loss_function = nn.CrossEntropyLoss().to(DEVICE)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5).to(DEVICE)
    EPOCHS = np.linspace(500, 1500, 3)

    # %% [markdown]
    # # MNIST ConvNetJS 
    # The model trained has the following layers:
    # * 2 Convolution layers with a ReLU activation function, stride = 1, pad = 2, num_filters = 8, 16 respectively.
    # * 2 Pool Layers with max pooling, stride = 2, stride = 3
    # * FC layer with softmax activation
    # 
    # 
    # ## Convolution Layer: 
    # This layer is used to filter the image by applying learnable weights to a subset of the inout matrix.
    # In this model the first layer uses 6 5 by 5 filters and the second 16 5 by 5 filters. By filtering the image we achieve a form of feature selection from the initial image. The end result for every filter is called feature map, as it maps the initial image to some of its significant features. After that we use ReLU.
    # 
    # ## ReLU activation function:
    # The convolution operator used above is a linear operator. While useful for feature selection, real data exhibit great nonlinearity. Therefore we need to add some form of nonlinearity to our model, which is what ReLU accomplises.
    # 
    # ## Pool Layer:
    # Pooling or specifically max pooling selects the biggest value inside in a spatial area defined by pooling size. In our case we use 8 2 by 2 matrices in the pooling layer. The main utilities of pooling are:
    # 
    # * Decreasing the number of parameters and computations making the model manageable and controlling overfitting.
    # * Makes the model invariant to small variations in the input data.
    # 
    # ## Fully Connected Layer:
    # This layer is basically an MLP with the use of a softmax activation function in the end.
    # The purpose of this layer is to classify the image shown in a specifc digit (0-9).
    # The output from the convolution and pooling layer represents high-level features which are then used by the fully connected layer to predict the digit shown.

    # %% [code] {"jupyter":{"outputs_hidden":false}}'''

'if __name__ == \'__main__\': \n\n    # %% [code] {"execution":{"iopub.status.busy":"2022-02-08T06:50:41.886063Z","iopub.execute_input":"2022-02-08T06:50:41.886368Z","iopub.status.idle":"2022-02-08T06:50:42.818285Z","shell.execute_reply.started":"2022-02-08T06:50:41.886332Z","shell.execute_reply":"2022-02-08T06:50:42.817544Z"}}\n    directory = \'../input/patreco3-multitask-affective-music/data/fma_genre_spectrograms/train_labels.txt\'\n    train_data = pd.read_csv(directory, sep = \'\t\')\n    number_samples = train_data.shape[0]\n    sample_rows = np.random.randint(number_samples, size = 2)\n\n    while train_data.iloc[sample_rows[0]][1] == train_data.iloc[sample_rows[1]][1]:\n\n        sample_rows[1] = np.random.randint(number_samples)\n\n    directory = \'../input/patreco3-multitask-affective-music/data/fma_genre_spectrograms/train/\'\n    samples = [train_data.iloc[elem][0] for elem in sample_rows]\n    genre = [train_data.iloc[elem][1] for elem in sample_rows]\n\n    samples = [e