# Evaluation of Different Models

In [1]:
import json
import scipy
import torch

import numpy as np
import pandas as pd

from src.models import classification_transformer

In [2]:
embedding_transformer = classification_transformer.Main("large")

In [3]:
data = pd.read_csv('../Data/data_styled.csv')
for col in ["chords", "style_s", "style_m", "style_l"]:
    data[col] = data[col].apply(lambda x: torch.tensor(json.loads(x)))
data.head()

Unnamed: 0,url,title,artist,decade,genre,ratings,stars,chords,style_s,style_m,style_l
0,https://tabs.ultimate-guitar.com/tab/jeff-buck...,Hallelujah,Jeff Buckley,1990,Rock|Folk,51639.0,5.0,"[tensor(685), tensor(677), tensor(685), tensor...","[tensor(-0.3856), tensor(-1.9832), tensor(2.09...","[tensor(1.3062), tensor(-0.7475), tensor(0.239...","[tensor(-1.0692), tensor(0.6523), tensor(1.242..."
1,https://tabs.ultimate-guitar.com/tab/ed-sheera...,Perfect,Ed Sheeran,2010,Pop,44194.0,5.0,"[tensor(162), tensor(34), tensor(685), tensor(...","[tensor(-0.4196), tensor(-2.5276), tensor(0.92...","[tensor(1.3084), tensor(-0.2184), tensor(0.411...","[tensor(-1.5094), tensor(1.2023), tensor(2.055..."
2,https://tabs.ultimate-guitar.com/tab/elvis-pre...,Cant Help Falling In Love,Elvis Presley,1960,"Soundtrack|R&B, Funk & Soul",30059.0,5.0,"[tensor(685), tensor(162), tensor(677), tensor...","[tensor(-1.4292), tensor(-2.7096), tensor(3.19...","[tensor(1.6858), tensor(-0.6888), tensor(-0.22...","[tensor(-2.5916), tensor(0.9577), tensor(0.642..."
3,https://tabs.ultimate-guitar.com/tab/eagles/ho...,Hotel California,Eagles,1970,Rock,28670.0,5.0,"[tensor(173), tensor(422), tensor(397), tensor...","[tensor(-1.1563), tensor(-2.2659), tensor(3.00...","[tensor(1.7773), tensor(-0.7144), tensor(-0.03...","[tensor(-2.1881), tensor(0.8039), tensor(-0.08..."
4,https://tabs.ultimate-guitar.com/tab/radiohead...,Creep,Radiohead,1990,Rock,28606.0,5.0,"[tensor(162), tensor(83), tensor(685), tensor(...","[tensor(-0.4531), tensor(-3.7440), tensor(1.14...","[tensor(1.9167), tensor(0.1738), tensor(-0.065...","[tensor(-2.5974), tensor(0.5083), tensor(0.480..."


In [4]:
# Split the data into train and test sets as in the previous notebooks
import torch
from torch.utils.data import random_split

torch.manual_seed(42)

class ChordDataset: # A dummy dataset class
    def __init__(self, data):
        self.data = data
    
    def __len__(self):
        return len(self.data)

dataset = ChordDataset(data)
train_size = int(np.rint(len(dataset) * 0.8))
train_indices, test_indices = random_split(range(len(dataset)), [train_size, len(dataset) - train_size])

# Convert the indices to lists
train_indices = [idx for idx in train_indices.indices]
test_indices = [idx for idx in test_indices.indices]

# Split the dataframe using the indices
train_data = data.iloc[train_indices]
test_data = data.iloc[test_indices]

# Reindex the dataframes
train_data = train_data.reset_index(drop=True)
test_data = test_data.reset_index(drop=True)

In [5]:
print(f"Train size: {len(train_data)}, Test size: {len(test_data)}")

Train size: 17790, Test size: 4448


## Fr√©chet Feature Distance

In [6]:
class Evaluation:
    def __init__(self, ref_column):
        self.ref_column = ref_column        
        self.augmentation_map = torch.tensor(np.load('../Data/augmentation_map.npy', allow_pickle=True))
        self.augmentation_map = self.augmentation_map.to(embedding_transformer.device)

    
    def augment(self, chords):
        """Change the root note of the chords by a random amount"""
        move_by = torch.randint(0, 12, [1]).item()
        return self.augmentation_map[chords, move_by]
        
    def pad(self, chords):
        """Pad the input 2D tensor [n] into shape [256] with zeros"""
        out = torch.zeros((256), dtype=torch.long, device=embedding_transformer.device)
        out[:len(chords)] = chords
        return out
    
    def reduce_dimensionality(self, column, batch_size=64, augment=False):
        """Use a pretrained classifier to reduce the dimensionality of the dataframes."""
        
        reduced_column = []
        n = len(column)
        
        for i in range(0, n, batch_size):
            # Extract the current batch of data
            batch = column[i:i+batch_size]
            
            # Convert batch to torch tensor and process it
            batch_tensor = [torch.tensor(item.tolist(), dtype=torch.long, device=embedding_transformer.device) for item in batch]
            if augment:
                batch_tensor = torch.stack([self.pad(self.augment(item)) for item in batch_tensor])
            else:
                batch_tensor = torch.stack([self.pad(item) for item in batch_tensor])
            
            # Get embeddings for the entire batch and append to reduced_column
            batch_embeddings = embedding_transformer.batch_extract_features(batch_tensor)
            reduced_column.extend(batch_embeddings)
            
        return reduced_column
    
    def frechet_distance(self, mu1, mu2, sigma1, sigma2):
        """
        Compute the Frechet distance between two multivariate Gaussians.
        
        Args:
            mu1, mu2: mean vectors (1D numpy arrays)
            sigma1, sigma2: covariance matrices (2D numpy arrays)
            
        Returns:
            The Frechet distance between the two distributions.
        """
        mu_diff = mu1 - mu2
        # The following line computes (sigma1 * sigma2)^(1/2) using the matrix square root
        sqrt_sigma = scipy.linalg.sqrtm(np.dot(sigma1, sigma2))
        
        # Handling numerical instability (may occur if matrices are nearly singular)
        if not np.isfinite(sqrt_sigma).all():
            offset = np.eye(sigma1.shape[0]) * 1e-10
            sqrt_sigma = scipy.linalg.sqrtm(np.dot(sigma1 + offset, sigma2 + offset))
        
        # Compute the trace term
        tr_term = np.trace(sigma1 + sigma2 - 2 * sqrt_sigma)
        
        # Compute the difference term
        diff_term = np.dot(mu_diff, mu_diff)
        
        return diff_term + tr_term
    
    def calculate_frechet_distance(self, ref_column, gen_column):
        """Calculate the Frechet distance between the reference and generated samples for the reduced columns."""
        ref_column, gen_column = np.array(ref_column), np.array(gen_column)
        mu1 = np.mean(ref_column, axis=0)
        mu2 = np.mean(gen_column, axis=0)

        sigma1 = np.cov(ref_column, rowvar=False)
        sigma2 = np.cov(gen_column, rowvar=False)

        # Compute the Frechet distance
        return self.frechet_distance(mu1, mu2, sigma1, sigma2)
    
    def preprocess_ref_col(self):
        """Reduce the dimensionality of the reference column."""
        self.ref_reduced_column = self.reduce_dimensionality(self.ref_column, augment=True)
    
    def get_column_score(self, gen_column):
        """Get the score for a generated column."""
        gen_reduced_column = self.reduce_dimensionality(gen_column)
        distances = self.calculate_frechet_distance(self.ref_reduced_column, gen_reduced_column)
        return distances

In [7]:
torch.manual_seed(42)
eval = Evaluation(test_data["chords"])
eval.preprocess_ref_col()

In [8]:
def evaluate(file_name):
    gen_df = pd.read_csv(f"../Data/Generated/{file_name}.csv")
    gen_df["chords"] = gen_df["chords"].apply(lambda x: torch.tensor(json.loads(x), dtype=torch.long))
    score = eval.get_column_score(gen_df["chords"])
    return score

In [9]:
scores = []
sizes = ["small", "medium", "large"]
files = ["recurrent_net"] + [f"transformer_{s}" for s in sizes] + [f"conditional_{s}" for s in sizes] + [f"style_{s}" for s in sizes]
for file_name in files:
    scores.append({"model": file_name, "score": evaluate(file_name)})
scores = pd.DataFrame(scores)
scores

Unnamed: 0,model,score
0,recurrent_net,9.290163
1,transformer_small,3.201061
2,transformer_medium,2.814373
3,transformer_large,2.315624
4,conditional_small,2.998407
5,conditional_medium,2.075891
6,conditional_large,1.848373
7,style_small,1.727796
8,style_medium,1.13868
9,style_large,0.823672


## Top-1 accuracy & Perplexity

In [10]:
import torch.nn as nn
from src.models import *

In [11]:
VOCAB_SIZE = 1035

class Trainer():
    def __init__(self, model, device):
        self.model = model
        self.device = device

    def loss_mask(self, y_pred, x):
        """Get the mask for the loss function, so that the loss is not calculated for the padded elements"""
        eos_index = torch.argmax((x == VOCAB_SIZE - 1).float(), dim=1)
        range_tensor = torch.arange(y_pred.shape[1]).unsqueeze(0).expand(y_pred.shape[0], -1).to(self.device)
        mask = range_tensor <= eos_index.unsqueeze(1)
        return mask
    
    def masked_accuracy(self, y_pred, x):
        """Calculate the accuracy of the model only for the elements of the sequence"""
        y_pred, x = y_pred[:, :-1], x[:, 1:]
        eos_index = torch.argmax((x == VOCAB_SIZE - 1).float(), dim=1)
        range_tensor = torch.arange(y_pred.shape[1]).unsqueeze(0).expand(y_pred.shape[0], -1).to(self.device)
        # EOS is not included in the accuracy calculation
        mask = range_tensor < eos_index.unsqueeze(1)
        valid_elements = torch.sum(mask.float())

        same = torch.argmax(y_pred, dim=2) == x
            
        return torch.sum(same.float() * mask.float()) / valid_elements
    
    def masked_cross_entropy(self, logits, target, mask):
        """
        logits: Predictions from the model, of shape (batch_size, sequence_length, vocab_size)
        target: Ground truth labels, of shape (batch_size, sequence_length)
        mask: Binary mask indicating the non-padded parts, of shape (batch_size, sequence_length)
        """

        # Compute the raw CrossEntropyLoss
        loss = nn.CrossEntropyLoss(reduction='none')(logits.transpose(1, 2), target)

        # Apply the mask to the loss
        masked_loss = loss * mask.float()

        # Compute the mean loss over non-padded parts
        final_loss = masked_loss.sum() / mask.float().sum()

        return final_loss
                   
        
    def eval_step(self, data_loader, model_name):
        self.model.eval()
        
        average_loss = 0
        average_accuracy = 0
        for i, x in enumerate(data_loader):
            if "conditional" in model_name:
                x, style, _ = x
                x, style = x.to(self.device), style.to(self.device)
                with torch.inference_mode():
                    y_pred = self.model(x, style)
            elif "style" in model_name:
                x, _, styles = x
                style = styles[["small", "medium", "large"].index(model_name.split("_")[-1])]
                x, style = x.to(self.device), style.to(self.device)
                with torch.inference_mode():
                    y_pred = self.model(x, style)
            else:
                x, _, _ = x
                x = x.to(self.device)
                with torch.inference_mode():
                    y_pred = self.model(x)

            mask = self.loss_mask(y_pred, x)            
            loss = self.masked_cross_entropy(y_pred[:, :-1], x[:, 1:], mask[:, :-1])
            accuracy = self.masked_accuracy(y_pred, x)

            average_loss += loss.item()
            average_accuracy += accuracy.item()
            
        average_loss /= len(data_loader)
        average_accuracy /= len(data_loader)   
        
        return np.exp(average_loss), average_accuracy

In [12]:
# Create data loaders for the train and test sets
from torch.utils.data import DataLoader, Dataset

class ChordDataset(Dataset):
    def __init__(self, data):
        self.data = data
        self.all_genres = data["genre"].apply(lambda x: x.split("|")).explode().unique()
        self.all_decades = data["decade"].unique()
        self.genre_to_idx = {genre: i for i, genre in enumerate(self.all_genres)}
        self.decade_to_idx = {decade: i for i, decade in enumerate(self.all_decades)}

    def pad(self, chords):
        """Pad the input tensor of shape [n] into shape [256] with zeros and special tokens"""
        out = torch.zeros((256))
        out[0] = VOCAB_SIZE - 2 # Start of sequence token
        out[1 : 1 + len(chords)] = chords
        out[1 + len(chords)] = VOCAB_SIZE - 1 # End of sequence token
        return out

    def multi_hot(self, genres, decade):
        """Convert a list of genres and a decade into a multi-hot vector"""
        genre_style = torch.zeros((len(self.all_genres)))
        genre_style[[self.genre_to_idx[genre] for genre in genres]] = 1
        genre_style /= genre_style.sum() # Normalize as there can be multiple genres
        
        decade_style = torch.zeros((len(self.all_decades)))
        decade_style[self.decade_to_idx[decade]] = 1
        
        return torch.cat([genre_style, decade_style])

    def __getitem__(self, index):
        row = self.data.iloc[index]
        x = row["chords"]
        x = self.pad(x).long()
        
        genres = self.multi_hot(row["genre"].split("|"), row["decade"])
        styles = row["style_s"], row["style_m"], row["style_l"]
        
        return x, genres, styles

    def __len__(self):
        return len(self.data)

In [13]:
torch.manual_seed(42)

dataset = ChordDataset(data)

train_size = np.rint(len(dataset) * 0.8).astype(int)
train_data, test_data = random_split(dataset, [train_size, len(dataset) - train_size])

# Define the dataloaders
train_loader = DataLoader(train_data, batch_size=128)
test_loader = DataLoader(test_data, batch_size=128)

In [14]:
# Initialize all models
recurrent_net = recurrent_network.Main().model

# Initialize models with variants
variants = ["small", "medium", "large"]
transformer_nets = [transformer.Main(variant).model for variant in variants]
conditional_nets = [conditional_transformer.Main(variant).model for variant in variants]
style_nets = [style_transformer.Main(variant).model for variant in variants]

# Convert all models to ONNX
models = [recurrent_net] + transformer_nets + conditional_nets + style_nets
model_names = ["recurrent_net"] + [f"transformer_{v}" for v in variants] + [f"conditional_{v}" for v in variants] + [f"style_{v}" for v in variants]


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

scores = []
for model, name in zip(models, model_names):
    trainer = Trainer(model, device)
    
    train_perplexity, train_accuracy = trainer.eval_step(train_loader, name)
    test_perplexity, test_accuracy = trainer.eval_step(test_loader, name) 

    scores.append({"model": name, "train_perplexity": train_perplexity, "train_accuracy": train_accuracy, "test_perplexity": test_perplexity, "test_accuracy": test_accuracy})
scores = pd.DataFrame(scores)
scores

Unnamed: 0,model,train_perplexity,train_accuracy,test_perplexity,test_accuracy
0,recurrent_net,4.927489,0.560048,4.850823,0.565912
1,transformer_small,3.156535,0.687364,3.123427,0.690507
2,transformer_medium,2.682259,0.737712,2.677365,0.739093
3,transformer_large,2.505561,0.759765,2.513159,0.760409
4,conditional_small,3.547666,0.645767,3.520874,0.648063
5,conditional_medium,2.785376,0.727548,2.773981,0.729867
6,conditional_large,2.551218,0.751437,2.561826,0.751403
7,style_small,3.495391,0.62248,3.473176,0.625798
8,style_medium,2.581267,0.726275,2.560228,0.728987
9,style_large,2.25228,0.763176,2.263705,0.763126
