In [1]:
import torch.nn

def generate_evaluation_dataset(model, dataset_name, dataloader, name, chunking=True, averaging=False, chunk_size=256):
    model.mask_ratio = 0.0
    get_and_save_latents(dataloader, model, dataset_name, name, chunking=chunking, averaging=averaging, chunk_size=chunk_size)
    print("Saving...")

In [2]:
from utils.visualization import visualize_ROC_PR_AUC
import torch.nn

from sklearn.metrics import f1_score, roc_auc_score, average_precision_score
from sklearn.metrics import accuracy_score
from torch import nn, optim
from sklearn.metrics import r2_score

class MLP(nn.Module):
    def __init__(self, input=128, output=10, dropout=0.1):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(input, 512),
            nn.ReLU(),
            nn.Dropout(p=dropout),
            nn.Linear(512, output)
        )

    def forward(self, x):
        return self.model(x)


class LinearProbe(nn.Module):
    def __init__(self, input=128, output=10):
        super().__init__()
        self.model = nn.Linear(input, output)

    def forward(self, x):
        return self.model(x)


# ----- Training Loop -----
def train_model(model, train_dataloader, test_dataloader, config, device="cuda", use_tqdm=False, criterion=nn.CrossEntropyLoss(), early_stopping=True):
    optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)

    model.train()
    best_accuracy = 0.0
    patience = 0

    itr = range(config.num_epochs)
    if use_tqdm:
        itr = tqdm(itr)

    for epoch in itr:
        total_loss = 0
        for labels, data in train_dataloader:
            #print(labels.max().item())

            # if dataset returns one-hot, convert back to integer for CrossEntropy
            if isinstance(criterion, nn.BCEWithLogitsLoss):
                labels = torch.stack(labels).permute(1, 0, 2).squeeze(2).float()
            elif labels.ndim > 1:
                labels = labels.squeeze(1)

            data = data.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            outputs = model(data)

            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

        if test_dataloader is not None:
            scores, accuracy, _ = evaluate_model(model, test_dataloader, criterion=criterion, use_tqdm=use_tqdm)
            model.train()

        if early_stopping:
            if accuracy < best_accuracy:
                patience += 1
            else:
                patience = 0
                best_accuracy = accuracy

            if patience >= 16:
                return model

    return model


# Evaluation
def evaluate_model(model, dataloader, device="cuda", use_tqdm=False, criterion=nn.CrossEntropyLoss(), roc_pr_auc=False):
    model.eval()
    all_predictions = []
    all_labels = []

    with torch.no_grad():
        itr = dataloader
        if use_tqdm:
            itr = tqdm(dataloader)

        for labels, data in dataloader:
            # if dataset returns one-hot, convert back to integer for CrossEntropy
            if isinstance(criterion, nn.BCEWithLogitsLoss):
                labels = torch.stack(labels).permute(1, 0, 2).squeeze(2).float()
            elif labels.ndim > 1:
                labels = labels.squeeze(1)

            data = data.to(device)
            labels = labels.to(device)

            outputs = model(data)

            if isinstance(criterion, nn.CrossEntropyLoss):
                predicted = outputs.argmax(dim=1).long()
            elif isinstance(criterion, nn.BCEWithLogitsLoss):
                probability = torch.sigmoid(outputs)
                predicted = probability > 0.5
            else:
                predicted = outputs

            B = labels.shape[0]
            if B == 1:
                all_predictions.append(predicted.clone().cpu())
                all_labels.append(labels.clone().cpu())
            else:
                all_predictions.extend(predicted.clone().cpu())
                all_labels.extend(labels.clone().cpu())

    if isinstance(criterion, nn.MSELoss):
        predictions = torch.stack(all_predictions)
        labels = torch.stack(all_labels)

        if labels.shape[1] == 1:
            labels = labels.squeeze(1)

        first = r2_score([y[0] for y in labels], [y[0] for y in predictions])
        second = r2_score([y[1] for y in labels], [y[1] for y in predictions])

        return first, second

    if isinstance(criterion, nn.BCEWithLogitsLoss):
        aucs, aps = [], []

        predictions = torch.stack(all_predictions)
        labels = torch.stack(all_labels)

        for i in range(labels.shape[1]):
            # only compute if the class has at least one positive and one negative
            if len(np.unique(labels[:, i])) == 2:
                aucs.append(roc_auc_score(labels[:, i], predictions[:, i]))
                aps.append(average_precision_score(labels[:, i], predictions[:, i]))
            else:
                # skip or set to NaN
                aucs.append(np.nan)
                aps.append(np.nan)
        return np.nanmean(aucs), np.nanmean(aps)

    f1 = f1_score(all_labels, all_predictions, average="macro")
    accuracy = accuracy_score(all_labels, all_predictions)

    return f1, accuracy, (all_predictions, all_labels)

In [3]:
import tqdm
import torch.nn

from matplotlib import pyplot as plt
from training.evaluation import local_coherence
from utils.Config import Config
from data.data_utils import *
from training.inference import get_and_save_latents
from sklearn.metrics import auc

def local_tag_coherence(latent_dataset, max_k=100, granularity=1):
    latent_paths = latent_dataset.latents
    labels = latent_dataset.labels

    latents = []
    for path in tqdm(latent_paths):
        l = torch.load(path, weights_only=False)
        latents.append(l)

    l = torch.tensor(latents)

    if l.shape[1] == 1:
        l = l.squeeze(1)

    latents = np.array(l)

    tag_coherence = []
    k_values = []
    ks = [x for x in range(1, max_k, granularity)]
    for k in tqdm(ks):
        lgc = local_coherence(np.array(latents), np.array(labels), k=k)
        k_values.append(k)
        tag_coherence.append(lgc)

    auc_coh = auc(k_values, tag_coherence)

    print('computed AUC using sklearn.metrics.auc: {}'.format(auc_coh / k))
    plt.figure(figsize=(8, 6))
    plt.plot(k_values, tag_coherence, label='Coherence')
    plt.xlabel("K-Neighbors")
    plt.ylabel("Coherence")
    plt.title("Coherence v.s. Neighboorhood Size")
    plt.legend()
    plt.grid(True)
    plt.show()


def grid_search(train_latent_dataset, valid_latent_dataset, num_classes=10, dataset="", criterion=nn.CrossEntropyLoss()):
    best_params = []
    best_accuracy = 0
    best_F1 = 0
    pb = tqdm(total=18)

    # if latent_dataset_test is not None:
    #     n_fold_length = len(latent_dataset)
    #     n_factor = int(n_fold_length / 10)
    #     random_indicies = np.random.permutation(n_fold_length)
    #
    #     n_fold = random_indicies[:n_factor]
    #     other_folds = random_indicies[n_factor:]
    #
    #     train_set = torch.utils.data.Subset(latent_dataset, other_folds)
    #     test_set = torch.utils.data.Subset(latent_dataset, n_fold)
    # else:
    #     train_set = latent_dataset
    #     test_set = latent_dataset_test

    print("Grid Search...")
    for model_type in ["MLP"]:
        for weight_decay in [1e-4, 1e-3]:
            for learning_rate in [1e-5, 1e-4, 1e-3]:
                for batch in [64]:
                    for dropout in [0.25, 0.5] if model_type == "MLP" else [0]:

                        model_name = f"LinearClassifier-{dataset}"
                        config = Config(
                                save_path=f"trained_models\\{model_name}\\",
                                num_epochs=1024,
                                learning_rate=learning_rate,
                                weight_decay=weight_decay,
                                num_workers=2,
                                batch_size= batch,
                                eval_batch_size=batch,
                                dtype=torch.float32
                            )

                        train_latent_dataloader = DataLoader(
                            train_latent_dataset,
                            batch_size=batch,
                            shuffle=True,
                        )

                        test_latent_dataloader = DataLoader(
                            valid_latent_dataset,
                            batch_size=batch,
                            shuffle=True,
                        )

                        device = "cuda"

                        if model_type == "MLP":
                            model = MLP(128, num_classes, dropout=dropout).to(device)
                        else:
                            model = LinearProbe(128, num_classes).to(device)

                        if best_params is None:
                            best_params = [model_type, learning_rate, weight_decay, batch, dropout]

                        train_model(model, train_latent_dataloader, test_latent_dataloader, config, device=device, use_tqdm=False, criterion=criterion)
                        f1, accuracy = evaluate_model(model, test_latent_dataloader, device, use_tqdm=False, criterion=criterion)

                        if accuracy > best_accuracy:
                            best_params = [model_type, learning_rate, weight_decay, batch, dropout]
                            best_accuracy = accuracy
                            best_F1 = f1

                        pb.update(1)

    print(f"Best Accuracy: {best_accuracy}")
    print(f"Best F1: {best_F1}")

    return best_params

def train_valid(train_latent_dataset, test_latent_dataset, params, num_classes=10, dataset="", criterion=nn.CrossEntropyLoss()):
    model_type, learning_rate, weight_decay, batch, dropout = params
    model_name = f"LinearClassifier-{dataset}"
    print("Evaluating Dataset")

    config = Config(
            save_path=f"trained_models\\{model_name}\\",
            num_epochs=1024,
            learning_rate=learning_rate,
            weight_decay=weight_decay,
            num_workers=2,
            batch_size= batch,
            eval_batch_size=batch,
            dtype=torch.float32
        )

    train_latent_dataloader = DataLoader(
        train_latent_dataset,
        batch_size=batch,
        shuffle=True,
    )

    test_latent_dataloader = DataLoader(
        test_latent_dataset,
        batch_size=batch,
        shuffle=True,
    )

    device = "cuda"

    if model_type == "MLP":
        model = MLP(128, num_classes, dropout=dropout).to(device)
    else:
        model = LinearProbe(128, num_classes).to(device)

    train_model(model, train_latent_dataloader, None, config, device=device, use_tqdm=True, criterion=criterion, early_stopping=False)
    torch.save(model, "model.pt")
    scores = evaluate_model(model, test_latent_dataloader, device, use_tqdm=True, criterion=criterion, roc_pr_auc=True)
    return scores

In [4]:
import numpy as np
from sklearn.manifold import TSNE

import matplotlib.pyplot as plt

def tsne(dataset, perplexity=30, n_iter=1000, random_state=42, figsize=(8, 6), save_path=None):
    latent_paths = dataset.latents
    labels = dataset.labels

    latents = []
    for path in tqdm(latent_paths):
        l = torch.load(path, weights_only=False)
        latents.append(l)

    l = torch.tensor(latents)

    if l.shape[1] == 1:
        l = l.squeeze(1)

    X = np.array(l)

    # Convert latents to a numpy array
    if X.ndim != 2:
        raise ValueError(f"Expected latents of shape [N, dim], got {X.shape}")

    # Compute t-SNE
    tsne_model = TSNE(n_components=2, perplexity=perplexity, random_state=random_state)
    X_tsne = tsne_model.fit_transform(X)

    # Plot
    plt.figure(figsize=figsize)
    if labels is not None:
        labels = np.array(labels)
        scatter = plt.scatter(X_tsne[:, 0], X_tsne[:, 1], c=labels, cmap='tab10', alpha=0.8)
        plt.legend(*scatter.legend_elements(), title="Classes")
    else:
        plt.scatter(X_tsne[:, 0], X_tsne[:, 1], alpha=0.8)

    plt.title("t-SNE of Latent Embeddings")
    plt.xlabel("t-SNE 1")
    plt.ylabel("t-SNE 2")
    plt.grid(False)
    plt.tight_layout()

    if save_path:
        plt.savefig(save_path, dpi=300)
    plt.show()


In [None]:
from data.data_utils import GS
from torch.utils.data import DataLoader
import torch.nn
from torch.utils.data import ConcatDataset, DataLoader

directory = "D:\\SongsDataset\\GS\\"
GS_dataset = GS(directory)
GS_dataloader = DataLoader(
    GS_dataset,
    batch_size=1,
    shuffle=True,
)

directory = "D:\\SongsDataset\\GS-MTG\\"
GS_MTG_train_dataset = GS(directory, split="train")
GS_MTG_train_dataloader = DataLoader(
    GS_MTG_train_dataset,
    batch_size=1,
    shuffle=True,
)

directory = "D:\\SongsDataset\\GS-MTG\\"
GS_MTG_valid_dataset = GS(directory, split="valid")
GS_MTG_valid_dataloader = DataLoader(
    GS_MTG_valid_dataset,
    batch_size=1,
    shuffle=True,
)

def gs_eval(model, name, chunking=True, chunk_size=1024, averaging=True, k=100, already_generated=False,
              granularity=5):

    if not already_generated:
        generate_evaluation_dataset(model, "GS", GS_dataloader, name, chunking=chunking, chunk_size=chunk_size,
                                    averaging=averaging)
        generate_evaluation_dataset(model, "GS-MTG-Valid", GS_MTG_valid_dataloader, name, chunking=chunking, chunk_size=chunk_size,
                                    averaging=averaging)
        generate_evaluation_dataset(model, "GS-MTG-Train", GS_MTG_train_dataloader, name, chunking=chunking, chunk_size=chunk_size,
                                    averaging=averaging)

    num_classes = 36

    os.makedirs(f"D:\\SongsDataset\\GS-MTG-Train\\latent_datasets\\{name}\\" + "full-set\\", exist_ok=True)
    os.makedirs(f"D:\\SongsDataset\\GS-MTG-Valid\\latent_datasets\\{name}\\" + "full-set\\", exist_ok=True)
    os.makedirs(f"D:\\SongsDataset\\GS\\latent_datasets\\{name}\\" + "full-set\\", exist_ok=True)

    train_latent_dataset = LatentDataset(f"D:\\SongsDataset\\GS-MTG-Train\\latent_datasets\\{name}\\" + "full-set\\", num_classes=num_classes)
    valid_latent_dataset = LatentDataset(f"D:\\SongsDataset\\GS-MTG-Valid\\latent_datasets\\{name}\\" + "full-set\\", num_classes=num_classes)
    test_latent_dataset = LatentDataset(f"D:\\SongsDataset\\GS\\latent_datasets\\{name}\\" + "full-set\\", num_classes=num_classes)
    combined_dataset = ConcatDataset([train_latent_dataset, valid_latent_dataset])

    #tsne(combined_dataset)
    #local_tag_coherence(train_latent_dataset, max_k=k, granularity=granularity)

    params = grid_search(train_latent_dataset, valid_latent_dataset, num_classes, criterion=nn.CrossEntropyLoss())

    for param in params:
        print(param)

    results = train_valid(combined_dataset, test_latent_dataset, params, criterion=nn.CrossEntropyLoss(), num_classes=num_classes)

    accuracy_score, f1_score = results

    print(f"F1: {f1_score}\t variance: {accuracy_score}")

In [None]:
from torch.utils.data import ConcatDataset, DataLoader
from data.data_utils import MTAT
from data.data_utils import EmoMusic
from torch.utils.data import DataLoader
import torch.nn

directory = "D:\\SongsDataset\\EmoMusic\\"
Emo_train_dataset = EmoMusic(directory, split="train")
Emo_train_dataloader = DataLoader(
    Emo_train_dataset,
    batch_size=1,
    shuffle=True,
    num_workers=2,
    prefetch_factor=1
)

Emo_valid_dataset = EmoMusic(directory, split="valid")
Emo_valid_dataloader = DataLoader(
    Emo_valid_dataset,
    batch_size=1,
    shuffle=True,
    num_workers=2,
    prefetch_factor=1
)

Emo_test_dataset = EmoMusic(directory, split="test")
Emo_test_dataloader = DataLoader(
    Emo_test_dataset,
    batch_size=1,
    shuffle=True,
    num_workers=2,
    prefetch_factor=1
)

def emo_eval(model, name, chunking=True, chunk_size=1024, averaging=True, already_generated=False):
    if not already_generated:
        generate_evaluation_dataset(model, "EmoMusic-Train", Emo_train_dataloader, name, chunking=chunking, chunk_size=chunk_size,
                                    averaging=averaging)
        generate_evaluation_dataset(model, "EmoMusic-Valid", Emo_valid_dataloader, name, chunking=chunking, chunk_size=chunk_size,
                                        averaging=averaging)
        generate_evaluation_dataset(model, "EmoMusic-Test", Emo_test_dataloader, name, chunking=chunking, chunk_size=chunk_size,
                                        averaging=averaging)

    num_classes = 2

    os.makedirs(f"D:\\SongsDataset\\EmoMusic-Train\\latent_datasets\\{name}\\" + "full-set\\", exist_ok=True)
    os.makedirs(f"D:\\SongsDataset\\EmoMusic-Valid\\latent_datasets\\{name}\\" + "full-set\\", exist_ok=True)
    os.makedirs(f"D:\\SongsDataset\\EmoMusic-Test\\latent_datasets\\{name}\\" + "full-set\\", exist_ok=True)

    train_latent_dataset = LatentDataset(f"D:\\SongsDataset\\EmoMusic-Train\\latent_datasets\\{name}\\" + "full-set\\", num_classes=num_classes)
    valid_latent_dataset = LatentDataset(f"D:\\SongsDataset\\EmoMusic-Valid\\latent_datasets\\{name}\\" + "full-set\\", num_classes=num_classes)
    test_latent_dataset = LatentDataset(f"D:\\SongsDataset\\EmoMusic-Test\\latent_datasets\\{name}\\" + "full-set\\", num_classes=num_classes)
    combined_dataset = ConcatDataset([train_latent_dataset, valid_latent_dataset])

    #tsne(latent_train_dataset)
    #local_tag_coherence(latent_train_dataset, max_k=k, granularity=granularity)

    #tsne(latent_dataset)
    #local_tag_coherence(combined_dataset, max_k=k, granularity=granularity)

    params = grid_search(train_latent_dataset, valid_latent_dataset, num_classes=num_classes, criterion=nn.MSELoss())
    for param in params:
        print(param)

    results = train_valid(combined_dataset, test_latent_dataset, params, criterion=nn.MSELoss(), num_classes=num_classes)

    accuracy_score, f1_score = results

    print(f"F1: {f1_score}\t variance: {accuracy_score}")

In [None]:
import torch.nn

from data.data_utils import GTZAN
from torch.utils.data import DataLoader

directory = "D:\\SongsDataset\\GTZAN\\"
GTZAN_train_dataset = GTZAN(directory, split="train")
GTZAN_train_dataloader = DataLoader(
    GTZAN_train_dataset,
    batch_size=1,
    shuffle=True,
    num_workers=2,
    prefetch_factor=1
)

GTZAN_valid_dataset = GTZAN(directory, split="valid")
GTZAN_valid_dataloader = DataLoader(
    GTZAN_train_dataset,
    batch_size=1,
    shuffle=True,
    num_workers=2,
    prefetch_factor=1
)

GTZAN_test_dataset = GTZAN(directory, split="test")
GTZAN_test_dataloader = DataLoader(
    GTZAN_train_dataset,
    batch_size=1,
    shuffle=True,
    num_workers=2,
    prefetch_factor=1
)

def gtzan_eval(model, name, chunking=True, chunk_size=256, averaging=True, k=100, already_generated=False, granularity=5):
    if not already_generated:
        generate_evaluation_dataset(model, "GTZAN_train", GTZAN_train_dataset, name, chunking=chunking, chunk_size=chunk_size,
                                    averaging=averaging)
        generate_evaluation_dataset(model, "GTZAN_valid", GTZAN_valid_dataloader, name, chunking=chunking, chunk_size=chunk_size,
                                    averaging=averaging)
        generate_evaluation_dataset(model, "GTZAN_test", GTZAN_test_dataloader, name, chunking=chunking, chunk_size=chunk_size,
                                    averaging=averaging)

    num_classes = 10

    os.makedirs(f"D:\\SongsDataset\\GTZAN_train\\latent_datasets\\{name}\\" + "full-set\\", exist_ok=True)
    os.makedirs(f"D:\\SongsDataset\\GTZAN_valid\\latent_datasets\\{name}\\" + "full-set\\", exist_ok=True)
    os.makedirs(f"D:\\SongsDataset\\GTZAN_test\\latent_datasets\\{name}\\" + "full-set\\", exist_ok=True)

    train_latent_dataset = LatentDataset(f"D:\\SongsDataset\\GTZAN_train\\latent_datasets\\{name}\\" + "full-set\\", num_classes=num_classes)
    valid_latent_dataset = LatentDataset(f"D:\\SongsDataset\\GTZAN_valid\\latent_datasets\\{name}\\" + "full-set\\", num_classes=num_classes)
    test_latent_dataset = LatentDataset(f"D:\\SongsDataset\\GTZAN_test\\latent_datasets\\{name}\\" + "full-set\\", num_classes=num_classes)
    combined_dataset = ConcatDataset([train_latent_dataset, valid_latent_dataset])

    #tsne(combined_dataset)
    #local_tag_coherence(combined_dataset, max_k=k, granularity=granularity)

    params = grid_search(train_latent_dataset, valid_latent_dataset, num_classes, criterion=nn.CrossEntropyLoss())
    for param in params:
        print(param)

    results = train_valid(combined_dataset, test_latent_dataset, params, criterion=nn.CrossEntropyLoss(), num_classes=num_classes)

    accuracy_score, f1_score = results

    print(f"F1: {f1_score}\t variance: {accuracy_score}")

In [5]:
from torch.nn import BCEWithLogitsLoss
from data.data_utils import MTAT
from torch.utils.data import DataLoader
import torch.nn

directory = "D:\\SongsDataset\\MTAT\\"

MTAT_train_dataset = MTAT(directory, split="train")
MTAT_train_dataloader = DataLoader(
    MTAT_train_dataset,
    batch_size=1,
    shuffle=True,
    num_workers=2,
    prefetch_factor=1
)

MTAT_valid_dataset = MTAT(directory, split="valid")
MTAT_valid_dataloader = DataLoader(
    MTAT_valid_dataset,
    batch_size=1,
    shuffle=True,
    num_workers=2,
    prefetch_factor=1
)

MTAT_test_dataset = MTAT(directory, split="test")
MTAT_test_dataloader = DataLoader(
    MTAT_test_dataset,
    batch_size=1,
    shuffle=True,
    num_workers=2,
    prefetch_factor=1
)

def mtat_eval(model, name, chunking=True, chunk_size=1024, averaging=True, k=100, already_generated=False, granularity=5):
    if not already_generated:
        generate_evaluation_dataset(model, "MTAT-Train", MTAT_train_dataloader, name, chunking=chunking, chunk_size=chunk_size, averaging=averaging)
        generate_evaluation_dataset(model, "MTAT-Valid", MTAT_valid_dataloader, name, chunking=chunking, chunk_size=chunk_size, averaging=averaging)
        generate_evaluation_dataset(model, "MTAT-Test", MTAT_test_dataloader, name, chunking=chunking, chunk_size=chunk_size, averaging=averaging)

    num_classes = 188

    os.makedirs(f"D:\\SongsDataset\\MTAT-Train\\latent_datasets\\{name}\\" + "full-set\\", exist_ok=True)
    os.makedirs(f"D:\\SongsDataset\\MTAT-Valid\\latent_datasets\\{name}\\" + "full-set\\", exist_ok=True)
    os.makedirs(f"D:\\SongsDataset\\MTAT-Test\\latent_datasets\\{name}\\" + "full-set\\", exist_ok=True)

    train_latent_dataset = LatentDataset(f"D:\\SongsDataset\\MTAT-Train\\latent_datasets\\{name}\\" + "full-set\\", num_classes=num_classes)
    valid_latent_dataset = LatentDataset(f"D:\\SongsDataset\\MTAT-Valid\\latent_datasets\\{name}\\" + "full-set\\", num_classes=num_classes)
    test_latent_dataset = LatentDataset(f"D:\\SongsDataset\\MTAT-Test\\latent_datasets\\{name}\\" + "full-set\\", num_classes=num_classes)

    combined_dataset = ConcatDataset([train_latent_dataset, valid_latent_dataset])

    #tsne(combined_dataset)
    #local_tag_coherence(combined_dataset, max_k=k, granularity=granularity)

    #params = grid_search(train_latent_dataset, valid_latent_dataset, num_classes, criterion=nn.BCEWithLogitsLoss())
    params = ("MLP", 0.0001, 0.0001, 64, 0.25)

    for param in params:
        print(param)

    results = train_valid(combined_dataset, test_latent_dataset, params, num_classes=num_classes, criterion=nn.BCEWithLogitsLoss())

    f1_score, accuracy,  = results

    visualize_ROC_PR_AUC(all_predictions, all_labels)

    print(f"F1: {f1_score}\t variance: {accuracy_score}")

    return (all_predictions, all_labels)

  0%|          | 0/25863 [00:00<?, ?it/s]

  audio, sr = librosa.load(full_path, sr=44100, mono=True)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


  0%|          | 0/25863 [00:00<?, ?it/s]

  0%|          | 0/25863 [00:00<?, ?it/s]

In [6]:
def add_fields(model, use_sinusoidal=False, use_y_emb=False,
               use_rope_x=False, use_rope_y=False, rope_base=-1,
               use_alibi_x=False, use_alibi_y=False):

    model.use_cls = True
    model.predict_tempo = False
    model.use_sinusoidal = use_sinusoidal
    model.use_y_emb = use_y_emb
    model.use_rope_x = use_rope_x
    model.use_rope_y = use_rope_y
    model.rope_base = rope_base
    model.use_alibi_x = use_alibi_x
    model.use_alibi_y = use_alibi_y
    model.needs_coordinates = use_rope_x or use_rope_y or use_alibi_x or use_alibi_y

    if not (use_alibi_x or use_alibi_y):
        model.transformer.alibi_2d = None

    return model

In [None]:
from torch.utils.data import ConcatDataset, DataLoader

model = torch.load("E:\\Coding\\SongAnalyzer\\Analyzer\\src\\final-models\\Myna-sinusoidal-ALIBI-256L-0.9M\\sinusoidal_Epoch-511.pt", weights_only=False)
model.mask_ratio = 0
model = add_fields(model, use_sinusoidal=True)
name = "Sinusoidal-Chunking-256"
(all_predictions, all_labels) = mtat_eval(model, name, chunking=True, chunk_size=256, averaging=True, already_generated=True)

In [None]:
visualize_ROC_PR_AUC(all_predictions, all_labels)

In [None]:
model = torch.load("E:\\Coding\\SongAnalyzer\\Analyzer\\src\\final-models\\Myna-1D-ALIBI-256L-0.9M\\1d_alibi_Epoch-511.pt", weights_only=False)
model.mask_ratio = 0
model = add_fields(model, use_alibi_x=True, use_y_emb=True)
name = "1D-ALIBI-No-Chunking"
emo_eval(model, name, chunking=False, chunk_size=256, averaging=True, already_generated=False)

In [None]:
name = "1D-ALIBI-No-Chunking"
emo_eval(model, name, chunking=False, chunk_size=256, averaging=True, already_generated=False)

In [None]:
model = torch.load("E:\\Coding\\SongAnalyzer\\Analyzer\\src\\final-models\\Myna-2D-ALIBI-256L-0.9M\\2d_alibi_Epoch-511.pt", weights_only=False)
model.mask_ratio = 0
model = add_fields(model, use_alibi_y=True, use_alibi_x=True)
name = "2D-ALIBI-Chunking-256"
#emo_eval(model, name, chunking=True, chunk_size=256, averaging=True, already_generated=True)

In [None]:
name = "2D-ALIBI-No-Chunking"
emo_eval(model, name, chunking=False, chunk_size=256, averaging=True, already_generated=False)

In [None]:
# ---------------------------------------------------

In [None]:
model = torch.load("E:\\Coding\\SongAnalyzer\\Analyzer\\src\\final-models\\Myna-sinusoidal-ALIBI-256L-0.9M\\sinusoidal_Epoch-511.pt", weights_only=False)
model.mask_ratio = 0
model = add_fields(model, use_sinusoidal=True)
name = "Sinusoidal-Chunking-256"
gs_eval(model, name, chunking=True, chunk_size=256, averaging=True, k=200, already_generated=False, granularity=10)

In [None]:
model = torch.load("E:\\Coding\\SongAnalyzer\\Analyzer\\src\\final-models\\Myna-1D-ALIBI-256L-0.9M\\1d_alibi_Epoch-511.pt", weights_only=False)
model.mask_ratio = 0
model = add_fields(model, use_alibi_x=True, use_y_emb=True)
name = "1D-ALIBI-No-Chunking"
gs_eval(model, name, chunking=False, chunk_size=256, averaging=True, already_generated=False)

In [None]:
gs_eval(model, name, chunking=False, chunk_size=256, averaging=True, already_generated=False)

In [None]:
model = torch.load("E:\\Coding\\SongAnalyzer\\Analyzer\\src\\final-models\\Myna-2D-ALIBI-256L-0.9M\\2d_alibi_Epoch-511.pt", weights_only=False)
model.mask_ratio = 0
model = add_fields(model, use_alibi_y=True, use_alibi_x=True)
name = "2D-ALIBI-No-Chunking"
gs_eval(model, name, chunking=False, chunk_size=256, averaging=True, already_generated=True)

In [None]:
gs_eval(model, name, chunking=False, chunk_size=256, averaging=True, already_generated=False)

In [None]:
# ---------------------------------------------------

In [None]:
model = torch.load("E:\\Coding\\SongAnalyzer\\Analyzer\\src\\final-models\\Myna-sinusoidal-ALIBI-256L-0.9M\\sinusoidal_Epoch-511.pt", weights_only=False)
model.mask_ratio = 0
model = add_fields(model, use_sinusoidal=True)
name = "Sinusoidal-Chunking-256"
gtzan_eval(model, name, chunking=True, chunk_size=256, averaging=True, already_generated=False, k=200, granularity=10)

In [None]:
model = torch.load("E:\\Coding\\SongAnalyzer\\Analyzer\\src\\final-models\\Myna-1D-ALIBI-256L-0.9M\\1d_alibi_Epoch-511.pt", weights_only=False)
model.mask_ratio = 0
model = add_fields(model, use_alibi_x=True, use_y_emb=True)
name = "1D-ALIBI-Chunking-256"
#gtzan_eval(model, name, chunking=True, chunk_size=256, averaging=True, already_generated=True, k=200, granularity=10)

In [None]:
name = "1D-ALIBI-No-Chunking"
gtzan_eval(model, name, chunking=False, chunk_size=256, averaging=True, already_generated=True, k=200, granularity=10)

In [None]:
model = torch.load("E:\\Coding\\SongAnalyzer\\Analyzer\\src\\final-models\\Myna-2D-ALIBI-256L-0.9M\\2d_alibi_Epoch-511.pt", weights_only=False)
model.mask_ratio = 0
model = add_fields(model, use_alibi_y=True, use_alibi_x=True)
name = "2D-ALIBI-Chunking-256"
#gtzan_eval(model, name, chunking=True, chunk_size=256, averaging=True, already_generated=True, k=200, granularity=10)

In [None]:
name = "2D-ALIBI-No-Chunking"
gtzan_eval(model, name, chunking=False, chunk_size=256, averaging=True, already_generated=False, k=200, granularity=10)

In [None]:
# ---------------------------------------------------

In [None]:
model = torch.load("E:\\Coding\\SongAnalyzer\\Analyzer\\src\\final-models\\Myna-sinusoidal-ALIBI-256L-0.9M\\sinusoidal_Epoch-511.pt", weights_only=False)
model.mask_ratio = 0
model = add_fields(model, use_sinusoidal=True)
name = "Sinusoidal-Chunking-256"
mtat_eval(model, name, chunking=True, chunk_size=256, averaging=True, already_generated=True)

In [None]:
model = torch.load("E:\\Coding\\SongAnalyzer\\Analyzer\\src\\final-models\\Myna-1D-ALIBI-256L-0.9M\\1d_alibi_Epoch-511.pt", weights_only=False)
model.mask_ratio = 0
model = add_fields(model, use_alibi_x=True, use_y_emb=True)
name = "1D-ALIBI-Chunking-256"
#mtat_eval(model, name, chunking=True, chunk_size=256, averaging=True, already_generated=True)

In [None]:
name = "1D-ALIBI-No-Chunking"
mtat_eval(model, name, chunking=False, chunk_size=256, averaging=True, already_generated=True)

In [None]:
model = torch.load("E:\\Coding\\SongAnalyzer\\Analyzer\\src\\final-models\\Myna-2D-ALIBI-256L-0.9M\\2d_alibi_Epoch-511.pt", weights_only=False)
model.mask_ratio = 0
model = add_fields(model, use_alibi_y=True, use_alibi_x=True)
name = "2D-ALIBI-Chunking-256"
#mtat_eval(model, name, chunking=True, chunk_size=256, averaging=True, already_generated=True)

In [None]:
name = "2D-ALIBI-No-Chunking"
mtat_eval(model, name, chunking=False, chunk_size=256, averaging=True, already_generated=True)