# Data & Training

In [None]:
def parse_dataset(length=256):
    directory = ""  #"latents\\" #"reconstruction_test_latents\\" #mtg-jamendo
    num_per = 1000
    count = 1

    for start in tqdm(range(0, 16150, num_per)):
        mtg_dataset = retrieve_data("E:\SongsDataset\\mtg-jamendo\\", directory, start=start, count=num_per, sample_length=length)
        torch.save(mtg_dataset, f"E:\\SongsDataset\\length_{length}\\dataset{count}.pt")

        count += 1

    for start in tqdm(range(0, 3975, num_per)):
        spotify_dataset = retrieve_data("E:\\SongsDataset\\latents\\", directory, start=start, count=num_per, sample_length=length)
        torch.save(spotify_dataset, f"E:\\SongsDataset\\length_{length}\\dataset{count}.pt")
        count += 1

    full_dataset = torch.load(f"E:\\SongsDataset\\length_{length}\\dataset1.pt")
    for start in tqdm(range(2, 21)):
        new_data = torch.load(f"E:\\SongsDataset\\length_{length}\\dataset{start}.pt")
        full_dataset = torch.cat((full_dataset, new_data))

    torch.save(full_dataset, f"E:\\SongsDataset\\length_{length}\\full_dataset.pt")

In [None]:
parse_dataset(256)

In [None]:
import torch.optim as optim
from torch.utils.data import DataLoader
import numpy as np
from tqdm import tqdm

import torch

from Training import train, evaluate
from Data import AudioDataset, retrieve_data

In [None]:
# ==== Model & Optimizer ====
num_heads = 8
num_layers = 8
d_model = 256
latent_space = 512
dim_feedforward = 1024
sample_length = 1024

batch_size = 16

device = "cuda"

In [None]:
#full_dataset = torch.load(f"E:\\SongsDataset\\length_{sample_length}\\full_dataset.pt")
full_dataset = torch.load(f"E:\\SongsDataset\\length_{sample_length}\\dataset1.pt")

In [None]:
from torch.utils.data import random_split

num_samples, seq_length, embed_dim = full_dataset.shape

train_len = int(len(full_dataset) * 0.9)
train_set, test_set = random_split(full_dataset, [train_len, len(full_dataset) - train_len])

train_dataset = AudioDataset(train_set)
test_dataset = AudioDataset(test_set)

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

In [None]:
from Loss import combined_loss
from AudioTransformer import AudioTransformer

model = AudioTransformer(d_model=d_model, num_heads=num_heads, transformer_layers=num_layers, dim_feedforward=dim_feedforward, latent_space=latent_space, length=sample_length, dropout=0.1, name_extension="-fft-cos-only_RoPE", use_rope=True, use_alibi=False)

In [None]:
model_parameters = filter(lambda p: p.requires_grad, model.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])
print(params)

In [None]:
model = model.to(device)
optimizer = optim.AdamW(model.parameters(), lr=3e-5, weight_decay=0)

In [None]:
train(model, train_dataloader, test_dataloader, optimizer, num_epochs=10, device=device, loss_func=combined_loss)
evaluate(model, test_dataloader)

# Output Stuff

In [None]:
from torch.utils.data import random_split

reconstruction_examples = retrieve_data("E:\SongsDataset\\",  "reconstruction_test_latents\\", sample_length=256)

In [None]:
song_set, _ = random_split(reconstruction_examples, [len(reconstruction_examples), 0])
song_dataset = AudioDataset(song_set)
song_dataloader = DataLoader(song_dataset, batch_size=batch_size, shuffle=True)

In [None]:
model = torch.load(
    f"AudioTransformerDeepCNN-LatentSpace512-Heads8-TrasformerLayers8-DModel256-Dropout0.1-fft-cos\\-Epoch-14.pt",
        weights_only=False)
evaluate(model, song_dataset)

In [None]:
from Loss import combined_loss

device = "cuda"

model.eval()  # Set model to evaluation mode
model.to(device)
total_loss = 0.0
num_batches = 0

new_song = []

# Disable gradient computation for evaluation
with torch.no_grad():
    for batch in tqdm(song_dataloader):
        batch = batch.to(device)

        # Forward pass
        reconstructed = model(batch)

        new_song.extend(reconstructed.to("cpu"))

        loss = combined_loss(reconstructed, batch)

        total_loss += loss.item()
        num_batches += 1

l = np.array(np.stack(new_song)).reshape(64, -1)
np.save("reconstructed_song-2D-256-Campfire.npy", l)

In [None]:
device = "cuda"

model.eval()
model.to(device)

total_loss = 0.0
num_batches = 0

latent_space = []

# Disable gradient computation for evaluation
with torch.no_grad():
    for batch in tqdm(song_dataloader):
        batch = batch.to(device)

        # Forward pass
        latent = model.to_latent(batch)

        latent_space.extend(latent.to("cpu"))

        num_batches += 1

In [None]:
print(len(new_song))
print(len(new_song[0]))
print(len(new_song[0][0]))

In [None]:
import IPython
from music2latent import EncoderDecoder
import numpy as np

encdec = EncoderDecoder()

In [None]:
l = np.load("reconstructed_song-2D-256-Banana.npy")

In [None]:
# compressed_song = np.load("reconstructed_song-256-FFT.npy")
wv_rec = encdec.decode(l)

In [None]:
IPython.display.display(IPython.display.Audio(wv_rec, rate=44100))

In [None]:
IPython.display.display(IPython.display.Audio(wv_rec, rate=44100))

In [None]:
from tqdm import tqdm
import torch
from Data import AudioDataset, retrieve_data

# Data Output

In [None]:
model = torch.load("final-models\\AudioTransformerDeepCNN-LatentSpace512-Heads8-TrasformerLayers8-DModel256-Dropout0.1-fft\\-Epoch-10.pt", weights_only=False)
model.to("cuda")
model.eval()  # Set model to evaluation mode

In [None]:
directory = ""
spotify_dataset = retrieve_data("E:\\SongsDataset\\latents\\", directory, sample_length=256, keep_song_data_option=True)

In [None]:
def compress_song_average(song, model):
    with torch.no_grad():
        latent_space = model.to_latent(song)
        return torch.sum(latent_space.to('cpu'), dim=0) / len(song)

In [None]:
from Analyzer.Webscraper.Data import chunk_song
import os

def compute(model, name):
    model.to("cuda")
    model.eval()

    path = "E:\\SongsDataset\\latents\\"
    all_folders = os.listdir(path)

    file = open(f"output_analysis\\output-{name}.csv", "w", encoding='utf-8')

    for each_song in tqdm(all_folders):
        song_path = os.path.join(path, each_song)

        padded_data = chunk_song(song_path, 256)
        input_tensor = torch.Tensor(padded_data).reshape(-1, 256, 64).to("cuda")

        latent = compress_song_average(input_tensor, model)

        output = ""
        for value in latent:
            output += f"{str(value.item())} "

        file.write(output + f"\"{each_song}\"\n")

In [None]:
model = torch.load("final-models\\AudioTransformerDeepCNN-LatentSpace512-Heads8-TrasformerLayers8-DModel256-Dropout0.1-fft\\-Epoch-10.pt", weights_only=False)
compute(model, "CNN-FINAL")

In [None]:
model = torch.load("final-models\\AudioTransformerSingleLinearReconstruction-LatentSpace512-Heads16-TrasformerLayers16-DModel256-Dropout0.1-fft\\-Epoch-10.pt", weights_only=False)
compute(model, "Linear-FINAL")

In [None]:
model = torch.load("AudioTransformerCNNReconstruction-LatentSpace64-Heads8-TrasformerLayers8-DModel256-Dropout0.1-fft-cos\\-Epoch-10.pt", weights_only=False)
compute(model, "CNN-FFT-COS")

# Data Analysis