In [1]:
import subprocess

from anyio import sleep

def cluster_elki(name, num_clusters):
    # Define parameters
    elki_jar = "elki-bundle-0.8.0.jar"
    data_file = f"output_analysis/output-{name}.csv"

    cmd = [
        "java", "-jar", elki_jar,
        "KDDCLIApplication",
        "-dbc.in", data_file,
        "-algorithm", "clustering.hierarchical.extraction.CutDendrogramByNumberOfClusters",
        "-algorithm", "Anderberg",
        "-algorithm.distancefunction", "CosineDistance",
        "-hierarchical.minclusters", str(num_clusters),
        "-resulthandler", "ResultWriter",
        "-out.gzip", "false",
        "-out", f"output_analysis/elki-TEST-{name}-{num_clusters}",
    ]

    try:
        result = subprocess.run(cmd, capture_output=True, text=True, check=True)
    except subprocess.CalledProcessError as e:
        print("An error occurred:\n", e.stderr)

In [3]:
import torch
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor, as_completed
from training.inference import load_and_parse_audio
from mutagen.easyid3 import EasyID3

def process_song(song_path, chunking=True, chunk_size=256):
    chunks = load_and_parse_audio(song_path, convert=True, chunking=chunking, chunk_size=chunk_size)
    song_easyID3 = EasyID3(song_path)
    url_from_albumartist = song_easyID3.get("albumartist", [None])[0]

    id = url_from_albumartist.split("/")[-1]
    return id, chunks

def compute(model, name, batch_size=1, num_workers=4, chunking=True, chunk_size=256, start_partition=1):
    path = "E:\\SongsDataset\\songs\\"
    all_songs = os.listdir(path)
    if start_partition == 1:
        sign = 'w'
    else:
        sign = 'a'

    with open(name, sign, encoding="utf-8") as f, torch.no_grad(), ThreadPoolExecutor(max_workers=num_workers) as executor:
        # submit all preprocessing jobs
        partitions = 8
        num_songs = len(all_songs) // partitions
        for index in range(start_partition, partitions):
            futures = [executor.submit(process_song, os.path.join(path, song), chunking, chunk_size) for song in all_songs[(index - 1) * num_songs:index * num_songs]]

            # process results in batches of X
            for future in tqdm(as_completed(futures), total=len(futures)):
                song_path, chunks = future.result()
                if chunks is None:
                    continue

                run_batch(model, chunks, f, song_path)


def compute_async(model, name, batch_size=1, num_workers=4, chunking=True, chunk_size=256, start_partition=1):
    path = "E:\\SongsDataset\\songs\\"
    all_songs = os.listdir(path)
    if start_partition == 1:
        sign = 'w'
    else:
        sign = 'a'

    with open(name, sign, encoding="utf-8") as f, torch.no_grad(), ThreadPoolExecutor(max_workers=num_workers) as executor:
        # submit all preprocessing jobs
        partitions = 8
        num_songs = len(all_songs) // partitions
        index = 1

        for song in tqdm(all_songs):
            song_path, chunks = process_song(os.path.join(path, song), chunking, chunk_size)
            if chunks is None:
                continue

            run_batch(model, chunks, f, song_path)

def run_batch(model, chunks, file_handle, song_path, max_memory=32):
    chunks = chunks.to("cuda")

    chunks = chunks.unsqueeze(1)

    length = chunks.shape[-1]
    max_batch_size = 67108864 // (length ** 2)

    num_chunks = int(chunks.shape[0] / max_batch_size)

    if num_chunks <= 1:
        data_minibatches = [chunks]
    else:
        data_minibatches = torch.chunk(chunks, num_chunks, dim=0)

    all_preds = []

    for i, batch in enumerate(data_minibatches):
        data_minibatch = batch
        outputs = model(data_minibatch)
        all_preds.extend(outputs.cpu())

    if len(all_preds) == 0:
        print(f"Empty Song")
        return

    latents = torch.stack(all_preds, dim=0)
    averages = latents.mean(dim=0).numpy()
    torch.cuda.empty_cache()
    line = " ".join([str(x) for x in averages]) + f" \"{os.path.basename(song_path)}\"\n"
    file_handle.write(line)

In [4]:
# Helper for loading older model versions
def add_fields(model, use_sinusoidal=False, use_y_emb=False,
               use_rope_x=False, use_rope_y=False, rope_base=-1,
               use_alibi_x=False, use_alibi_y=False):

    model.use_cls = True
    model.predict_tempo = False
    model.use_sinusoidal = use_sinusoidal
    model.use_y_emb = use_y_emb
    model.use_rope_x = use_rope_x
    model.use_rope_y = use_rope_y
    model.rope_base = rope_base
    model.use_alibi_x = use_alibi_x
    model.use_alibi_y = use_alibi_y
    model.needs_coordinates = use_rope_x or use_rope_y or use_alibi_x or use_alibi_y
    if not (use_alibi_x or use_alibi_y):
        model.transformer.alibi_2d = None

    return model

In [70]:
model = torch.load("E:\\Coding\\SongAnalyzer\\Analyzer\\src\\final-models\\Myna-1D-ALIBI-256L-0.9M\\1d_alibi_Epoch-511.pt", weights_only=False)
model.mask_ratio = 0.0
model = model.to("cuda")
model = add_fields(model, use_y_emb=True, use_alibi_x=True)

In [78]:
compute_async(model, "E:/Coding/SongAnalyzer/Analyzer/src/output_analysis/output-Myna-CLS-ALIBI-1D-Chunking-256.csv", chunking=True, chunk_size=256)

100%|██████████| 7056/7056 [2:09:55<00:00,  1.10s/it]  


In [79]:
cluster_elki("Myna-CLS-ALIBI-1D-Chunking-256", 256)

In [None]:
compute_async(model, "E:/Coding/SongAnalyzer/Analyzer/src/output_analysis/output-Myna-CLS-ALIBI-1D-Chunking-256.csv",
              chunking=True, chunk_size=256)

In [5]:
def parse_cluster_file_name(file_path):
    ids = []
    with open(file_path, 'r', encoding="utf-8") as f:
        for line in f:
            start = line[:2]
            if start == "ID":
                id = line.split(" ")[-1][:-1]
                ids.append(id)

    return ids

def parse_cluster_file_data(file_path):
    ids = []
    labels = []
    with open(file_path, 'r', encoding="utf-8") as f:
        for line in f:
            parts = line.split("\"")
            data = parts[0]
            label = parts[1]

            data = data.split(" ")
            data = torch.tensor([float(d) for d in data[:-1]])
            ids.append(data)
            labels.append(label)

    return torch.stack(ids, dim=0), labels

In [4]:
from spotipy import SpotifyException
from asyncio import wait
from spotipy.oauth2 import SpotifyOAuth

import spotipy
import os

with open("spotify-info.txt") as file:
    lines = file.readLines()
    CLIENT_ID = lines[0]
    CLIENT_SECRET = lines[1]
    REDIRECT_URI = lines[2]

# We'll need all of these permissions
scope = "user-read-private playlist-read-private playlist-modify-private user-library-modify"

sp = spotipy.Spotify(
    auth_manager=SpotifyOAuth(
        client_id=CLIENT_ID,
        client_secret=CLIENT_SECRET,
        redirect_uri=REDIRECT_URI,
        scope=scope
    )
)

user = sp.current_user()
print("User ID:", user["id"])
print("Display Name:", user["display_name"])
print("Email:", user.get("email"))
print("Profile URL:", user["external_urls"]["spotify"])

def safe_add_items(playlist_id, items, max_retries=5):
    for attempt in range(max_retries):
        try:
            sp.playlist_add_items(playlist_id, items)
            return True
        except SpotifyException as e:
            if e.http_status in [429, 502, 503, 504]:
                retry_after = int(e.headers.get("Retry-After", 2 ** attempt))
                print(f"Rate limited or gateway error, retrying in {retry_after}s...")
                wait(retry_after)
            else:
                raise
    raise RuntimeError("Max retries reached while adding items to playlist.")

def create_playlist(path_file, name, track_ids=None):
    playlist = sp.user_playlist_create(
        user=user["id"],
        name=f"Clustering / {name}",
        public=True,
        description="Created via the Spotify API!"
    )

    playlist_id = playlist["id"]
    print("Created playlist:", playlist["external_urls"]["spotify"])

    if track_ids is None:
        track_ids = parse_cluster_file_name(path_file)

    for i in range(0, len(track_ids), 100):
        sp.playlist_add_items(playlist_id, track_ids[i:i+100])

    print("Tracks added successfully!")

User ID: g47dvcltndgtgav7sgqsia10p
Display Name: Gorp
Email: potatoeunbeatable@gmail.com
Profile URL: https://open.spotify.com/user/g47dvcltndgtgav7sgqsia10p


In [None]:
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
import matplotlib as mpl
from scipy.spatial import ConvexHull

mpl.rcParams['text.usetex'] = False
mpl.rcParams['mathtext.default'] = 'regular'

def safe_label(s):
    for ch in ['$', '^', '_', '{', '}', '#', '%', '&']:
        s = s.replace(ch, f'\\{ch}')
    return s

# Needs at least one point for each dimension in song vectors.
# Solution could be to generate similar embeddings by adding *very small* amounts of random noise?
# Worth looking into
def get_similarity_hull(data, labels, interested_ids, k=100):
    length = len(interested_ids)
    if length == 0:
        return

    index = [i for i, x in enumerate(labels) if x in interested_ids]
    hull_points = data[index]

    hull = ConvexHull(hull_points)

    A = hull.equations[:, :-1]  # normals
    b = hull.equations[:, -1]   # offsets
    A = torch.tensor(A, dtype=torch.float32)
    b = torch.tensor(b, dtype=torch.float32)

    def hull_space(points):
        normA = torch.linalg.norm(A, dim=1)

        signed = (points @ A.T + b) / normA

        depth = -torch.max(signed, dim=1).values

        return depth

    def mahalanobis(points):
        mean = hull_points.mean(axis=0, dtype=float)
        cov = np.cov(points, rowvar=False, dtype=float)
        inv_cov = np.linalg.inv(cov)

        diffs = points - mean

        d2 = np.einsum('ni,ij,nj->n', diffs, inv_cov, diffs)
        distances = np.sqrt(d2)

        return np.exp(-distances)

    return get_k_most_similar_maha(data, labels, hull_space, k=k)


def cosine_sim(track_data, interested_point):
    track_norm = F.normalize(track_data, p=2, dim=1)
    point_norm = F.normalize(interested_point, p=2, dim=1)
    similarity_matrix = track_norm @ point_norm.T
    return similarity_matrix


def get_k_most_similar_maha(data, labels, maha_dist, k=100):
    similarity_matrix = maha_dist(data)
    sim_tensor = torch.Tensor(similarity_matrix)
    sorted_x, indices = torch.sort(sim_tensor, dim=0, descending=True)

    np_labels = np.array(labels)
    sim_labels = np_labels[indices.numpy()]

    top_k = k
    if len(sorted_x) > (top_k):
        sim_labels_trunc = sim_labels[:top_k]
    else:
        sim_labels_trunc = sim_labels

    sim_labels_trunc = [str(x) for x in sim_labels_trunc]

    for label in sim_labels_trunc:
        print(label)

    return sim_labels_trunc


def get_k_most_similar(data, labels, point, k=100):
    track_data = data

    similarity_matrix = cosine_sim(track_data, point)
    ids = np.array(labels)

    # Sort similarities per song
    similarity_matrix = similarity_matrix.mean(dim=1)
    sorted_x, indices = torch.sort(similarity_matrix, dim=0, descending=True)

    # Plot for each selected song
    sim_values = sorted_x.cpu().numpy()
    sim_labels = ids[indices.cpu().numpy()]

    top_k = k
    if len(sim_values) > (top_k):
        sim_labels_trunc = sim_labels[:top_k]
    else:
        sim_labels_trunc = sim_labels

    sim_labels_trunc = [str(x) for x in sim_labels_trunc]

    for label in sim_labels_trunc:
        print(label)

    return sim_labels_trunc

def parse_data(path_file, songs_to_display=[1]):
    track_data, ids = parse_cluster_file_data(path_file)

    # Normalize embeddings to unit length
    track_norm = F.normalize(track_data, p=2, dim=1)

    # Compute cosine similarity matrix
    similarity_matrix = track_norm @ track_norm.T  # [N, N]
    ids = np.array(ids)

    # Sort similarities per song
    sorted_x, indices = torch.sort(similarity_matrix, descending=True)

    # Plot for each selected song
    for song_idx in songs_to_display:
        sim_values = sorted_x[song_idx].cpu().numpy()
        sim_labels = ids[indices[song_idx].cpu().numpy()]

        ref_song_id = ids[song_idx]
        ref_pos = np.where(sim_labels == ref_song_id)[0][0]

        # Truncate to top 250 + bottom 250
        top_k, bottom_k = 250, 250
        if len(sim_values) > (top_k + bottom_k):
            sim_values_trunc = np.concatenate((sim_values[:top_k], sim_values[-bottom_k:]))
            sim_labels_trunc = np.concatenate((sim_labels[:top_k], sim_labels[-bottom_k:]))
        else:
            sim_values_trunc = sim_values
            sim_labels_trunc = sim_labels

        # Check if reference is visible
        ref_in_range = np.where(sim_labels_trunc == ref_song_id)[0]
        ref_in_range = ref_in_range[0] if len(ref_in_range) > 0 else None

        plt.figure(figsize=(2.2, len(sim_values_trunc) * 0.25), dpi=150)
        plt.imshow(sim_values_trunc[:, None], cmap='viridis', aspect='auto', interpolation='none')

        # Highlight reference song
        if ref_in_range is not None:
            plt.axhline(ref_in_range, color='red', linewidth=1.2)
            plt.text(0.5, ref_in_range, f"← {ref_song_id}", color='red', fontsize=7,
                     va='center', ha='left', transform=plt.gca().transData)

        # Add text showing similarity values to the right
        ax = plt.gca()
        for i, val in enumerate(sim_values_trunc):
            ax.text(1.05, i, f"{val:.3f}", color='white', fontsize=6, va='center',
                    transform=ax.transData)

        # Format y labels safely
        plt.yticks(np.arange(len(sim_labels_trunc)), [safe_label(lbl) for lbl in sim_labels_trunc], fontsize=6)
        plt.xticks([])
        plt.title(f"Similarity to song {safe_label(ref_song_id)}", fontsize=10, pad=10)
        plt.colorbar(label="Cosine similarity", shrink=0.7)

        plt.tight_layout()
        plt.show()


# Playlist generation example on a specific song

In [6]:
path = "E:/Coding/SongAnalyzer/Analyzer/src/output_analysis/output-Myna-CLS-ALIBI-1D-Chunking-256.csv"
track_data, ids = parse_cluster_file_data(path)

In [15]:
path = "E:/Coding/SongAnalyzer/Analyzer/src/output_analysis/output-Myna-CLS-ALIBI-1D-Chunking-256.csv"
label = ["0XOMAi1bTq6FLANw5WUzKr"]

index = [i for i, x in enumerate(ids) if x in label]
interested_point = track_data[index]

most_sim = get_k_most_similar(track_data, ids, interested_point, k=114)
create_playlist(None, "Reflective Summer", most_sim)

0XOMAi1bTq6FLANw5WUzKr
07piOqPFq7X0BtdFlodEPe
0QbvYVhykr3lx0iOvr1DYd
0XpJIpi9bLnV2UbjZ6nV9g
3v0eRsjxQcCJfHINOyX7sZ
7wxWlTpMjpF2ztYFePpIXp
0WaaPFt4Qy8sVfxKz43bCD
0DMDOM5l1aYm18rR214KbP
6zRxzFCzowTo6TAzI3dHCC
0Qv7xi6uPSqH2k82tOkGSt
6Vjm8aQFfuNmm7jwgCbAoW
16doMTberqpHTGc467rJBw
68AzhSmdueBmCUaVG2mOwv
4mh0rIKPBhMBAln7t3m0Zk
2a1iMaoWQ5MnvLFBDv4qkf
7upG8kgnfF4yfaOGNKHXJg
5pTD57jhRgNbpyVCHsQvkv
0awZwmlC6pxH65KTZpadmX
0dMTCCAYuGPNHGeaN6s9P3
0ytsgfXzI3NlpHVnBGKJNn
26AYR77170U49cMcXB7aRV
596IkZP89HvFYdgOb3lNvP
1YrU8ExqF04ygegVoOOoFU
1PJRDeZSoZk7gtisdTYfLi
2cDI4xiUEkAgp3gI9iIMk4
0y8uWWRKZrJgbsfy1oUOeF
7MdKQKCcCxu5rLeD8H6tsu
3PfOVqpFVBxf39Qxj6Vs6l
5g7FVko9msJrVqpZlmFmUU
3IbX1Ycd2PD7jwb3rWRX4o
21wC5i3a5qEEPpXn3hXt8V
3zBeDk1Ijt11UkVYaDNHFk
1BvYBXJHCXowsLQ9ZYsCPA
0cnOTTEREe2gKR9ZkZXeQS
4PVrz6sCGjxzNxMw729FVn
6WhoGqTFLJGCgMNUyBDzt5
4ptIWfbb32M6vftqhyt9CK
5UWwZ5lm5PKu6eKsHAGxOk
5ISHFvPLUqKz2JfDRtwnb2
2xHu33sRZaQ3T725emVgbA
1YylK4wwCYiUp1Dv8T2lix
76xIJEP0Fu7ae5PuzR0XG6
1bQu3S9Au6lY3RQ9TbxL3R
6p03ssQvEzW

In [48]:
create_playlist(None, "Ambient-Trance", most_sim)

Created playlist: https://open.spotify.com/playlist/2J6sswkJYt600TjP6X1mC0
Tracks added successfully!
