In [1]:
# %%capture
!pip install datasets transformers sentence-transformers
!pip install implicit

Collecting datasets
  Downloading datasets-3.6.0-py3-none-any.whl.metadata (19 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2025.3.0,>=2023.1.0 (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets)
  Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cu

In [3]:
# Import libraries
import json
from sentence_transformers import SentenceTransformer
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import normalize

# Load your artist data (same as before)
def load_json(path="nlp_artists_filtered.json"):
    with open(path, 'r', encoding='utf-8') as f:
        return json.load(f)

artists = load_json()
seeds = load_json("nlp_seeds_anonymized.json")
artists_spotify = load_json("nlp_artists_filtered_spot.json")
for k, v in artists.items():
    spotify_info = artists_spotify[k]
    if 'spotify_desc' in spotify_info and spotify_info['spotify_desc'] != '':
        artists[k]['spotify_desc'] = spotify_info['spotify_desc']
spotify_desc_artists = [k for k in artists.keys() if 'spotify_desc' in artists[k]]

# Introduction

This experiment will explore the merits of using the cosine similarity between sentence embeddings as a metric for recommending artists.

First, we have to load the models. We will initialize one for wikipedia descriptions and one for artist names.

In [4]:
# Load model
model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')

# Convert text to embeddings
texts = [artist['wiki'] for artist in artists.values()]
wiki_embeddings_list = model.encode(texts, show_progress_bar=True)

wiki_embeddings_list.shape

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.4k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Batches:   0%|          | 0/107 [00:00<?, ?it/s]

(3404, 768)

In [5]:
names = [artist['name'] for artist in artists.values()]
name_embeddings_list = model.encode(names, show_progress_bar=True)
name_embeddings_list.shape

spotifies = [artists[_id]['spotify_desc'] for _id in spotify_desc_artists]
spotify_embeddings_list = model.encode(spotifies, show_progress_bar=True)

ids = [k for k in artists.keys()]

Batches:   0%|          | 0/107 [00:00<?, ?it/s]

Batches:   0%|          | 0/103 [00:00<?, ?it/s]

In [6]:
# Map artist ID to corresponding Wiki embedding
wiki_embeddings = {}
for artist_id, wiki_embedding in zip(ids, wiki_embeddings_list):
    wiki_embeddings[artist_id] = wiki_embedding

# Map artist ID to corresponding Name embedding
name_embeddings = {}
for artist_id, name_embedding in zip(ids, name_embeddings_list):
    name_embeddings[artist_id] = name_embedding

# Map artist ID to corresponding Spotify desc embedding
spotify_embeddings = {}
for artist_id, spotify_embedding in zip(spotify_desc_artists, spotify_embeddings_list):
    spotify_embeddings[artist_id] = spotify_embedding

# Map artist ID to name
id_to_name = {}
for k, v in artists.items():
    id_to_name[k] = v['name']

In [7]:
from dataclasses import dataclass
from typing import Optional, List


@dataclass
class RecommendedArtist:
    id: str
    score: float


class EmbeddingRecommender:

    def __init__(self, embeddings):
        self.embeddings = embeddings

    def recommend(self, seed_ids, all_ids, top_n=float('inf')) -> List[RecommendedArtist]:
        # Calculate average seed embedding
        all_vecs = np.vstack([self.embeddings[i] for i in all_ids])

        seed_vecs = [self.embeddings[seed_id] for seed_id in seed_ids]
        avg_vec   = np.mean(seed_vecs, axis=0, keepdims=True)

        # Compute cosine simulatity
        sims = cosine_similarity(avg_vec, all_vecs)[0]

        # Sort & Pick top_n artists
        ranked = sorted(zip(all_ids, sims), key=lambda x: -x[1])
        results = []
        for artist_id, score in ranked:
            if artist_id in seed_ids:
                continue
            results.append(RecommendedArtist(artist_id, float(score)))
            if len(results) >= top_n:
                break
        return results

wiki_recommender = EmbeddingRecommender(wiki_embeddings)
name_recommender = EmbeddingRecommender(name_embeddings)
spotify_recommender = EmbeddingRecommender(spotify_embeddings)

# Evaluation

For evaluation, we will use an AUC metric that operates over a list of relevances scores (0 or 1, depending on whether or not an artist has genres overlapping with the seeds for that recommendation).
The goal is to rank all of the relevant artists at the front of the list, and all of the irrelevant artists at the back.

The data for this evaluation comes from Localify.org; We have taken the heavy rotations of just under 200 users and masked 20% of their familiar artists for anonymity.

The evaluation is done by calculating this AUC metric for recommendations across these ~200 users, where we mask 50% of their (anonymized) familiar artists, augment these masked artists with a list of distractors of the same size, and then perform recommendations using this augmented list as the candidate set and their unmasked seed artists as the set of seed artists. We then see how many of the user's masked artists were placed near the top of the list of recommendations.

# Evaluation

In [8]:
from random import shuffle
from typing import Tuple

def calc_auc_score(rank_relevance):
    """
    usage : result = model.calc_auc_score([1,0,1,0,0,0,1,0,0,0,0,0])
    :param rank_relevance: list of 1s (relevant) and 0s (not relevant)
    :return: AUC score between 0 and 1. 0.5 is random. 1.0 is perfect (all relevant items at the top.)
    """
    num_true = sum(rank_relevance)
    num_false = len(rank_relevance) - num_true

    if num_true == 0 or num_false == 0:
        return -1

    tpr = 0
    total = 0
    for val in rank_relevance:
        if val:
            tpr += 1
        else:
            total += tpr

    auc = total / (num_true * num_false)
    return auc

In [9]:
from math import ceil
from random import sample


class Evaluator:

    def __init__(self, model, artist_ids, seed_lists):
        self.model = model
        self.artist_ids = artist_ids
        self.seed_lists = seed_lists

    def do_trial(self, seed_ids):
        split = ceil(len(seed_ids) * 0.5)
        shuffle(seed_ids)
        masked = seed_ids[split:]
        unmasked = seed_ids[:split]

        potential_distractors = [_id for _id in self.artist_ids if _id not in seed_ids]
        distractors = sample(potential_distractors, len(masked))

        candidates = masked + distractors
        shuffle(candidates)
        results = self.model.recommend(unmasked, candidates + unmasked)

        relevances = [1 if res.id in masked else 0 for res in results]
        return calc_auc_score(relevances)

    def eval_model(self):
        scores = []
        for seed_ids in self.seed_lists:
            score = self.do_trial(seed_ids)
            if score != -1:
              scores.append(score)
        return np.mean(scores)


In [None]:
wiki_evaluator = Evaluator(wiki_recommender, ids, seeds)
wiki_evaluator.eval_model()

np.float64(0.7108257169103823)

In [None]:
name_evaluator = Evaluator(name_recommender, ids, seeds)
name_evaluator.eval_model()

np.float64(0.6883232149767156)

In [11]:
spotify_evaluator = Evaluator(spotify_recommender,
                              spotify_desc_artists,
                              [list(filter(lambda x: x in spotify_desc_artists, user_seeds)) for user_seeds in seeds])
spotify_evaluator.eval_model()

np.float64(0.6576950774890852)

Across these 200 users, the recommendations using wiki descriptions performed with around 71% accuracy, the recommendations using names performed with around 68% accuracy, and the recommendations using spotify descriptions performed with around 66% accuracy.