In [75]:
# %%capture
!pip install datasets transformers sentence-transformers
!pip install implicit

Collecting implicit
  Downloading implicit-0.7.2-cp311-cp311-manylinux2014_x86_64.whl.metadata (6.1 kB)
Downloading implicit-0.7.2-cp311-cp311-manylinux2014_x86_64.whl (8.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.9/8.9 MB[0m [31m117.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: implicit
Successfully installed implicit-0.7.2


In [2]:
# Import libraries
import json
from sentence_transformers import SentenceTransformer
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import normalize

# Load your artist data (same as before)
def load_json(path="nlp_artists_filtered.json"):
    with open(path, 'r', encoding='utf-8') as f:
        return json.load(f)

artists = load_json()
seeds = load_json("nlp_seeds_anonymized.json")

Artists for Testing: 3404


# Introduction

This experiment will explore the merits of using the cosine similarity between sentence embeddings as a metric for recommending artists.

First, we have to load the models. We will initialize one for wikipedia descriptions and one for artist names.

In [5]:
# Load model
model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')

# Convert text to embeddings
texts = [artist['wiki'] for artist in artists.values()]
wiki_embeddings_list = model.encode(texts, show_progress_bar=True)

wiki_embeddings_list.shape

Batches:   0%|          | 0/107 [00:00<?, ?it/s]

(3404, 768)

In [6]:
import pandas as pd

names = [artist['name'] for artist in artists.values()]

name_embeddings_list = model.encode(names, show_progress_bar=True)
name_embeddings_list.shape

ids = [k for k in artists.keys()]

Batches:   0%|          | 0/107 [00:00<?, ?it/s]

In [8]:
# Map artist ID to corresponding Wiki embedding
wiki_embeddings = {}
for artist_id, wiki_embedding in zip(ids, wiki_embeddings_list):
    wiki_embeddings[artist_id] = wiki_embedding

# Map artist ID to corresponding Name embedding
name_embeddings = {}
for artist_id, name_embedding in zip(ids, name_embeddings_list):
    name_embeddings[artist_id] = name_embedding

# Map artist ID to name
id_to_name = {}
for k, v in artists.items():
    id_to_name[k] = v['name']

In [56]:
from dataclasses import dataclass
from typing import Optional, List


@dataclass
class RecommendedArtist:
    id: str
    score: float


class EmbeddingRecommender:

    def __init__(self, embeddings):
        self.embeddings = embeddings

    def recommend(self, seed_ids, all_ids, top_n=float('inf')) -> List[RecommendedArtist]:
        # Calculate average seed embedding
        all_vecs = np.vstack([self.embeddings[i] for i in all_ids])

        seed_vecs = [self.embeddings[seed_id] for seed_id in seed_ids]
        avg_vec   = np.mean(seed_vecs, axis=0, keepdims=True)

        # Compute cosine simulatity
        sims = cosine_similarity(avg_vec, all_vecs)[0]

        # Sort & Pick top_n artists
        ranked = sorted(zip(all_ids, sims), key=lambda x: -x[1])
        results = []
        for artist_id, score in ranked:
            if artist_id in seed_ids:
                continue
            results.append(RecommendedArtist(artist_id, float(score)))
            if len(results) >= top_n:
                break
        return results

wiki_recommender = EmbeddingRecommender(wiki_embeddings)
name_recommender = EmbeddingRecommender(name_embeddings)

# Evaluation

For evaluation, we will use an AUC metric that operates over a list of relevances scores (0 or 1, depending on whether or not an artist has genres overlapping with the seeds for that recommendation).
The goal is to rank all of the relevant artists at the front of the list, and all of the irrelevant artists at the back.

The data for this evaluation comes from Localify.org; We have taken the heavy rotations of just under 200 users and masked 20% of their familiar artists for anonymity.

The evaluation is done by calculating this AUC metric for recommendations across these ~200 users, where we mask 50% of their (anonymized) familiar artists, augment these masked artists with a list of distractors of the same size, and then perform recommendations using this augmented list as the candidate set and their unmasked seed artists as the set of seed artists. We then see how many of the user's masked artists were placed near the top of the list of recommendations.

# Evaluation

In [57]:
from random import shuffle
from typing import Tuple

def calc_auc_score(rank_relevance):
    """
    usage : result = model.calc_auc_score([1,0,1,0,0,0,1,0,0,0,0,0])
    :param rank_relevance: list of 1s (relevant) and 0s (not relevant)
    :return: AUC score between 0 and 1. 0.5 is random. 1.0 is perfect (all relevant items at the top.)
    """
    num_true = sum(rank_relevance)
    num_false = len(rank_relevance) - num_true

    if num_true == 0 or num_false == 0:
        return -1

    tpr = 0
    total = 0
    for val in rank_relevance:
        if val:
            tpr += 1
        else:
            total += tpr

    auc = total / (num_true * num_false)
    return auc

In [58]:
from math import ceil
from random import sample


class Evaluator:

    def __init__(self, model, artist_ids, seed_lists):
        self.model = model
        self.artist_ids = artist_ids
        self.seed_lists = seed_lists

    def do_trial(self, seed_ids):
        split = ceil(len(seed_ids) * 0.5)
        shuffle(seed_ids)
        masked = seed_ids[split:]
        unmasked = seed_ids[:split]

        potential_distractors = [_id for _id in self.artist_ids if _id not in seed_ids]
        distractors = sample(potential_distractors, len(masked))

        candidates = masked + distractors
        shuffle(candidates)
        results = self.model.recommend(unmasked, candidates + unmasked)

        relevances = [1 if res.id in masked else 0 for res in results]
        return calc_auc_score(relevances)

    def eval_model(self):
        scores = []
        for seed_ids in self.seed_lists:
            score = self.do_trial(seed_ids)
            if score != -1:
              scores.append(score)
        return np.mean(scores)


In [65]:
wiki_evaluator = Evaluator(wiki_recommender, ids, seeds)
wiki_evaluator.eval_model()

np.float64(0.7108257169103823)

In [84]:
name_evaluator = Evaluator(name_recommender, ids, seeds)
name_evaluator.eval_model()

np.float64(0.6883232149767156)

Across these 200 users, the recommendations using wiki descriptions performed with around 71% accuracy. The recommendations using names performed with around 68% accuracy.