# Read and Explore Data

In [1]:
from typing import List, Union, Callable
from dataclasses import dataclass

import pandas as pd
import numpy as np

In [2]:
def read(feature, h=None):
    return pd.read_csv(f"data/id_{feature}_mmsr.tsv", delimiter="\t", header=h)


def embed_and_merge(df1, df2, col_name):
    embedding = df2.columns.difference(["id"], sort=False)
    df2[col_name] = df2[embedding].apply(lambda x: np.array(x, dtype=float), axis=1)
    df2.drop(embedding, inplace=True, axis=1)
    return pd.merge(df1, df2, left_on="id", right_on="id", how="left")

In [3]:
df = read("information", 0)

In [4]:
# read bert embedding
bert = read("lyrics_bert", 0)
df = embed_and_merge(df, bert, "bert_embedding")

In [5]:
# read word2vec embedding
word2vec = read("lyrics_word2vec", 0)
df = embed_and_merge(df, word2vec, "word2vec_embedding")

In [6]:
# read tf-idf term weighting
tfidf_weighting = read("lyrics_tf-idf", 0)
df = embed_and_merge(df, tfidf_weighting, "tf-idf")

In [7]:
df

Unnamed: 0,id,artist,song,album_name,bert_embedding,word2vec_embedding,tf-idf
0,01Yfj2T3YTwJ1Yfy,We As Human,Take The Bullets Away (feat. Lacey Sturm),We As Human,"[0.0302475523203611, 0.0352500043809413, 0.010...","[0.0193592727054678, 0.0232394714425702, 0.028...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0818293914712727, ..."
1,01gyRHLquwXDlhkO,The Notorious B.I.G.,Somebody's Gotta Die,Life After Death (Remastered Edition),"[0.0084422621876001, 0.0302564185112714, 0.009...","[0.018537292381979, 0.0113115924403394, 0.0107...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
2,01rMxQv6vhyE1oQX,Against the Current,Chasing Ghosts,In Our Bones,"[0.0490818135440349, 0.0148476688191294, 0.001...","[0.0227837218553759, 0.0231641749730655, 0.012...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
3,02RGE9FNH65RtMS7,Barthezz,Infected,Trance - The Early Years (1997-2002),"[0.0445394963026046, 0.0214906893670558, 0.013...","[0.0381116103401342, 0.0278804157207017, 0.016...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
4,02ZnlCGZEbkfCDxo,Laura Pausini,Tra Te E Il Mare,The Best of Laura Pausini - E Ritorno Da Te,"[0.0514551289379596, 0.0297695714980363, -0.01...","[0.0182936789026777, -0.0064870788035669, 0.00...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.2413163920156013, ..."
...,...,...,...,...,...,...,...
10090,zyzILCQvVeUFIINi,Crowded House,When You Come,Temple Of Low Men,"[0.006713552866131, 0.0480893477797508, -0.001...","[0.0195101330379449, 0.0236336907562543, 0.011...","[0.0, 0.0, 0.079623055470056, 0.0, 0.0, 0.0, 0..."
10091,zzgS4ZqyswamEWNj,Britney Spears,My Only Wish (This Year),Platinum Christmas,"[0.0098905526101589, 0.0401467233896255, -0.02...","[0.0268563718791583, 0.0082648759004199, 0.011...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
10092,zzoFYDMlqU1X2zz1,Thundercat,DUI,Drunk,"[0.0101165119558572, 0.0388841480016708, -0.01...","[0.0051499218912795, 0.0028818239457905, 0.017...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
10093,zzpkRCGA5ud8q4mv,Otis Redding,Rock Me Baby,Otis Blue,"[-0.0166116580367088, 0.0266939438879489, -0.0...","[0.0370260450523346, 0.0159991827379498, -0.00...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."


In [8]:
df.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 10095 entries, 0 to 10094
Data columns (total 7 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   id                  10095 non-null  object
 1   artist              10095 non-null  object
 2   song                10095 non-null  object
 3   album_name          10095 non-null  object
 4   bert_embedding      10095 non-null  object
 5   word2vec_embedding  10095 non-null  object
 6   tf-idf              10095 non-null  object
dtypes: object(7)
memory usage: 630.9+ KB


In [9]:
df

Unnamed: 0,id,artist,song,album_name,bert_embedding,word2vec_embedding,tf-idf
0,01Yfj2T3YTwJ1Yfy,We As Human,Take The Bullets Away (feat. Lacey Sturm),We As Human,"[0.0302475523203611, 0.0352500043809413, 0.010...","[0.0193592727054678, 0.0232394714425702, 0.028...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0818293914712727, ..."
1,01gyRHLquwXDlhkO,The Notorious B.I.G.,Somebody's Gotta Die,Life After Death (Remastered Edition),"[0.0084422621876001, 0.0302564185112714, 0.009...","[0.018537292381979, 0.0113115924403394, 0.0107...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
2,01rMxQv6vhyE1oQX,Against the Current,Chasing Ghosts,In Our Bones,"[0.0490818135440349, 0.0148476688191294, 0.001...","[0.0227837218553759, 0.0231641749730655, 0.012...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
3,02RGE9FNH65RtMS7,Barthezz,Infected,Trance - The Early Years (1997-2002),"[0.0445394963026046, 0.0214906893670558, 0.013...","[0.0381116103401342, 0.0278804157207017, 0.016...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
4,02ZnlCGZEbkfCDxo,Laura Pausini,Tra Te E Il Mare,The Best of Laura Pausini - E Ritorno Da Te,"[0.0514551289379596, 0.0297695714980363, -0.01...","[0.0182936789026777, -0.0064870788035669, 0.00...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.2413163920156013, ..."
...,...,...,...,...,...,...,...
10090,zyzILCQvVeUFIINi,Crowded House,When You Come,Temple Of Low Men,"[0.006713552866131, 0.0480893477797508, -0.001...","[0.0195101330379449, 0.0236336907562543, 0.011...","[0.0, 0.0, 0.079623055470056, 0.0, 0.0, 0.0, 0..."
10091,zzgS4ZqyswamEWNj,Britney Spears,My Only Wish (This Year),Platinum Christmas,"[0.0098905526101589, 0.0401467233896255, -0.02...","[0.0268563718791583, 0.0082648759004199, 0.011...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
10092,zzoFYDMlqU1X2zz1,Thundercat,DUI,Drunk,"[0.0101165119558572, 0.0388841480016708, -0.01...","[0.0051499218912795, 0.0028818239457905, 0.017...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
10093,zzpkRCGA5ud8q4mv,Otis Redding,Rock Me Baby,Otis Blue,"[-0.0166116580367088, 0.0266939438879489, -0.0...","[0.0370260450523346, 0.0159991827379498, -0.00...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."


# Define RS and functions

In [10]:
def cosine_similarity(a: np.ndarray, b: np.ndarray) -> float:
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

In [11]:
def dot_product(a: np.ndarray, b: np.ndarray) -> float:
    return np.dot(a, b)

In [12]:
def manhattan_distance(a: np.ndarray, b: np.ndarray) -> float:
    return np.sum(np.abs(a - b))

In [13]:
def euclidean_distance(a: np.ndarray, b: np.ndarray) -> float:
    return np.linalg.norm(a - b)

In [14]:
@dataclass
class SongInfo:
    title: str
    artist: str


class RetrievalSystem:
    def __init__(
        self,
        df: pd.DataFrame,
        sim_metric: Callable = cosine_similarity,
        sim_feature: str = "bert_embedding",
    ):
        self.df = df
        self.sim_metric = sim_metric
        self.sim_feature = sim_feature

        if self.sim_feature not in self.df.columns:
            raise ValueError(
                f"'{self.sim_feature}' not found in the dataframe columns."
            )

        # Precompute the stacked version of the feature
        # array(list(array)) -> 2d array
        self.all_songs_stacked = np.vstack(self.df[self.sim_feature].values)

    def _calc_similarity(self, song: pd.DataFrame, n: int = 5) -> pd.DataFrame:
        """
        Calculate the similarity of the given song with all songs in the dataset.

        Parameters:
        - song: DataFrame row representing the song.
        - n: Number of top similar songs to retrieve.

        Returns:
        - DataFrame of top n similar songs.
        """
        song_vector = song[self.sim_feature]
        # Compute similarity for each song in the dataset, ensuring each song_vec is reshaped to 2D
        similarity = np.array(
            [
                self.sim_metric(song_vector, song_vec)
                for song_vec in self.all_songs_stacked
            ]
        )

        top_n_indices = np.argsort(similarity)[::-1][:n]
        top_n = self.df.iloc[top_n_indices]
        # make pandas happy: no in-place modification
        top_n = self.df.iloc[top_n_indices].copy()
        top_n["similarity"] = similarity[top_n_indices]
        return top_n

    def random_baseline(self, song_id: Union[int, str], n: int = 5) -> pd.DataFrame:
        """
        Retrieve random songs from the dataset.

        Parameters:
        - song_id: ID of the song. Not used in this method.
        - n: Number of songs to retrieve.

        Returns:
        - DataFrame of n random songs.
        """
        return self.df.sample(n=n)

    def retrieve(self, query: Union[int, str, SongInfo], n: int = 5) -> pd.DataFrame:
        """
        Retrieve the top n songs similar to the given song_id.

        Parameters:
        - query: If int, row of df. If str, song_id. If SongInfo, title and artist.
        - n: Number of songs to retrieve.

        Returns:
        - DataFrame of top n similar songs.
        """
        if isinstance(query, (int, str)):
            song_id = query
            if song_id not in self.df["id"].values and song_id not in self.df.index:
                raise ValueError(f"Song id {song_id} not in the dataset.")
            song = (
                self.df.loc[song_id]
                if isinstance(song_id, int)
                else self.df[self.df["id"] == song_id].iloc[0]
            )
        elif isinstance(query, SongInfo):
            title, artist = query.title, query.artist
            song = self.df[(self.df["song"] == title) & (self.df["artist"] == artist)]
            if song.empty:
                raise ValueError(
                    f"Song with title '{title}' and artist '{artist}' not found in the dataset."
                )
            song = song.iloc[0]
        else:
            raise ValueError(
                "Invalid query type. Provide either song_id (int/str) or an instance of SongInfo."
            )

        return self._calc_similarity(song, n)

## Random Baseline

In [15]:
rs = RetrievalSystem(df)

In [16]:
rs.random_baseline(1)

Unnamed: 0,id,artist,song,album_name,bert_embedding,word2vec_embedding,tf-idf
5473,Xj7BqM7MQrUzXvFF,Benjamin Francis Leftwich,Shine,Last Smoke Before the Snowstorm,"[0.0459097176790237, 0.0299372430890798, -0.01...","[0.0120196155350034, 0.0212312918971292, 0.015...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
1836,B3tXZmmnw87oUZS3,The Cranberries,Forever Yellow Skies,To The Faithful Departed,"[-0.0059726079925894, 0.0176043324172496, 0.00...","[-0.0062853344176944, 0.009930726586792, 0.012...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
2678,GP7ancyZ5J71quAu,The Mars Volta,Drunkship of Lanterns,Deloused in the Comatorium,"[-0.0005169544601812, 0.0074029215611517, 0.00...","[0.0265781265737202, 0.0330487160453907, 0.004...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
8232,oXpdQJTpASLJfFvM,En Vogue,Beat of Love,Masterpiece Theatre,"[0.0270564071834087, -0.014147656969726, 0.011...","[0.0041077826206353, 0.0225017249721727, 0.030...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
6799,g1Fkyx0T1Ylcw49Z,Teena Marie,Ooo La La La,Naked To The World,"[-0.0086635462939739, 0.01679671369493, 0.0072...","[0.009836863935925, -0.0003681176446646, 0.002...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."


In [17]:
rs.random_baseline(1)

Unnamed: 0,id,artist,song,album_name,bert_embedding,word2vec_embedding,tf-idf
6918,ghRyim0wlKC4V2Tb,BONUSbaby,If I Become An Adult,If I become an adult 어른이 된다면,"[0.0104310577735304, 0.0367681607604026, -0.01...","[0.0016102789289134, 0.0093822789490469, 0.007...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.131..."
8320,p5C6RE9bhZAVUUAl,Tegan and Sara,"Like O, Like H",The Con,"[0.0033543638419359, 0.0073735117912292, 0.014...","[0.017634171613359, 0.013064124040483, -0.0007...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
8189,oHMNoh8ipj5GTtCO,Kylie Minogue,Loving Days,Body Language,"[0.0096387723460793, -0.0271708257496356, -0.0...","[0.0076873334829911, 0.0391547668610761, 0.008...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
2291,Dluy8chZBkk4kOih,Linda Ronstadt,Tracks of My Tears,Greatest Hits,"[0.0148730874061584, 0.0081155421212315, -0.01...","[0.0125893291874995, 0.0146143953477601, 0.019...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
2101,Cd07fQdWjDbf0uz9,Troye Sivan,BITE,Blue Neighbourhood (Deluxe),"[0.0215001497417688, 0.030625918880105, -0.003...","[0.0413938150846414, 0.0064974004443445, 0.017...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."


Indeed, the system produces new results for each query/run.

In [18]:
rs.random_baseline("01gyRHLquwXDlhkO", n=10)

Unnamed: 0,id,artist,song,album_name,bert_embedding,word2vec_embedding,tf-idf
9290,uyMzxxGWUbuZ7CnC,Ugly Kid Joe,Busy Bee,America's Least Wanted,"[0.0309558399021625, 0.0517794005572795, 0.001...","[0.0127271551793923, 0.0179842533164524, 0.012...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
3009,IICZC7cnZUeEKMPX,Porcupine Tree,The Sleep of No Dreaming,Signify (Remaster),"[0.0382155813276767, 0.0395022705197334, 0.024...","[0.028454271707839, 0.0211454630318459, 0.0129...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2491528952759..."
6081,blZ9zSQBqOMxcPhN,Vanessa Carlton,Hear the Bells,Hear The Bells,"[0.024512231349945, 0.0247154235839843, -0.004...","[0.0342380406384888, 0.0040819178324281, 0.015...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
9370,vPNIItkt1ru5ivn2,Sunn O))),Cursed Realms (Of the Winterdemons),Black One,"[0.0264619141817092, 0.0175736267119646, -0.00...","[0.020258006977383, 0.0198967416028608, 0.0009...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
9943,yy2SYZt8VIrBFFNS,Erthlings,Bridges,Bridges,"[0.0310981310904026, -0.0093351379036903, -0.0...","[0.0113095313011269, 0.030110428686298, 0.0018...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
5025,Utn46i9ypxeqb3Yt,Morbid Angel,Garden of Disdain,Kingdoms Disdained,"[0.0227955412119627, 0.0789821371436119, -0.00...","[0.0246172136643057, 0.0498534072345743, 0.005...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
1093,6ZEYJA8Jti0lbrtU,The D.O.C.,The Formula,No One Can Do It Better,"[0.0117557402700185, 0.0185621827840805, 0.009...","[0.0155628929355843, 0.0102422690150209, 0.009...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
8514,qGGKn5a4l0kyz8LP,Sum 41,With Me,Underclass Hero,"[0.0155548034235835, 0.0227421037852764, 0.004...","[0.022677292133625, 0.0207324632351766, 0.0175...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
8342,pDYI5F6cG7ieAp9B,Alexisonfire,The Northern,Old Crows / Young Cardinals,"[0.0254897680133581, 0.01590789668262, -0.0055...","[0.0280405069740178, 0.0102992975056016, 0.010...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
8777,rtDrvlfjlXSUNJS4,Theatre of Tragedy,Angelique,Aégis,"[0.0073118545114994, -0.0036731443833559, -0.0...","[0.0244916200566553, 0.014297137653812, 0.0029...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."


In [19]:
rs.random_baseline("01gyRHLquwXDlhkO", n=10)

Unnamed: 0,id,artist,song,album_name,bert_embedding,word2vec_embedding,tf-idf
5812,a06mlbXr4vBYcuIN,A Perfect Circle,TalkTalk,Eat The Elephant,"[0.0346777476370334, 0.0467580892145633, -0.01...","[0.0350178064487408, 0.0303938706695892, 0.004...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
4537,RkqgX0RjKtsfWmkk,Deep Purple,Wasted Sunsets,Perfect Strangers,"[0.0072947815060615, 0.0527826994657516, 0.010...","[0.0315058776456206, 0.0097277849364077, 0.005...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.1080329079762917, ..."
2970,I1wSAAImrUdZPlY8,Blind Guardian,Prophecies,Beyond The Red Mirror,"[0.0210439134389162, 0.0453364364802837, -0.00...","[0.0232803927941074, 0.0194085540243916, 0.010...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0515435043465022, ..."
968,5kEGDiPu9Yt69KWy,Loreena McKennitt,Snow,Celtic Twilight 2,"[-0.0080511029809713, 0.0694233700633049, -0.0...","[0.0239750206477902, 0.0226442270957292, -0.00...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
1696,ABQN80RkNgRdteks,Alicia Keys,Nobody Not Really,The Diary Of Alicia Keys,"[0.0297940969467163, 0.0486738122999668, -0.00...","[0.0289834229261032, 0.005980910063954, 0.0106...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
437,2UozOVTCtNFcy0Ml,Franz Ferdinand,Tell Her Tonight,Franz Ferdinand,"[-0.0178439244627952, 0.0366654470562934, -0.0...","[0.015229359983849, -0.0035109222278974, 0.037...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
1384,8GRxTPVdJcJi5TCw,Disturbed,Another Way to Die,Asylum (Deluxe),"[0.0258089788258075, 0.0784980431199073, 0.016...","[0.0370090511977497, 0.0330236288582809, 0.009...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
9263,uk6uTmkZ36FPozWg,Stella Donnelly,Season's Greetings,Beware of the Dogs,"[0.0113677317276597, 0.0769811272621154, -0.00...","[0.0167365467313731, 0.0155031902600278, 0.016...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
6455,dwo9CZ4cYlYrsiZH,"Emerson, Lake & Palmer",Lucky Man - 2012 Remastered Version,"Emerson, Lake & Palmer (Deluxe Version)","[-0.0008436187636107, -0.009991068392992, 0.00...","[0.0363589826414025, 0.0150405316336949, 0.024...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
5494,Xqnqd69GY5xNU3iH,The Strokes,Between Love & Hate,Room On Fire,"[0.0159800443798303, 0.0301124770194292, 0.007...","[0.0223667489429798, 0.0006515211927905, 0.014...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."


## Text-based (cos-sim, tf-idf)

In [20]:
rs_cosine = RetrievalSystem(df, cosine_similarity, "tf-idf")

In [21]:
rs_cosine.retrieve(1)

Unnamed: 0,id,artist,song,album_name,bert_embedding,word2vec_embedding,tf-idf,similarity
1,01gyRHLquwXDlhkO,The Notorious B.I.G.,Somebody's Gotta Die,Life After Death (Remastered Edition),"[0.0084422621876001, 0.0302564185112714, 0.009...","[0.018537292381979, 0.0113115924403394, 0.0107...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",1.0
1173,74k8qdan0o4DFa7L,Lil' Wayne,Megaman,Tha Carter IV (Deluxe),"[0.0338740646839141, 0.0250908937305212, -0.00...","[0.0145877606695465, 0.0140829699431174, 0.005...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.50826
9574,wiw2rM2xb4CJ5sVP,Kanye West,Never Let Me Down,The College Dropout,"[-0.0022038235329091, 0.0649233236908912, 0.00...","[0.0175837381167852, 0.0021414441591594, -0.00...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0343634815054...",0.504759
499,2rQiu54zZBs5Dmmi,2Pac,Heartz Of Men,All Eyez On Me,"[0.02616005577147, 0.0450833514332771, 0.01747...","[0.0190100703822374, 0.0084098643173214, 0.013...","[0.0, 0.0, 0.0, 0.0311529042992567, 0.0, 0.0, ...",0.485196
1481,8ptSfMLrxpbfRaxl,Nas,The Genesis,Illmatic,"[0.0098546016961336, 0.0452835336327552, 0.001...","[0.0236667927170824, 0.0118261590587759, 0.014...","[0.0, 0.0, 0.0, 0.0486305117996291, 0.0, 0.0, ...",0.445888


In [22]:
# check reproducibility
rs_cosine.retrieve(1)

Unnamed: 0,id,artist,song,album_name,bert_embedding,word2vec_embedding,tf-idf,similarity
1,01gyRHLquwXDlhkO,The Notorious B.I.G.,Somebody's Gotta Die,Life After Death (Remastered Edition),"[0.0084422621876001, 0.0302564185112714, 0.009...","[0.018537292381979, 0.0113115924403394, 0.0107...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",1.0
1173,74k8qdan0o4DFa7L,Lil' Wayne,Megaman,Tha Carter IV (Deluxe),"[0.0338740646839141, 0.0250908937305212, -0.00...","[0.0145877606695465, 0.0140829699431174, 0.005...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.50826
9574,wiw2rM2xb4CJ5sVP,Kanye West,Never Let Me Down,The College Dropout,"[-0.0022038235329091, 0.0649233236908912, 0.00...","[0.0175837381167852, 0.0021414441591594, -0.00...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0343634815054...",0.504759
499,2rQiu54zZBs5Dmmi,2Pac,Heartz Of Men,All Eyez On Me,"[0.02616005577147, 0.0450833514332771, 0.01747...","[0.0190100703822374, 0.0084098643173214, 0.013...","[0.0, 0.0, 0.0, 0.0311529042992567, 0.0, 0.0, ...",0.485196
1481,8ptSfMLrxpbfRaxl,Nas,The Genesis,Illmatic,"[0.0098546016961336, 0.0452835336327552, 0.001...","[0.0236667927170824, 0.0118261590587759, 0.014...","[0.0, 0.0, 0.0, 0.0486305117996291, 0.0, 0.0, ...",0.445888


In [23]:
rs_cosine.retrieve("xUjqXrsiCLb8tPkC", n=10)

Unnamed: 0,id,artist,song,album_name,bert_embedding,word2vec_embedding,tf-idf,similarity
9714,xUjqXrsiCLb8tPkC,Lena,Satellite,My Cassette Player,"[0.0248264782130718, -0.0130147626623511, -0.0...","[0.0234581675356102, 0.0106312696958031, 0.008...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.097...",1.0
4776,TEYr2jFKqLiibyI7,Sophie Ellis-Bextor,Love It Is Love,Shoot from the Hip,"[0.0325065068900585, 0.0252496302127838, -0.00...","[0.0350450286220477, -0.0008324891750817, 0.00...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.636165
2602,FoenRGxt8ED5UDj8,Danger,11h30,French Attack!,"[0.0459299124777317, 0.0142702432349324, -0.00...","[0.0390507596944059, -0.0203864892079894, 0.01...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.604088
8670,r9jKmYrPBtJRgg2k,The Weeknd,Can't Feel My Face - Martin Garrix Remix,Can't Feel My Face (Martin Garrix Remix),"[0.0108366692438721, 0.0221357941627502, -0.01...","[0.0217854850999812, -0.0138672701011292, 0.01...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.603747
3038,ISH3Weh5lSybSTNb,Wings,Silly Love Songs,Wings At The Speed Of Sound,"[0.0335490964353084, 0.0035518314689397, -0.02...","[0.0241067869505578, -0.0191004661942965, 0.02...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.207...",0.591854
785,4fNOkOQVsp2lkBen,Alicia Keys,Fallin',Songs In A Minor (Expanded Edition),"[-0.0084553556516766, -0.0231564063578844, 0.0...","[0.0300932081549295, -0.0003555286564819, 0.00...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.590371
8450,ptSQMoK9d8CydnHP,Phoenix,Chloroform,Bankrupt!,"[0.0549735054373741, 0.0111094331368803, -0.01...","[0.0362936046408259, -0.0150780265087146, 0.01...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.585183
4897,U0fgnyiJBI9Ypbsj,Mika,Lollipop,Life in Cartoon Motion,"[0.0250862203538417, 0.0291050467640161, -0.00...","[0.0322641837924095, -0.0039999505085339, 0.00...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.573458
8979,t8M8yJHyRlalRn4E,Hombres G,Te Quiero,Las baladas (Los singles vol II),"[0.0231318846344947, -0.0088586620986461, -0.0...","[0.0286453383397573, -0.026708961118498, 0.015...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.565957
9990,zI9DjkxOs5x1EhJQ,Anna Calvi,No More Words,Anna Calvi,"[-0.0162214189767837, 0.0066514564678072, -0.0...","[0.0189160151618394, -0.0181776074071725, 0.02...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.562936


In [24]:
rs_cosine.retrieve("xUjqXrsiCLb8tPkC", n=3)

Unnamed: 0,id,artist,song,album_name,bert_embedding,word2vec_embedding,tf-idf,similarity
9714,xUjqXrsiCLb8tPkC,Lena,Satellite,My Cassette Player,"[0.0248264782130718, -0.0130147626623511, -0.0...","[0.0234581675356102, 0.0106312696958031, 0.008...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.097...",1.0
4776,TEYr2jFKqLiibyI7,Sophie Ellis-Bextor,Love It Is Love,Shoot from the Hip,"[0.0325065068900585, 0.0252496302127838, -0.00...","[0.0350450286220477, -0.0008324891750817, 0.00...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.636165
2602,FoenRGxt8ED5UDj8,Danger,11h30,French Attack!,"[0.0459299124777317, 0.0142702432349324, -0.00...","[0.0390507596944059, -0.0203864892079894, 0.01...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.604088


In [25]:
sample_song = SongInfo(title="Always", artist="Bon Jovi")

rs_cosine.retrieve(sample_song, n=10)

Unnamed: 0,id,artist,song,album_name,bert_embedding,word2vec_embedding,tf-idf,similarity
1474,8nyPt8gB1K8g5FNv,Bon Jovi,Always,Cross Road,"[0.0144497156143188, 0.0245958436280488, -0.01...","[0.0225783427988251, 0.0156012110966983, 0.017...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",1.0
2774,GuH9cRalvkhXvmaZ,Switchfoot,Always,Hello Hurricane,"[0.0055294819176197, -0.0001915300526889, -0.0...","[0.0189313250011764, 0.0269654621203829, 0.012...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.483889
9858,yP56QH0tXHXsDNF8,Whitney Houston,I Will Always Love You,The Bodyguard - Original Soundtrack Album,"[0.0098089557141065, 0.0142131317406892, -0.03...","[0.0187607038782776, 0.0015507783973589, 0.015...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.483217
6985,h47nxyjyRPFroIaQ,k.d. lang,Constant Craving,Recollection,"[0.0494027473032474, 0.0115364342927932, -0.00...","[0.0274683685055461, 0.0313542720294841, -0.01...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.472977
7945,muLitfTmi9qTxcFV,HammerFall,Always Will Be,Gates Of Dalhalla,"[0.0251877382397651, 0.019176747649908, -0.009...","[0.0213097585162813, 0.0215721395640381, 0.018...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.458007
1254,7av2576enFL1qIFt,Sade,You're Not The Man,Promise,"[0.0672291442751884, -0.0037387742195278, -0.0...","[0.0354864509048356, 0.0098513099172794, 0.032...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.08134298...",0.455112
6275,d0ANC4yI7hpiCzPe,Taylor Dayne,I'll Always Love You,Tell It to My Heart (Expanded Edition),"[0.0119658829644322, 0.0227391123771667, -0.00...","[0.0201313272995189, 0.001961245751345, 0.0161...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.424737
758,4TzvRc17Z1Rn6LWj,Sophie Zelmani,Always You,My Best Friend's Wedding: Music From The Motio...,"[0.0561069324612617, 0.0314044989645481, -0.00...","[0.0214049826480684, 0.0272937887777133, 0.009...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.423746
5858,aIJRXTjxYPzwX2Y7,Ace of Base,"Always Have, Always Will",Flowers (Remastered),"[-0.0179681722074747, 0.0235860869288444, -0.0...","[0.0201336787846009, 0.0044605702930835, 0.008...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.421836
1904,BRT3Xamcitwly4w6,Forfun,Pra Sempre,Alegria Compartilhada,"[0.0343994535505771, 0.0788502767682075, -0.01...","[0.0282556520283429, 0.0323357793487726, 0.011...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.418782


## Text-based (cos-sim, "feature")

We use BERT embeddings.

In [26]:
rs_bert = RetrievalSystem(df, cosine_similarity, "bert_embedding")

In [27]:
rs_bert.retrieve(1)

Unnamed: 0,id,artist,song,album_name,bert_embedding,word2vec_embedding,tf-idf,similarity
1,01gyRHLquwXDlhkO,The Notorious B.I.G.,Somebody's Gotta Die,Life After Death (Remastered Edition),"[0.0084422621876001, 0.0302564185112714, 0.009...","[0.018537292381979, 0.0113115924403394, 0.0107...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",1.0
7410,jj55fEqkySIv4qy1,YNW Melly,Murder On My Mind,I AM YOU,"[0.0164332240819931, 0.059163823723793, 0.0078...","[0.019819463780142, 0.0183831372960049, 0.0207...","[0.0, 0.0, 0.0, 0.0190404350324946, 0.0, 0.017...",0.75099
4032,OokmnsloeW1Sh3NF,Freeway,What We Do,Philadelphia Freeway,"[-0.0033323119860142, 0.0608314909040927, 0.01...","[0.0174925761162075, 0.0111284196112097, 0.003...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.731522
7349,jJEgEAF2iuW4yk9g,Danny Brown,When It Rain,Atrocity Exhibition,"[0.0014290270628407, 0.0355900675058364, 0.011...","[0.0105782504642751, 0.0090563397756651, -0.00...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.729569
3599,M65mU1UIozrDxcvu,Bad Meets Evil,Fast Lane,Hell: The Sequel (Deluxe),"[0.00348283466883, 0.0361678898334503, 0.00615...","[0.0153334198974876, 0.0139721735374244, 0.006...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.720946


In [28]:
# check reproducibility
rs_bert.retrieve(1)

Unnamed: 0,id,artist,song,album_name,bert_embedding,word2vec_embedding,tf-idf,similarity
1,01gyRHLquwXDlhkO,The Notorious B.I.G.,Somebody's Gotta Die,Life After Death (Remastered Edition),"[0.0084422621876001, 0.0302564185112714, 0.009...","[0.018537292381979, 0.0113115924403394, 0.0107...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",1.0
7410,jj55fEqkySIv4qy1,YNW Melly,Murder On My Mind,I AM YOU,"[0.0164332240819931, 0.059163823723793, 0.0078...","[0.019819463780142, 0.0183831372960049, 0.0207...","[0.0, 0.0, 0.0, 0.0190404350324946, 0.0, 0.017...",0.75099
4032,OokmnsloeW1Sh3NF,Freeway,What We Do,Philadelphia Freeway,"[-0.0033323119860142, 0.0608314909040927, 0.01...","[0.0174925761162075, 0.0111284196112097, 0.003...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.731522
7349,jJEgEAF2iuW4yk9g,Danny Brown,When It Rain,Atrocity Exhibition,"[0.0014290270628407, 0.0355900675058364, 0.011...","[0.0105782504642751, 0.0090563397756651, -0.00...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.729569
3599,M65mU1UIozrDxcvu,Bad Meets Evil,Fast Lane,Hell: The Sequel (Deluxe),"[0.00348283466883, 0.0361678898334503, 0.00615...","[0.0153334198974876, 0.0139721735374244, 0.006...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.720946


In [29]:
rs_bert.retrieve("xUjqXrsiCLb8tPkC", n=10)

Unnamed: 0,id,artist,song,album_name,bert_embedding,word2vec_embedding,tf-idf,similarity
9714,xUjqXrsiCLb8tPkC,Lena,Satellite,My Cassette Player,"[0.0248264782130718, -0.0130147626623511, -0.0...","[0.0234581675356102, 0.0106312696958031, 0.008...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.097...",1.0
3315,KJyVTXfwdjUGtj5m,Boyzone,Love Is A Hurricane,Brother,"[0.0109569383785128, 0.0114181209355592, -0.01...","[0.0257171472981099, 0.0143042436575342, 0.018...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.070...",0.838821
9358,vMluKVsjLFKZEmdg,Rose Royce,Best Love,Essential - Soul Love,"[0.0156424306333065, -0.0005188798531889, -0.0...","[0.0099419479391893, 0.002779156429289, 0.0152...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.816716
9216,uRNge3sN74NdIaYi,Pixie Lott,My Love,Turn It Up,"[0.0216934476047754, 0.0021355729550123, -0.02...","[0.0354702897806832, 0.0013932273866635, 0.009...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.809644
1474,8nyPt8gB1K8g5FNv,Bon Jovi,Always,Cross Road,"[0.0144497156143188, 0.0245958436280488, -0.01...","[0.0225783427988251, 0.0156012110966983, 0.017...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.809535
9477,w64KwSB7npnE8hb2,Feist,Inside and Out,Let It Die,"[0.0126460622996091, 0.0134117128327488, -0.00...","[0.0271040636265603, 0.0090910210852575, 0.014...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.809321
6926,gkaNGhUTCZzICBcE,Frankie Valli,Can't Take My Eyes Off You,Relaxing Classical Playlist: Chilled Music for...,"[0.0064658499322831, 0.0126306535676121, -0.01...","[0.0322652382319944, 0.0124505681016035, 0.008...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.808088
2231,DOlk8ct9zA9uIFX0,Selena Gomez & The Scene,A Year Without Rain,A Year Without Rain,"[-0.0006214904715307, 0.00964539591223, -0.015...","[0.0239702489085589, 0.024451315599493, -0.004...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.807728
1504,8xFD1UO8nr1qwOg6,Ed Sheeran,Thinking Out Loud,x (Deluxe Edition),"[0.0021413159556686, 0.0046218605712056, 0.000...","[0.025883711926304, 0.0162178390764449, 0.0213...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.804023
6891,gX0rgtPnKPOD9qhT,Mariah Carey,To Be Around You,Emotions,"[-0.0074757486581802, -0.003137374529615, -0.0...","[0.0290032677086336, 0.0051829625160566, 0.017...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.802836


In [30]:
rs_bert.retrieve("xUjqXrsiCLb8tPkC", n=3)

Unnamed: 0,id,artist,song,album_name,bert_embedding,word2vec_embedding,tf-idf,similarity
9714,xUjqXrsiCLb8tPkC,Lena,Satellite,My Cassette Player,"[0.0248264782130718, -0.0130147626623511, -0.0...","[0.0234581675356102, 0.0106312696958031, 0.008...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.097...",1.0
3315,KJyVTXfwdjUGtj5m,Boyzone,Love Is A Hurricane,Brother,"[0.0109569383785128, 0.0114181209355592, -0.01...","[0.0257171472981099, 0.0143042436575342, 0.018...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.070...",0.838821
9358,vMluKVsjLFKZEmdg,Rose Royce,Best Love,Essential - Soul Love,"[0.0156424306333065, -0.0005188798531889, -0.0...","[0.0099419479391893, 0.002779156429289, 0.0152...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.816716


In [31]:
sample_song = SongInfo(title="Always", artist="Bon Jovi")

rs_bert.retrieve(sample_song, n=10)

Unnamed: 0,id,artist,song,album_name,bert_embedding,word2vec_embedding,tf-idf,similarity
1474,8nyPt8gB1K8g5FNv,Bon Jovi,Always,Cross Road,"[0.0144497156143188, 0.0245958436280488, -0.01...","[0.0225783427988251, 0.0156012110966983, 0.017...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",1.0
6809,g5Y3fz7C9EnMXdMA,Lana Del Rey,Blue Jeans (Gesaffelstein Remix),Blue Jeans Remixes,"[0.0005073484499007, 0.0516314357519149, -0.00...","[0.0135968459859633, 0.0143863931580339, 0.015...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.861671
2634,G2otV6WAmea6VB1f,Steps,Heartbeat,Step One,"[0.0182322841137647, 0.0263710282742977, -0.02...","[0.0235298634303317, 0.0174621289812968, 0.010...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.855285
2319,DzOzu2jdYJQY7xH9,Mariah Carey,Whenever You Call,Butterfly,"[0.0052341688424348, 0.0173688419163227, -0.02...","[0.0051299023547584, 0.0066302975003017, 0.014...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.850007
7276,ire6sDD2ryFx62Vx,Supertramp,My Kind Of Lady,Famous Last Words (Remastered),"[0.0064854389056563, 0.032216902822256, -0.015...","[0.0125396402083307, 0.016958795785303, 0.0149...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.846296
4564,RvExtv1mIVglWrLc,Amber Pacific,Gone So Young,The Possibility and the Promise,"[0.0076442663557827, 0.0012404836015775, -0.03...","[-0.0006062765857456, 0.0090232125445696, 0.02...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.845325
1504,8xFD1UO8nr1qwOg6,Ed Sheeran,Thinking Out Loud,x (Deluxe Edition),"[0.0021413159556686, 0.0046218605712056, 0.000...","[0.025883711926304, 0.0162178390764449, 0.0213...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.844703
8580,qbNvLKSCnMbHHD26,Atomic Kitten,If You Come to Me,Ladies Night,"[0.0099219242110848, 0.0535404942929744, -0.01...","[0.0104777433101416, 0.0095600008094225, -0.00...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.05246146...",0.839742
2016,C9TfpaaJWG6ZbRmg,Oh Land,Love You Better,Wishbone,"[0.0224775746464729, 0.0490416437387466, -0.01...","[0.0119707079522868, 0.0122023489360193, 0.015...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.832535
5288,WYDhkPUgu71CQnF2,Shania Twain,Forever and for Always,Up!,"[-0.0051636109128594, 0.0454315170645713, -0.0...","[0.0189944793535179, 0.0149165007584482, 0.013...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.832055


## Text-based(""similarity", "feature")
Finally, we use word2vec embeddings as features and the simple Dot Product as similarity metric.

In [32]:
rs_w2v = RetrievalSystem(df, dot_product, "word2vec_embedding")

In [33]:
rs_w2v.retrieve(1)

Unnamed: 0,id,artist,song,album_name,bert_embedding,word2vec_embedding,tf-idf,similarity
7591,kqVWk6G25ortikfF,Tame Impala,Beverly Laurel,Lonerism B-Sides & Remixes,"[0.027529876679182, 0.0361310057342052, 0.0032...","[0.0207204579492099, -0.031965019297786, 0.032...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.20698
5101,VJXAAxF84r7eg4bf,Blur,I Know,Leisure (Special Edition),"[0.0273886676877737, 0.0458299070596694, -0.02...","[-0.002103335357138, -0.011079236417525, 0.007...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.200881
1671,9ziWMjkmNbJKtU8Y,Tame Impala,Nangs,Currents,"[0.0668678805232048, 0.0044250548817217, 0.017...","[0.0545170158147811, 0.0088886441662907, 0.037...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.200703
4200,PqjX8Jm9lA7ogAe4,Solange,Time (Is),When I Get Home,"[0.0141285741701722, -0.0031541504431515, -0.0...","[0.0002488365693528, 0.0001221311291784, 0.004...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.200372
4352,Qh5XjFCInkeOAYzu,Supergrass,Time,I Should Coco,"[-0.0017348146066069, 0.0078667551279068, -0.0...","[0.0187828158411909, -0.0109101621269741, -0.0...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.200032


In [34]:
# check reproducibility
rs_w2v.retrieve(1)

Unnamed: 0,id,artist,song,album_name,bert_embedding,word2vec_embedding,tf-idf,similarity
7591,kqVWk6G25ortikfF,Tame Impala,Beverly Laurel,Lonerism B-Sides & Remixes,"[0.027529876679182, 0.0361310057342052, 0.0032...","[0.0207204579492099, -0.031965019297786, 0.032...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.20698
5101,VJXAAxF84r7eg4bf,Blur,I Know,Leisure (Special Edition),"[0.0273886676877737, 0.0458299070596694, -0.02...","[-0.002103335357138, -0.011079236417525, 0.007...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.200881
1671,9ziWMjkmNbJKtU8Y,Tame Impala,Nangs,Currents,"[0.0668678805232048, 0.0044250548817217, 0.017...","[0.0545170158147811, 0.0088886441662907, 0.037...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.200703
4200,PqjX8Jm9lA7ogAe4,Solange,Time (Is),When I Get Home,"[0.0141285741701722, -0.0031541504431515, -0.0...","[0.0002488365693528, 0.0001221311291784, 0.004...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.200372
4352,Qh5XjFCInkeOAYzu,Supergrass,Time,I Should Coco,"[-0.0017348146066069, 0.0078667551279068, -0.0...","[0.0187828158411909, -0.0109101621269741, -0.0...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.200032


In [35]:
rs_w2v.retrieve("xUjqXrsiCLb8tPkC", n=10)

Unnamed: 0,id,artist,song,album_name,bert_embedding,word2vec_embedding,tf-idf,similarity
5942,alViVmyWPgD1TNjV,Amy Winehouse,To Know Him Is to Love Him,Back To Black: B-Sides,"[-0.0303377602249383, 0.033262051641941, -0.01...","[0.008976437833068, -0.0263210574669453, 0.008...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.25823
4352,Qh5XjFCInkeOAYzu,Supergrass,Time,I Should Coco,"[-0.0017348146066069, 0.0078667551279068, -0.0...","[0.0187828158411909, -0.0109101621269741, -0.0...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.257726
2602,FoenRGxt8ED5UDj8,Danger,11h30,French Attack!,"[0.0459299124777317, 0.0142702432349324, -0.00...","[0.0390507596944059, -0.0203864892079894, 0.01...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.256744
6016,bIDVY7FbsuHTgV50,Bent,To Be Loved,Intercept! Deluxe Edition,"[0.0561926960945129, 0.0288669969886541, 0.001...","[0.0477735178777948, 0.0420612953603267, 0.043...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.256257
1671,9ziWMjkmNbJKtU8Y,Tame Impala,Nangs,Currents,"[0.0668678805232048, 0.0044250548817217, 0.017...","[0.0545170158147811, 0.0088886441662907, 0.037...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.251869
5101,VJXAAxF84r7eg4bf,Blur,I Know,Leisure (Special Edition),"[0.0273886676877737, 0.0458299070596694, -0.02...","[-0.002103335357138, -0.011079236417525, 0.007...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.249743
7591,kqVWk6G25ortikfF,Tame Impala,Beverly Laurel,Lonerism B-Sides & Remixes,"[0.027529876679182, 0.0361310057342052, 0.0032...","[0.0207204579492099, -0.031965019297786, 0.032...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.248669
9596,wthZJWSU9CZmQ8XX,September,Because I Love You,September,"[0.0219271760433912, 0.0171462707221508, -0.01...","[0.0338738771263576, -0.0327832125726022, 0.02...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.245535
767,4XJZFt2CYHjejWh7,Avicii,Levels - Radio Edit,Sommer 2019,"[-0.0397753082215786, 0.026319533586502, -0.00...","[0.0311360507655669, 0.0008656910675413, -0.03...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.243641
8239,oaSlacL5oON2ueP8,Orchestral Manoeuvres in the Dark,(Forever) Live and Die,The Pacific Age,"[0.0515315309166908, 0.0264671593904495, -0.00...","[0.0024889501138305, -0.0121915402976608, 0.00...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.241794


In [36]:
rs_w2v.retrieve("xUjqXrsiCLb8tPkC", n=3)

Unnamed: 0,id,artist,song,album_name,bert_embedding,word2vec_embedding,tf-idf,similarity
5942,alViVmyWPgD1TNjV,Amy Winehouse,To Know Him Is to Love Him,Back To Black: B-Sides,"[-0.0303377602249383, 0.033262051641941, -0.01...","[0.008976437833068, -0.0263210574669453, 0.008...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.25823
4352,Qh5XjFCInkeOAYzu,Supergrass,Time,I Should Coco,"[-0.0017348146066069, 0.0078667551279068, -0.0...","[0.0187828158411909, -0.0109101621269741, -0.0...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.257726
2602,FoenRGxt8ED5UDj8,Danger,11h30,French Attack!,"[0.0459299124777317, 0.0142702432349324, -0.00...","[0.0390507596944059, -0.0203864892079894, 0.01...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.256744


In [37]:
sample_song = SongInfo(title="Always", artist="Bon Jovi")

rs_w2v.retrieve(sample_song, n=10)

Unnamed: 0,id,artist,song,album_name,bert_embedding,word2vec_embedding,tf-idf,similarity
1671,9ziWMjkmNbJKtU8Y,Tame Impala,Nangs,Currents,"[0.0668678805232048, 0.0044250548817217, 0.017...","[0.0545170158147811, 0.0088886441662907, 0.037...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.242612
5101,VJXAAxF84r7eg4bf,Blur,I Know,Leisure (Special Edition),"[0.0273886676877737, 0.0458299070596694, -0.02...","[-0.002103335357138, -0.011079236417525, 0.007...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.23092
8239,oaSlacL5oON2ueP8,Orchestral Manoeuvres in the Dark,(Forever) Live and Die,The Pacific Age,"[0.0515315309166908, 0.0264671593904495, -0.00...","[0.0024889501138305, -0.0121915402976608, 0.00...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.229072
7591,kqVWk6G25ortikfF,Tame Impala,Beverly Laurel,Lonerism B-Sides & Remixes,"[0.027529876679182, 0.0361310057342052, 0.0032...","[0.0207204579492099, -0.031965019297786, 0.032...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.228911
4352,Qh5XjFCInkeOAYzu,Supergrass,Time,I Should Coco,"[-0.0017348146066069, 0.0078667551279068, -0.0...","[0.0187828158411909, -0.0109101621269741, -0.0...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.224972
5533,Y6S6YPEyfK5F8TMP,Blur,High Cool,Leisure,"[0.0317266955971717, 0.0232173074036836, -0.01...","[0.0243767883069813, 0.0167615284025669, -0.00...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.222764
5942,alViVmyWPgD1TNjV,Amy Winehouse,To Know Him Is to Love Him,Back To Black: B-Sides,"[-0.0303377602249383, 0.033262051641941, -0.01...","[0.008976437833068, -0.0263210574669453, 0.008...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.22207
4885,TvyK48kBK3S8XOXD,Fatboy Slim,Going Out of My Head,The Greatest Hits: Why Try Harder,"[-0.0218375567346811, -0.0232946276664733, 0.0...","[0.0846379287540912, 0.0920350179076194, 0.030...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.219967
8615,qq3el8xwdckarmlh,The Brian Jonestown Massacre,Wasted,Methodrone,"[0.0179591365158557, 0.004815602209419, -0.000...","[0.0202043564643675, 0.0119870190323281, 0.023...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.217051
3836,Nblp573S43B1TMzx,Gipsy Kings,Tu Quieres Volver,Cantos de Amor / Love Songs,"[0.0294866636395454, -0.0039065848104655, -0.0...","[0.0245165100150033, 0.0106129271125532, 0.009...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.216332


The results already seem a lot less reasonable. However, for a full comparison, we should only change 1 variable at a time. Still, the results are interesting to look at.