In [112]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity, linear_kernel, polynomial_kernel, sigmoid_kernel, rbf_kernel, laplacian_kernel, chi2_kernel, euclidean_distances, manhattan_distances, cosine_distances
import plotly.express as px
import plotly.graph_objects as go

Create a DataFrame which contains only the relevant features for the recommender system

In [15]:
by_isrc = pd.read_csv('data/checkpoint/by_isrc_oldest.csv')
ct_merged = pd.read_csv('data/checkpoint/ct_merged.csv')
by_isrc.set_index('isrc', inplace=True)
ct_merged.set_index('isrc', inplace=True)

In [16]:
by_isrc.head().T

isrc,AEA040700577,AEA040700578,AEA040700579,AEA040700580,AEA040700581
genres,"['j-pop', 'jazz', 'pop']","['groove', 'j-pop', 'jazz', 'pop']","['groove', 'j-pop', 'jazz', 'pop']","['groove', 'j-pop', 'jazz', 'pop']","['groove', 'j-pop', 'jazz', 'pop', 'pop']"
name,Bala Wala Chi,Houdou Nisbi,Nafs Al Sheghlat,Yalla Kichou Barra,Ma Tfel
artists,Ziad Rahbani,Ziad Rahbani,Ziad Rahbani,Ziad Rahbani,Ziad Rahbani
album,Houdou Nisbi,Houdou Nisbi,Houdou Nisbi,Houdou Nisbi,Houdou Nisbi
release_date,1985-01-01,1985-01-01,1985-01-01,1985-01-01,1985-01-01
release_date_precision,day,day,day,day,day
uri,spotify:track:0fylgLeNObjVvwhd8caHqX,spotify:track:0yMFpBNCYXqwwOAg23bC8a,spotify:track:6G8l1kI8QlTD0UDIak5F8H,spotify:track:21g76Lq5Jg4QvfTDvi4PlH,spotify:track:0pKxrkFh8fxPKpkO29MYmi
spotify_id,0fylgLeNObjVvwhd8caHqX,0yMFpBNCYXqwwOAg23bC8a,6G8l1kI8QlTD0UDIak5F8H,21g76Lq5Jg4QvfTDvi4PlH,0pKxrkFh8fxPKpkO29MYmi
chart_power,,,,,
popularity,41,31,21,17,31


Try different distance measures / similarity functions.

In [87]:
def rec_question(by_isrc, ids, df_help):
    i = input('Which song did you mean? (Enter the index of the song)')
    if (int(i)<0) or (int(i)>(len(ids)-1)):
        print('False input, try again!')
        return rec_question(by_isrc, ids, df_help)
    else:
        name = df_help.loc[int(i), 'name']
        artists = df_help.loc[int(i), 'artists']
        print(f'You selected {name} by {artists}.')
        return df_help.loc[int(i), 'isrc']

In [107]:
def recommend_tracks_kernel(track: str, recommender_function, distance: bool = False):
    '''
    Recommends tracks that are similar to the provided track.

    Parameter
    ---------
    track: str
        Provided track

    df: pd.DataFrame
        DataFrame used for the Recommendation
    
    '''
    global ct_merged

    global by_isrc

    ids = list(by_isrc[by_isrc.name == track].index)
    index = ids[0]
    if len(ids) == 0:
        print('No song with this name available!')
    elif len(ids) > 1:
        print('There are multiple songs with this name:')
        df_help = by_isrc.loc[ids].reset_index()
        display(df_help)
        index = rec_question(by_isrc, ids, df_help)

    if (len(ids)):
        kernel_array = recommender_function(ct_merged, ct_merged[ct_merged.index == str(index)])
        kernel_df = pd.DataFrame(kernel_array, index=ct_merged.index)

        kernel_df = kernel_df.rename(columns={0: 'Score'})
        kernel_df = kernel_df.merge(by_isrc, how='left', on='isrc')
        #display(kernel_df.sort_values(by='Score', ascending=distance).head(6))
        return kernel_df.sort_values(by='Score', ascending=distance)
    else:
        print('Error')


In [108]:
def try_functions(track):
    kernel_functions = [cosine_similarity]
    distance_functions = [euclidean_distances, manhattan_distances]
    result = {}
    for kernel_function in kernel_functions:
        display(kernel_function.__name__)
        result[kernel_function.__name__] = recommend_tracks_kernel(track, kernel_function, False)

    for distance_function in distance_functions:
        display(distance_function.__name__)
        result[distance_function.__name__] = recommend_tracks_kernel(track, distance_function, True)

    return result

In [199]:
def create_visualizations(data, scale=False):
    fig = go.Figure()
    count = 0
    if scale:
        scaleableFeatures = ['danceability', 'energy',
                           'key', 'loudness', 'mode', 'speechiness',
                           'acousticness', 'instrumentalness', 'liveness',
                           'valence', 'time_signature', 'tempo', 'danceability']
        data_scale = data.copy()
        data_scale = data[['danceability', 'energy',
                           'key', 'loudness', 'mode', 'speechiness',
                           'acousticness', 'instrumentalness', 'liveness',
                           'valence', 'time_signature', 'tempo', 'danceability']]
        scaler = MinMaxScaler()
        data_scale = pd.DataFrame(scaler.fit_transform(data_scale), columns=data_scale.columns, index=data_scale.index)
        data.drop(columns=scaleableFeatures, inplace=True)
        data = pd.concat([data,data_scale], axis=1)
    for index, song in data.iterrows():
        count += 1
        if (count >= 5) and (count <=49):
            continue
        if count == 51:
            break
        df_radar = pd.DataFrame(song[['danceability', 'energy',
                       'key', 'loudness', 'mode', 'speechiness',
                       'acousticness', 'instrumentalness', 'liveness',
                       'valence', 'time_signature', 'tempo', 'danceability']])
        df_radar.reset_index(inplace=True)
        df_radar.rename(columns={"index":"feature"}, inplace=True)

        fig.add_trace(go.Scatterpolar(
            r = df_radar[index],
            theta = df_radar['feature'],
            mode = 'lines',
            fill = 'none',
            name = str(count) + '. ' + song['name']
        ))

    fig.update_layout(
        height = 1000
    )
    fig.show()
    

    
def recommend(track):
    data = recommend_tracks_kernel(track, euclidean_distances, True)
    display(data[:5])
    create_visualizations(data, True)

In [200]:
recommend('Beat It')

There are multiple songs with this name:


Unnamed: 0,isrc,genres,name,artists,album,release_date,release_date_precision,uri,spotify_id,chart_power,...,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,year
0,DEEF21902041,"['electro', 'jazz']",Beat It,The Revolution,We Made the Night,1988,year,spotify:track:4IRhHVeRJIrZAGSrKKChW8,4IRhHVeRJIrZAGSrKKChW8,,...,1,0.0274,0.498,0.911,0.34,0.823,120.099,206472,4,1988
1,GBBZV8504790,"['dub', 'reggae']",Beat It,Yellowman,Galong Galong Galong,1985,year,spotify:track:0OjIMaOx6Ct8qWwyglHnkB,0OjIMaOx6Ct8qWwyglHnkB,,...,1,0.192,0.178,0.0,0.0578,0.879,89.961,211693,4,1985
2,USSM19902990,['pop'],Beat It,Michael Jackson,Thriller,1982-11-30,day,spotify:track:3BovdzfaX4jb5KFQwoPfAw,3BovdzfaX4jb5KFQwoPfAw,1411.0,...,0,0.0473,0.0242,0.000353,0.234,0.901,138.728,258400,4,1982


Which song did you mean? (Enter the index of the song) 2


You selected Beat It by Michael Jackson.


Unnamed: 0_level_0,Score,genres,name,artists,album,release_date,release_date_precision,uri,spotify_id,chart_power,...,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,year
isrc,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
USSM19902990,4.214685e-08,['pop'],Beat It,Michael Jackson,Thriller,1982-11-30,day,spotify:track:3BovdzfaX4jb5KFQwoPfAw,3BovdzfaX4jb5KFQwoPfAw,1411.0,...,0,0.0473,0.0242,0.000353,0.234,0.901,138.728,258400,4,1982
ISA109205206,0.1724493,['pop'],Nú er úti veður vott,"Katla María,Pálmi Gunnarsson",Katla og Pálmi,1982,year,spotify:track:2NC6gG3F0K2TLYlyXgCAFA,2NC6gG3F0K2TLYlyXgCAFA,,...,0,0.0279,0.00674,0.000429,0.172,0.872,138.13,158174,4,1982
FIFPS8300005,0.1766179,['pop'],Näillä eväillä,Matti Esko,Tahdon olla luonas,1983,year,spotify:track:7i7kCtlwgLpByXthzRUrID,7i7kCtlwgLpByXthzRUrID,,...,0,0.0283,0.0789,0.0,0.234,0.935,121.526,188333,4,1983
GBAYK0500077,0.1799912,['pop'],I'm Not a Loser - 2006 Remaster,Judie Tzuke,Shoot the Moon (2006 Remaster),1982-04-01,day,spotify:track:1th9UUf0658GCDXWvL5Fwv,1th9UUf0658GCDXWvL5Fwv,,...,0,0.0426,0.0848,0.0,0.139,0.833,126.573,209533,4,1982
USAT20621282,0.1805117,['pop'],I Can't Get Enough,Modern Romance,Adventures in Clubland (Expanded),1981-01-01,day,spotify:track:77Zg8C3gwqpGu0Go51uPiV,77Zg8C3gwqpGu0Go51uPiV,,...,0,0.0593,0.0783,0.00478,0.27,0.922,121.232,262067,4,1981


In [93]:
#result = try_functions("Purple Rain")