In [2]:
import pandas as pd
import numpy as np
import random
import math

In [3]:
DATA_DIR = "data/"
SONGS_FILE = "new_tracks.csv"
NFEATURE = 13        #Number of Features
last_t_init = -50    #Initial last_t
totReco = 0          #Number of total recommendation till now

In [4]:
def normalize(Songs):
    Songs_norm = Songs.copy()
    norm_required = ['tempo', 'duration_ms', 'key', 'loudness','time_signature']
    
    for feature_name in norm_required:
        max_value = Songs[feature_name].max()
        min_value = Songs[feature_name].min()
        Songs_norm[feature_name] = (Songs[feature_name] - min_value) / (max_value - min_value)
        
    return Songs_norm

In [5]:
###Read data
Songs = pd.read_csv(DATA_DIR + SONGS_FILE, index_col=0)
Songs = Songs.drop('track_id', axis = 1) #not required

likedArtist = set()
likedAlbum = set()
ratedSongs = set()

#Adding last_t column
arr = np.ones(Songs.shape[0])*last_t_init
Songs.insert(0, 'last_t', arr)

#the state values
distances = np.zeros(Songs.shape[0])
penalized_distances = np.zeros(Songs.shape[0])

#weights for model approximation
weights = np.ones(NFEATURE)

Songs = normalize(Songs)
Songs

Unnamed: 0_level_0,last_t,artist,album,track_name,danceability,acousticness,energy,key,loudness,mode,speechiness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
1,-50.0,HVOB,Rocco,2nd World,0.572,0.25500,0.544,0.636364,0.613055,0,0.0274,0.818000,0.1040,0.0393,0.418416,0.371359,0.75
2,-50.0,Khalid,American Teen,8TEEN,0.830,0.22500,0.585,0.090909,0.859087,1,0.0366,0.002090,0.0576,0.6420,0.343579,0.226743,0.75
3,-50.0,Damien Rice,9,9 Crimes,0.346,0.91300,0.139,0.000000,0.410540,1,0.0321,0.000077,0.0934,0.1160,0.556118,0.212600,0.75
4,-50.0,Radiohead,In Rainbows,15 Step,0.600,0.04890,0.853,0.090909,0.775161,1,0.0423,0.346000,0.0934,0.8470,0.271411,0.238283,0.75
5,-50.0,Coldplay,Viva La Vida (Prospekt's March Edition),42,0.309,0.22100,0.636,0.454545,0.793152,0,0.0344,0.002810,0.1020,0.1770,0.591707,0.238425,0.75
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
563,-50.0,Hozier,Hozier,Take Me To Church,0.566,0.63400,0.664,0.363636,0.882588,0,0.0464,0.000000,0.1160,0.4370,0.506815,0.244117,0.75
564,-50.0,Petit Biscuit,Night Trouble,Night Trouble,0.411,0.95100,0.364,0.090909,0.410116,1,0.0743,0.930000,0.1260,0.0685,0.281717,0.442675,0.50
565,-50.0,The Lumineers,Cleopatra,Sleep On The Floor,0.389,0.24900,0.431,0.636364,0.752696,1,0.0344,0.000000,0.1300,0.2750,0.596881,0.204508,0.75
566,-50.0,Brothers of Metal,Yggdrasil,Yggdrasil,0.469,0.00222,0.743,0.090909,0.870014,0,0.0272,0.000111,0.2760,0.4810,0.220190,0.284745,0.75


In [6]:
def compute_distance(user_features, song_features, artist, album):
    """ Compute euclidean distance based on user preferences and song features """
    
    distsq = weights * ((user_features - song_features)**2)
    distsq_val = np.sum(distsq)
    dist = math.sqrt(distsq_val)
    
    if album in likedAlbum:                     #liked album
        dist *= (3/5)
    elif artist in likedArtist:                 #liked artist     
        dist *= (4/5)   
    
    return dist

In [7]:
def get_song_features(song):
    """ Feature of particular song """
    
    if isinstance(song, pd.Series):
        return song[-NFEATURE:]
    elif isinstance(song, pd.DataFrame):
        return get_song_features(pd.Series(song.loc[song.index[0]]))
    else:
        raise TypeError("{} should be a Series or DataFrame".format(song))

In [8]:
def best_recommendation(user_features):
    """ Song with least distance """
    global Songs
    global distances
    global penalized_distances
    
    for i, (Title, song) in enumerate(Songs.iterrows()):
        song_features = get_song_features(song)
        artist, album = song['artist'], song['album']
        distances[i] = compute_distance(user_features, song_features, artist, album)
        
        c1 = -0.289539905818
        c2 = 4.18136020393
        x = totReco - song.last_t
        penalty_factor = 1 + math.exp(c1*x+c2)
        penalized_distances[i] = distances[i]*penalty_factor
        
    return Songs[Songs.index == Songs.index[penalized_distances.argmin()]]

In [9]:
def greedy_choice(user_features):
    """ greedy approach -> Exploitation """
    global totReco
    
    eps_inf = 0.1
    c1 = -0.00287682072452
    c2 = -1.60943791243
    x = totReco
    
    epsilon = eps_inf + math.exp(c1*x+c2)
    
    totReco += 1
    
    if random.random() > epsilon: # choose the best
        return best_recommendation(user_features)
    else:
        return random_choice()

In [10]:
def random_choice():
    """ Random songs -> Exploration """
    global Songs
    
    song = Songs.sample()
    while(song.index[0] in ratedSongs):
        song = Songs.sample()
    return song

In [11]:
def all_recommendation(user_features):
    """ Top 10 songs with using exploration and exploitation """
    global Songs
    
    recoSongs = []
    for i in range(10):
        song = greedy_choice(user_features)
        recoSongs.append(song)
        Songs.loc[Songs.index.isin(song.index),'last_t'] = totReco
    return recoSongs

In [12]:
def update_features(user_features, song_features, rating):
    """ Gradient descent on user features """
    
    error = song_features - user_features
    alpha = 0.1*rating        #learning rate depends on rating of the song (importance)
    correction = alpha * error
    
    return user_features + correction

In [13]:
def get_policy_weights():
    """ Learning on the weights can be done here """
    """ Does nothing for now """
    global weights
    
    return weights

In [14]:
def get_user_features():
    user_features = np.zeros(NFEATURE)
    
    #Features = ["danceability","acousticness","energy","key","loudness","mode","speechiness",
    #                   0               1        2        3        4        5       6
    #            "instrumentalness","liveness","valence","tempo","duration_ms","time_signature"]
    #                    7              8           9       10        11            12
      
    RateablesFeatures = ["danceable","acoustic","energetic","_1","loud","_2","speechy",
                         "instrumental","live-performed","sad(0)/happy(10)","_3","long","_4"]
    #unratable (not intuitive) -> _1: key    _2:mode    _3:tempo    _4:time_signature
    unratable_index = [3,5,10,12]
    
    choice = input("User def features? [0/1]: ")
    
    if int(choice) == 1:
        for i in range (0,len(RateablesFeatures)):
            if i in unratable_index:
                user_features[i] = 0.5
            else:
                if i == 4:    #loudness
                    num = input("Do you prefer loud music [0/1]: ")
                    if int(num)==0:
                        user_features[i] = 0
                    else:
                        user_features[i] = 1
                elif i == 6:    #speechiness
                    num = input("Do you prefer music[0] or podcasts[1]: ")
                    user_features[i] = int(num)
                elif i == 11:    #duration
                    num = input("Do you prefer long tracks? [0/1]: ")
                    if int(num) == 0:
                        user_features[i] = 0
                    else:
                        user_features[i] = 1                
                else:
                    num = input("How much do you like a "+RateablesFeatures[i]+" track [0-10]: ")
                    user_features[i] = int(num)/10
        
    
    else:
        for i in range (0,len(RateablesFeatures)):
            user_features[i] = 0.5
                    
    return user_features

In [15]:
def reinforcement_learning(N=5):
    global Songs
    global weights

    user_features = get_user_features()
    weights = get_policy_weights()
    
    print ("\n\nRate following", N, "songs. So that we can know your taste.\nLeave it blank if you haven't heard it\n")
    
    for i in range(N):
        recommendation = greedy_choice(user_features)
        
        if(recommendation.index[0] in ratedSongs):
            print ('Song already rated')
            continue
        ratedSongs.add(recommendation.index[0])
        
        recommendation_features = get_song_features(recommendation)
        
        rating = input('Rate ' + recommendation.at[recommendation.index[0],'track_name'] + ' (-10 to 10): ')
        
        if(rating.strip() != ""):
            if int(rating)>7:
                likedAlbum.add(recommendation.at[recommendation.index[0],'album'])
                likedArtist.add(recommendation.at[recommendation.index[0],'artist'])
                
            rating = 0.1*int(rating)
            user_features = update_features(user_features, recommendation_features, rating)
            
        Songs.loc[Songs.index.isin(recommendation.index),'last_t'] = totReco
        
    return user_features

In [16]:
user_features = reinforcement_learning()

User def features? [0/1]: 1
How much do you like a danceable track [0-10]: 10
How much do you like a acoustic track [0-10]: 4
How much do you like a energetic track [0-10]: 8
Do you prefer loud music [0/1]: 1
Do you prefer music[0] or podcasts[1]: 0
How much do you like a instrumental track [0-10]: 5
How much do you like a live-performed track [0-10]: 1
How much do you like a sad(0)/happy(10) track [0-10]: 10
Do you prefer long tracks? [0/1]: 0


Rate following 5 songs. So that we can know your taste.
Leave it blank if you haven't heard it

Rate Smack That (-10 to 10): 6
Rate Another One Bites The Dust - Remastered 2011 (-10 to 10): 3
Rate I Summon You (-10 to 10): 7
Rate Taxi Cab (-10 to 10): 3
Rate UGH! (-10 to 10): 9


In [16]:
user_features = [0.209,0.172,0.412,0,0.673951,1,0.0443,0.15,0.0843,0.162,0.21546,0.329158,0.5]
print(Songs.loc[12])
print('\n-----------------------------------')
print(user_features)

last_t                             -50
artist                        Kodaline
album               In A Perfect World
track_name                  All I Want
danceability                     0.209
acousticness                     0.172
energy                           0.412
key                                  0
loudness                      0.673951
mode                                 1
speechiness                     0.0443
instrumentalness                  0.15
liveness                        0.0843
valence                          0.162
tempo                          0.21546
duration_ms                   0.329158
time_signature                     0.5
Name: 12, dtype: object

-----------------------------------
[0.209, 0.172, 0.412, 0, 0.673951, 1, 0.0443, 0.15, 0.0843, 0.162, 0.21546, 0.329158, 0.5]


In [17]:
#UI
choice = 'y'
while choice == 'y':
    print ('\nWait \n\n')
    recommendations = all_recommendation(user_features)
    
    i=1
    for music in recommendations:
        print (str(i) + ". " + music.at[music.index[0],'track_name'])
        i += 1
    
    print ('\n\nRate songs one by one or leave it blank')
    
    for music in recommendations:
        if(music.index[0] in ratedSongs):
            print ('Song already rated')
            continue
        ratedSongs.add(music.index[0])
        
        rating = input("Rate " + music.at[music.index[0],'track_name'] + " (-10 to 10): ")
        
        if(rating.strip() != ""):
            
            if int(rating)>7:
                likedAlbum.add(music.at[music.index[0],'album'])
                likedArtist.add(music.at[music.index[0],'artist'])
                
            rating = 0.1*int(rating)
            song_features = get_song_features(music)
            user_features = update_features(user_features, song_features, rating)
    
    choice = input("\nDo you want more recommendations? (y/n) ").strip()


Wait 


1. Up&Up
2. Don't Wanna Know (feat. Kendrick Lamar)
3. Levitate - From The Original Motion Picture “Passengers”
4. Paris
5. A Change Of Heart
6. She's American
7. The Judge
8. Come and Get Your Love - Single Version
9. Love Me
10. Team


Rate songs one by one or leave it blank
Rate Up&Up (-10 to 10): 8
Rate Don't Wanna Know (feat. Kendrick Lamar) (-10 to 10): 7
Rate Levitate - From The Original Motion Picture “Passengers” (-10 to 10): 6
Rate Paris (-10 to 10): 5
Rate A Change Of Heart (-10 to 10): 9
Rate She's American (-10 to 10): 5
Rate The Judge (-10 to 10): 8
Rate Come and Get Your Love - Single Version (-10 to 10): 5
Rate Love Me (-10 to 10): 5
Rate Team (-10 to 10): 2

Do you want more recommendations? (y/n) n


### Testing

In [25]:
print('LIKED ARTISTS')
for artist in likedArtist:
    print(artist)
print('\n\nLIKED ALBUMS')
for album in likedAlbum:
    print(album)

LIKED ARTISTS
Twenty One Pilots
Coldplay
The 1975


LIKED ALBUMS
Blurryface
I like it when you sleep, for you are so beautiful yet so unaware of it
A Head Full of Dreams


In [26]:
top_recom = Songs[Songs.index == Songs.index[penalized_distances.argmin()]]
#top_recom = Songs[Songs.index == Songs.index[distances.argmin()]]
top_features = get_song_features(top_recom)
name, artist, album = top_recom.at[top_recom.index[0],'track_name'], top_recom.at[top_recom.index[0],'artist'], top_recom.at[top_recom.index[0],'album']

print(name,'\n')
print(top_features)

Love Me 

danceability           0.632
acousticness         0.00689
energy                   0.8
key                        0
loudness            0.975604
mode                       0
speechiness           0.0357
instrumentalness    4.51e-05
liveness               0.611
valence                0.909
tempo               0.288912
duration_ms         0.218033
time_signature          0.75
Name: 289, dtype: object


In [27]:
compute_distance(user_features, top_features, artist, album)

0.5968264085057302

In [28]:
def top50():
    top50ind = 1+distances.argsort()[:50]
    name50 = []
    album50 = []
    artist50 = []
    for songind in top50ind:
        song = Songs[Songs.index == songind]
        song_features = get_song_features(song)
        name, artist, album = song.at[song.index[0],'track_name'], song.at[song.index[0],'artist'], song.at[song.index[0],'album']
        name50.append(name)
        album50.append(album)
        artist50.append(artist)
        
        data = {'Track Name': name50,
               'Album': album50,
               'Artist': artist50}
        
        top50Songs = pd.DataFrame(data)
    return top50Songs

In [29]:
top50()

Unnamed: 0,Track Name,Album,Artist
0,UGH!,"I like it when you sleep, for you are so beaut...",The 1975
1,Paris,"I like it when you sleep, for you are so beaut...",The 1975
2,A Change Of Heart,"I like it when you sleep, for you are so beaut...",The 1975
3,She's American,"I like it when you sleep, for you are so beaut...",The 1975
4,Smack That,Konvicted,Akon
5,Love Me,"I like it when you sleep, for you are so beaut...",The 1975
6,Another One Bites The Dust - Remastered 2011,The Game (2011 Remaster),Queen
7,Somebody Else,"I like it when you sleep, for you are so beaut...",The 1975
8,Somebody Else,"I like it when you sleep, for you are so beaut...",The 1975
9,Somebody Else,"I like it when you sleep, for you are so beaut...",The 1975
