# Neighbourhood Finder demo

# Imports and helper functions

In [591]:
import sys
sys.path.append('../src')
sys.path.append('../src/data/')
sys.path.append('../src/models/')
sys.path.append('../src/features/')
sys.path.append('../src/visualization/')
# will reload any library
%load_ext autoreload
%autoreload 2
%config InlineBackend.figure_format = 'retina'
%matplotlib inline
from data.metrics import recall
from visualization.plot_utils import write_latex_table, summary_pooling_table, pooling_plots, pandas_settings, plot_settings,  save_result
# general
import pandas as pd
import numpy as np
from tqdm import tqdm
from features.tracks_info import TrackInfo
from collections import OrderedDict
from itertools import product
from data_utils import product_size
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [592]:
from features.tracks_info import TrackInfo
from features.popularity_feature import Popularity

tracks = TrackInfo(subset='5k-')
popularity = Popularity()

In [593]:
import json
from random import randint
import os
import json
import lyricwikia
import pickle
import sys
import numpy as np

def artists_in_playlist(playlist):
    all_ = []
    for track in playlist['tracks']:
        all_.append(track['artist_name'])
    return list(set(all_))

def albums_in_playlist(playlist):
    all_ = []
    for track in playlist['tracks']:
        all_.append(track['album_name'])
    return list(set(all_))

def tracks_in_playlist(playlist):
    all_ = []
    for track in playlist['tracks']:
        all_.append(track['track_uri'])
    return list(set(all_))

def track_names_in_playlist(playlist):
    all_ = []
    for track in playlist['tracks']:
        all_.append(track['track_name'])
    return list(set(all_))

def lyrics_in_playlist(playlist):
    all_ = {}
    info = {}
    ct = 0
    failed = []
    for track in playlist['tracks']:
        track_uri = track['track_uri']
        song_name = track['track_name']
        artist_name = track['artist_name']
        try:
            lyrics = lyricwikia.get_lyrics(artist_name, song_name)
            all_[track_uri] = lyrics
            info[track_uri] = (song_name,artist_name)
        except:
            failed.append(ct)
        ct += 1
    return all_,failed,info

f = open('../data/pooling/uri_to_track_info.pckl','rb')
uri_to_name_artist = pickle.load(f)
f.close()

f = open('../data/pooling/artist_to_songs.p','rb')
artist_to_songs = pickle.load(f)
f.close()

clean_artist_to_songs = {}
for artist in artist_to_songs.keys():
    clean_artist_to_songs[artist] = list(set(artist_to_songs[artist]))

name_artist_to_uri = {}
for uri in uri_to_name_artist.keys():
    tup = uri_to_name_artist[uri]
    name_artist_to_uri[tup] = uri

from gensim.models import Word2Vec
model = Word2Vec.load('../data/pooling/model_min5_new.bin')

# Pick song name by looking up for artist

In [4]:
clean_artist_to_songs['Coldplay']

['A Sky Full of Stars',
 'Birds',
 'For You',
 'Death Will Never Conquer',
 'Viva La Vida - Live From Spotify London',
 'A Rush Of Blood To The Head - Live In Sydney',
 'Til Kingdom Come',
 'Square One',
 'Charlie Brown - Live from Glastonbury 2011',
 'The Goldrush',
 'Ink - Live At Le Casino De Paris, Paris',
 'Talk',
 'Paradise - Remix',
 'How You See The World - Live From Earls Court',
 'Major Minus - Live',
 'A Spell A Rebel Yell',
 'No More Keeping My Feet On The Ground',
 "Everything's Not Lost - Live In Sydney",
 'A Hopeful Transmission',
 'Viva La Vida - Live from One Love Manchester',
 'Amsterdam',
 'Lost!',
 'Everglow - Single Version',
 'A Sky Full of Stars - Robin Schulz Edit',
 'Fun (feat. Tove Lo)',
 'Paradise',
 'Careful Where You Stand',
 'Animals',
 'U.F.O.',
 'In My Place (Live 2012)',
 'Moses - Live In Sydney',
 'Talk - Junkie XL Remix',
 'Up With the Birds',
 'Lost@ - Live At United Center, Chicago',
 'Everglow - Single Version, Radio Edit',
 'Fix You - Live from Th

In [5]:
list_already = []
list_s = [('Warriors','Imagine Dragons'),('The Scientist','Coldplay'),('Immigrant Song','Led Zeppelin'),('T.N.T.','AC/DC')]
def make_suggestions(list_s):
    song_names = []
    artist_names = []
    for l in list_s:
        song_names.append(l[0])
        artist_names.append(l[1])
        uri = name_artist_to_uri[l].split(':')[-1]
        list_already.append(uri)
    sample = np.zeros(300)
    ct = 0

    returned_artists = []
    returned_songs = []
    for song in list_already:

        track = 'spotify:track:%s'%song
        try:
            ct += 1
            sample += model[track]
        except:
            print(uri_to_name_artist[track],track)
    representation = sample/ct

    most_sim = model.most_similar([representation],topn=1000)
    artists = []
    songs = []
    for suggestion in most_sim:

        songs.append(uri_to_name_artist[suggestion[0]])
        artists.append(uri_to_name_artist[suggestion[0]][1])
    #     print(uri_to_name_artist[suggestion[0]])

    from collections import Counter
    c = Counter(artists)
    top_artists = c.most_common(10)

    # print('Top songs - ')
    # print(*songs[:25],sep='\n')
    # print('\n\n\nTop Artisis')
    # for t in top_artists:
    #     print(t[0])

    most_sim = model.most_similar([representation],topn=1000)
    artists = []
    songs = []
    for suggestion in most_sim:

        songs.append(uri_to_name_artist[suggestion[0]])
        artists.append(uri_to_name_artist[suggestion[0]][1])
    #     print(uri_to_name_artist[suggestion[0]])

    from collections import Counter
    c = Counter(artists)
    top_artists = c.most_common(14)

#     print('Top songs - ')
    printed = 0
    i = 0
    while(printed<25):
        song,artist = songs[i]
        if song not in song_names:
            if artist not in artist_names:
                returned_songs.append(song)
#                 print(song,artist)
                printed += 1
        i += 1

#     print('\n\n\nTop Artisis')
    for t in top_artists:
        if t not in artist_names:
            returned_artists.append(t[0])
#             print(t[0])
    
    return returned_songs,returned_artists

# Working with actual playlist

In [488]:
def pool(uris,FRACTION,SONG_COUNT,ARTIST_COUNT):
    
    cutoff = int(len(uris)*FRACTION)
    train_uris = uris[:cutoff]
    test_uris = uris[cutoff:]

    sample = np.zeros(300)
    returned_artists = []
    returned_songs = []
    ct = 0
    for song in train_uris:
        track = 'spotify:track:%s'%song
        try:
            sample += model[track]
            ct+=1
        except:
            pass
    representation = sample/ct

    most_sim = model.most_similar([representation],topn=SONG_COUNT)

    song_names = [uri_to_name_artist['spotify:track:'+track][0] for track in train_uris]
    artist_names = [uri_to_name_artist['spotify:track:'+track][1] for track in train_uris]

    pooled_songs = []
    pooled_artists = []
    artists_all = []
    for suggestion in most_sim:
        song,artist = uri_to_name_artist[suggestion[0]]
        if song not in song_names:
            pooled_songs.append(suggestion)
            artists_all.append(artist)

    c = Counter(artists_all)
    top_artists = c.most_common(ARTIST_COUNT)
    for top_artist in top_artists:
        if top_artist[0] not in artist_names:
            pooled_artists.append(top_artist[0])
    return pooled_songs,pooled_artists,train_uris,test_uris

In [489]:
def artist_top_songs(artist_name,K):
    auri = tracks.aname2auri[artist_name][0]
    turis = tracks.auri2turi[auri]
    tnames = [tracks.turi2tname[t] for t in turis]
    pops = popularity.transform(turis)
    top_indices = [x for x,y in sorted(enumerate(pops), key = lambda x: x[1])]
    top_indices.reverse()
    top_songs = [turis[index] for index in top_indices[:K]]
    top_song_names = [tnames[index] for index in top_indices[:K]]
    return top_songs,top_song_names
def recall(testset,suggested_uris):
    total = 0
    positive = 0
    for uri in testset:
        total += 1
        if uri in suggested_uris:
            positive += 1
    return positive/float(total)

In [541]:
def print_playlist(uris):
    full_uris = ['spotify:track:'+uri for uri in uris]
    print(len(list(set(artist_names)))/len(uris))
    playlist_builder = []
    for fl in full_uris:
        print(uri_to_name_artist[fl])

In [490]:
# Note - Pick depending upon genre/diversity how to sample pool

In [606]:
pid = 30
n_pick = 100
# strat='medoids' # there are many sampling strategies
pl_turi,pl_auri = tracks.get_playlist(pid)

# full_uris = ['spotify:track:'+uri for uri in pl_turi]
# print(len(list(set(artist_names)))/len(pl_turi))
# playlist_builder = []
# for fl in full_uris:
#     print(uri_to_name_artist[fl])

songs_pool,artists_pool,train,test = pool(pl_turi,0.75,50000,3000)

  


In [607]:
songs_pool[0]

('spotify:track:4BAAyLMY1lMUivwVeKYkzx', 0.9768328666687012)

In [608]:
f = open('../data/pooling/pool_songs_pid_%s.p'%pid,'wb')
pickle.dump(songs_pool,f)
f.close()

In [569]:
# allowed_top_songs = []
# for artist in artists_pool:
#     allowed_top_songs.extend(artist_top_songs(artist,7)[1])

In [570]:
# final_suggestions = []
# for song_uri,_ in songs_pool:
#     song,artist = uri_to_name_artist[song_uri]
#     if song in allowed_top_songs:
#         final_suggestions.append(song_uri.split(':')[-1])

In [571]:
len(train)

12

In [586]:
simple_suggestions = [i[0].split(':')[-1] for i in songs_pool[:len(train)*100]]

In [590]:
print_playlist(pl_turi)

1.0
('Rescuer (Good News)', 'Rend Collective')
('I Got Saved', 'Selah')
('Backseat Driver', 'tobyMac')
('Forgiven', 'Crowder')
('From the Ground Up', 'Dan + Shay')
('O Come to the Altar', 'Elevation Worship')
('Broken Things', 'Matthew West')
('HARD LOVE (feat. Lauren Daigle)', 'NEEDTOBREATHE')
('Hills and Valleys (The Valleys Version)', 'Tauren Wells')
('The Gospel', 'Ryan Stevenson')
('Different', 'Micah Tyler')
("O'Lord", 'Lauren Daigle')
('Lights Shine Bright', 'tobyMac')
('O God Forgive Us', 'for KING & COUNTRY')
('Come To the Table', 'Sidewalk Prophets')
('Unfinished', 'Mandisa')
('Jesus I Believe', 'Big Daddy Weave')


In [None]:
def r_preci