<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc" style="margin-top: 1em;"><ul class="toc-item"><li><span><a href="#create-pooling-&amp;-ranking-model" data-toc-modified-id="create-pooling-&amp;-ranking-model-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>create pooling &amp; ranking model</a></span></li><li><span><a href="#Get-recommendation" data-toc-modified-id="Get-recommendation-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Get recommendation</a></span><ul class="toc-item"><li><span><a href="#Playlist" data-toc-modified-id="Playlist-2.1"><span class="toc-item-num">2.1&nbsp;&nbsp;</span>Playlist</a></span></li></ul></li></ul></div>

In [1]:
import sys
sys.path.append('../src')
sys.path.append('../src/data/')
sys.path.append('../src/models/')
sys.path.append('../src/features/')
sys.path.append('../src/visualization/')
# will reload any library
%load_ext autoreload
%autoreload 2
%config InlineBackend.figure_format = 'retina'
%matplotlib inline

In [2]:
from data.metrics import recall
from visualization.plot_utils import write_latex_table, summary_pooling_table, pooling_plots, pandas_settings, plot_settings,  save_result
# general
import pandas as pd
import numpy as np
from tqdm import tqdm
from features import AudioFeatures, TrackInfo, Popularity, GenreLDA, AlbumFeature, TrackFeature, ArtistFeature, ContextFeatures
from collections import OrderedDict
from itertools import product
from data_utils import product_size
import matplotlib.pyplot as plt
import seaborn as sns

def pool_stats(pid,k,strat,n_pick,true_t,pool_t,true_a,pool_a):
    r_t = recall(true_t, pool_t)
    r_a = recall(true_a, pool_a)
    return OrderedDict([('pid',pid),('k',k),('strategy',strat),('n',n_pick),('recall-tracks',r_t),('recall-artist',r_a)])

plot_settings()
pandas_settings()

# create pooling & ranking model

In [4]:
from models.word2vec_pooler import W2VPooler

pooler =  W2VPooler(verbose=True)
tracks = TrackInfo(subset='5k-')
feats = [AudioFeatures(),Popularity(),GenreLDA(),AlbumFeature(),TrackFeature(),ArtistFeature()]
weights = np.array([7.204, 6.567, 26.770, 37.775, 43.009, 16.044])
strats =  ['all', 'centroid', 'all', 'medoid', 'mean', 'mean']

LOADING AUDIO FEATURE...
FINISHED LOADING AUDIO FEATURE...
../data/interim/track_uri2popularity.pkl.bz2 IS LOADING...
LOADED POPULARITY
../data/w2v/album_128_1cut IS LOADING
LOADED W2V
../data/w2v/track_128_1cut IS LOADING
LOADED W2V
../data/w2v/artist_128_1cut IS LOADING
LOADED W2V


# Get recommendation

In [12]:
list_s = [('The Scientist','Coldplay'),('Immigrant Song','Led Zeppelin'),('T.N.T.','AC/DC')]
seeds_t = [tracks.track_info2uris(tname,aname)[0] for tname,aname in list_s]
# pooling
pool_t, dist= pooler.recommend(seeds_t,n=10000,agg_strat='mean')
# ranking
dist = np.zeros(len(pool_t))
for f,w,s in zip(feats,weights,strats):
    dist= dist + w*f.distance_between_sets(seeds_t,pool_t,strat=s)
# rank based on distance
sorted_indx=np.argsort(dist)
turi_ranked = [pool_t[i] for i in sorted_indx][:100]
#

Found 3 out of 3 vecs
2-means centroids, returning pool of size 10000


## Playlist

In [13]:
pl_df = pd.DataFrame()
pl_df['track_uri']=turi_ranked
pl_df['track_info'] = pl_df['track_uri'].apply(tracks.uri2track_info)
pl_df

Unnamed: 0,track_uri,track_info
0,78lgmZwycJ3nzsdgmPPGNx,Immigrant Song - Led Zeppelin
1,75JFxkI2RXiU7L9VXzMkle,The Scientist - Coldplay
2,0BCPKOYdS2jbQ8iyB56Zns,Clocks - Coldplay
3,0hCB0YR03f6AmQaHbwWDe8,Whole Lotta Love - Led Zeppelin
4,2nVHqZbOGkKWzlcy1aMbE7,Kashmir - Led Zeppelin
5,7LVHVU3tWfcxj5aiPFEW4Q,Fix You - Coldplay
6,5CQ30WqJwcep0pYcV4AMNc,Stairway To Heaven - Led Zeppelin
7,5CQ30WqJwcep0pYcV4AMNc,Stairway To Heaven - Led Zeppelin
8,3qT4bUD1MaWpGrTwcvguhb,Black Dog - Led Zeppelin
9,0R8P9KfGJCDULmlEoBagcO,Trouble - Coldplay
