In [None]:
import numpy as np
import tensorflow as tf

import matplotlib.pyplot as plt
import preprocessing

import collections

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
data_path = "../data"

## Most Common Song Finder
Looks at the most common songs in a slice of 1000 playlists; you can change the amount of songs loaded or use get_data!

In [None]:
train_id, test_id, vocab, relevance, lp = preprocessing.preprocess(directory='../data_info/data/', train_test_split=0.8, k=2)

song_counter = collections.Counter(train_id)

n_most_common = 40
most_common_tokens, most_common_occurrences = zip(*song_counter.most_common(n_most_common))

## Convert the tokens back to words so that we can see what they are
token_to_word_dict = {i:w for w, i in vocab.items()}
most_common_songs = [token_to_word_dict[t] for t in most_common_tokens]

print(*zip(most_common_songs, most_common_occurrences), sep = ", ")

In [None]:
fig_most_common, ax_top50_most_common = plt.subplots()
ax_top50_most_common.barh(y = most_common_songs,
                          width = most_common_occurrences, 
                          height = 0.75, 
                          color = "C0", 
                          edgecolor = "black", 
                          zorder = 100)

ax_top50_most_common.grid(linestyle = "dashed", 
                          color = "#bfbfbf", 
                          zorder = -100)

ax_top50_most_common.set_yticks(ticks = ax_top50_most_common.get_yticks())
ax_top50_most_common.set_yticklabels(labels = most_common_songs, 
                                     fontsize = 14)

ax_top50_most_common.invert_yaxis()

fig_most_common.set_size_inches([6, 12])

### Run The Model
We've already preprocessed 5000 playlists worth of data, which we can load with 'get data'.

Note that the preprocessing accidently has relevance values reversed, so we reverse the lists in this code block as a quick fix.

Feel free to change the parameters of the model as you see fit!

In [None]:
%%time

import model

train_id, test_id, track_to_id, relevance_w_str_map = preprocessing.get_data(directory='../data_info/saved_preprocessing_k=5/saved_preprocessing')

# Reverse order - oops!
relevance = {int(key): value[::-1] for key, value in relevance_w_str_map.items()}

# Create reverse dicitonary
id_to_track = {id: name for name, id in track_to_id.items()}

train_id = np.array(train_id)
test_id  = np.array(test_id)

# Training and validation are aligned because we require the input song for RPrecision
X0, Y0 = train_id, train_id
X1, Y1 = test_id,  test_id

args = model.get_model(id_to_track, relevance)

data = args.model.fit(
    X0, Y0,
    epochs=1, 
    batch_size=100,
    validation_data=(X1, Y1)
)

### Generate Recommendations

In [None]:
def RPrecision(predictions, labels):
        predict_set = set(predictions)
        labels = labels[:len(predict_set)]
        
        ground_truth = set(labels)

        # Return mean of running total to get running mean
        return len(predict_set.intersection(ground_truth)) / len(ground_truth)

## Change 'song' to see what the model recommends as well as the R-Precision it gets!
song = 'Creep'
for word1 in [song]:
    if word1 not in vocab: print(f"{word1} not in vocabulary")            
    else: print(args.model.generate_recommendations(word1, 10, vocab))
    print()

ids = relevance[vocab[song]]
id_to_track = {id: name for name, id in vocab.items()}
tracks =[id_to_track[id] for id in ids]
print(tracks[:30])
print()
print("R-Precision: " + str(RPrecision(args.model.generate_recommendations(word1, 10, vocab), [id_to_track[x] for x in relevance[vocab[song]]])))