In [1]:
import numpy as np
import tensorflow as tf

import matplotlib as mpl
import matplotlib.pyplot as plt
import preprocessing

import collections

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
data_path = "../data"

## Most Common Song Finder

In [4]:
train_id, test_id, vocab, relevance, lp = preprocessing.preprocess(directory='../data_info/data/', train_test_split=0.8, k=2)

song_counter = collections.Counter(train_id)

n_most_common = 40
most_common_tokens, most_common_occurrences = zip(*song_counter.most_common(n_most_common))

## Convert the tokens back to words so that we can see what they are
token_to_word_dict = {i:w for w, i in vocab.items()}
most_common_songs = [token_to_word_dict[t] for t in most_common_tokens]

print(*zip(most_common_songs, most_common_occurrences), sep = ", ")

KeyboardInterrupt: 

In [None]:
fig_most_common, ax_top50_most_common = plt.subplots()
ax_top50_most_common.barh(y = most_common_words,
                          width = most_common_occurrences, 
                          height = 0.75, 
                          color = "C0", 
                          edgecolor = "black", 
                          zorder = 100)

ax_top50_most_common.grid(linestyle = "dashed", 
                          color = "#bfbfbf", 
                          zorder = -100)

ax_top50_most_common.set_yticks(ticks = ax_top50_most_common.get_yticks())
ax_top50_most_common.set_yticklabels(labels = most_common_words, 
                                     fontsize = 14)

ax_top50_most_common.invert_yaxis()

fig_most_common.set_size_inches([6, 12])

### Run The Model

In [5]:
%%time

import rnn

train_id, test_id, vocab, relevance, lp = preprocessing.preprocess(directory='../data_info/data/', train_test_split=0.8, k=3)

train_id = np.array(train_id)
test_id  = np.array(test_id)    

# Training and validation are aligned because we require the input song for RPrecision
X0, Y0 = train_id, train_id
X1, Y1 = test_id,  test_id

args = rnn.get_text_model(vocab, relevance)

data = args.model.fit(
    X0, Y0,
    epochs=2, 
    batch_size=lp,
    validation_data=(X1, Y1)
)

Epoch 1/2
Epoch 2/2

### Generate Playlist

In [None]:
def RPrecision(predictions, labels):
        PAD_TOKEN = 0
        #print(prediction_arr)
        predict_set = set(predictions)
        labels = labels[:len(predict_set)]
        
        ground_truth = set(labels)

        # Return mean of running total to get running mean
        return len(predict_set.intersection(ground_truth)) / len(ground_truth)

## Feel free to mess around with the word list to see the model try to generate sentences
for word1 in ['Creep']:
    if word1 not in vocab: print(f"{word1} not in vocabulary")            
    else: print(args.model.generate_recommendations(word1, 10, vocab))
    print()

ids = relevance[vocab['Creep']]
id_to_track = {id: name for name, id in vocab.items()}
tracks =[id_to_track[id] for id in ids]
print(tracks[:30])
print()
print("R-Precision: " + str(RPrecision(args.model.generate_recommendations(word1, 10, vocab), [id_to_track[x] for x in relevance[vocab['Creep']]])))