# Recommendations in Keras using triplet loss
Along the lines of BPR [1]. 

[1] Rendle, Steffen, et al. "BPR: Bayesian personalized ranking from implicit feedback." Proceedings of the Twenty-Fifth Conference on Uncertainty in Artificial Intelligence. AUAI Press, 2009.

This is implemented (more efficiently) in LightFM (https://github.com/lyst/lightfm). See the MovieLens example (https://github.com/lyst/lightfm/blob/master/examples/movielens/example.ipynb) for results comparable to this notebook.

## Set up the architecture
A simple dense layer for both users and items: this is exactly equivalent to latent factor matrix when multiplied by binary user and item indices. There are three inputs: users, positive items, and negative items. In the triplet objective we try to make the positive item rank higher than the negative item for that user.

Because we want just one single embedding for the items, we use shared weights for the positive and negative item inputs (a siamese architecture).

This is all very simple but could be made arbitrarily complex, with more layers, conv layers and so on. I expect we'll be seeing a lot of papers doing just that.


In [1]:
import pickle
import random

In [2]:
ALPHA = 0.2

In [98]:
"""
Triplet loss network example for recommenders
"""
from keras.models import load_model
from __future__ import print_function
from keras.utils import plot_model
import numpy as np

from keras import backend as K
from keras.models import Model
from keras.layers import Embedding, Flatten, Input, merge
from keras.optimizers import Adam
from keras.models import Sequential
from keras.layers import LSTM, Dense,Dropout

# import data
# import metrics


def identity_loss(y_true, y_pred):

    return K.mean(y_pred - 0 * y_true)


def bpr_triplet_loss(X):
    first_item_latent, second_item_latent, random_item_latent = X

    # BPR loss
    loss = 1.0 - K.sigmoid(
        K.sum(first_item_latent * second_item_latent, axis=-1, keepdims=True) -
        K.sum(first_item_latent * random_item_latent, axis=-1, keepdims=True))

    return loss

def standard_triplet_loss(X):
    first_item_latent, random_item_latent, second_item_latent = X
    
    term1 = K.pow((first_item_latent - second_item_latent),2)
    term1_nrom = K.l2_normalize(term1)
    sum1 = K.sum(term1_norm)
    term2 = K.pow((first_item_latent - random_item_latent),2)
    term2_nrom = K.l2_normalize(term2)
    sum2 = K.sum(term2_norm)
    loss = K.maximum(sum1 - sum2 + 0.2,0)
    
    return loss

## Load and transform data
We're going to load the Movielens 100k dataset and create triplets of (user, known positive item, randomly sampled negative item).

The success metric is AUC: in this case, the probability that a randomly chosen known positive item from the test set is ranked higher for a given user than a ranomly chosen negative item.

# Reading songs and converting into one hot encoding

In [99]:
pwd

'/Users/spandanmadan/Desktop/Spotify/Spotify/notebooks'

In [None]:
all_lyrics = []
for pid in range(2355):
    f = open('../data/pooling/scraped_lyrics_%s.p'%pid,'rb')
    lyrics_list = pickle.load(f)
    f.close()
    all_lyrics += lyrics_list

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
# list of text documents
text = [' '.join(words) for words in all_lyrics]
# create the transform
vectorizer = TfidfVectorizer(max_df = 0.9,min_df = 0.01)
# tokenize and build vocab
vectorizer.fit(text)

In [None]:
f = open()
vectorizer = pickle.load()

In [None]:
all_uris = []
pid_to_uris = {}
for pid in range(2355):
    f = open('../data/pooling/scraped_lyrics_uris_%s.p'%pid,'rb')
    uris_list = pickle.load(f)
    f.close()
    pid_to_uris[pid] = uris_list
    all_uris += uris_list

In [None]:
len(all_uris)

# Debugging to see sizes etc are ok

In [None]:
vector = vectorizer.transform(all_lyrics[0])

In [None]:
vector.toarray().shape

In [21]:
uri_to_lyrics = {}
for i in range(len(all_uris)):
    if i %1000 == 0:
        print(i)
    uri = all_uris[i]
    lyrics = all_lyrics[i]
    if lyrics == []:
        lyrics = [' ']
#     vector = vectorizer.transform(lyrics)
#     nparray = vector.toarray()
    uri_to_lyrics[uri] = [' '.join(lyrics)]

0
1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
11000
12000
13000
14000
15000
16000
17000
18000
19000
20000
21000
22000
23000
24000
25000
26000
27000
28000
29000
30000
31000
32000
33000
34000
35000
36000
37000
38000
39000
40000
41000
42000
43000
44000
45000
46000
47000
48000
49000
50000
51000
52000
53000
54000
55000
56000
57000
58000
59000
60000
61000
62000
63000
64000
65000
66000
67000
68000
69000
70000
71000
72000
73000
74000
75000
76000
77000
78000
79000
80000
81000
82000
83000
84000
85000
86000
87000
88000
89000
90000
91000
92000
93000
94000
95000
96000
97000
98000
99000
100000
101000
102000
103000
104000
105000
106000
107000
108000


In [23]:
# f = open('uri_to_lyrics.p','wb')
# pickle.dump(uri_to_lyrics,f)
# f.close()

In [24]:
vector_size = vectorizer.transform(uri_to_lyrics['0UaMYEvWZi0ZqiDOoHU3YI']).shape[1]

In [25]:
unique_uris = list(set(uri_to_lyrics.keys()))

In [26]:
vectorizer.transform(uri_to_lyrics['0UaMYEvWZi0ZqiDOoHU3YI']).toarray().shape

(1, 1419)

In [47]:
LATENT_DIM = 300
NUM_EPOCHS = 5000
BATCH_SIZE = 5
timesteps = 11
features = 129

In [35]:
def get_train_batch(BATCH_SIZE,vector_size,unique_uris):
    found = 0
    while not found:
        pid = random.randint(0,2354)
        playlist_uris = pid_to_uris[pid]
        
        first_part = playlist_uris[:int(len(playlist_uris)*0.75)]
        second_part = playlist_uris[int(len(playlist_uris)*0.75):]
        random_part = [i for i in unique_uris if i not in playlist_uris]
        
        try:
            indices1 = random.sample(range(1,len(first_part)),BATCH_SIZE)
            indices2 = random.sample(range(1,len(second_part)),BATCH_SIZE)
            indices3 = random.sample(range(1,len(random_part)),BATCH_SIZE)
            found =1 
        except:
            pass
    
    data_first_song = np.zeros((BATCH_SIZE,vector_size))
    data_second_song = np.zeros((BATCH_SIZE,vector_size))
    data_random_song = np.zeros((BATCH_SIZE,vector_size))
    
    first_uris = []
    second_uris = []
    random_uris = []
    
    for i in range(len(indices1)):
        data_first_song[i] = vectorizer.transform(uri_to_lyrics[first_part[indices1[i]]]).toarray()
        data_second_song[i] = vectorizer.transform(uri_to_lyrics[second_part[indices2[i]]]).toarray()
        data_random_song[i] = vectorizer.transform(uri_to_lyrics[random_part[indices3[i]]]).toarray()
        
        first_uris.append(first_part[indices1[i]])
        second_uris.append(second_part[indices2[i]])
        random_uris.append(random_part[indices3[i]])
        
    uris = (first_uris,second_uris,random_uris)
    reshaped_1 = data_first_song.reshape((BATCH_SIZE,timesteps,-1))
    reshaped_2 = data_second_song.reshape((BATCH_SIZE,timesteps,-1))
    reshaped_random = data_random_song.reshape((BATCH_SIZE,timesteps,-1))
    return reshaped_1, reshaped_2, reshaped_random,uris

In [36]:
a,b,c,d = get_train_batch(BATCH_SIZE,vector_size,unique_uris)

# Building the model

In [37]:
# visible = Input(shape=(100,1))
# # feature extraction
# extract = LSTM(10, return_sequences=True)(visible)
# # classification output
# class11 = LSTM(10)(extract)
# class12 = Dense(10, activation='relu')(class11)
# output1 = Dense(1, activation='sigmoid')(class12)

In [38]:
def triplet_merge(X):
    first,second,random = X
    good = K.pow((first-second),2)
    bad = K.pow((first-random),2)
    
    K.sum(good,bad)

In [39]:
# positive_item_input = Input((1, ), name='positive_item_input')
# negative_item_input = Input((1, ), name='negative_item_input')

# # Shared embedding layer for positive and negative items
# item_embedding_layer = Embedding(
#     10, 10, name='item_embedding', input_length=1)

# user_input = Input((1, ), name='user_input')

# positive_item_embedding = Flatten()(item_embedding_layer(
#     positive_item_input))
# negative_item_embedding = Flatten()(item_embedding_layer(
#     negative_item_input))
# user_embedding = Flatten()(Embedding(
#     10, 10, name='user_embedding', input_length=1)(
#         user_input))

# loss = merge(
#     [positive_item_embedding, negative_item_embedding, user_embedding],
#     mode=bpr_triplet_loss,
#     name='loss',
#     output_shape=(1, ))

# model__ = Model(
#     input=[positive_item_input, negative_item_input, user_input],
#     output=loss)
# model__.compile(loss=identity_loss, optimizer=Adam())

  name=name)


In [41]:
shared_LSTM_layer = LSTM(700,return_sequences=True)
shared_dropout_layer = Dropout(0.5)
shared_LSTM_layer2 = LSTM(500,return_sequences=True)
shared_dense_layer = Dense(1000,activation='relu')
shared_dense_layer_2 = Dense(300, activation='relu')
shared_dropout_layer_2 = Dropout(0.5)
shared_flatten = Flatten()

In [42]:
# shared_dense_layer_2.get_weights()

In [43]:
# first input
visible_1 = Input(shape=(timesteps,features))
extract_1 = shared_LSTM_layer(visible_1)
dropped_1 = shared_dropout_layer(extract_1)
extract_2 = shared_LSTM_layer2(dropped_1)
dropped_2 = shared_dropout_layer_2(extract_2)
flatten_1 = shared_flatten(dropped_2)
dense_1 = shared_dense_layer(flatten_1)
dense_2 = shared_dense_layer_2(dense_1)

# Second input
visible_1_2 = Input(shape=(timesteps,features))
extract_1_2 = shared_LSTM_layer(visible_1_2)
dropped_1_2 = shared_dropout_layer(extract_1_2)
extract_2_2 = shared_LSTM_layer2(dropped_1_2)
dropped_2_2 = shared_dropout_layer_2(extract_2_2)
flatten_1_2 = shared_flatten(dropped_2_2)
dense_1_2 = shared_dense_layer(flatten_1_2)
dense_2_2 = shared_dense_layer_2(dense_1_2)

# Third input 
visible_1_3 = Input(shape=(timesteps,features))
extract_1_3 = shared_LSTM_layer(visible_1_3)
dropped_1_3 = shared_dropout_layer(extract_1_3)
extract_2_3 = shared_LSTM_layer2(dropped_1_3)
dropped_2_3 = shared_dropout_layer_2(extract_2_3)
flatten_1_3 = shared_flatten(dropped_2_3)
dense_1_3 = shared_dense_layer(flatten_1_3)
dense_2_3 = shared_dense_layer_2(dense_1_3)

output = merge([dense_2,dense_2_2,dense_2_3],mode=standard_triplet_loss,output_shape=(1,))
model = Model(inputs=[visible_1,visible_1_2,visible_1_3], outputs=output)
plot_model(model,to_file='model.png')

  name=name)


In [44]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 11, 129)      0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            (None, 11, 129)      0                                            
__________________________________________________________________________________________________
input_3 (InputLayer)            (None, 11, 129)      0                                            
__________________________________________________________________________________________________
lstm_3 (LSTM)                   (None, 11, 700)      2324000     input_1[0][0]                    
                                                                 input_2[0][0]                    
          

In [45]:
# shared_LSTM_layer = LSTM(700,return_sequences=True)
# shared_dropout_layer = Dropout(0.5)
# shared_LSTM_layer2 = LSTM(500,return_sequences=True)
# shared_dense_layer = Dense(500,activation='relu')
# shared_dense_layer_2 = Dense(300, activation='relu')

# # first input
# visible_1 = Input(shape=(timesteps,features))
# extract_1 = shared_LSTM_layer(visible_1)
# dropped_1 = shared_dropout_layer(extract_1)
# extract_2 = shared_LSTM_layer2(dropped_1)
# dropped_2 = shared_dropout_layer(extract_2)
# dense_1 = shared_dense_layer(dropped_2)
# dense_2 = shared_dense_layer_2(dense_1)

# # Second input
# visible_1_2 = Input(shape=(timesteps,features))
# extract_1_2 = shared_LSTM_layer(visible_1_2)
# dropped_1_2 = shared_dropout_layer(extract_1_2)
# extract_2_2 = shared_LSTM_layer2(dropped_1_2)
# dropped_2_2 = shared_dropout_layer(extract_2_2)
# dense_1_2 = shared_dense_layer(dropped_2_2)
# dense_2_2 = shared_dense_layer_2(dense_1_2)


# # Third input
# visible_1_3 = Input(shape=(timesteps,features))
# extract_1_3 = shared_LSTM_layer(visible_1_3)
# dropped_1_3 = shared_dropout_layer(extract_1_3)
# extract_2_3 = shared_LSTM_layer2(dropped_1_3)
# dropped_2_3 = shared_dropout_layer(extract_2_3)
# dense_1_3 = shared_dense_layer(dropped_2_3)
# dense_2_3 = shared_dense_layer_2(dense_1_3)

# # merge(
# #         [positive_item_embedding, negative_item_embedding, user_embedding],
# #         mode=bpr_triplet_loss,
# #         name='loss',
# #         output_shape=(1, ))
# output = merge([visible_1,visible_1_2,visible_1_3],mode=standard_triplet_loss,output_shape=(1,))
# model = Model(inputs=[visible_1, visible_1_2,visible_1_3], outputs=output)

In [46]:
model.compile(loss=identity_loss, optimizer=Adam())

## Run the model
Run for a couple of epochs, checking the AUC after every epoch.

In [48]:
for epoch in range(NUM_EPOCHS):
    if epoch % 50 == 0:
        print('Epoch %s' % epoch)
    
    # Sample triplets from the training data
    first_data, second_data, random_data,uris = get_train_batch(BATCH_SIZE,vector_size,unique_uris)
    X = [first_data,second_data,random_data]
    model.fit(X,
              np.ones(len(first_data)),
              batch_size=BATCH_SIZE,
              nb_epoch=1,
              verbose=0,
              shuffle=True)

Epoch 0


  del sys.path[0]


Epoch 50
Epoch 100
Epoch 150
Epoch 200
Epoch 250
Epoch 300
Epoch 350
Epoch 400
Epoch 450
Epoch 500
Epoch 550
Epoch 600
Epoch 650
Epoch 700
Epoch 750
Epoch 800
Epoch 850
Epoch 900
Epoch 950
Epoch 1000
Epoch 1050
Epoch 1100
Epoch 1150
Epoch 1200
Epoch 1250
Epoch 1300
Epoch 1350
Epoch 1400
Epoch 1450
Epoch 1500
Epoch 1550
Epoch 1600
Epoch 1650
Epoch 1700
Epoch 1750
Epoch 1800
Epoch 1850
Epoch 1900
Epoch 1950
Epoch 2000
Epoch 2050
Epoch 2100
Epoch 2150
Epoch 2200
Epoch 2250
Epoch 2300
Epoch 2350
Epoch 2400
Epoch 2450
Epoch 2500
Epoch 2550
Epoch 2600
Epoch 2650
Epoch 2700
Epoch 2750
Epoch 2800
Epoch 2850
Epoch 2900
Epoch 2950
Epoch 3000
Epoch 3050
Epoch 3100
Epoch 3150
Epoch 3200
Epoch 3250
Epoch 3300
Epoch 3350
Epoch 3400
Epoch 3450
Epoch 3500
Epoch 3550
Epoch 3600
Epoch 3650
Epoch 3700
Epoch 3750
Epoch 3800
Epoch 3850
Epoch 3900
Epoch 3950
Epoch 4000
Epoch 4050
Epoch 4100
Epoch 4150
Epoch 4200
Epoch 4250
Epoch 4300
Epoch 4350
Epoch 4400
Epoch 4450
Epoch 4500
Epoch 4550
Epoch 4600
Epoch 46

In [49]:
MODEL_SAVE_PATH = '/Users/spandanmadan/Desktop/Spotify/Spotify/data/pooling/saved_lstm_5000.h5'
model.save(MODEL_SAVE_PATH)

# Get embeddings from trained model

In [50]:
def uri_to_input_vector(uris):
    data = np.zeros((BATCH_SIZE,vector_size))
    for uri in uris:
        data[i] = vectorizer.transform(uri_to_lyrics[uri]).toarray()
    reshaped_data = data.reshape((BATCH_SIZE,timesteps,-1))
    return reshaped_data

In [85]:
visible = Input(shape=(timesteps,features))
extract = shared_LSTM_layer(visible)
drop = shared_dropout_layer(extract)
extract_ = shared_LSTM_layer2(drop)
drop_ = shared_dropout_layer_2(extract_)
flat = shared_flatten(drop_)
dense = shared_dense_layer(flat)
dense_ = shared_dense_layer_2(dense)

# model = Model(inputs=[visible_1, visible_1_2,visible_1_3], outputs=output)
prediction_model = Model(inputs=visible,outputs=dense_)

KeyboardInterrupt: 

In [None]:
MODEL_SAVE_PATH = '/Users/spandanmadan/Desktop/Spotify/Spotify/data/pooling/saved_prediction_model_5000.h5'
prediction_model.save(MODEL_SAVE_PATH)

In [53]:
# prediction_model = Sequential()
# prediction_model.add(LSTM(700,return_sequences=True,
#                input_shape=(timesteps, features),weights=shared_LSTM_layer.get_weights()))  # returns a sequence of vectors of dimension 32
# prediction_model.add(Dropout(0.5))
# prediction_model.add(LSTM(500, return_sequences=True,weights=shared_LSTM_layer2.get_weights()))  # returns a sequence of vectors of dimension 32
# prediction_model.add(Dropout(0.5))
# prediction_model.add(Dense(500, activation='relu',weights=shared_dense_layer.get_weights()))
# prediction_model.add(Dense(300, activation='relu',weights=shared_dense_layer_2.get_weights()))

In [54]:
plot_model(prediction_model,to_file='prediction_model.png')

In [55]:
prediction_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         (None, 11, 129)           0         
_________________________________________________________________
lstm_3 (LSTM)                (None, 11, 700)           2324000   
_________________________________________________________________
dropout_3 (Dropout)          (None, 11, 700)           0         
_________________________________________________________________
lstm_4 (LSTM)                (None, 11, 500)           2402000   
_________________________________________________________________
dropout_4 (Dropout)          (None, 11, 500)           0         
_________________________________________________________________
flatten_5 (Flatten)          (None, 5500)              0         
_________________________________________________________________
dense_3 (Dense)              (None, 1000)              5501000   
__________

In [56]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 11, 129)      0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            (None, 11, 129)      0                                            
__________________________________________________________________________________________________
input_3 (InputLayer)            (None, 11, 129)      0                                            
__________________________________________________________________________________________________
lstm_3 (LSTM)                   (None, 11, 700)      2324000     input_1[0][0]                    
                                                                 input_2[0][0]                    
          

In [87]:
prediction_model = load_model('../data/pooling/saved_prediction_model_5000.h5')



In [88]:
uri_to_embeding = {}

In [89]:
len(all_uris)

108093

In [90]:
for i in range(0,len(all_uris),5):
    uris = all_uris[i:i+5]
    if len(uris) < 5:
        rem = 5 - len(uris)
        for  j in len(rem):
            uris.append(all_uris[0])
    input_data = np.zeros((BATCH_SIZE,vector_size))
    
    for k in range(len(uris)):
        input_data[k] = vectorizer.transform(uri_to_lyrics[uris[k]]).toarray()
        reshaped_data = input_data.reshape((BATCH_SIZE,timesteps,-1))
        prediction = prediction_model.predict(reshaped_data)
        uri_to_embeding[uris[k]] = prediction
    break

In [91]:
uri_to_embeding.keys()

dict_keys(['0UaMYEvWZi0ZqiDOoHU3YI', '6I9VzXrHxO9rA9A5euc8Ak', '0WqIKmW4BTrj3eJFmnCKMv', '1AWQoqb9bSvzTjaLralEkT', '1lzr43nnXAijIGYnCT8M8H'])

In [96]:
a.shape

(5, 11, 129)

In [97]:
model.predict([a,b,c])

array([0.2, 0.2, 0.2, 0.2, 0.2], dtype=float32)

In [93]:
np.sum(uri_to_embeding['6I9VzXrHxO9rA9A5euc8Ak'])

0.0

In [123]:
def print_songs(uri_list):
    for uri in uri_list:
        print(uri_to_name_artist['spotify:track:'+uri])

def compare_outputs(activations_first,activations_second,activations_random):
    mean_1 = np.mean(activations_first,axis=0)
    mean_2 = np.mean(activations_second,axis=0)
    mean_random = np.mean(activations_random,axis=0)
    
    A = np.linalg.norm(mean_1 - mean_2,2)
    B = np.linalg.norm(mean_1 - mean_random,2)
    
    return A,B

In [140]:
a,b,c,d = get_train_batch(BATCH_SIZE,vector_size,unique_uris)

In [141]:
activations_first = prediction_model.predict(a)
activations_second = prediction_model.predict(b)
activations_random = prediction_model.predict(c)

In [142]:
f = open('/Users/spandanmadan/Desktop/Spotify/Spotify/data/pooling/uri_to_name_artist.p','rb')
uri_to_name_artist = pickle.load(f)
f.close()

In [143]:
print('First songs-')
print_songs(d[0])
print('Second songs-')
print_songs(d[1])
print('Third songs-')
print_songs(d[2])

First songs-
('An Anthem of Invitation', 'Judah & the Lion')
('Shepherd of My Soul', 'Rivers & Robots')
('Fall Down', 'Rivers & Robots')
('I Surrender All', 'Ascend The Hill')
('Oh Love That Will Not Let Me Go', 'Ascend The Hill')
Second songs-
('Voice That Stills the Raging Sea', 'Rivers & Robots')
('Light Will Dawn', 'Rivers & Robots')
('See the Way (feat. David Brymer)', 'Misty Edwards')
('Shepherd of My Soul', 'Rivers & Robots')
('Farther Along', 'Josh Garrels')
Third songs-
('Everybody Wants to Go to Heaven', 'Kenny Chesney')
('All I Ask Of You (feat. Pennybirdrabbit) - feat. Penny', 'Skrillex')
('Jorge Regula', 'The Moldy Peaches')
('Dawn of Time', 'Tribal Seeds')
('Never Too Far Gone', 'Jordan Feliz')


In [144]:
compare_outputs(activations_first,activations_second,activations_random)

(0.009273529, 0.016392708)