# Spotify Tracks Recommendation System

In [None]:
%run ../../common/model.requirements.ipynb

In [None]:
%run ../../common/data.requirements.ipynb

In [None]:
%run ../../../data/external/spotify-tracks-dataset.ipynb

In [None]:
%run data.preprocessing.ipynb

In [None]:
%run data.train.ipynb

In [None]:
%run model.build.ipynb

In [None]:
data_train = data_preprocessing(df_spotify)

In [None]:
def generate_batch(pairs, tracks, albums, pairs_set, n_positive=50, negative_ratio=1.0, classification=False):
    import numpy as np
    import random

    batch_size = int(n_positive * (1 + negative_ratio))
    batch = np.zeros((batch_size, 3))
    
    if classification:
        neg_label = 0
    else:
        neg_label = -1
    
    while True:
        for idx, (track_id, album_name) in enumerate(random.sample(pairs, n_positive)):
            batch[idx, :] = (track_id, album_name, 1)

        idx += 1
        
        while idx < batch_size:
            random_track = random.randrange(len(tracks))
            random_album = random.randrange(len(albums))
            
            if (tracks[random_track], albums[random_album]) not in pairs_set:
                batch[idx, :] = (tracks[random_track], albums[random_album], neg_label)
                idx += 1
                
        np.random.shuffle(batch)
        yield {'track': batch[:, 0], 'album': batch[:, 1]}, batch[:, 2]

In [None]:
pairs = list(zip(data_train['track_id'], data_train['album_name']))
pairs_set = set(pairs)

unique_tracks = data_train['track_id'].unique()
unique_albums = data_train['album_name'].unique()

track_index = {track_id: idx for idx, track_id in enumerate(unique_tracks)}
album_index = {album_name: idx for idx, album_name in enumerate(unique_albums)}

In [None]:
x, y = next(generate_batch(pairs, track_index, album_index, pairs_set, n_positive = 2, negative_ratio = 2))

In [None]:
model = build_model(track_index=track_index, album_index=album_index)
model.summary()

In [None]:
n_positive = 1024

gen = generate_batch(pairs, unique_tracks, unique_albums, pairs_set, n_positive, negative_ratio=2)

train_model = model.fit(x=gen, epochs=15, steps_per_epoch=len(pairs) // n_positive, verbose=2)

## Test the Recommendation System

In [17]:
%run model.test.ipynb

In [18]:
track_layer = model.get_layer('track_embedding')
track_weights = track_layer.get_weights()[0]
track_weights = track_weights / np.linalg.norm(track_weights, axis = 1).reshape((-1, 1))

In [19]:
random_track = random_track(data_train)
find_similar(track_name=random_track, data=data_train, weights=track_weights)

Tracks most similar to "Tu Kahaan - 1 Min Music":
1. Tu Kahaan - 1 Min Music - Similarity: 1.00
2. Blue Train - Similarity: 0.56
3. Tose Naina (From "Mickey Virus) - Similarity: 0.55
4. 何度目の青空か? - Similarity: 0.55
5. De História em História - Similarity: 0.55
6. 愛を知る - Similarity: 0.52
7. Talk It Out - Similarity: 0.51
8. Don't Cry Daddy - Similarity: 0.51
9. Chala Head Chala - Similarity: 0.50
10. Thillana 2.0 - Dhanashree (feat. Sharanya Srinivas) - Similarity: 0.50
