In [13]:
import pandas as pd
import scipy as sc
import numpy as np
import sys
from sklearn.preprocessing import MultiLabelBinarizer

In [2]:
# addresses of the files
train_file = '../data/train.csv'
target_playlists_file = '../data/target_playlists.csv'
tracks_file = '../data/tracks.csv'

In [3]:
# reading of all files and renaming columns
train_data = pd.read_csv(train_file)
train_data.columns = ['playlist_id', 'track_id']

tracks_data = pd.read_csv(tracks_file)
tracks_data.columns = ['track_id', 'album_id', 'artist_id', 'duration_sec']

target_playlists = pd.read_csv(target_playlists_file)
target_playlists.columns = ['playlist_id']

In [4]:
# building the URM matrix
grouped_playlists = train_data.groupby('playlist_id', as_index=True).apply(lambda x: list(x['track_id']))
URM = MultiLabelBinarizer(sparse_output=True).fit_transform(grouped_playlists)
URM_csr = URM.tocsr()

In [5]:
# building the similarity matrix
similarity_matrix = URM_csr.dot(URM_csr.T)

In [7]:
def initialize_output_file():
    file = open("submission.csv", 'a')
    file.write("playlist_id,track_ids" + '\n')
    return file

In [8]:
# useful to print to file with the right structure
def print_to_file(playlist, tracks, file):
    file.write(str(playlist) + ',')
    index = 0
    while index < 9:
        file.write(str(tracks[index]) + ' ')
        index += 1
    file.write(str(tracks[index]) + '\n')

In [9]:
def recommend(playlist_id, row_data, file, playlists):
    target_playlist = grouped_playlists.loc[playlist_id]

    reverse_similarity_ranking = np.argsort(row_data.todense())

    reverse_ranking_array = np.squeeze(np.asarray(reverse_similarity_ranking))
    
    similarity_ranking = reverse_ranking_array[::-1]

    to_be_recommended = []
    
    recommendations = 0
    index = 1
    while(recommendations < 10):
        pos = similarity_ranking[index]
        considered_playlist = playlists.loc[pos]
        index += 1
        for track in considered_playlist:
            if track not in target_playlist and recommendations < 10:
                recommendations += 1
                to_be_recommended.append(track)
                
    print_to_file(playlist_id, to_be_recommended, file)

In [10]:
# execution of the recommendations
file = initialize_output_file()

for playlist in target_playlists.itertuples(index=True, name='Pandas'):
    playlist_id = getattr(playlist, "playlist_id")
    row_data = similarity_matrix[playlist_id,:]
    recommend(playlist_id, row_data, file, grouped_playlists)

file.close()