In [1]:
import networkx as nx
import timeit
import json
import random
import pickle

In [2]:
start = timeit.default_timer()
G=pickle.load(open("./spotify_million_playlist_dataset/data/graphrep/spotify.pickle", "rb"))
stop = timeit.default_timer()
print('Time elapsed (minutes):', (stop - start)/60)  

Time elapsed (minutes): 5.261787376665355


In [3]:
def randWalkReccs(G, num_iterations=1000, walk_steps=10, personalization=None):
    ### personalization is an array with nodes to teleport to
    ### num iterations should be around 100*len(personalization)
    reccs = {}
    for i in range(num_iterations):
        if personalization != None:
            start_node = random.choice(personalization)
        else:
            start_node = random.choice(list(G.nodes()))

        if (start_node not in reccs.keys()):
            reccs[start_node] = 1
        else:
            reccs[start_node] = reccs[start_node] + 1

        for j in range(walk_steps):
            tmp_node = random.choice(list(G.neighbors(start_node)))
            if (tmp_node not in reccs.keys()):
                reccs[tmp_node] = 1
            else:
                reccs[tmp_node] = reccs[tmp_node] + 1
            start_node = tmp_node
            
    return reccs


In [4]:
personalization_dict = { # both in this example from playlist 0
    'track:6I9VzXrHxO9rA9A5euc8Ak':6, #toxic britney spears
    'track:0UaMYEvWZi0ZqiDOoHU3YI':1 #lose control missy elliott
}
personalization_vect = list(personalization_dict.keys())
personalization_vect

['track:6I9VzXrHxO9rA9A5euc8Ak', 'track:0UaMYEvWZi0ZqiDOoHU3YI']

In [5]:
random.seed(0)

In [6]:
start = timeit.default_timer()
num_iterations = 100*max(len(personalization_vect),10)
walk_steps = 12
results = randWalkReccs(G, num_iterations, walk_steps, personalization=personalization_vect)
stop = timeit.default_timer()
print('Time elapsed (minutes):', (stop - start)/60)  

Time elapsed (minutes): 0.14002182500165267


In [7]:
rec_songs = filter(lambda x: str.startswith(str(x[0]), "track:"), results.items())
reccs = sorted(rec_songs, key=lambda x: x[1], reverse=True)
len(reccs)

4417

In [8]:
for i in range(25):
    print(reccs[i], G.nodes[reccs[i][0]])

('track:6I9VzXrHxO9rA9A5euc8Ak', 535) {'track_name': 'Toxic', 'artist_name': 'Britney Spears'}
('track:0UaMYEvWZi0ZqiDOoHU3YI', 488) {'track_name': 'Lose Control (feat. Ciara & Fat Man Scoop)', 'artist_name': 'Missy Elliott'}
('track:0XUfyU2QviPAs6bxSpXYG4', 13) {'track_name': 'Yeah!', 'artist_name': 'Usher'}
('track:7uKcScNXuO3MWw6LowBjW1', 12) {'track_name': 'One, Two Step', 'artist_name': 'Ciara'}
('track:6RcQOut9fWL6FSqeIr5M1r', 11) {'track_name': 'Hollaback Girl', 'artist_name': 'Gwen Stefani'}
('track:6C7RJEIUDqKkJRZVWdkfkH', 10) {'track_name': 'Stronger', 'artist_name': 'Kanye West'}
('track:0O45fw2L5vsWpdsOdXwNAR', 9) {'track_name': 'SexyBack', 'artist_name': 'Justin Timberlake'}
('track:5XJJdNPkwmbUwE79gv0NxK', 9) {'track_name': 'Gold Digger', 'artist_name': 'Kanye West'}
('track:04KTF78FFg8sOHC1BADqbY', 9) {'track_name': 'Hot In Herre', 'artist_name': 'Nelly'}
('track:3ZFTkvIE7kyPt6Nu3PEa7V', 8) {'track_name': "Hips Don't Lie", 'artist_name': 'Shakira'}
('track:5dNfHmqgr128gM

In [18]:
for node in list(G.nodes()):
    if type(node) != int and not node.startswith("track:"):
        G.remove_node(node)

In [19]:

f"{G.number_of_nodes()}  nodes, {G.number_of_edges()} links"

'3260225  nodes, 65401617 links'

In [20]:
random.seed(0)

In [21]:
start = timeit.default_timer()
num_iterations = 100*max(len(personalization_vect),10)
walk_steps = 12
results = randWalkReccs(G, num_iterations, walk_steps, personalization=personalization_vect)
stop = timeit.default_timer()
print('Time elapsed (minutes):', (stop - start)/60)  

Time elapsed (minutes): 0.09601535333398109


In [22]:
rec_songs = filter(lambda x: str.startswith(str(x[0]), "track:"), results.items())
reccs = sorted(rec_songs, key=lambda x: x[1], reverse=True)
len(reccs)

4477

In [23]:
for i in range(25):
    print(reccs[i], G.nodes[reccs[i][0]])

('track:6I9VzXrHxO9rA9A5euc8Ak', 542) {'track_name': 'Toxic', 'artist_name': 'Britney Spears'}
('track:0UaMYEvWZi0ZqiDOoHU3YI', 483) {'track_name': 'Lose Control (feat. Ciara & Fat Man Scoop)', 'artist_name': 'Missy Elliott'}
('track:5dNfHmqgr128gMY2tc5CeJ', 12) {'track_name': 'Ignition - Remix', 'artist_name': 'R. Kelly'}
('track:2EEeOnHehOozLq4aS0n6SL', 9) {'track_name': 'iSpy (feat. Lil Yachty)', 'artist_name': 'KYLE'}
('track:5KY7zgFeH2GWoL1zP9mME6', 9) {'track_name': 'Get Low - Street', 'artist_name': 'Lil Jon & The East Side Boyz'}
('track:7KXjTSCq5nL1LoYtL7XAwS', 8) {'track_name': 'HUMBLE.', 'artist_name': 'Kendrick Lamar'}
('track:3f7gYMirBEKuc57218BjOY', 8) {'track_name': 'California Gurls - feat. Snoop Dogg', 'artist_name': 'Katy Perry'}
('track:0QsvXIfqM0zZoerQfsI9lm', 8) {'track_name': "Don't Let Me Down", 'artist_name': 'The Chainsmokers'}
('track:0WqIKmW4BTrj3eJFmnCKMv', 8) {'track_name': 'Crazy In Love', 'artist_name': 'Beyoncé'}
('track:70cTMpcgWMcR18t9MRJFjB', 8) {'tra

In [24]:
# save the read graph into a pickle file to read it in faster next time
pickle.dump(G, open("./spotify_million_playlist_dataset/data/graphrep/spotify-no-artist-album.pickle", 'wb'))