In [53]:
import networkx as nx
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import timeit
import pprint
import json

In [54]:
df = pd.DataFrame(columns=['artist_name','track_uri','artist_uri','track_name','album_uri','album_name'])
df = df.set_index("track_uri")

In [55]:
start = timeit.default_timer()

G = nx.Graph()
num_files = 5
for i in range(num_files):
    lb = i*1000
    ub = lb+999
    #print(f"mpd.slice.{lb}-{ub}.json")
    filename = f"spotify_million_playlist_dataset/data/mpd.slice.{lb}-{ub}.json"
    with open(filename) as read_file:
        tmp = json.load(read_file)
        for playlist in tmp["playlists"]:
            #todo: add playlist nodes?
            #G.add_node(playlist)
            for track in playlist["tracks"]:
                G.add_node(track["track_uri"], track_name=track["track_name"], duration=track["duration_ms"])
                G.add_node(track["artist_uri"], artist_name=track["artist_name"])
                G.add_node(track["album_uri"], album_name=track["album_name"])
                G.add_edge(track["track_uri"], track["artist_uri"])
                G.add_edge(track["track_uri"], track["album_uri"])
                G.add_edge(track["album_uri"], track["artist_uri"])
                # del track["pos"]
                # track_df = pd.DataFrame(track, index=["track_uri"])
                # df.loc[track_df["track_uri"].values] = track_df
    print(f"{i+1}/{num_files}; n:{G.number_of_nodes()}, m:{G.number_of_edges()}", end="\r")
print(f"n:{G.number_of_nodes()}, m:{G.number_of_edges()}")
    
stop = timeit.default_timer()
print('Time: ', stop - start)  

n:188865, m:27515275152
Time:  3.8739833999425173


In [62]:
results = nx.pagerank(G, personalization={'spotify:track:6I9VzXrHxO9rA9A5euc8Ak':0.01, 'spotify:track:0UaMYEvWZi0ZqiDOoHU3YI':0.01})
results

{'spotify:track:0UaMYEvWZi0ZqiDOoHU3YI': 0.08891146483109919,
 'spotify:artist:2wIVse2owClT7go1WT98tk': 0.08652707971465429,
 'spotify:album:6vV5UrXcfyQD1wu4Qo2I9K': 0.04439576790337253,
 'spotify:track:6I9VzXrHxO9rA9A5euc8Ak': 0.08376002189817086,
 'spotify:artist:26dSoYclwsYLMAKD3tpOr4': 0.08130941210816897,
 'spotify:album:0z7pVBGOD7HCIB7S8eLkLI': 0.046468490315361335,
 'spotify:track:0WqIKmW4BTrj3eJFmnCKMv': 6.804849920110449e-07,
 'spotify:artist:6vWDO969PvNqNYHIOW5v0m': 7.235135110623024e-05,
 'spotify:album:25hVFAxTlDvXbx2X2QkUkE': 5.029315946853531e-06,
 'spotify:track:1AWQoqb9bSvzTjaLralEkT': 7.825854807799265e-07,
 'spotify:artist:31TPClRtHm23RisEBtV3X7': 3.4806560945271595e-05,
 'spotify:album:6QPkyl04rXwTGlGlcYaRoW': 4.0426089439383626e-06,
 'spotify:track:1lzr43nnXAijIGYnCT8M8H': 9.829146600232445e-07,
 'spotify:artist:5EvFsr3kj42KNv97ZEnqij': 2.5182367505267026e-05,
 'spotify:album:6NmFmPX56pcLBOFMhIiKvF': 4.284768510448289e-06,
 'spotify:track:0XUfyU2QviPAs6bxSpXYG4': 7.

In [66]:
rec_songs = filter(lambda x: str.startswith(x[0], "spotify:track:"), results.items())
reccs = sorted(rec_songs, key=lambda x: x[1], reverse=True)[:25]

In [69]:
for i in range(len(reccs)):
    print(reccs[i], G.nodes[reccs[i][0]])

('spotify:track:0UaMYEvWZi0ZqiDOoHU3YI', 0.08891146483109919) {'track_name': 'Lose Control (feat. Ciara & Fat Man Scoop)', 'duration': 226863}
('spotify:track:6I9VzXrHxO9rA9A5euc8Ak', 0.08376002189817086) {'track_name': 'Toxic', 'duration': 198800}
('spotify:track:4z5fkIflIBvSG9elVNmiOJ', 0.01391146483109918) {'track_name': 'We Run This (Without Manicure Interlude)', 'duration': 204461}
('spotify:track:7mS8RbJji2UZAaguRGsOCH', 0.008760021898170857) {'track_name': 'Me Against the Music - LP Version / Video Mix', 'duration': 223773}
('spotify:track:0dRhSF9LV0HR8Jwd3MMMKJ', 0.008760021898170857) {'track_name': 'Everytime', 'duration': 230306}
('spotify:track:2vPCRs3g0vdLU8sHlAEytO', 0.008760021898170857) {'track_name': 'Outrageous', 'duration': 201906}
('spotify:track:3XplJgPz8VjbDzbGwGgZdq', 0.002284293758926169) {'track_name': 'Get Ur Freak On', 'duration': 236933}
('spotify:track:7IAa7vUJ11STN7le8XaxsH', 0.002284293758926169) {'track_name': 'WTF (Where They From) [feat. Pharrell Willia

In [70]:
G.nodes['spotify:track:4z5fkIflIBvSG9elVNmiOJ']

{'track_name': 'We Run This (Without Manicure Interlude)', 'duration': 204461}

In [71]:
G['spotify:track:4z5fkIflIBvSG9elVNmiOJ']

AtlasView({'spotify:artist:2wIVse2owClT7go1WT98tk': {}, 'spotify:album:6vV5UrXcfyQD1wu4Qo2I9K': {}})

In [72]:
G.nodes['spotify:artist:2wIVse2owClT7go1WT98tk']

{'artist_name': 'Missy Elliott'}

In [73]:
G.nodes['spotify:album:6vV5UrXcfyQD1wu4Qo2I9K']

{'album_name': 'The Cookbook'}