### How does the popularity of authors within a playlist affect the popularity of the playlist itself?

In [1]:
import os
import json
import pickle
from collections import defaultdict
from collections import Counter
import numpy as np

In [2]:
MPD = 'mpd/'
playlist_files = [os.path.join(MPD, x) for x in os.listdir(MPD)]

In [3]:
SONG_DATA = 'mappings/songs'
song_files = [os.path.join(SONG_DATA, x) for x in os.listdir(SONG_DATA)]

In [4]:
ARTIST_DATA = 'mappings/artists/artist_matching.pkl'
with open(ARTIST_DATA, 'rb') as fp:
    artist_dict = pickle.load(fp)

In [5]:
artistID_to_popularity = {k:v['popularity'] for k, v in artist_dict.items()}

In [6]:
songID_to_popularity = {}
for file in song_files:
    with open(file, 'rb') as fp:
        data = pickle.load(fp)
    for songID,song in data.items():
        total_popularity = 0
        num_authors = len(song['artists'])
        for artist in song['artists']:
            artistID = artist['id']
            popularity = artistID_to_popularity[artistID]
            total_popularity += popularity
        songID_to_popularity[songID] = (total_popularity, num_authors)

In [7]:
list(songID_to_popularity.items())[:5]

[('spotify:track:1fyTBapjw8q9MlzLVdU6wg', (67, 1)),
 ('spotify:track:4cC9nQMgClmz8cix6l5CSX', (8, 1)),
 ('spotify:track:3qwh1awyjkXFUER1Pp64qv', (26, 1)),
 ('spotify:track:6NHONOfLaDTtSG8WtCTpex', (23, 1)),
 ('spotify:track:7HxecasMeh6aCAvQPGiFgP', (12, 1))]

In [8]:
playlist_data = {} # list, num_followers
for file in playlist_files:
    with open(file, 'rb') as fp:
        data = json.load(fp)
    for playlist in data['playlists']:
        empty = False
        ID = playlist['pid']
        author_data = []
        for track in playlist['tracks']:
            tID = track['track_uri']
            if tID not in songID_to_popularity:
                continue
            author_data.append(songID_to_popularity[tID])
        followers = playlist['num_followers']
        playlist_data[ID] = (author_data, followers)

In [9]:
list(playlist_data.items())[:5]

[(962000,
  ([(35, 1),
    (47, 1),
    (20, 1),
    (32, 1),
    (23, 1),
    (33, 1),
    (35, 1),
    (23, 1),
    (29, 1),
    (36, 1),
    (27, 1),
    (47, 1),
    (33, 1),
    (39, 1),
    (34, 1),
    (43, 1),
    (17, 1),
    (41, 1),
    (34, 1),
    (23, 1),
    (39, 1),
    (47, 1),
    (26, 1),
    (32, 1),
    (34, 1),
    (36, 1),
    (45, 2),
    (34, 1),
    (25, 1),
    (32, 1),
    (57, 1),
    (36, 1),
    (32, 1),
    (21, 1),
    (41, 1),
    (59, 2),
    (35, 1),
    (36, 1),
    (25, 1),
    (36, 1),
    (20, 1),
    (32, 1),
    (41, 1),
    (36, 1),
    (36, 1),
    (39, 1),
    (34, 1),
    (47, 1),
    (32, 1),
    (41, 1),
    (56, 2),
    (36, 1),
    (27, 1),
    (41, 1),
    (38, 1),
    (16, 1),
    (47, 1),
    (41, 1),
    (47, 1),
    (33, 1),
    (39, 1),
    (36, 1),
    (35, 1),
    (36, 1),
    (4, 1),
    (53, 1),
    (11, 1),
    (41, 1),
    (19, 1),
    (48, 2),
    (35, 1),
    (32, 1),
    (29, 1),
    (39, 1),
    (58, 1),
    (35, 1),
   

In [10]:
# with open('mappings/Q2/playlist_data.pkl', 'wb') as fp:
#     pickle.dump(playlist_data, fp)

In [11]:
for k, v in playlist_data.items():
    data = v[0]
    for x in data:
        if len(x) != 2:
            print(k)

In [12]:
with open('mappings/Q2/playlist_data.pkl', 'wb') as fp:
    pickle.dump(playlist_data, fp)