In [37]:
import spotipy
import pandas as pd
import numpy as np
import spotipy
import hashlib
from spotipy.oauth2 import SpotifyClientCredentials
from spotipy.oauth2 import SpotifyOAuth
from dotenv import load_dotenv
import re
import math
from tqdm import tqdm

load_dotenv('../secrets/.env', override=True)

scope = ["playlist-modify-private","playlist-read-private","user-library-modify","user-library-read"]

spotify = spotipy.Spotify(auth_manager=SpotifyOAuth(scope=scope))

my_id = spotify.current_user()['id']

%run ../src/useful_functions.ipynb
%run ../src/spotify_scraping.ipynb
%run ../src/spotify_scraping_playlists.ipynb

In [38]:
tracks = get_spotify_tracks()
albums = get_spotify_albums()
artists = get_spotify_artists()
playlists = get_spotify_playlists()
playlist_tracks = get_spotify_playlist_tracks()
library = get_spotify_library()

In [39]:
get_library(max=10000, without_duplicates=False)
#library = library.drop_duplicates(subset='Track ID', keep='last')
#temp = library.groupby(['Track ID'])['Track ID'].count()
#temp = pd.DataFrame(temp[temp > 1].index).merge(tracks, how = 'left', on = 'Track ID')

Unnamed: 0,Track ID
0,3s9RxyseMXYLTOePGAADSb
0,1RGYjwj7R7CYwNePNZfkgW
0,4sys6H7gj0kgsa2bUx0IeW
0,2bbNjtgjr0ZOaPK7SOcVbV
0,45WduJCqST7pBJPvxZ6ZgU
...,...
0,6oO10y1h2VyxJQfomG4CtH
0,78rIJddV4X0HkNAInEcYde
0,6Slaf2WBzQA86oS7MNMUNS
0,7cBhzkTWSpHBGAiCZzSika


In [40]:
offset = 0
total = 1000
temp = pd.DataFrame()

while offset<total:
    temp_playlists = spotify.current_user_playlists(limit=50, offset=offset)
    temp = pd.concat([temp,pd.DataFrame.from_dict({'Playlist ID': [x['id'] for x in temp_playlists['items']],
                                                   'Playlist Name': [x['name'] for x in temp_playlists['items']],
                                                   'Playlist Description': [x['description'] for x in temp_playlists['items']]})])
    if offset == 0:
        total = temp_playlists['total']
    offset += 50

del(offset, total, temp_playlists)

In [41]:
playlists_to_run = list(temp[[((x == '**') or (x[0:3] == '** ')) for x in temp['Playlist Description']]]['Playlist ID']) 
for playlist_id in tqdm(playlists_to_run):
    get_playlist(playlist_id, with_tracks = True)
    
playlists_to_run = list(temp[[((x == '*****') or (x[0:6] == '***** ')) for x in temp['Playlist Description']]]['Playlist ID']) 
for playlist_id in tqdm(playlists_to_run):
    get_playlist(playlist_id, with_tracks = True)

del(temp, playlists_to_run)

100%|██████████| 38/38 [01:37<00:00,  2.56s/it]
100%|██████████| 10/10 [01:27<00:00,  8.76s/it]


In [42]:
mapping_df = tracks.merge(library, on='Track ID', how='left',indicator = True)
mapping_df = mapping_df[mapping_df['Is Track Local'] == False]
mapping_df = mapping_df.merge(mapping_df[(mapping_df['_merge'] == 'both')][["Track External ID's","Track ID"]], on = ["Track External ID's"], how='left', suffixes=[""," Mapped"])
mapping_df = (
    mapping_df.merge(mapping_df[pd.isna(mapping_df['Track ID Mapped'])]
                     .groupby(["Track External ID's",])[["Track External ID's",'Track ID','Track Popularity']]
                     .apply(lambda x: x.sort_values(by = ['Track Popularity'], ascending=False).head(1))
                     .reset_index(drop = True)[["Track External ID's", "Track ID"]], on = ["Track External ID's"], how='left', suffixes=[""," Mapped New"])
)
mapping_df['Track ID Mapped'] = mapping_df.apply(lambda x: x['Track ID Mapped'] if not pd.isna(x['Track ID Mapped']) else x['Track ID Mapped New'], axis=1)
#mapping_df['Track ID Mapped'] = mapping_df.apply(lambda x: x['Track ID'] if pd.isna(x['Track ID Mapped']) else x['Track ID Mapped'], axis=1)
test_df = mapping_df.groupby(['Track ID', "Track External ID's"]).agg('count')
test_df = test_df[test_df['Track ID Mapped'] > 1].sort_values(by=['Track Name']).reset_index()
mapping_df = mapping_df[['Track ID', 'Track ID Mapped']]

if test_df.size == 0:
    del(test_df)
else:
    print('Number of Errors: ',test_df.size)

In [43]:
sorted = playlist_tracks.merge(playlists, how = 'left', left_on = 'Playlist ID', right_on = 'id')
sorted = (
    sorted[((sorted['description'] == "**") | (sorted['description'].apply(lambda x: x[0:3] == "** ")))]
    .merge(library, how = 'outer', on = 'Track ID', indicator = True)
    .merge(mapping_df, how = 'left', on = 'Track ID')
    .merge(tracks, how = 'left', on = 'Track ID')
    .merge(albums, how = 'left', on = 'Album ID')
)

In [44]:
temp = playlist_tracks.merge(playlists, how = 'left', left_on = 'Playlist ID', right_on = 'id')
temp = (
    temp[((temp['description'] == "*****") | (temp['description'].apply(lambda x: x[0:6] == "***** ")))]
    .merge(library, how = 'left', on = 'Track ID', indicator = True)
    .merge(mapping_df, how = 'left', on = 'Track ID')
    .merge(tracks, how = 'left', on = 'Track ID')
)
temp = temp[temp['name'] != 'Albums to Listen To']
temp = temp.groupby('Track ID Mapped').head(1)[['Track ID', 'Track ID Mapped', 'Playlist ID', 'Position Number', 'name', 'Track Name', '_merge']]
temp = temp.merge(temp[['Track ID', '_merge']].rename(columns = {'Track ID': 'Track ID Mapped', '_merge': '_merge Mapped'}), how = 'left', on = 'Track ID Mapped')

for playlist_id in tqdm(list(temp['Playlist ID'].drop_duplicates())):
    
    df = temp[temp['Playlist ID'] == playlist_id]
    if playlist_id in [#'65jX0kBNpAEO7TcDQrkDai', 
                       '0UL0ZXu3J2ivcyYJEXCl5a', 
                       '0bIch9WkT5N5pIWnscc0Zx', 
                       '35FowWuDxV2wsuuw2goxIJ', 
                       '4iPj2uCtn4PSleqBk0IXbo', 
                       '5Tf5DugxlW3BbNoK76h7db',
                       '0SmY9kTeNMWRwUJyNWvaFA']:
        df = df[((df['_merge'] != 'both') & (df['_merge Mapped'] != 'both'))]
    new_tracks = list(df['Track ID Mapped'])
    edit_playlist_tracks(playlist_id, new_tracks, order=False)
    
del(df)

100%|██████████| 9/9 [02:17<00:00, 15.26s/it]


In [553]:
#make_playlist(title, description)
#edit_playlist_tracks(playlists[playlists['name'] == title]['id'][0], new_tracks)

#edit_playlist_details(user=my_id, playlist_id = playlist_id, name = name', description = 'description')
#get_playlist(playlist_id, with_tracks = False)
#get_playlist(playlist_id = playlist_id)
#delete_playlist(playlist_id = playlist_id)

In [22]:
playlist_tracks.to_pickle('../data/spotify_playlist_tracks.pkl')
playlists.to_pickle('../data/spotify_playlists.pkl')
tracks.to_pickle('../data/spotify_tracks.pkl')
albums.to_pickle('../data/spotify_albums.pkl')
artists.to_pickle('../data/spotify_artists.pkl')
library.to_pickle('../data/spotify_library.pkl')