In [21]:
import json
import numpy as np
from pprint import pprint
import PyLyrics as lyrics
import requests
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import spotipy.util as util
from time import time, sleep
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore")
# import sys
# sys.version

In [22]:
with open('../data/creds.json', 'r') as f:
    creds = json.load(f)
    client_id = creds["id"]
    client_secret = creds["secret"]

In [23]:
class Dataset:
    def __init__(self, client_id, client_secret):
        client_cred = SpotifyClientCredentials(client_id=client_id,
                                                      client_secret=client_secret)
        self.sp = spotipy.Spotify(client_credentials_manager=client_cred)
        self.data_dict = {'__disclaimer': "I do not own any of the data included here, and intend to use this for academic purposes only.", 'tracks': {}, 'users':{}}

    def add_data(self, user_id, track_obj):
        track_name = track_obj['name']
        track_id = track_obj['id']
        if user_id in self.data_dict['users']:
            self.data_dict['users'][user_id].append(track_id)
        else:
            self.data_dict['users'][user_id] = [track_id]

        if track_id in self.data_dict:
            return
        else:
            artist_id = track_obj['artists'][0]['id']
            artist = track_obj['artists'][0]['name']
            if len(artist)==0: 
                return
            # pprint(track_audio_feats)
            # track_audio_analysis = self.sp.audio_analysis(track_id) # too slow
            # pprint(track_audio_analysis)
            if len(track_name) == 0: 
                return
            track_data = get_feats(artist, track_obj)
            track_data['artist_id'] = artist_id
            self.data_dict['tracks'][track_id] = track_data
    
    def size(self):
        return len(self.data_dict['tracks'])
    
    def num_users(self):
        return len(self.data_dict['users'])
    
    def fetch_user_data(self, user_id):
        user_playlists = self.sp.user_playlists(user_id)

        for playlist in user_playlists['items']:
            if playlist['owner']['id'] == user_id:
                print(playlist['name'], end='')
                print(' -- ', playlist['tracks']['total'])
                results = self.sp.user_playlist(user_id, playlist['id'], fields='tracks,next')
                tracks = results['tracks']
                for i, item in enumerate(tracks['items']):
                    track = item['track']
                    # pprint(track)
                    self.add_data(user_id, track)
                while tracks['next']:
                    tracks = self.sp.next(tracks)
                    for i, item in enumerate(tracks['items']):
                        track = item['track']
                        # pprint(track)
                        self.add_data(user_id, track)
                        
    def load_data(self, data_filepath):
        with open(data_filepath, 'r') as f:
            self.data_dict = json.load(f)
    
    def get_feats(self, artist, track):
        track_id = track['id']
        track_name = track['name'].strip()
        album_name = track['album']['name'].strip()
        track_duration = track['duration_ms']/1000
        track_album_art_url = [i['url'] for i in track['album']['images']][0]
        track_audio_feats = self.sp.audio_features(track_id)[0]
        track_popularity = track['popularity']
        album_id = track['album']['id']
        album_info = sp.album(album_id)
        release_year = album_info['release_date'][:4]
        genres = album_info['genres']
        album_popularity = album_info['popularity']
        
        for i in ['uri', 'id', 'analysis_url', 'track_href', 'type', 'duration_ms']:
            del track_audio_feats[i]
        track_lyrics = 'N/A'
        try:
            track_lyrics = lyrics.PyLyrics.getLyrics(artist, track_name)
        except ValueError as e:
            track_name = track_name.split('-')[0].strip()
            try:
                track_lyrics = lyrics.PyLyrics.getLyrics(artist, track_name)
            except ValueError as e:
                print('Lyrics not found for %s - %s'%(artist, track_name))
        track_data = {'name': track_name, 
                      'artist': artist,
                      'album': album_name,
                      'album_id': album_id,
                      'album_popularity': album_popularity,
                      'images': track_album_art_url,
                      'popularity': track_popularity,
                      'release_year': release_year,
                      'genres': genres,
                      'duration': track_duration, 
                      'lyrics': track_lyrics, 
                      'audio_feats': track_audio_feats}
        return track_data    

    def save_data(self, data_filepath):
        with open(data_filepath, 'w') as f:
            json.dump(self.data_dict, f)

In [27]:
dataset = Dataset(client_id, client_secret)
# dataset.load_data('../data/spotify_data.json')
tic = time()
for user in ['rfsdr1ffkv97dj1qz0esavzzu', 'ajinkyaz']:
    dataset.fetch_user_data(user)
print('Took ', time()-tic, 'seconds')
print('Total tracks --', dataset.size())
print('Total users --', dataset.num_users())

My beats --  12
My Playlist --  81
Lyrics not found for Zedd - Stay (with Alessia Cara)
Lyrics not found for Machine Gun Kelly - Home (with Machine Gun Kelly, X Ambassadors & Bebe Rexha)
Lyrics not found for Kygo - It Ain’t Me (with Selena Gomez)
Lyrics not found for Hailee Steinfeld - Let Me Go (with Alesso, Florida Georgia Line & watt)
Lyrics not found for Camila Cabello - Crown (with Camila Cabello & Grey)
Lyrics not found for Paul Simpson - When I Met You in the Summer
Lyrics not found for Kendrick Lamar - LOVE. FEAT. ZACARI.
Lyrics not found for Marshmello - Spotlight
Lyrics not found for G-Eazy - Him & I (with Halsey)
Lyrics not found for Tee Grizzley - From The D To The A (feat. Lil Yachty)
Random Shite --  146
Lyrics not found for CAZZETTE - Blind Heart
Lyrics not found for SNBRN - Raindrops
Lyrics not found for David Guetta - Bad (feat. Vassy)
Lyrics not found for Major Lazer - Lean On (feat. MØ & DJ Snake)
Lyrics not found for Ash Grunwald - Walking
Lyrics not found for Moxie

In [None]:
# dataset.save_data('../data/spotify_data.json')