In [1]:
import sys
sys.path.insert(0, '../')

import pdb

import numpy as np
import pandas as pd
import spotipy
import spotipy.util as util
from spotipy.oauth2 import SpotifyClientCredentials
import spotipy.oauth2 as oauth2


from keys.spotify_keys import * 

In [2]:
data = pd.read_csv("../data/raw/raw_dropped_duplicates.zip")

In [3]:
data.shape

(107973, 20)

In [4]:
i = 0
ids = list(data.ID.iloc[i:i + 50])

In [112]:

CLIENT_ID = spotify_keys["CLIENT_ID"]
CLIENT_SECRET = spotify_keys["CLIENT_SECRET"]

credentials = oauth2.SpotifyClientCredentials(
        client_id=CLIENT_ID,
        client_secret=CLIENT_SECRET)

token = credentials.get_access_token()
spotify = spotipy.Spotify(auth=token)

# res = spotify.tracks(ids)
# print(res)

In [128]:
class SpotifyAPIParser:
    
    def __init__(self, save_directory="../data/api_calls/", file_name="track_data.csv"):
        self.save_directory = save_directory
        self.file_name = file_name
        
        # give initial value for save location
        self.save_loc = f"{save_directory}0-{file_name}"
        
        self.current_df = None
    
    def get_track_info(self, data, sp, index_range=[0]):
        # spotify api calls allow for up to 50 queries per call
        # querying tracks requires a list of track IDs
        for i in index_range:
            ids = list(data.ID.iloc[i: i + 50])
            res = sp.tracks(ids)

            parsed = self.parse_api_response(res)
            self.save_call(df=parsed, current_index=i)
        
    
    def save_call(self, df, current_index):
        if(current_index % 10_000 == 0 and current_index != 0):
            print("adjusting location")
            self.adjust_save_loc(int(current_index / 10_000))
            pd.concat([self.current_df, df]).to_csv(self.save_loc, index=False)
        else:
            
            self.current_df = pd.concat([self.current_df, df])
            
            
    def adjust_save_loc(self, i):
        loc = f"{self.save_directory}{i}-{self.file_name}"
        self.save_loc = loc
    
    
    
    
    
    
    
    
    def parse_api_response(self, res):
        track_data = []
        for track in res['tracks']:
            track_data.append(self.parse_track_data(track))
        track_df = pd.DataFrame(track_data, columns=(
            "ID", 
            "artist_names",
            "artist_ids",
            "release_date",
            "release_date_precision",
        ))
        return track_df
        
            
    def parse_track_data(self, track):
        ID = track['id']
        artist_names = self.get_track_artists(track)
        artist_ids = self.get_track_artist_ids(track)
        release_date = self.get_track_release_date(track)
        release_date_precision = self.get_track_release_date_precision(track)
        
        return (
            ID,
            artist_names,
            artist_ids,
            release_date,
            release_date_precision
        )
    
    
    def get_track_artists(self, track):
        try:
            artists = []
            for artist in track['artists']:
                artists.append(artist['name'])
            return ",".join(artists)
        except:
            return np.NaN

    
    def get_track_artist_ids(self, track):
        try:
            artist_ids = []
            for artist in track['artists']:
                artist_ids.append(artist['id'])
            return ",".join(artist_ids)
        except:
            return np.NaN
        
    
    def get_track_release_date(self, track):
        try:
            return track['album']['release_date']
        except:
            return np.NaN
        
    
    def get_track_release_date_precision(self, track):
        try:
            return track['album']['release_date_precision']
        except:
            return np.NaN

In [138]:
np.arange(0, 10_000, 50)

array([   0,   50,  100,  150,  200,  250,  300,  350,  400,  450,  500,
        550,  600,  650,  700,  750,  800,  850,  900,  950, 1000, 1050,
       1100, 1150, 1200, 1250, 1300, 1350, 1400, 1450, 1500, 1550, 1600,
       1650, 1700, 1750, 1800, 1850, 1900, 1950, 2000, 2050, 2100, 2150,
       2200, 2250, 2300, 2350, 2400, 2450, 2500, 2550, 2600, 2650, 2700,
       2750, 2800, 2850, 2900, 2950, 3000, 3050, 3100, 3150, 3200, 3250,
       3300, 3350, 3400, 3450, 3500, 3550, 3600, 3650, 3700, 3750, 3800,
       3850, 3900, 3950, 4000, 4050, 4100, 4150, 4200, 4250, 4300, 4350,
       4400, 4450, 4500, 4550, 4600, 4650, 4700, 4750, 4800, 4850, 4900,
       4950, 5000, 5050, 5100, 5150, 5200, 5250, 5300, 5350, 5400, 5450,
       5500, 5550, 5600, 5650, 5700, 5750, 5800, 5850, 5900, 5950, 6000,
       6050, 6100, 6150, 6200, 6250, 6300, 6350, 6400, 6450, 6500, 6550,
       6600, 6650, 6700, 6750, 6800, 6850, 6900, 6950, 7000, 7050, 7100,
       7150, 7200, 7250, 7300, 7350, 7400, 7450, 75

In [130]:
parser = SpotifyAPIParser("../data/api_calls/", "track_data.csv")
parser.get_track_info(data, spotify, index_range=np.arange(0, 20_000, 50))

adjusting location


In [141]:
data[data.ID == "5fKitjmfucWNCOZzyk8y6j"]

Unnamed: 0,Name,Danceability,Energy,Key,Loudness,Mode,Speechness,Acousticness,Instrumentalness,Liveness,Valence,Tempo,Type,ID,Uri,Ref_Track,URL_features,Duration_ms,time_signature,Genre
10049,VoodooRag,0.287,0.377,0.0,-12.573,0.0,0.0383,0.884,0.951,0.196,0.283,81.864,audio_features,5fKitjmfucWNCOZzyk8y6j,spotify:track:5fKitjmfucWNCOZzyk8y6j,https://api.spotify.com/v1/tracks/5fKitjmfucWN...,https://api.spotify.com/v1/audio-analysis/5fKi...,225293.0,4.0,pianoblues


In [143]:
pd.read_csv('../data/api_calls/0-track_data.csv')

FileNotFoundError: [Errno 2] File b'../data/api_calls/0-track_data.csv' does not exist: b'../data/api_calls/0-track_data.csv'

In [62]:
track = {'album': {'album_type': 'single',
  'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/2egqZI7Cz9JqioUoUBxM9H'},
    'href': 'https://api.spotify.com/v1/artists/2egqZI7Cz9JqioUoUBxM9H',
    'id': '2egqZI7Cz9JqioUoUBxM9H',
    'name': 'Drakum',
    'type': 'artist',
    'uri': 'spotify:artist:2egqZI7Cz9JqioUoUBxM9H'}],
  'available_markets': [],
  'external_urls': {'spotify': 'https://open.spotify.com/album/0OdDdqkPazkYO46ocxqCFx'},
  'href': 'https://api.spotify.com/v1/albums/0OdDdqkPazkYO46ocxqCFx',
  'id': '0OdDdqkPazkYO46ocxqCFx',
  'images': [{'height': 600,
    'url': 'https://i.scdn.co/image/708101f4170c92c558c7028917ab2c569a4c6f60',
    'width': 600},
   {'height': 300,
    'url': 'https://i.scdn.co/image/c8671483b0c0644a9cad1963455b903da12ec4fd',
    'width': 300},
   {'height': 64,
    'url': 'https://i.scdn.co/image/bd2cbd012ce86b983921010ccccb01826dfcf3f7',
    'width': 64}],
  'name': 'Around the Oak',
  'release_date': '2011-04-21',
  'release_date_precision': 'day',
  'total_tracks': 3,
  'type': 'album',
  'uri': 'spotify:album:0OdDdqkPazkYO46ocxqCFx'},
 'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/2egqZI7Cz9JqioUoUBxM9H'},
   'href': 'https://api.spotify.com/v1/artists/2egqZI7Cz9JqioUoUBxM9H',
   'id': '2egqZI7Cz9JqioUoUBxM9H',
   'name': 'Drakum',
   'type': 'artist',
   'uri': 'spotify:artist:2egqZI7Cz9JqioUoUBxM9H'}],
 'available_markets': [],
 'disc_number': 1,
 'duration_ms': 238889,
 'explicit': False,
 'external_ids': {'isrc': 'ESAAI0260838'},
 'external_urls': {'spotify': 'https://open.spotify.com/track/1xZAjz98q8zbtASUvkLCMY'},
 'href': 'https://api.spotify.com/v1/tracks/1xZAjz98q8zbtASUvkLCMY',
 'id': '1xZAjz98q8zbtASUvkLCMY',
 'is_local': False,
 'name': 'Around the Oak',
 'popularity': 0,
 'preview_url': None,
 'track_number': 1,
 'type': 'track',
 'uri': 'spotify:track:1xZAjz98q8zbtASUvkLCMY'}

# print(SpotifyAPIParser.parse_track_data(track))
SpotifyAPIParser.parse_api_response(res).head()


Unnamed: 0,ID,artist_names,artist_ids,release_date,release_date_precision
0,6J2VvzKwWc2f0JP5RQVZjq,The HU,0b2B3PwcYzQAhuJacmcYgc,2018-10-14,day
1,4HA34COgxgVJ6zK88UN4Ik,Gloryhammer,7dkEByOe0oHqc54qU4hwzV,2019-04-12,day
2,3W6Xik6Xxf06JuUoZSATlD,"Orion's Reign,Minniva","3hRL14TJtJqr7i1IH3D1pU,50KIyycosYOGrBHfLaI1Ju",2018-10-05,day
3,2gGveBaLJQMtJ43X4UL5kH,Brothers of Metal,0WPCPYm5IDNtQjuJOGNPcy,2018-11-09,day
4,1lRF81A1C9QoCgBcEop2zg,Powerwolf,5HFkc3t0HYETL4JeEbDB1v,2018-07-13,day


In [63]:
data.merge(SpotifyAPIParser.parse_api_response(res), on="ID", how="inner")

Unnamed: 0,Name,Danceability,Energy,Key,Loudness,Mode,Speechness,Acousticness,Instrumentalness,Liveness,...,Uri,Ref_Track,URL_features,Duration_ms,time_signature,Genre,artist_names,artist_ids,release_date,release_date_precision
0,YuveYuveYu,0.624,0.857,10.0,-6.25,0.0,0.0542,0.0208,0.206,0.11,...,spotify:track:6J2VvzKwWc2f0JP5RQVZjq,https://api.spotify.com/v1/tracks/6J2VvzKwWc2f...,https://api.spotify.com/v1/audio-analysis/6J2V...,282920.0,4.0,celticmetal,The HU,0b2B3PwcYzQAhuJacmcYgc,2018-10-14,day
1,Gloryhammer,0.517,0.916,0.0,-4.933,1.0,0.0559,0.000182,0.00191,0.306,...,spotify:track:4HA34COgxgVJ6zK88UN4Ik,https://api.spotify.com/v1/tracks/4HA34COgxgVJ...,https://api.spotify.com/v1/audio-analysis/4HA3...,300320.0,4.0,celticmetal,Gloryhammer,7dkEByOe0oHqc54qU4hwzV,2019-04-12,day
2,Nostos,0.251,0.894,8.0,-4.103,0.0,0.057,0.0144,0.0,0.123,...,spotify:track:3W6Xik6Xxf06JuUoZSATlD,https://api.spotify.com/v1/tracks/3W6Xik6Xxf06...,https://api.spotify.com/v1/audio-analysis/3W6X...,175353.0,4.0,celticmetal,"Orion's Reign,Minniva","3hRL14TJtJqr7i1IH3D1pU,50KIyycosYOGrBHfLaI1Ju",2018-10-05,day
3,Yggdrasil,0.469,0.743,1.0,-5.57,0.0,0.0272,0.00222,0.000111,0.276,...,spotify:track:2gGveBaLJQMtJ43X4UL5kH,https://api.spotify.com/v1/tracks/2gGveBaLJQMt...,https://api.spotify.com/v1/audio-analysis/2gGv...,272292.0,4.0,celticmetal,Brothers of Metal,0WPCPYm5IDNtQjuJOGNPcy,2018-11-09,day
4,Incense&Iron,0.487,0.952,1.0,-4.429,0.0,0.0613,0.000228,0.0,0.161,...,spotify:track:1lRF81A1C9QoCgBcEop2zg,https://api.spotify.com/v1/tracks/1lRF81A1C9Qo...,https://api.spotify.com/v1/audio-analysis/1lRF...,237933.0,4.0,celticmetal,Powerwolf,5HFkc3t0HYETL4JeEbDB1v,2018-07-13,day
5,WelcometoHell,0.43,0.797,2.0,-5.907,0.0,0.0303,0.000308,0.0566,0.323,...,spotify:track:3lUQ6y8XeeaoK2hPydcX9c,https://api.spotify.com/v1/tracks/3lUQ6y8Xeeao...,https://api.spotify.com/v1/audio-analysis/3lUQ...,262719.0,4.0,celticmetal,Mono Inc.,1189BemPY7h2lgaH1pkzGX,2018-07-27,day
6,Ambiramus,0.434,0.908,6.0,-4.715,1.0,0.0936,0.00791,0.0,0.662,...,spotify:track:6sXsi4DsxtpI4BnfhaG77v,https://api.spotify.com/v1/tracks/6sXsi4DsxtpI...,https://api.spotify.com/v1/audio-analysis/6sXs...,173213.0,4.0,celticmetal,Eluveitie,5X0N2k3qMnI8kSrGJT3kfT,2019-03-15,day
7,Gunman,0.308,0.965,8.0,-3.172,1.0,0.0591,2.3e-05,0.0648,0.117,...,spotify:track:3Mz5lXbMBK2tUmKAwAJwi1,https://api.spotify.com/v1/tracks/3Mz5lXbMBK2t...,https://api.spotify.com/v1/audio-analysis/3Mz5...,317733.0,4.0,celticmetal,Orden Ogan,3t5X2CVDf5mrlIx1SdvWYM,2017-07-07,day
8,NoMoreHollywoodEndings,0.5,0.925,4.0,-3.467,0.0,0.0378,0.00094,0.0,0.294,...,spotify:track:7AstJIsEomp0hJyMasOqLN,https://api.spotify.com/v1/tracks/7AstJIsEomp0...,https://api.spotify.com/v1/audio-analysis/7Ast...,234787.0,4.0,celticmetal,Battle Beast,7k5jeohQCF20a8foBD9ize,2019-01-18,day
9,Promises,0.479,0.977,2.0,-4.515,1.0,0.086,1.7e-05,5.3e-05,0.298,...,spotify:track:1L4UqbKtfM4bk3UdOzrXny,https://api.spotify.com/v1/tracks/1L4UqbKtfM4b...,https://api.spotify.com/v1/audio-analysis/1L4U...,285925.0,4.0,celticmetal,"Timo Tolkki’s Avalon,Todd Michael Hall","6UkYfNV9Y4kp00UT4D2djV,5ZLa4aUwDkoflqCXq81x4A",2019-04-23,day


In [36]:
res['tracks'][0]

{'album': {'album_type': 'single',
  'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/2egqZI7Cz9JqioUoUBxM9H'},
    'href': 'https://api.spotify.com/v1/artists/2egqZI7Cz9JqioUoUBxM9H',
    'id': '2egqZI7Cz9JqioUoUBxM9H',
    'name': 'Drakum',
    'type': 'artist',
    'uri': 'spotify:artist:2egqZI7Cz9JqioUoUBxM9H'}],
  'available_markets': [],
  'external_urls': {'spotify': 'https://open.spotify.com/album/0OdDdqkPazkYO46ocxqCFx'},
  'href': 'https://api.spotify.com/v1/albums/0OdDdqkPazkYO46ocxqCFx',
  'id': '0OdDdqkPazkYO46ocxqCFx',
  'images': [{'height': 600,
    'url': 'https://i.scdn.co/image/708101f4170c92c558c7028917ab2c569a4c6f60',
    'width': 600},
   {'height': 300,
    'url': 'https://i.scdn.co/image/c8671483b0c0644a9cad1963455b903da12ec4fd',
    'width': 300},
   {'height': 64,
    'url': 'https://i.scdn.co/image/bd2cbd012ce86b983921010ccccb01826dfcf3f7',
    'width': 64}],
  'name': 'Around the Oak',
  'release_date': '2011-04-21',
  'release_date

In [22]:
for track in res['tracks']:
    print(track["album"]['artists'][0]["name"])
    

Drakum
Northland
Korpiklaani
Eluveitie
In Extremo
Eluveitie
Eluveitie
Eluveitie
Eluveitie
Eluveitie
Týr
Týr
Týr
Týr
Amon Amarth
Amon Amarth
Amon Amarth
Amon Amarth
Amon Amarth
Amon Amarth
Amon Amarth
Northland
Northland
Crimfall
Skyclad
Ensiferum
Ensiferum
Ensiferum
Korpiklaani
Sabaton
Trewa
Skiltron
Týr
Thyrfing
Pertness
Wolfheart
Equilibrium
Tvivel
Trewa
Brymir
Thyrfing
Firkin
Eastern High
Brave the Sea
Wolfheart
Amon Amarth
Heidevolk
Firkin
Aexylium
Aexylium


In [77]:
spotify.categories(country=["US"], limit=50)

{'categories': {'href': 'https://api.spotify.com/v1/browse/categories?country=US&offset=0&limit=50',
  'items': [{'href': 'https://api.spotify.com/v1/browse/categories/toplists',
    'icons': [{'height': 275,
      'url': 'https://t.scdn.co/media/derived/toplists_11160599e6a04ac5d6f2757f5511778f_0_0_275_275.jpg',
      'width': 275}],
    'id': 'toplists',
    'name': 'Top Lists'},
   {'href': 'https://api.spotify.com/v1/browse/categories/summer',
    'icons': [{'height': None,
      'url': 'https://t.scdn.co/images/8e508d7eb5b843a89c368c9507ecc429.jpeg',
      'width': None}],
    'id': 'summer',
    'name': 'Summer'},
   {'href': 'https://api.spotify.com/v1/browse/categories/pop',
    'icons': [{'height': 274,
      'url': 'https://t.scdn.co/media/derived/pop-274x274_447148649685019f5e2a03a39e78ba52_0_0_274_274.jpg',
      'width': 274}],
    'id': 'pop',
    'name': 'Pop'},
   {'href': 'https://api.spotify.com/v1/browse/categories/hiphop',
    'icons': [{'height': 274,
      'url': 