In [98]:
# import modules
import pandas as pd
import numpy as np
import spotipy

# jupyter notebook full-width display
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

# pandas formatting
pd.set_option('display.float_format', '{:_.0f}'.format)
# NOTE: underscore separaters ('_') are better than commas (',') because 
# numbers with underscores work in Python without any extra effort.
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 200)

### The Billboard 100
https://www.kaggle.com/datasets/dhruvildave/billboard-the-hot-100-songs

In [2]:
# Billboard Top 100 Historical Data
# via:  https://toolbox.google.com/datasetsearch
url_billboard = r'D:\RYERSON\820\Datasets\Billboard The Hot 100 Songs\charts.csv'

df_billboard = pd.read_csv(url_billboard)
df_billboard['date'] = pd.to_datetime(df_billboard['date'])

# Unique Songs from The Billboard 100 Dataset

# just the songs on the billboard 100, once per song
df_billboard_songs = df_billboard[['song', 'artist']].drop_duplicates().sort_values(['artist', 'song']).reset_index(drop=True)

# add a blank id column and a blank MISSING column 
df_billboard_songs['id'] = ''
df_billboard_songs['MISSING'] = ''

df_billboard.shape, df_billboard_songs.shape

((330087, 7), (29681, 4))

### 1.2M Songs with Metadata (csv)
https://www.kaggle.com/datasets/rodolfofigueroa/spotify-12m-songs

In [3]:
# Spotify 1.2M+ Songs
# via:  https://www.kaggle.com/datasets/
url_1M_songs = r'D:\RYERSON\820\Datasets\Spotify 1.2M+ Songs\tracks_features.csv'

# create the dataframe with the large number of songs metadata
df_1M_songs = pd.read_csv(url_1M_songs)

# make a list of song ids from the 1M dataset
metadata_ids_csv = df_1M_songs.id.to_list()

### 8.7M Songs with Metadata (SQL)

In [4]:
# all ids from the SQLite database
metadata_ids_SQL = pd.read_csv('all_ids_sql.csv', header=None, names=['id'])
metadata_ids_SQL = metadata_ids_SQL.id.to_list()

### Combine Ids For Datasets with Metadata

In [5]:
all_metadata_ids = set(metadata_ids_csv + metadata_ids_SQL) # set() faster to search, and no duplicates
len(all_metadata_ids)

9592981

### Get Track IDs using API

### TEMPORARY TOKEN WORKFLOW

##### get a temporary authorization token from: https://developer.spotify.com/console/get-search-item

In [6]:
# input the temporary token
TEMP_TOKEN = input('Enter token: ')

# create a spotify object
spotify = spotipy.Spotify(auth=TEMP_TOKEN)

Enter token: BQDmmPusHl5G46Nv_X_jhw6bspvKSoFDn1UySHQxIeRDx37yp6PwDhgZ3GCz37wMdyfe4BFvYmFynTGjdgDzvB8EbP5M99tm_ZOBdfFgn4UIAyGRQYwTYhKrpi9Db0aX-keyb3XCZI53l3b12DgnN2iwlEUG_VYD_cMNPTGr2Scw


In [62]:
## TODO, add a 'MISSING' tag somewhere

# helper function
def find_id(track_title, artist_name, metadata_ids):
    """ 
    for searches with multiple results, all id were identical for the test cases I ran 
    some searches return no results, in this case the song is not on spotify
        confirmed by spot checks in the spotify music player
    some tracks give a 404 error, 
        these seem to exist in Spotify but 404 anyway
        not sure why the API does this but 
    """
    track_info = spotify.search(q='artist:' + artist_name + ' track:' + track_title, type='track')
    
    if track_info['tracks']['items'] == []:  # if track doesn't exist on Spotify
        return '', 'MISSING'
    else:
        # default to 0th id
        track_id = track_info['tracks']['items'][0]['id']
        
        number_of_results = len(track_info['tracks']['items'])
        
        # check if there is a better match
        for i in range(number_of_results):
            current_id = track_info['tracks']['items'][i]['id']
            if current_id in metadata_ids:
                return current_id, 'matched'  # immediately return it if it's found
        
        # if we made it through the loop without returning, note 'MISSING' and return the 0th id
        return track_id, 'MISSING'

In [63]:
%%time
# TEST
find_id("You Can't Turn Me Off (In The Middle Of Turning Me On)", 'High Inergy', all_metadata_ids)

Wall time: 304 ms


('7CfCUjAZFndcpAWLogl9J3', 'matched')

In [65]:
# TEST
find_id("You Can't Find this (SONG)", 'Low Unergy', all_metadata_ids)

('', 'MISSING')

### Add Spotify IDs to billboard songs matched in the datasets

In [9]:
# load saved csv if required
df_billboard_songs = pd.read_csv('df_billboard_songs.csv', keep_default_na=False)
df_billboard_songs.id.nunique()

14291

In [10]:
# start over at
start_over_at = 30000

# populate df_billboard_songs with ids, where available
for i, row in df_billboard_songs.iterrows():
    
    # start over at
    if i < start_over_at-1:
        continue
    
    # show status update
    if i%10 == 0:
        print(i, end='  ')
    if i%100 == 0:
        print()
        
    # start over where we finished (don't overwrite known ids)
    if df_billboard_songs['id'].iloc[i] != '':  
        continue
    # append id, NONE, or 'ERROR'
    else:
        artist = row[1]
        song = row[0]
        try:
            # unless there is an error, will append the id or None 
            df_billboard_songs['id'].iloc[i], df_billboard_songs['MISSING'].iloc[i] = find_id(song, artist, all_metadata_ids)
        except:  # all errors treated the same
            # if there is an error, change id to 'ERROR'
            print('ERROR:  ', artist, song)
            df_billboard_songs['MISSING'].iloc[i] = 'ERROR'  # leave id blank
        
        # save every 1000 rows, if new
        if i%1000 == 0:
            df_billboard_songs.to_csv('df_billboard_songs_TEMP.csv', index=False)
        
# save final dataframe
df_billboard_songs.to_csv('df_billboard_songs.csv', index=False)

19060  19070  19080  19090  19100  


HTTP Error for GET to https://api.spotify.com/v1/search with Params: {'q': 'artist:P. Diddy & Ginuwine Featuring Loon, Mario Winans & Tammy Ruggeri track:I Need A Girl (Part Two)', 'limit': 10, 'offset': 0, 'type': 'track', 'market': None} returned 404 due to Not found.


ERROR:   P. Diddy & Ginuwine Featuring Loon, Mario Winans & Tammy Ruggeri I Need A Girl (Part Two)
19110  19120  19130  19140  19150  19160  19170  19180  19190  19200  
19210  19220  19230  19240  19250  19260  19270  19280  19290  19300  
19310  19320  19330  19340  19350  19360  19370  19380  19390  19400  
19410  19420  19430  19440  19450  19460  19470  19480  19490  19500  
19510  19520  19530  19540  19550  19560  19570  19580  19590  19600  
19610  19620  19630  19640  19650  19660  19670  

HTTP Error for GET to https://api.spotify.com/v1/search with Params: {'q': "artist:Perry Como And The Fontane Sisters With Mitchell Ayres And His Orchestra track:It's Beginning To Look A Lot Like Christmas", 'limit': 10, 'offset': 0, 'type': 'track', 'market': None} returned 404 due to Not found.


ERROR:   Perry Como And The Fontane Sisters With Mitchell Ayres And His Orchestra It's Beginning To Look A Lot Like Christmas
19680  19690  19700  
19710  19720  19730  19740  19750  19760  19770  19780  19790  19800  
19810  19820  19830  19840  19850  19860  19870  19880  19890  19900  
19910  

HTTP Error for GET to https://api.spotify.com/v1/search with Params: {'q': 'artist:Pitbull Featuring Jennifer Lopez & Claudia Leitte track:We Are One (Ole Ola) [The 2014 FIFA World Cup Official Song]', 'limit': 10, 'offset': 0, 'type': 'track', 'market': None} returned 404 due to Not found.


ERROR:   Pitbull Featuring Jennifer Lopez & Claudia Leitte We Are One (Ole Ola) [The 2014 FIFA World Cup Official Song]
19920  19930  19940  19950  19960  19970  19980  19990  20000  
20010  20020  20030  20040  20050  20060  20070  20080  20090  20100  
20110  20120  

HTTP Error for GET to https://api.spotify.com/v1/search with Params: {'q': "artist:Pras Michel Feat. Ol' Dirty Bastard & Introducing Mya track:Ghetto Supastar (That Is What You Are)", 'limit': 10, 'offset': 0, 'type': 'track', 'market': None} returned 404 due to Not found.


ERROR:   Pras Michel Feat. Ol' Dirty Bastard & Introducing Mya Ghetto Supastar (That Is What You Are)
20130  20140  20150  20160  20170  20180  20190  20200  
20210  20220  20230  20240  20250  20260  20270  20280  20290  20300  
20310  20320  20330  20340  20350  20360  20370  20380  20390  20400  
20410  20420  20430  20440  

HTTP Error for GET to https://api.spotify.com/v1/search with Params: {'q': 'artist:RZA Feat. Method Man & Cappadonna track:Wu-Wear: The Garment Renaissance (From "High School High")', 'limit': 10, 'offset': 0, 'type': 'track', 'market': None} returned 404 due to Not found.


ERROR:   RZA Feat. Method Man & Cappadonna Wu-Wear: The Garment Renaissance (From "High School High")
20450  20460  20470  20480  20490  20500  
20510  20520  20530  20540  20550  20560  20570  20580  20590  20600  
20610  20620  20630  20640  20650  20660  20670  20680  20690  20700  
20710  20720  20730  20740  20750  20760  20770  20780  20790  20800  
20810  20820  20830  20840  20850  20860  20870  20880  20890  20900  
20910  20920  20930  20940  20950  20960  20970  20980  20990  21000  
21010  21020  21030  21040  21050  21060  21070  21080  21090  21100  
21110  21120  21130  21140  21150  21160  21170  21180  21190  21200  
21210  21220  21230  21240  21250  21260  21270  21280  21290  21300  
21310  21320  21330  21340  21350  21360  21370  21380  21390  21400  
21410  21420  21430  21440  21450  21460  21470  21480  21490  21500  
21510  21520  21530  21540  21550  21560  21570  21580  21590  21600  
21610  21620  21630  21640  21650  21660  21670  21680  21690  21700  
217

HTTP Error for GET to https://api.spotify.com/v1/search with Params: {'q': 'artist:Shabba Ranks Featuring Patra And Terri & Monica track:Family Affair (From "Addams Family Values")', 'limit': 10, 'offset': 0, 'type': 'track', 'market': None} returned 404 due to Not found.


ERROR:   Shabba Ranks Featuring Patra And Terri & Monica Family Affair (From "Addams Family Values")
22240  22250  22260  22270  22280  22290  22300  
22310  22320  22330  22340  22350  22360  22370  22380  22390  22400  
22410  22420  22430  22440  22450  22460  22470  22480  

HTTP Error for GET to https://api.spotify.com/v1/search with Params: {'q': "artist:Silkk The Shocker Featuring Master P, Destiny's Child, O'Dell, Mo track:Just Be Straight With Me", 'limit': 10, 'offset': 0, 'type': 'track', 'market': None} returned 404 due to Not found.


ERROR:   Silkk The Shocker Featuring Master P, Destiny's Child, O'Dell, Mo Just Be Straight With Me
22490  22500  
22510  22520  22530  22540  22550  22560  22570  22580  22590  22600  
22610  22620  22630  22640  22650  22660  22670  22680  22690  22700  
22710  22720  22730  22740  22750  22760  22770  22780  22790  22800  
22810  22820  22830  22840  22850  22860  22870  22880  22890  22900  
22910  22920  22930  22940  22950  22960  22970  22980  22990  23000  
23010  23020  23030  23040  23050  23060  23070  23080  23090  

HTTP Error for GET to https://api.spotify.com/v1/search with Params: {'q': 'artist:Steve Allen and His Orchestra with The Copacabana Trio track:Cuando Calienta El Sol (When The Sun Is Hot)', 'limit': 10, 'offset': 0, 'type': 'track', 'market': None} returned 404 due to Not found.


23100  
ERROR:   Steve Allen and His Orchestra with The Copacabana Trio Cuando Calienta El Sol (When The Sun Is Hot)
23110  23120  23130  23140  23150  23160  23170  23180  23190  23200  
23210  23220  23230  23240  23250  23260  23270  23280  23290  23300  
23310  23320  23330  23340  23350  23360  23370  23380  23390  23400  
23410  23420  23430  23440  23450  23460  23470  23480  23490  23500  
23510  23520  23530  23540  23550  23560  23570  23580  23590  23600  
23610  23620  23630  

HTTP Error for GET to https://api.spotify.com/v1/search with Params: {'q': "artist:Talking Heads track:Life During Wartime (This Ain't No Party...This Ain't No Disco...This Ain't", 'limit': 10, 'offset': 0, 'type': 'track', 'market': None} returned 404 due to Not found.


ERROR:   Talking Heads Life During Wartime (This Ain't No Party...This Ain't No Disco...This Ain't
23640  23650  23660  23670  23680  23690  23700  
23710  23720  23730  23740  23750  23760  23770  23780  23790  23800  
23810  23820  23830  23840  23850  23860  23870  23880  23890  23900  
23910  23920  23930  23940  23950  23960  23970  23980  23990  24000  
24010  24020  24030  24040  24050  24060  24070  24080  24090  24100  
24110  24120  24130  24140  24150  24160  24170  24180  24190  24200  
24210  24220  24230  24240  24250  

HTTP Error for GET to https://api.spotify.com/v1/search with Params: {'q': 'artist:The Barden Bellas, The Treblemakers & The BU Harmonics track:Riff Off: Mickey / Like A Virgin / Hit Me With Your Best Shot...', 'limit': 10, 'offset': 0, 'type': 'track', 'market': None} returned 404 due to Not found.


24260  ERROR:   The Barden Bellas, The Treblemakers & The BU Harmonics Riff Off: Mickey / Like A Virgin / Hit Me With Your Best Shot...
24270  24280  24290  24300  
24310  24320  24330  24340  24350  24360  24370  24380  24390  24400  
24410  24420  24430  24440  24450  24460  24470  24480  24490  24500  
24510  24520  24530  24540  24550  24560  24570  24580  24590  24600  
24610  24620  24630  24640  24650  24660  24670  

HTTP Error for GET to https://api.spotify.com/v1/search with Params: {'q': 'artist:The Chi-lites track:There Will Never Be Any Peace (Until God Is Seated At The Conference Table)', 'limit': 10, 'offset': 0, 'type': 'track', 'market': None} returned 404 due to Not found.


ERROR:   The Chi-lites There Will Never Be Any Peace (Until God Is Seated At The Conference Table)
24680  24690  24700  
24710  24720  24730  24740  24750  24760  24770  24780  24790  24800  
24810  24820  24830  24840  24850  24860  24870  24880  24890  24900  
24910  24920  24930  24940  24950  24960  24970  24980  24990  25000  
25010  25020  25030  25040  25050  25060  25070  25080  25090  25100  
25110  25120  25130  25140  25150  25160  25170  25180  25190  25200  
25210  25220  25230  25240  25250  25260  25270  25280  25290  25300  
25310  25320  25330  25340  25350  25360  25370  25380  25390  25400  
25410  25420  25430  25440  25450  25460  25470  25480  25490  25500  
25510  25520  25530  25540  25550  25560  25570  25580  25590  25600  
25610  25620  25630  25640  25650  25660  25670  25680  25690  25700  
25710  25720  25730  25740  25750  25760  25770  25780  25790  25800  
25810  25820  25830  25840  25850  25860  25870  25880  25890  25900  
25910  25920  25930  25940 

HTTP Error for GET to https://api.spotify.com/v1/search with Params: {'q': 'artist:The Partridge Family Starring Shirley Jones Featuring David Cassidy track:Breaking Up Is Hard To Do', 'limit': 10, 'offset': 0, 'type': 'track', 'market': None} returned 404 due to Not found.
HTTP Error for GET to https://api.spotify.com/v1/search with Params: {'q': "artist:The Partridge Family Starring Shirley Jones Featuring David Cassidy track:Doesn't Somebody Want To Be Wanted", 'limit': 10, 'offset': 0, 'type': 'track', 'market': None} returned 404 due to Not found.


ERROR:   The Partridge Family Starring Shirley Jones Featuring David Cassidy Breaking Up Is Hard To Do
ERROR:   The Partridge Family Starring Shirley Jones Featuring David Cassidy Doesn't Somebody Want To Be Wanted


HTTP Error for GET to https://api.spotify.com/v1/search with Params: {'q': 'artist:The Partridge Family Starring Shirley Jones Featuring David Cassidy track:I Woke Up In Love This Morning', 'limit': 10, 'offset': 0, 'type': 'track', 'market': None} returned 404 due to Not found.
HTTP Error for GET to https://api.spotify.com/v1/search with Params: {'q': "artist:The Partridge Family Starring Shirley Jones Featuring David Cassidy track:I'll Meet You Halfway", 'limit': 10, 'offset': 0, 'type': 'track', 'market': None} returned 404 due to Not found.
HTTP Error for GET to https://api.spotify.com/v1/search with Params: {'q': "artist:The Partridge Family Starring Shirley Jones Featuring David Cassidy track:It's One Of Those Nights (Yes Love)", 'limit': 10, 'offset': 0, 'type': 'track', 'market': None} returned 404 due to Not found.


ERROR:   The Partridge Family Starring Shirley Jones Featuring David Cassidy I Woke Up In Love This Morning
26370  ERROR:   The Partridge Family Starring Shirley Jones Featuring David Cassidy I'll Meet You Halfway
ERROR:   The Partridge Family Starring Shirley Jones Featuring David Cassidy It's One Of Those Nights (Yes Love)


HTTP Error for GET to https://api.spotify.com/v1/search with Params: {'q': 'artist:The Partridge Family Starring Shirley Jones Featuring David Cassidy track:Looking Through The Eyes Of Love', 'limit': 10, 'offset': 0, 'type': 'track', 'market': None} returned 404 due to Not found.


ERROR:   The Partridge Family Starring Shirley Jones Featuring David Cassidy Looking Through The Eyes Of Love
26380  26390  

HTTP Error for GET to https://api.spotify.com/v1/search with Params: {'q': 'artist:The Pipes And Drums And The Military Band Of The Royal Scots Dragoon Guards track:Amazing Grace', 'limit': 10, 'offset': 0, 'type': 'track', 'market': None} returned 404 due to Not found.


26400  
ERROR:   The Pipes And Drums And The Military Band Of The Royal Scots Dragoon Guards Amazing Grace
26410  26420  26430  26440  26450  26460  26470  26480  26490  26500  
26510  26520  26530  26540  26550  26560  26570  26580  26590  26600  
26610  26620  26630  26640  26650  26660  26670  26680  26690  26700  
26710  26720  26730  26740  26750  26760  26770  26780  26790  26800  
26810  26820  26830  26840  26850  26860  26870  26880  26890  26900  
26910  26920  26930  26940  26950  

HTTP Error for GET to https://api.spotify.com/v1/search with Params: {'q': 'artist:The Star Wars Intergalactic Droid Choir & Chorale track:What Can You Get A Wookiee For Christmas (When He Already Owns A Comb?)', 'limit': 10, 'offset': 0, 'type': 'track', 'market': None} returned 404 due to Not found.


ERROR:   The Star Wars Intergalactic Droid Choir & Chorale What Can You Get A Wookiee For Christmas (When He Already Owns A Comb?)
26960  26970  26980  26990  27000  
27010  27020  27030  27040  27050  27060  27070  27080  27090  27100  
27110  27120  27130  27140  27150  27160  27170  27180  27190  27200  
27210  27220  27230  27240  27250  27260  27270  27280  27290  27300  
27310  27320  27330  27340  27350  27360  27370  27380  27390  27400  
27410  27420  27430  27440  27450  27460  27470  27480  27490  27500  
27510  27520  27530  27540  27550  27560  27570  

HTTP Error for GET to https://api.spotify.com/v1/search with Params: {'q': 'artist:Thomas Rhett Featuring Reba McEntire, Hillary Scott, Chris Tomlin & Keith Urban track:Be A Light', 'limit': 10, 'offset': 0, 'type': 'track', 'market': None} returned 404 due to Not found.


ERROR:   Thomas Rhett Featuring Reba McEntire, Hillary Scott, Chris Tomlin & Keith Urban Be A Light
27580  27590  27600  
27610  27620  27630  27640  27650  27660  27670  27680  27690  27700  
27710  27720  27730  27740  27750  27760  27770  27780  27790  27800  
27810  27820  27830  27840  27850  27860  27870  27880  27890  27900  
27910  27920  27930  27940  27950  27960  27970  27980  27990  28000  
28010  28020  28030  28040  28050  28060  28070  28080  28090  28100  
28110  28120  28130  28140  28150  28160  28170  28180  28190  28200  
28210  28220  28230  28240  28250  28260  28270  28280  28290  28300  
28310  28320  28330  28340  28350  28360  28370  28380  28390  28400  
28410  28420  28430  28440  28450  28460  28470  28480  

HTTP Error for GET to https://api.spotify.com/v1/search with Params: {'q': 'artist:Tyler, The Creator Featuring Brent Faiyaz & Fana Hues track:Sweet / I Thought You Wanted To Dance', 'limit': 10, 'offset': 0, 'type': 'track', 'market': None} returned 404 due to Not found.


ERROR:   Tyler, The Creator Featuring Brent Faiyaz & Fana Hues Sweet / I Thought You Wanted To Dance
28490  28500  
28510  28520  28530  28540  28550  28560  28570  28580  28590  28600  
28610  28620  28630  28640  28650  28660  28670  28680  28690  28700  
28710  28720  28730  28740  28750  28760  28770  28780  28790  28800  
28810  28820  28830  28840  28850  28860  28870  28880  28890  28900  
28910  28920  28930  28940  28950  28960  28970  28980  28990  29000  
29010  29020  29030  29040  29050  29060  29070  29080  29090  29100  
29110  29120  29130  29140  29150  29160  29170  29180  29190  29200  
29210  29220  29230  29240  29250  29260  29270  29280  29290  29300  
29310  29320  29330  29340  29350  29360  29370  29380  29390  29400  
29410  29420  29430  29440  29450  29460  29470  29480  29490  29500  
29510  29520  29530  29540  29550  29560  29570  29580  29590  29600  
29610  29620  29630  29640  29650  29660  29670  

HTTP Error for GET to https://api.spotify.com/v1/search with Params: {'q': "artist:will.i.am Featuring Miley Cyrus, French Montana, Wiz Khalifa & DJ Mustard track:Feelin' Myself", 'limit': 10, 'offset': 0, 'type': 'track', 'market': None} returned 404 due to Not found.


29680  ERROR:   will.i.am Featuring Miley Cyrus, French Montana, Wiz Khalifa & DJ Mustard Feelin' Myself


### After Gathering all IDs

In [137]:
df_billboard_songs = pd.read_csv('df_billboard_songs.csv', keep_default_na=False)

In [139]:
# how many id have we added
df_billboard_songs.id.nunique(), sum(df_billboard_songs.id != "")
# 83 duplicated ids

(22274, 22357)

### Remove Duplicates / Errors

In [140]:
duplicates = df_billboard_songs[df_billboard_songs.id!='']
duplicates = duplicates[duplicates.id.duplicated(False)]
duplicates.to_csv('duplicated_ids.csv', index=True)

In [100]:
# # go through this manually and fix it
#     # in this case NEED TO FIX BOTH BILLBOARD DATAFRAMES

# # OR drop the corrupted rows
# # manually imputing is problematic, error prone, and time consuming

# duplicates.sort_values('id').head(200)

In [141]:
# set of duplicated ids
duplicated_ids = set(duplicates.id)

In [142]:
sum(df_billboard_songs['id'].isin(duplicated_ids))

168

In [143]:
# drop from billboard list of known ids 
for i, row in df_billboard_songs.iterrows():
    if df_billboard_songs.iloc[i]['id'] in duplicated_ids:
        df_billboard_songs['MISSING'].iloc[i] = 'DUPLICATED'
        df_billboard_songs['id'].iloc[i] = ''

In [144]:
sum(df_billboard_songs['id'].isin(duplicated_ids))

0

In [145]:
df_billboard_songs.to_csv('df_billboard_songs - duplicates removed.csv', index=False)

### what songs are still missing audio feature data?

In [146]:
df_billboard_songs = pd.read_csv('df_billboard_songs - duplicates removed.csv', keep_default_na=False)

In [147]:
# how many id have we added
df_billboard_songs.id.nunique(), sum(df_billboard_songs.id != '')
# off by one because '' counts as a unique id

(22190, 22189)

In [148]:
# check how many id match the metadata_ids
sum(df_billboard_songs.id.isin(all_metadata_ids))

18616

In [152]:
# songs on spotify that we don't have audio features for yet
need_audio_features = df_billboard_songs[(~df_billboard_songs.id.isin(all_metadata_ids) & (df_billboard_songs.id != ''))]
need_audio_features.to_

In [153]:
len(need_audio_features.id)

3573

### Final Datasets

In [None]:
# separate songs into
    # not on spotify (no id)
    # on spotify, but not either dataset (not in either metadata set)
    # matched with dataset (could distinguish using temp_revised)
    
    
 

In [None]:
# how many tracks are in spotify, but don't have data?
    # use API to get audio_features?




In [None]:
# check the percentage of songs accounted for




In [None]:
# now merge with entire billboard list



# re-count the percentage of songs accounted for in the entire set

### 8+ M. Spotify Tracks, Genre, Audio Features (SQLite)
https://www.kaggle.com/datasets/maltegrosse/8-m-spotify-tracks-genre-audio-features

In [None]:
url_8M_sql = 'D:\RYERSON\820\Datasets\8+ M. Spotify Tracks, Genre, Audio Features\spotify.sqlite'
url_8M_csv = 'all_audio_features_sql.csv'  # .gitignore (very big)


### Merge all Datasets into Billboard Features, 
export Billboard Features and All Features