In [3]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
import json
import matplotlib.pyplot as plt
import statistics as stats
from credentials import client_id, client_secret
import time

In [4]:
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=client_id,
                                                           client_secret=client_secret))


In [6]:
billboard = 'Clean_Billboard.csv'
billboard_df = pd.read_csv(billboard)
billboard_df

Unnamed: 0,date,rank,song,artist,last-week,peak-rank,weeks-on-board
0,1958-08-04,1,Poor Little Fool,Ricky Nelson,0.0,1,1
1,1958-08-04,2,Patricia,Perez Prado And His Orchestra,0.0,2,1
2,1958-08-04,3,Splish Splash,Bobby Darin,0.0,3,1
3,1958-08-04,4,Hard Headed Woman,Elvis Presley With The Jordanaires,0.0,4,1
4,1958-08-04,5,When,Kalin Twins,0.0,5,1
...,...,...,...,...,...,...,...
326682,2021-03-13,96,Nobody,Dylan Scott,0.0,96,1
326683,2021-03-13,97,Cover Me Up,Morgan Wallen,95.0,52,9
326684,2021-03-13,98,Like I Want You,Giveon,100.0,95,3
326685,2021-03-13,99,Gone,Dierks Bentley,0.0,99,1


# Main Filter!
### This is where we filter the billboard top100 charts to search spotify with

In [7]:
recent_rankings = billboard_df[(billboard_df['date'] > '2015-01-01') &
                              (billboard_df['date'] < '2020-01-01')]

recent_rankings

Unnamed: 0,date,rank,song,artist,last-week,peak-rank,weeks-on-board
294287,2015-01-03,1,Blank Space,Taylor Swift,1.0,1,8
294288,2015-01-03,2,Take Me To Church,Hozier,2.0,2,19
294289,2015-01-03,3,Uptown Funk!,Mark Ronson Featuring Bruno Mars,3.0,3,6
294290,2015-01-03,4,Thinking Out Loud,Ed Sheeran,6.0,4,11
294291,2015-01-03,5,Lips Are Movin,Meghan Trainor,4.0,4,9
...,...,...,...,...,...,...,...
320382,2019-12-28,96,Homesick,Kane Brown,90.0,88,5
320383,2019-12-28,97,Easy,DaniLeigh Featuring Chris Brown,89.0,88,7
320384,2019-12-28,98,Enemies,Post Malone Featuring DaBaby,83.0,16,15
320385,2019-12-28,99,She,Harry Styles,0.0,99,1


# Consolidating the Data Frame
### Making a column of every week a song appears so we can have a distinct dataframe of songs and artists to work with

In [8]:
recent_rankings['weeks_on'] = [[i for i in recent_rankings['date'][recent_rankings['song']==j]] for j in recent_rankings['song']]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  recent_rankings['weeks_on'] = [[i for i in recent_rankings['date'][recent_rankings['song']==j]] for j in recent_rankings['song']]


In [9]:
recent_rankings.head()

Unnamed: 0,date,rank,song,artist,last-week,peak-rank,weeks-on-board,weeks_on
294287,2015-01-03,1,Blank Space,Taylor Swift,1.0,1,8,"[2015-01-03, 2015-01-10, 2015-01-17, 2015-01-2..."
294288,2015-01-03,2,Take Me To Church,Hozier,2.0,2,19,"[2015-01-03, 2015-01-10, 2015-01-17, 2015-01-2..."
294289,2015-01-03,3,Uptown Funk!,Mark Ronson Featuring Bruno Mars,3.0,3,6,"[2015-01-03, 2015-01-10, 2015-01-17, 2015-01-2..."
294290,2015-01-03,4,Thinking Out Loud,Ed Sheeran,6.0,4,11,"[2015-01-03, 2015-01-10, 2015-01-17, 2015-01-2..."
294291,2015-01-03,5,Lips Are Movin,Meghan Trainor,4.0,4,9,"[2015-01-03, 2015-01-10, 2015-01-17, 2015-01-2..."


## Dropping Unneccessary Columns
'Date' Will be Readded Later On When We Expand the new 'weeks_on' column

In [10]:
recent_rankings = recent_rankings.drop(columns=['date','rank','last-week'])
recent_rankings

Unnamed: 0,song,artist,peak-rank,weeks-on-board,weeks_on
294287,Blank Space,Taylor Swift,1,8,"[2015-01-03, 2015-01-10, 2015-01-17, 2015-01-2..."
294288,Take Me To Church,Hozier,2,19,"[2015-01-03, 2015-01-10, 2015-01-17, 2015-01-2..."
294289,Uptown Funk!,Mark Ronson Featuring Bruno Mars,3,6,"[2015-01-03, 2015-01-10, 2015-01-17, 2015-01-2..."
294290,Thinking Out Loud,Ed Sheeran,4,11,"[2015-01-03, 2015-01-10, 2015-01-17, 2015-01-2..."
294291,Lips Are Movin,Meghan Trainor,4,9,"[2015-01-03, 2015-01-10, 2015-01-17, 2015-01-2..."
...,...,...,...,...,...
320382,Homesick,Kane Brown,88,5,"[2019-11-30, 2019-12-07, 2019-12-14, 2019-12-2..."
320383,Easy,DaniLeigh Featuring Chris Brown,88,7,"[2019-11-16, 2019-11-23, 2019-11-30, 2019-12-0..."
320384,Enemies,Post Malone Featuring DaBaby,16,15,"[2019-09-21, 2019-09-28, 2019-10-05, 2019-10-1..."
320385,She,Harry Styles,99,1,[2019-12-28]


# Checking Artist Names for any Strange symbols that might hurt the search process. 
### Things like ['featuring','&','x','with','+',',']
Every decade introduces new roadblocks with trying to automatically search spotify. Every failed search will be manually searched in the next step, non-alphanumeric characters do not fly with spotify searching


In [11]:
[i.split() for i in recent_rankings['artist'] if len(i.split()) > 2]

[['Mark', 'Ronson', 'Featuring', 'Bruno', 'Mars'],
 ['Ariana', 'Grande', '&', 'The', 'Weeknd'],
 ['Big', 'Sean', 'Featuring', 'E-40'],
 ['Nicki',
  'Minaj',
  'Featuring',
  'Drake,',
  'Lil',
  'Wayne',
  '&',
  'Chris',
  'Brown'],
 ['I', 'LOVE', 'MAKONNEN', 'Featuring', 'Drake'],
 ['Fall', 'Out', 'Boy'],
 ['James', 'Newton', 'Howard', 'Featuring', 'Jennifer', 'Lawrence'],
 ['Jessie', 'J,', 'Ariana', 'Grande', '&', 'Nicki', 'Minaj'],
 ['Calvin', 'Harris', 'Featuring', 'John', 'Newman'],
 ['Iggy', 'Azalea', 'Featuring', 'M0'],
 ['Jeremih', 'Featuring', 'YG'],
 ['Usher', 'Featuring', 'Juicy', 'J'],
 ['Iggy', 'Azalea', 'Featuring', 'Rita', 'Ora'],
 ['Craig', 'Wayne', 'Boyd'],
 ['Alesso', 'Featuring', 'Tove', 'Lo'],
 ['Nicki', 'Minaj', 'Featuring', 'Beyonce'],
 ['Rich', 'Gang', 'Featuring', 'Young', 'Thug', '&', 'Rich', 'Homie', 'Quan'],
 ['Clean', 'Bandit', 'Featuring', 'Jess', 'Glynne'],
 ['Trey', 'Songz', 'Featuring', 'Nicki', 'Minaj'],
 ['DJ',
  'Khaled',
  'Featuring',
  'Chris',
  

# Searching for Spotify Song ID (Round 1)
Some filtering to increase the success rate of each automatic search.
The further we go back on the Billboard 100 list, the greater the number of songs are not available on the spotify platform.

In [12]:
unique_song_dict = {
    'song':[],
    'artist':[],
    'peak_rank':[],
    'total_weeks':[],
    'weeks':[],
    'spotify_id':[]
}

print('Working', end='')
for ind, row in recent_rankings.iterrows():
    musicians = row['artist']
    art = [i.lower().replace("'","") for i in musicians.split()]
    shit = ['featuring','feat.','(featuring','(duet','introducing','&','x','with','+',',']
    artist = ''
    for s in shit:
        if s in art:
            stop_pt = art.index(s)
            artist = ' '.join(art[:stop_pt])
            break
    if artist == '':
        artist = musicians 
        
    song = row['song']
    peak_rank = row['peak-rank']
    weeks_on_board = row['weeks-on-board']
    all_weeks = row['weeks_on']
    
    
    if song in unique_song_dict['song'] and row['artist'] in unique_song_dict['artist']:
        pass
    else:
        try:
            search = sp.search(q=f"{song} {artist}",type='track')

            if search['tracks']['items'][0]['id'] == '':
                z = float('NaN')
            else:
                z = search['tracks']['items'][0]['id']
            
            unique_song_dict['song'].append(song)
            unique_song_dict['artist'].append(row['artist'])
            unique_song_dict['peak_rank'].append(peak_rank)
            unique_song_dict['total_weeks'].append(weeks_on_board)
            unique_song_dict['weeks'].append(all_weeks)
            unique_song_dict['spotify_id'].append(z)
            print('.', end='')

        except:
            unique_song_dict['song'].append(song)
            unique_song_dict['artist'].append(row['artist'])
            unique_song_dict['peak_rank'].append(peak_rank)
            unique_song_dict['total_weeks'].append(weeks_on_board)
            unique_song_dict['weeks'].append(all_weeks)
            unique_song_dict['spotify_id'].append(float('NaN'))
            print(f'Failed to find Spot ID for {song} by {artist}')    


Working..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................Failed to find Spot ID for My Sh*t by A Boogie Wit da Hoodie
.................................................................Failed to find Spot ID for I Dont

# Confirm API Search and Dict Length

In [13]:
for i in unique_song_dict.keys():
    print(len(unique_song_dict[i]))

2440
2440
2440
2440
2440
2440


# Creating a New DataFrame including Spotify ID's found in Round1

In [14]:
spotify_billboard = pd.DataFrame(unique_song_dict)
spotify_billboard

Unnamed: 0,song,artist,peak_rank,total_weeks,weeks,spotify_id
0,Blank Space,Taylor Swift,1,8,"[2015-01-03, 2015-01-10, 2015-01-17, 2015-01-2...",1p80LdxRV74UKvL8gnD7ky
1,Take Me To Church,Hozier,2,19,"[2015-01-03, 2015-01-10, 2015-01-17, 2015-01-2...",1CS7Sd1u5tWkstBhpssyjP
2,Uptown Funk!,Mark Ronson Featuring Bruno Mars,3,6,"[2015-01-03, 2015-01-10, 2015-01-17, 2015-01-2...",32OlwWuMpZ6b0aN2RZOeMS
3,Thinking Out Loud,Ed Sheeran,4,11,"[2015-01-03, 2015-01-10, 2015-01-17, 2015-01-2...",34gCuhDGsG4bRPIf9bb02f
4,Lips Are Movin,Meghan Trainor,4,9,"[2015-01-03, 2015-01-10, 2015-01-17, 2015-01-2...",6gj08XDlv9Duc2fPOxUmVD
...,...,...,...,...,...,...
2435,Nobody But You,Blake Shelton Duet With Gwen Stefani,79,1,[2019-12-28],5M8goiFYynmNxhueYW6grR
2436,Famous Hoes,NLE Choppa,83,1,[2019-12-28],35Xf2ABjPzUXKz19akd7Jj
2437,Cherry,Harry Styles,84,1,[2019-12-28],2IOFZdYYkFxEHVz1w34PoL
2438,Golden,Harry Styles,86,1,[2019-12-28],45S5WTQEGOB1VHr1Q4FuPl


# Getting Results for all songs that failed the Initial Search
### The cell loaded is for the 2015-2020 Billboard range, Newer music is almost all available on Spotify.
The 1990-2000 range has ~330 songs that failed, 133 of which could not be found manually

In [15]:
spotify_billboard[spotify_billboard['spotify_id'].isna()]

Unnamed: 0,song,artist,peak_rank,total_weeks,weeks,spotify_id
834,My Sh*t,A Boogie Wit da Hoodie,98,1,"[2016-11-05, 2016-11-12, 2016-11-19, 2016-11-2...",
900,I Dont Wanna Live Forever (Fifty Shades Darker),Zayn / Taylor Swift,6,1,"[2016-12-31, 2017-01-07, 2017-01-14, 2017-01-2...",
1240,F**k Love,XXXTENTACION Featuring Trippie Redd,41,1,"[2017-09-16, 2017-09-23, 2017-09-30, 2017-10-0...",
1293,Too Hotty,"Quavo, Takeoff & Offset",97,1,"[2017-10-28, 2017-11-04, 2017-11-11, 2017-11-18]",
1316,My Choppa Hate N****s,21 Savage & Metro Boomin,94,1,"[2017-11-18, 2017-11-25]",
1539,Dame Tu Cosita,Pitbull x El Chombo x Karol G Featuring Cutty ...,81,1,"[2018-05-05, 2018-05-12, 2018-05-19, 2018-05-2...",
1620,Bigger > You,"2 Chainz, Drake & Quavo",53,1,"[2018-06-30, 2018-07-07]",
1670,Kream,Iggy Azalea Feauring Tyga,96,1,[2018-07-21],
1722,Ganja Burns,Nicki Minaj,60,1,[2018-08-25],
1797,Dope N****z,Lil Wayne Featuring Snoop Dogg,39,1,[2018-10-13],


# Manually Searching the Tracks that Failed Round 1 using input boxes
## For anything that continues to fail, use 'pass' in your input prompt to skip it and it will be dropped from the DataFrame later

In [16]:
#try again
replace_these = []
for index, row in spotify_billboard[spotify_billboard['spotify_id'].isna()].iterrows():
    found = False
    while found == False:
        print(f"{row['song']} by {row['artist']}")
        to_search = input('What to search? - ')
        if to_search=='pass':
            found=True
        else:
            try:
                search = sp.search(q=f"{to_search}", type='track')
                print(f"{search['tracks']['items'][0]['id']} -- {row['song']} {row['artist']} -- Index: {index}\n")
                replace_these.append((search['tracks']['items'][0]['id'], index))
                found=True
            except:
                print('Could not Find, try again\n')

My Sh*t by A Boogie Wit da Hoodie
What to search? - my shit boogie wit da hoodie
5uQOauh47VFt3B2kV9kRXw -- My Sh*t A Boogie Wit da Hoodie -- Index: 834

I Dont Wanna Live Forever (Fifty Shades Darker) by Zayn / Taylor Swift
What to search? - i dont wanna live forever zayn taylor swift
55n9yjI6qqXh5F2mYvUc2y -- I Dont Wanna Live Forever (Fifty Shades Darker) Zayn / Taylor Swift -- Index: 900

F**k Love by XXXTENTACION Featuring Trippie Redd
What to search? - fuck love xxxtentacion
7AQim7LbvFVZJE3O8TYgf2 -- F**k Love XXXTENTACION Featuring Trippie Redd -- Index: 1240

Too Hotty by Quavo, Takeoff & Offset
What to search? - too hotty quavo
Could not Find, try again

Too Hotty by Quavo, Takeoff & Offset
What to search? - too hotty migos
3hWUIMNBwLNug1O8o2PPRW -- Too Hotty Quavo, Takeoff & Offset -- Index: 1293

My Choppa Hate N****s by 21 Savage & Metro Boomin
What to search? - my choppa hate 21 savage
2D2w9943rsnJOGCrI4aMQp -- My Choppa Hate N****s 21 Savage & Metro Boomin -- Index: 1316



# Replacing NaN Values with the Searches above, Removing any stragglers.
#### Use the append below for any that are way outside of searching

In [17]:
replace_these

[('5uQOauh47VFt3B2kV9kRXw', 834),
 ('55n9yjI6qqXh5F2mYvUc2y', 900),
 ('7AQim7LbvFVZJE3O8TYgf2', 1240),
 ('3hWUIMNBwLNug1O8o2PPRW', 1293),
 ('2D2w9943rsnJOGCrI4aMQp', 1316),
 ('2T7UAAVTIIuMZPOQLXTq1l', 1539),
 ('5S1IUPueD0xE0vj4zU3nSf', 1620),
 ('5mu1uv8RmzDkF8foePK5qa', 1670),
 ('3LHYmz86DxGInsRp3wiiW5', 1722),
 ('2AwyHlbA6f641SEkizD5JG', 1797),
 ('5JEx7HbmvHQQswJCsoo9rA', 1880),
 ('1q9jq5X5vwmewjOa2mHtQ1', 2040),
 ('0B3FovCVaGKS5w1FTidEUP', 2216),
 ('1GeNui6m825V8jP4uKiIaH', 2253),
 ('7q0VdsXafFQIYfk3eZpwTq', 2429)]

In [18]:
# replacing values for the searches above
for i in replace_these:
    spotify_billboard.iloc[i[1],5] = i[0]

# Dropping and Confirming no NaN Values
2015-2020 all songs are on spotify

In [20]:
spotify_billboard[spotify_billboard['spotify_id'].isna()]

Unnamed: 0,song,artist,peak_rank,total_weeks,weeks,spotify_id


In [21]:
#THESE ARE NOT ON SPOTIFY - MANUALLY CONFIRMED IN APP
spotify_billboard = spotify_billboard.dropna()
spotify_billboard[spotify_billboard['spotify_id'].isna()]

Unnamed: 0,song,artist,peak_rank,total_weeks,weeks,spotify_id


# Fixing Total Weeks Column - Now based on length of 'weeks'

In [22]:
new_total_weeks = []
for ind, row in spotify_billboard.iterrows():
    new_total_weeks.append(len(row['weeks']))
spotify_billboard['total_weeks'] = new_total_weeks
spotify_billboard

Unnamed: 0,song,artist,peak_rank,total_weeks,weeks,spotify_id
0,Blank Space,Taylor Swift,1,30,"[2015-01-03, 2015-01-10, 2015-01-17, 2015-01-2...",1p80LdxRV74UKvL8gnD7ky
1,Take Me To Church,Hozier,2,23,"[2015-01-03, 2015-01-10, 2015-01-17, 2015-01-2...",1CS7Sd1u5tWkstBhpssyjP
2,Uptown Funk!,Mark Ronson Featuring Bruno Mars,3,51,"[2015-01-03, 2015-01-10, 2015-01-17, 2015-01-2...",32OlwWuMpZ6b0aN2RZOeMS
3,Thinking Out Loud,Ed Sheeran,4,48,"[2015-01-03, 2015-01-10, 2015-01-17, 2015-01-2...",34gCuhDGsG4bRPIf9bb02f
4,Lips Are Movin,Meghan Trainor,4,21,"[2015-01-03, 2015-01-10, 2015-01-17, 2015-01-2...",6gj08XDlv9Duc2fPOxUmVD
...,...,...,...,...,...,...
2435,Nobody But You,Blake Shelton Duet With Gwen Stefani,79,1,[2019-12-28],5M8goiFYynmNxhueYW6grR
2436,Famous Hoes,NLE Choppa,83,1,[2019-12-28],35Xf2ABjPzUXKz19akd7Jj
2437,Cherry,Harry Styles,84,1,[2019-12-28],2IOFZdYYkFxEHVz1w34PoL
2438,Golden,Harry Styles,86,1,[2019-12-28],45S5WTQEGOB1VHr1Q4FuPl


# Saving DF to CSV - Just Song/Billboard Info and Spotify ID

In [37]:
spotify_billboard.to_csv('SPOTIFY_ID_BILLBOARD_2015-2020.csv', index=False)

# Searching for Audio Features by Spotify ID for all Rows in our latest DF

In [26]:
features = {
    'danceability':[],
    'energy':[],
    'loudness':[],
    'speechiness':[],
    'acousticness':[],
    'instrumentalness':[],
    'liveness':[],
    'valence':[],
    'tempo':[]
}

for h,i in enumerate(spotify_billboard['spotify_id']):
    tot = len(spotify_billboard['spotify_id'])
    try:
        
        search = sp.audio_features(i)
   
        features['danceability'].append(search[0]['danceability'])
        features['energy'].append(search[0]['energy'])
        features['loudness'].append(search[0]['loudness'])
        features['speechiness'].append(search[0]['speechiness'])
        features['acousticness'].append(search[0]['acousticness'])
        features['instrumentalness'].append(search[0]['instrumentalness'])
        features['liveness'].append(search[0]['liveness'])
        features['valence'].append(search[0]['valence'])
        features['tempo'].append(search[0]['tempo'])
    except:
        for j in features.keys():
            features[j].append(float('NaN'))

    print('.', end='')

........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................

# Confirm length of lists in our output dictionary above

In [27]:
for i in features.keys():
    print(len(features[i]))

2440
2440
2440
2440
2440
2440
2440
2440
2440


In [28]:
billboard_spotify_features = spotify_billboard[['song','artist','weeks','spotify_id']]
for i in features.keys():
    billboard_spotify_features[i] = features[i]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  billboard_spotify_features[i] = features[i]


In [29]:
billboard_spotify_features.tail()

Unnamed: 0,song,artist,weeks,spotify_id,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo
2435,Nobody But You,Blake Shelton Duet With Gwen Stefani,[2019-12-28],5M8goiFYynmNxhueYW6grR,0.463,0.653,-5.515,0.0273,0.111,0.000136,0.172,0.233,143.912
2436,Famous Hoes,NLE Choppa,[2019-12-28],35Xf2ABjPzUXKz19akd7Jj,0.732,0.627,-5.693,0.161,0.346,0.0,0.261,0.332,81.481
2437,Cherry,Harry Styles,[2019-12-28],2IOFZdYYkFxEHVz1w34PoL,0.53,0.512,-9.261,0.0259,0.615,0.00453,0.129,0.411,95.946
2438,Golden,Harry Styles,[2019-12-28],45S5WTQEGOB1VHr1Q4FuPl,0.448,0.838,-5.257,0.0557,0.21,0.000131,0.131,0.254,139.863
2439,She,Harry Styles,[2019-12-28],6SQLk9HSNketfgs2AyIiMs,0.535,0.521,-5.942,0.0272,0.000532,0.371,0.19,0.457,140.026


In [30]:
billboard_spotify_features[billboard_spotify_features['danceability']=='NaN']

Unnamed: 0,song,artist,weeks,spotify_id,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo


# expanding the unique track/artist dataframe above by the list of weeks

In [31]:
billboard_spotify_expanded = billboard_spotify_features.explode('weeks').reset_index(drop=True)

In [32]:
billboard_spotify_expanded

Unnamed: 0,song,artist,weeks,spotify_id,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,Blank Space,Taylor Swift,2015-01-03,1p80LdxRV74UKvL8gnD7ky,0.752,0.678,-5.421,0.0646,0.085000,0.000002,0.130,0.583,96.009
1,Blank Space,Taylor Swift,2015-01-10,1p80LdxRV74UKvL8gnD7ky,0.752,0.678,-5.421,0.0646,0.085000,0.000002,0.130,0.583,96.009
2,Blank Space,Taylor Swift,2015-01-17,1p80LdxRV74UKvL8gnD7ky,0.752,0.678,-5.421,0.0646,0.085000,0.000002,0.130,0.583,96.009
3,Blank Space,Taylor Swift,2015-01-24,1p80LdxRV74UKvL8gnD7ky,0.752,0.678,-5.421,0.0646,0.085000,0.000002,0.130,0.583,96.009
4,Blank Space,Taylor Swift,2015-01-24,1p80LdxRV74UKvL8gnD7ky,0.752,0.678,-5.421,0.0646,0.085000,0.000002,0.130,0.583,96.009
...,...,...,...,...,...,...,...,...,...,...,...,...,...
27525,Nobody But You,Blake Shelton Duet With Gwen Stefani,2019-12-28,5M8goiFYynmNxhueYW6grR,0.463,0.653,-5.515,0.0273,0.111000,0.000136,0.172,0.233,143.912
27526,Famous Hoes,NLE Choppa,2019-12-28,35Xf2ABjPzUXKz19akd7Jj,0.732,0.627,-5.693,0.1610,0.346000,0.000000,0.261,0.332,81.481
27527,Cherry,Harry Styles,2019-12-28,2IOFZdYYkFxEHVz1w34PoL,0.530,0.512,-9.261,0.0259,0.615000,0.004530,0.129,0.411,95.946
27528,Golden,Harry Styles,2019-12-28,45S5WTQEGOB1VHr1Q4FuPl,0.448,0.838,-5.257,0.0557,0.210000,0.000131,0.131,0.254,139.863


In [33]:
all_weeks = sorted(list(set(billboard_spotify_expanded['weeks'])))
all_weeks

['2015-01-03',
 '2015-01-10',
 '2015-01-17',
 '2015-01-24',
 '2015-01-31',
 '2015-02-07',
 '2015-02-14',
 '2015-02-21',
 '2015-02-28',
 '2015-03-07',
 '2015-03-14',
 '2015-03-21',
 '2015-03-28',
 '2015-04-04',
 '2015-04-11',
 '2015-04-18',
 '2015-04-25',
 '2015-05-02',
 '2015-05-09',
 '2015-05-16',
 '2015-05-23',
 '2015-05-30',
 '2015-06-06',
 '2015-06-13',
 '2015-06-20',
 '2015-06-27',
 '2015-07-04',
 '2015-07-11',
 '2015-07-18',
 '2015-07-25',
 '2015-08-01',
 '2015-08-08',
 '2015-08-15',
 '2015-08-22',
 '2015-08-29',
 '2015-09-05',
 '2015-09-12',
 '2015-09-19',
 '2015-09-26',
 '2015-10-03',
 '2015-10-10',
 '2015-10-17',
 '2015-10-24',
 '2015-10-31',
 '2015-11-07',
 '2015-11-14',
 '2015-11-21',
 '2015-11-28',
 '2015-12-05',
 '2015-12-12',
 '2015-12-19',
 '2015-12-26',
 '2016-01-02',
 '2016-01-09',
 '2016-01-16',
 '2016-01-23',
 '2016-01-30',
 '2016-02-06',
 '2016-02-13',
 '2016-02-20',
 '2016-02-27',
 '2016-03-05',
 '2016-03-12',
 '2016-03-19',
 '2016-03-26',
 '2016-04-02',
 '2016-04-

# Creating a DF of average audio feature by week

In [34]:
week_details = {
    'week':[],
    'danceability_avg':[],
    'energy_avg':[],
    'loudness_avg':[],
    'speechiness_avg':[],
    'acousticness_avg':[],
    'instrumentalness_avg':[],
    'liveness_avg':[],
    'valence_avg':[],
    'tempo_avg':[]
}

for i in all_weeks:
    print(i)
    filtered_df = billboard_spotify_expanded[billboard_spotify_expanded['weeks']==i]
    
    week_details['week'].append(i)
    week_details['danceability_avg'].append(filtered_df['danceability'].mean())
    week_details['energy_avg'].append(filtered_df['energy'].mean())
    week_details['loudness_avg'].append(filtered_df['loudness'].mean())
    week_details['speechiness_avg'].append(filtered_df['speechiness'].mean())
    week_details['acousticness_avg'].append(filtered_df['acousticness'].mean())
    week_details['instrumentalness_avg'].append(filtered_df['instrumentalness'].mean())
    week_details['liveness_avg'].append(filtered_df['liveness'].mean())
    week_details['valence_avg'].append(filtered_df['valence'].mean())
    week_details['tempo_avg'].append(filtered_df['tempo'].mean())

2015-01-03
2015-01-10
2015-01-17
2015-01-24
2015-01-31
2015-02-07
2015-02-14
2015-02-21
2015-02-28
2015-03-07
2015-03-14
2015-03-21
2015-03-28
2015-04-04
2015-04-11
2015-04-18
2015-04-25
2015-05-02
2015-05-09
2015-05-16
2015-05-23
2015-05-30
2015-06-06
2015-06-13
2015-06-20
2015-06-27
2015-07-04
2015-07-11
2015-07-18
2015-07-25
2015-08-01
2015-08-08
2015-08-15
2015-08-22
2015-08-29
2015-09-05
2015-09-12
2015-09-19
2015-09-26
2015-10-03
2015-10-10
2015-10-17
2015-10-24
2015-10-31
2015-11-07
2015-11-14
2015-11-21
2015-11-28
2015-12-05
2015-12-12
2015-12-19
2015-12-26
2016-01-02
2016-01-09
2016-01-16
2016-01-23
2016-01-30
2016-02-06
2016-02-13
2016-02-20
2016-02-27
2016-03-05
2016-03-12
2016-03-19
2016-03-26
2016-04-02
2016-04-09
2016-04-16
2016-04-23
2016-04-30
2016-05-07
2016-05-14
2016-05-21
2016-05-28
2016-06-04
2016-06-11
2016-06-18
2016-06-25
2016-07-02
2016-07-09
2016-07-16
2016-07-23
2016-07-30
2016-08-06
2016-08-13
2016-08-20
2016-08-27
2016-09-03
2016-09-10
2016-09-17
2016-09-24

In [35]:
for i in week_details.keys():
    print(len(week_details[i]))

261
261
261
261
261
261
261
261
261
261


# Save Final CSV to Plot

In [36]:
average_features_by_week = pd.DataFrame(week_details)
average_features_by_week.to_csv('Average_Features/avg_audio_features(2015-2020).csv', index=False)
average_features_by_week

Unnamed: 0,week,danceability_avg,energy_avg,loudness_avg,speechiness_avg,acousticness_avg,instrumentalness_avg,liveness_avg,valence_avg,tempo_avg
0,2015-01-03,0.630767,0.675382,-6.020893,0.088619,0.171254,0.001111,0.188829,0.488739,122.893437
1,2015-01-10,0.639398,0.686051,-5.888417,0.087129,0.155584,0.001137,0.195837,0.495719,121.174893
2,2015-01-17,0.636223,0.679022,-5.916544,0.085244,0.173954,0.001134,0.185470,0.482457,122.007786
3,2015-01-24,0.631269,0.685089,-5.848577,0.080666,0.157003,0.001616,0.185337,0.492388,123.387298
4,2015-01-31,0.629144,0.684416,-5.823048,0.078931,0.163119,0.001132,0.187237,0.487552,122.748769
...,...,...,...,...,...,...,...,...,...,...
256,2019-11-30,0.683455,0.615040,-6.208168,0.138462,0.204399,0.009415,0.158438,0.502270,116.301871
257,2019-12-07,0.673324,0.595696,-6.431922,0.126662,0.253905,0.009235,0.160195,0.505601,116.956676
258,2019-12-14,0.665743,0.584634,-6.939248,0.121791,0.286280,0.009328,0.163158,0.525637,118.965317
259,2019-12-21,0.661634,0.592891,-6.766644,0.103347,0.265661,0.007923,0.184449,0.516924,116.072416


# Searching for good audio-feature baselines

In [113]:
tracks= []
for track_to_search_for_baseline in tracks:    
    baseline = sp.search(q=track_to_search_for_baseline, type='track')
    track_id = baseline['tracks']['items'][0]['id']

    track_baseline_search = {

    }

    search = sp.audio_features(track_id)
    print(f"Track ID: {track_id}\n")
    print(f"Danceability: {round(search[0]['danceability'],4)}")
    print(f"Energy: {search[0]['energy']}")
    print(f"Loudness: {search[0]['loudness']}")
    print(f"Speechiness: {search[0]['speechiness']}")
    print(f"Acousticness: {search[0]['acousticness']}")
    print(f"Instrumentalness: {round(search[0]['instrumentalness'],4)}")
    print(f"Liveness: {search[0]['liveness']}")
    print(f"Valence: {search[0]['valence']}")
    print(f"Tempo: {search[0]['tempo']}")
    print('\n\n')
    track_baseline_search

Track ID: 2pqi29h8djSqdSAbdjI1Ce

Danceability: 0.79
Energy: 0.628
Loudness: -5.312
Speechiness: 0.0654
Acousticness: 0.0783
Instrumentalness: 0
Liveness: 0.103
Valence: 0.248
Tempo: 93.001


{}

In [38]:
nineties = pd.read_csv('SPOTIFY_ID_BILLBOARD_1990-2000.csv')
twothousands = pd.read_csv('SPOTIFY_ID_BILLBOARD_2000-2010.csv')
twentyten = pd.read_csv('SPOTIFY_ID_BILLBOARD_2010-2014.csv')
twentyfifteen = pd.read_csv('SPOTIFY_ID_BILLBOARD_2014-2018.csv')
most_recent = pd.read_csv('SPOTIFY_ID_BILLBOARD_2018ON.csv')

In [39]:
ALL_BILLBOARD_SPOTIFY = pd.concat([nineties, twothousands, twentyten, twentyfifteen, most_recent])
ALL_BILLBOARD_SPOTIFY.reset_index(drop=True, inplace=True)
ALL_BILLBOARD_SPOTIFY

Unnamed: 0,song,artist,peak_rank,total_weeks,weeks,spotify_id
0,Another Day In Paradise,Phil Collins,1,9,"['1990-01-06', '1990-01-13', '1990-01-20', '19...",1NCuYqMc8hKMb4cpNTcJbD
1,Rhythm Nation,Janet Jackson,2,9,"['1990-01-06', '1990-01-13', '1990-01-20', '19...",4nTYxxF8iWBq54LO3dBUie
2,Dont Know Much,Linda Ronstadt (Featuring Aaron Neville),2,12,"['1990-01-06', '1990-01-13', '1990-01-20', '19...",5z3ZDMP02xF33yCvPFnct3
3,Pump Up The Jam,Technotronic Featuring Felly,4,12,"['1990-01-06', '1990-01-13', '1990-01-20', '19...",21qnJAMtzC6S5SESuqQLEK
4,With Every Beat Of My Heart,Taylor Dayne,5,7,"['1990-01-06', '1990-01-13', '1990-01-20', '19...",3hK26G5cYhJ9KjcUy1aBRk
...,...,...,...,...,...,...
12120,Only Wanna Be With You,Post Malone,74,1,['2021-03-13'],3SawmGBjjq8EOYZJV11cJm
12121,Drunk (And I Dont Wanna Go Home),Elle King & Miranda Lambert,90,1,['2021-03-13'],0QULNNd9z5s35entfiiXoa
12122,Breaking Up Was Easy In The 90s,Sam Hunt,92,1,['2021-03-13'],4sf2L157iEgAR7yrCNLgSq
12123,Nobody,Dylan Scott,96,1,['2021-03-13'],5TWAIHYaOnYg4txfmCgon5


In [None]:
ALL_BILLBOARD_SPOTIFY.to_csv('All_SpotifyIDs_1990_01_to_2021_03.csv', index=False)

In [40]:
features = {
    'danceability':[],
    'energy':[],
    'loudness':[],
    'speechiness':[],
    'acousticness':[],
    'instrumentalness':[],
    'liveness':[],
    'valence':[],
    'tempo':[]
}

for h,i in enumerate(ALL_BILLBOARD_SPOTIFY['spotify_id']):
    tot = len(ALL_BILLBOARD_SPOTIFY['spotify_id'])
    try:
        
        search = sp.audio_features(i)
   
        features['danceability'].append(search[0]['danceability'])
        features['energy'].append(search[0]['energy'])
        features['loudness'].append(search[0]['loudness'])
        features['speechiness'].append(search[0]['speechiness'])
        features['acousticness'].append(search[0]['acousticness'])
        features['instrumentalness'].append(search[0]['instrumentalness'])
        features['liveness'].append(search[0]['liveness'])
        features['valence'].append(search[0]['valence'])
        features['tempo'].append(search[0]['tempo'])
    except:
        for j in features.keys():
            features[j].append(float('NaN'))

    print('.', end='')

........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................

........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................

In [41]:
features

{'danceability': [0.78,
  0.633,
  0.401,
  0.885,
  0.773,
  0.541,
  0.594,
  0.4,
  0.712,
  0.57,
  0.817,
  0.865,
  0.773,
  0.623,
  0.354,
  0.527,
  0.535,
  0.772,
  0.267,
  0.576,
  0.715,
  0.345,
  0.598,
  0.633,
  0.743,
  0.743,
  0.617,
  0.701,
  0.537,
  0.537,
  0.648,
  0.783,
  0.812,
  0.487,
  0.835,
  0.946,
  0.359,
  0.495,
  0.745,
  0.712,
  0.741,
  0.607,
  0.511,
  0.694,
  0.587,
  0.503,
  0.731,
  0.834,
  0.54,
  0.941,
  0.877,
  0.348,
  0.434,
  0.557,
  0.604,
  0.28,
  0.568,
  0.769,
  0.682,
  0.67,
  0.595,
  0.604,
  0.853,
  0.672,
  0.595,
  0.637,
  0.66,
  0.571,
  0.636,
  0.543,
  0.602,
  0.745,
  0.587,
  0.539,
  0.77,
  0.657,
  0.641,
  0.685,
  0.724,
  0.446,
  0.718,
  0.572,
  0.506,
  0.633,
  0.754,
  0.689,
  0.593,
  0.668,
  0.702,
  0.601,
  0.642,
  0.757,
  0.693,
  0.721,
  0.534,
  0.406,
  0.758,
  0.592,
  0.756,
  0.798,
  0.723,
  0.806,
  0.53,
  0.862,
  0.537,
  0.738,
  0.734,
  0.479,
  0.674,
  0.547,
  0.

In [42]:
ALL_BILLBOARD_SPOTIFY['danceability'] = features['danceability']
ALL_BILLBOARD_SPOTIFY['energy'] = features['energy']
ALL_BILLBOARD_SPOTIFY['loudness'] = features['loudness']
ALL_BILLBOARD_SPOTIFY['speechiness'] = features['speechiness']
ALL_BILLBOARD_SPOTIFY['acousticness'] = features['acousticness']
ALL_BILLBOARD_SPOTIFY['instrumentalness'] = features['instrumentalness']
ALL_BILLBOARD_SPOTIFY['liveness'] = features['liveness']
ALL_BILLBOARD_SPOTIFY['valence'] = features['valence']
ALL_BILLBOARD_SPOTIFY['tempo'] = features['tempo']


In [43]:
ALL_BILLBOARD_SPOTIFY

Unnamed: 0,song,artist,peak_rank,total_weeks,weeks,spotify_id,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,Another Day In Paradise,Phil Collins,1,9,"['1990-01-06', '1990-01-13', '1990-01-20', '19...",1NCuYqMc8hKMb4cpNTcJbD,0.780,0.565,-7.320,0.0306,0.73600,0.002690,0.0630,0.365,101.967
1,Rhythm Nation,Janet Jackson,2,9,"['1990-01-06', '1990-01-13', '1990-01-20', '19...",4nTYxxF8iWBq54LO3dBUie,0.633,0.931,-7.752,0.0686,0.05040,0.109000,0.3750,0.676,109.116
2,Dont Know Much,Linda Ronstadt (Featuring Aaron Neville),2,12,"['1990-01-06', '1990-01-13', '1990-01-20', '19...",5z3ZDMP02xF33yCvPFnct3,0.401,0.285,-11.005,0.0327,0.65700,0.000011,0.1580,0.267,130.465
3,Pump Up The Jam,Technotronic Featuring Felly,4,12,"['1990-01-06', '1990-01-13', '1990-01-20', '19...",21qnJAMtzC6S5SESuqQLEK,0.885,0.844,-9.225,0.0733,0.01470,0.000004,0.0494,0.715,124.602
4,With Every Beat Of My Heart,Taylor Dayne,5,7,"['1990-01-06', '1990-01-13', '1990-01-20', '19...",3hK26G5cYhJ9KjcUy1aBRk,0.773,0.893,-4.768,0.0340,0.08270,0.000002,0.0984,0.971,121.370
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12120,Only Wanna Be With You,Post Malone,74,1,['2021-03-13'],3SawmGBjjq8EOYZJV11cJm,0.470,0.709,-4.563,0.0299,0.00124,0.001210,0.3020,0.247,98.036
12121,Drunk (And I Dont Wanna Go Home),Elle King & Miranda Lambert,90,1,['2021-03-13'],0QULNNd9z5s35entfiiXoa,0.612,0.884,-4.400,0.0459,0.00516,0.000000,0.0997,0.626,119.991
12122,Breaking Up Was Easy In The 90s,Sam Hunt,92,1,['2021-03-13'],4sf2L157iEgAR7yrCNLgSq,0.562,0.649,-5.400,0.0494,0.23100,0.000000,0.3410,0.376,145.913
12123,Nobody,Dylan Scott,96,1,['2021-03-13'],5TWAIHYaOnYg4txfmCgon5,0.573,0.747,-6.460,0.0319,0.51900,0.000001,0.0867,0.645,79.952


In [44]:
ALL_BILLBOARD_SPOTIFY.to_csv('SpotifyFeatures_All_1990_to_2021.csv')