In [39]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
import json
import matplotlib.pyplot as plt
import statistics as stats
from credentials import client_id, client_secret
import time

In [40]:
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=client_id,
                                                           client_secret=client_secret))


In [41]:
billboard = 'Clean_Billboard.csv'
billboard_df = pd.read_csv(billboard)
billboard_df

Unnamed: 0,date,rank,song,artist,last-week,peak-rank,weeks-on-board
0,1958-08-04,1,Poor Little Fool,Ricky Nelson,0.0,1,1
1,1958-08-04,2,Patricia,Perez Prado And His Orchestra,0.0,2,1
2,1958-08-04,3,Splish Splash,Bobby Darin,0.0,3,1
3,1958-08-04,4,Hard Headed Woman,Elvis Presley With The Jordanaires,0.0,4,1
4,1958-08-04,5,When,Kalin Twins,0.0,5,1
...,...,...,...,...,...,...,...
326682,2021-03-13,96,Nobody,Dylan Scott,0.0,96,1
326683,2021-03-13,97,Cover Me Up,Morgan Wallen,95.0,52,9
326684,2021-03-13,98,Like I Want You,Giveon,100.0,95,3
326685,2021-03-13,99,Gone,Dierks Bentley,0.0,99,1


# Main Filter!
### This is where we filter the billboard top100 charts to search spotify with

In [None]:
#SET THIS VARIABLE FOR OUTPUT CSV FILE NAMES LATER ON IN THIS NOTEBOOK
filter_range = '2015-2020'

In [264]:
filtered_df = billboard_df[(billboard_df['date'] > '2015-01-01') &
                              (billboard_df['date'] < '2020-01-01')]

filtered_df.reset_index(drop=True)

Unnamed: 0,date,rank,song,artist,last-week,peak-rank,weeks-on-board
0,2015-01-03,1,Blank Space,Taylor Swift,1.0,1,8
1,2015-01-03,2,Take Me To Church,Hozier,2.0,2,19
2,2015-01-03,3,Uptown Funk!,Mark Ronson Featuring Bruno Mars,3.0,3,6
3,2015-01-03,4,Thinking Out Loud,Ed Sheeran,6.0,4,11
4,2015-01-03,5,Lips Are Movin,Meghan Trainor,4.0,4,9
...,...,...,...,...,...,...,...
26095,2019-12-28,96,Homesick,Kane Brown,90.0,88,5
26096,2019-12-28,97,Easy,DaniLeigh Featuring Chris Brown,89.0,88,7
26097,2019-12-28,98,Enemies,Post Malone Featuring DaBaby,83.0,16,15
26098,2019-12-28,99,She,Harry Styles,0.0,99,1


# Consolidating the Data Frame

In [265]:
consolidate = filtered_df[['date','song','artist','peak-rank']].copy()
consolidate['track_art'] = consolidate['song'] + ' ' + consolidate['artist']
consolidate

Unnamed: 0,date,song,artist,peak-rank,track_art
294287,2015-01-03,Blank Space,Taylor Swift,1,Blank Space Taylor Swift
294288,2015-01-03,Take Me To Church,Hozier,2,Take Me To Church Hozier
294289,2015-01-03,Uptown Funk!,Mark Ronson Featuring Bruno Mars,3,Uptown Funk! Mark Ronson Featuring Bruno Mars
294290,2015-01-03,Thinking Out Loud,Ed Sheeran,4,Thinking Out Loud Ed Sheeran
294291,2015-01-03,Lips Are Movin,Meghan Trainor,4,Lips Are Movin Meghan Trainor
...,...,...,...,...,...
320382,2019-12-28,Homesick,Kane Brown,88,Homesick Kane Brown
320383,2019-12-28,Easy,DaniLeigh Featuring Chris Brown,88,Easy DaniLeigh Featuring Chris Brown
320384,2019-12-28,Enemies,Post Malone Featuring DaBaby,16,Enemies Post Malone Featuring DaBaby
320385,2019-12-28,She,Harry Styles,99,She Harry Styles


# Iterate Through the DF to Prepare for Searching
Removing Duplicates, Creating a column for list of all weeks a song is present, ensuring the "peak-rank" is the highest value given to a track during any position in its chart history.

In [266]:
check = {}
peak_ranks = {}
for ind,row in consolidate.iterrows():
    week = row['date']
    peak_rank = max(consolidate['peak-rank'].loc[(consolidate['track_art']==row['track_art'])])
    if row['track_art'] in check.keys():
        check[row['track_art']].append(week)

    else:
        check[row['track_art']] = []
        check[row['track_art']].append(week)
        
        peak_ranks[row['track_art']] = peak_rank


## Create New Columns, Drop Duplicate Rows, Update 'weeks_on_board' 

In [267]:
consolidate['all_weeks'] = consolidate['track_art'].map(check)
consolidate['peak-rank'] = consolidate['track_art'].map(peak_ranks)

consolidated_tosearch = consolidate.drop(columns=['date','track_art']).reset_index(drop=True)

consolidated_tosearch = consolidated_tosearch.drop_duplicates(subset=['song','artist']).reset_index(drop=True)
consolidated_tosearch['weeks_on_board'] = [len(i) for i in consolidated_tosearch['all_weeks']]
consolidated_tosearch

Unnamed: 0,song,artist,peak-rank,all_weeks,weeks_on_board
0,Blank Space,Taylor Swift,1,"[2015-01-03, 2015-01-10, 2015-01-17, 2015-01-2...",29
1,Take Me To Church,Hozier,2,"[2015-01-03, 2015-01-10, 2015-01-17, 2015-01-2...",23
2,Uptown Funk!,Mark Ronson Featuring Bruno Mars,3,"[2015-01-03, 2015-01-10, 2015-01-17, 2015-01-2...",51
3,Thinking Out Loud,Ed Sheeran,4,"[2015-01-03, 2015-01-10, 2015-01-17, 2015-01-2...",48
4,Lips Are Movin,Meghan Trainor,4,"[2015-01-03, 2015-01-10, 2015-01-17, 2015-01-2...",21
...,...,...,...,...,...
2495,Nobody But You,Blake Shelton Duet With Gwen Stefani,79,[2019-12-28],1
2496,Famous Hoes,NLE Choppa,83,[2019-12-28],1
2497,Cherry,Harry Styles,84,[2019-12-28],1
2498,Golden,Harry Styles,86,[2019-12-28],1


# Checking Artist Names for any Strange symbols that might hurt the search process. 
### Things like ['featuring','&','x','with','+',',']
Every decade introduces new roadblocks with trying to automatically search spotify. Every failed search will be manually searched in the next step, non-alphanumeric characters do not fly with spotify searching


In [269]:
[i.split() for i in consolidated_tosearch['artist'] if len(i.split()) > 2]

[['Mark', 'Ronson', 'Featuring', 'Bruno', 'Mars'],
 ['Ariana', 'Grande', '&', 'The', 'Weeknd'],
 ['Big', 'Sean', 'Featuring', 'E-40'],
 ['Nicki',
  'Minaj',
  'Featuring',
  'Drake,',
  'Lil',
  'Wayne',
  '&',
  'Chris',
  'Brown'],
 ['I', 'LOVE', 'MAKONNEN', 'Featuring', 'Drake'],
 ['Fall', 'Out', 'Boy'],
 ['James', 'Newton', 'Howard', 'Featuring', 'Jennifer', 'Lawrence'],
 ['Jessie', 'J,', 'Ariana', 'Grande', '&', 'Nicki', 'Minaj'],
 ['Calvin', 'Harris', 'Featuring', 'John', 'Newman'],
 ['Iggy', 'Azalea', 'Featuring', 'M0'],
 ['Jeremih', 'Featuring', 'YG'],
 ['Usher', 'Featuring', 'Juicy', 'J'],
 ['Iggy', 'Azalea', 'Featuring', 'Rita', 'Ora'],
 ['Craig', 'Wayne', 'Boyd'],
 ['Alesso', 'Featuring', 'Tove', 'Lo'],
 ['Nicki', 'Minaj', 'Featuring', 'Beyonce'],
 ['Rich', 'Gang', 'Featuring', 'Young', 'Thug', '&', 'Rich', 'Homie', 'Quan'],
 ['Clean', 'Bandit', 'Featuring', 'Jess', 'Glynne'],
 ['Trey', 'Songz', 'Featuring', 'Nicki', 'Minaj'],
 ['DJ',
  'Khaled',
  'Featuring',
  'Chris',
  

# Searching for Spotify Song ID (Round 1)
Some filtering to increase the success rate of each automatic search.
The further we go back on the Billboard 100 list, the greater the number of songs are not available on the spotify platform.

In [270]:
unique_song_dict = {
    'song':[],
    'artist':[],
    'peak_rank':[],
    'total_weeks':[],
    'weeks':[],
    'spotify_id':[]
}

print('Working.', end='')
for ind, row in consolidated_tosearch.iterrows():
    musicians = row['artist']
    art = [i.lower().replace("'","") for i in musicians.split()]
    shit = ['featuring','feat.','(featuring','(duet','introducing','&','x','with','+',',']
    artist = ''
    for s in shit:
        if s in art:
            stop_pt = art.index(s)
            artist = ' '.join(art[:stop_pt])
            break
    if artist == '':
        artist = musicians 
        
    song = row['song']
    peak_rank = row['peak-rank']
    weeks_on_board = row['weeks_on_board']
    all_weeks = row['all_weeks']
    
    

    try:
        search = sp.search(q=f"{song} {artist}",type='track')

        if search['tracks']['items'][0]['id'] == '':
            z = float('NaN')
        else:
            z = search['tracks']['items'][0]['id']

        unique_song_dict['song'].append(song)
        unique_song_dict['artist'].append(row['artist'])
        unique_song_dict['peak_rank'].append(peak_rank)
        unique_song_dict['total_weeks'].append(weeks_on_board)
        unique_song_dict['weeks'].append(all_weeks)
        unique_song_dict['spotify_id'].append(z)
        print('.', end='')

    except:
        unique_song_dict['song'].append(song)
        unique_song_dict['artist'].append(row['artist'])
        unique_song_dict['peak_rank'].append(peak_rank)
        unique_song_dict['total_weeks'].append(weeks_on_board)
        unique_song_dict['weeks'].append(all_weeks)
        unique_song_dict['spotify_id'].append(float('NaN'))
        print(f'\nFailed to find Spot ID for {song} by {artist}')    


Working.........................................................
Failed to find Spot ID for Sun Daze by Florida Georgia Line
..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................
Failed to find Spot ID for My Sh*t by A Boogie Wit da Hoodie
...............................

# Confirm API Search and Dict Length

In [271]:
for i in unique_song_dict.keys():
    print(len(unique_song_dict[i]))

2500
2500
2500
2500
2500
2500


# Creating a New DataFrame including Spotify ID's found in Round1

In [272]:
spotify_billboard = pd.DataFrame(unique_song_dict)
spotify_billboard

Unnamed: 0,song,artist,peak_rank,total_weeks,weeks,spotify_id
0,Blank Space,Taylor Swift,1,29,"[2015-01-03, 2015-01-10, 2015-01-17, 2015-01-2...",1p80LdxRV74UKvL8gnD7ky
1,Take Me To Church,Hozier,2,23,"[2015-01-03, 2015-01-10, 2015-01-17, 2015-01-2...",1CS7Sd1u5tWkstBhpssyjP
2,Uptown Funk!,Mark Ronson Featuring Bruno Mars,3,51,"[2015-01-03, 2015-01-10, 2015-01-17, 2015-01-2...",32OlwWuMpZ6b0aN2RZOeMS
3,Thinking Out Loud,Ed Sheeran,4,48,"[2015-01-03, 2015-01-10, 2015-01-17, 2015-01-2...",34gCuhDGsG4bRPIf9bb02f
4,Lips Are Movin,Meghan Trainor,4,21,"[2015-01-03, 2015-01-10, 2015-01-17, 2015-01-2...",6gj08XDlv9Duc2fPOxUmVD
...,...,...,...,...,...,...
2495,Nobody But You,Blake Shelton Duet With Gwen Stefani,79,1,[2019-12-28],5M8goiFYynmNxhueYW6grR
2496,Famous Hoes,NLE Choppa,83,1,[2019-12-28],35Xf2ABjPzUXKz19akd7Jj
2497,Cherry,Harry Styles,84,1,[2019-12-28],2IOFZdYYkFxEHVz1w34PoL
2498,Golden,Harry Styles,86,1,[2019-12-28],45S5WTQEGOB1VHr1Q4FuPl


# Getting Results for all songs that failed the Initial Search
### The cell loaded is for the 2015-2020 Billboard range, Newer music is almost all available on Spotify.
The 1990-2000 range has ~330 songs that failed, 133 of which could not be found manually

In [273]:
spotify_billboard[spotify_billboard['spotify_id'].isna()]

Unnamed: 0,song,artist,peak_rank,total_weeks,weeks,spotify_id
56,Sun Daze,Florida Georgia Line,57,12,"[2015-01-03, 2015-01-10, 2015-01-17, 2015-01-2...",
839,My Sh*t,A Boogie Wit da Hoodie,98,12,"[2016-11-05, 2016-11-12, 2016-11-19, 2016-11-2...",
908,I Dont Wanna Live Forever (Fifty Shades Darker),Zayn / Taylor Swift,6,23,"[2016-12-31, 2017-01-07, 2017-01-14, 2017-01-2...",
1258,F**k Love,XXXTENTACION Featuring Trippie Redd,41,20,"[2017-09-16, 2017-09-23, 2017-09-30, 2017-10-0...",
1314,Too Hotty,"Quavo, Takeoff & Offset",97,4,"[2017-10-28, 2017-11-04, 2017-11-11, 2017-11-18]",
1337,My Choppa Hate N****s,21 Savage & Metro Boomin,94,2,"[2017-11-18, 2017-11-25]",
1566,Dame Tu Cosita,Pitbull x El Chombo x Karol G Featuring Cutty ...,81,9,"[2018-05-05, 2018-05-12, 2018-05-19, 2018-05-2...",
1652,Bigger > You,"2 Chainz, Drake & Quavo",53,2,"[2018-06-30, 2018-07-07]",
1705,Kream,Iggy Azalea Feauring Tyga,96,1,[2018-07-21],
1758,Ganja Burns,Nicki Minaj,60,1,[2018-08-25],


# Manually Searching the Tracks that Failed Round 1 using input boxes
## For anything that continues to fail, use 'pass' in your input prompt to skip it and it will be dropped from the DataFrame later

In [274]:
#try again
replace_these = []
for index, row in spotify_billboard[spotify_billboard['spotify_id'].isna()].iterrows():
    found = False
    while found == False:
        print(f"{row['song']} by {row['artist']}")
        to_search = input('What to search? - ')
        if to_search=='pass':
            found=True
        else:
            try:
                search = sp.search(q=f"{to_search}", type='track')
                print(f"{search['tracks']['items'][0]['id']} -- {row['song']} {row['artist']} -- Index: {index}\n")
                replace_these.append((search['tracks']['items'][0]['id'], index))
                found=True
            except:
                print('Could not Find, try again\n')

Sun Daze by Florida Georgia Line
What to search? - sun daze florida georgia line
0El2Zyt68nYySFDG87hZgM -- Sun Daze Florida Georgia Line -- Index: 56

My Sh*t by A Boogie Wit da Hoodie
What to search? - my shit boogie wit da hoodie
5uQOauh47VFt3B2kV9kRXw -- My Sh*t A Boogie Wit da Hoodie -- Index: 839

I Dont Wanna Live Forever (Fifty Shades Darker) by Zayn / Taylor Swift
What to search? - i dont wanna live forever zayn taylor swift
55n9yjI6qqXh5F2mYvUc2y -- I Dont Wanna Live Forever (Fifty Shades Darker) Zayn / Taylor Swift -- Index: 908

F**k Love by XXXTENTACION Featuring Trippie Redd
What to search? - fuck love xxxtentacion
7AQim7LbvFVZJE3O8TYgf2 -- F**k Love XXXTENTACION Featuring Trippie Redd -- Index: 1258

Too Hotty by Quavo, Takeoff & Offset
What to search? - too hotty migos
3hWUIMNBwLNug1O8o2PPRW -- Too Hotty Quavo, Takeoff & Offset -- Index: 1314

My Choppa Hate N****s by 21 Savage & Metro Boomin
What to search? - my choppa hate niggas 21 savage
2D2w9943rsnJOGCrI4aMQp -- My 

# Replacing NaN Values with the Searches above, Removing any stragglers.
#### Use the append below for any that are way outside of searching

In [275]:
replace_these

[('0El2Zyt68nYySFDG87hZgM', 56),
 ('5uQOauh47VFt3B2kV9kRXw', 839),
 ('55n9yjI6qqXh5F2mYvUc2y', 908),
 ('7AQim7LbvFVZJE3O8TYgf2', 1258),
 ('3hWUIMNBwLNug1O8o2PPRW', 1314),
 ('2D2w9943rsnJOGCrI4aMQp', 1337),
 ('2T7UAAVTIIuMZPOQLXTq1l', 1566),
 ('5S1IUPueD0xE0vj4zU3nSf', 1652),
 ('5mu1uv8RmzDkF8foePK5qa', 1705),
 ('3LHYmz86DxGInsRp3wiiW5', 1758),
 ('2AwyHlbA6f641SEkizD5JG', 1833),
 ('5JEx7HbmvHQQswJCsoo9rA', 1918),
 ('1q9jq5X5vwmewjOa2mHtQ1', 2081),
 ('0B3FovCVaGKS5w1FTidEUP', 2265),
 ('7q0VdsXafFQIYfk3eZpwTq', 2488)]

In [276]:
# replacing values for the searches above
for i in replace_these:
    spotify_billboard.iloc[i[1],5] = i[0]

# Dropping and Confirming no NaN Values
2015-2020 all songs are on spotify

In [277]:
#this cell should output an empty dataframe
spotify_billboard[spotify_billboard['spotify_id'].isna()]

Unnamed: 0,song,artist,peak_rank,total_weeks,weeks,spotify_id


In [278]:
spotify_billboard

Unnamed: 0,song,artist,peak_rank,total_weeks,weeks,spotify_id
0,Blank Space,Taylor Swift,1,29,"[2015-01-03, 2015-01-10, 2015-01-17, 2015-01-2...",1p80LdxRV74UKvL8gnD7ky
1,Take Me To Church,Hozier,2,23,"[2015-01-03, 2015-01-10, 2015-01-17, 2015-01-2...",1CS7Sd1u5tWkstBhpssyjP
2,Uptown Funk!,Mark Ronson Featuring Bruno Mars,3,51,"[2015-01-03, 2015-01-10, 2015-01-17, 2015-01-2...",32OlwWuMpZ6b0aN2RZOeMS
3,Thinking Out Loud,Ed Sheeran,4,48,"[2015-01-03, 2015-01-10, 2015-01-17, 2015-01-2...",34gCuhDGsG4bRPIf9bb02f
4,Lips Are Movin,Meghan Trainor,4,21,"[2015-01-03, 2015-01-10, 2015-01-17, 2015-01-2...",6gj08XDlv9Duc2fPOxUmVD
...,...,...,...,...,...,...
2495,Nobody But You,Blake Shelton Duet With Gwen Stefani,79,1,[2019-12-28],5M8goiFYynmNxhueYW6grR
2496,Famous Hoes,NLE Choppa,83,1,[2019-12-28],35Xf2ABjPzUXKz19akd7Jj
2497,Cherry,Harry Styles,84,1,[2019-12-28],2IOFZdYYkFxEHVz1w34PoL
2498,Golden,Harry Styles,86,1,[2019-12-28],45S5WTQEGOB1VHr1Q4FuPl


# Saving DF to CSV - Just Song/Billboard Info and Spotify ID
Make a folder called "Spotify_IDs" if you want to save at this step in the process

In [290]:
spotify_billboard.to_csv(f'Spotify_IDs/SPOTIFY_ID_BILLBOARD_{filter_range}.csv', index=False)

# Searching for Audio Features by Spotify ID for all Rows in our latest DF

In [280]:
features = {
    'danceability':[],
    'energy':[],
    'loudness':[],
    'speechiness':[],
    'acousticness':[],
    'instrumentalness':[],
    'liveness':[],
    'valence':[],
    'tempo':[]
}

for h,i in enumerate(spotify_billboard['spotify_id']):
    tot = len(spotify_billboard['spotify_id'])
    try:
        
        search = sp.audio_features(i)
   
        features['danceability'].append(search[0]['danceability'])
        features['energy'].append(search[0]['energy'])
        features['loudness'].append(search[0]['loudness'])
        features['speechiness'].append(search[0]['speechiness'])
        features['acousticness'].append(search[0]['acousticness'])
        features['instrumentalness'].append(search[0]['instrumentalness'])
        features['liveness'].append(search[0]['liveness'])
        features['valence'].append(search[0]['valence'])
        features['tempo'].append(search[0]['tempo'])
    except:
        for j in features.keys():
            features[j].append(float('NaN'))

    print('.', end='')

........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................

# Confirm length of lists in our output dictionary above

In [281]:
for i in features.keys():
    print(len(features[i]))

2500
2500
2500
2500
2500
2500
2500
2500
2500


In [282]:
billboard_spotify_features = spotify_billboard.copy()

In [283]:
billboard_spotify_features['danceability'] = features['danceability']
billboard_spotify_features['energy'] = features['energy']
billboard_spotify_features['loudness'] = features['loudness']
billboard_spotify_features['speechiness'] = features['speechiness']
billboard_spotify_features['acousticness'] = features['acousticness']
billboard_spotify_features['instrumentalness'] = features['instrumentalness']
billboard_spotify_features['liveness'] = features['liveness']
billboard_spotify_features['valence'] = features['valence']
billboard_spotify_features['tempo'] = features['tempo']


In [284]:
billboard_spotify_features.tail()

Unnamed: 0,song,artist,peak_rank,total_weeks,weeks,spotify_id,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo
2495,Nobody But You,Blake Shelton Duet With Gwen Stefani,79,1,[2019-12-28],5M8goiFYynmNxhueYW6grR,0.463,0.653,-5.515,0.0273,0.111,0.000136,0.172,0.233,143.912
2496,Famous Hoes,NLE Choppa,83,1,[2019-12-28],35Xf2ABjPzUXKz19akd7Jj,0.732,0.627,-5.693,0.161,0.346,0.0,0.261,0.332,81.481
2497,Cherry,Harry Styles,84,1,[2019-12-28],2IOFZdYYkFxEHVz1w34PoL,0.53,0.512,-9.261,0.0259,0.615,0.00453,0.129,0.411,95.946
2498,Golden,Harry Styles,86,1,[2019-12-28],45S5WTQEGOB1VHr1Q4FuPl,0.448,0.838,-5.257,0.0557,0.21,0.000131,0.131,0.254,139.863
2499,She,Harry Styles,99,1,[2019-12-28],6SQLk9HSNketfgs2AyIiMs,0.535,0.521,-5.942,0.0272,0.000532,0.371,0.19,0.457,140.026


In [285]:
billboard_spotify_features[billboard_spotify_features['danceability']=='NaN']

Unnamed: 0,song,artist,peak_rank,total_weeks,weeks,spotify_id,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo


# Save DF with Audio Features - Condensed (optional)
Saves to a folder named 'Features'

In [289]:
billboard_spotify_features.to_csv(f'Features/Features_Condensed_{filter_range}.csv', index=False)

# expanding the unique track/artist dataframe above by the list of weeks

In [313]:
billboard_spotify_expanded = billboard_spotify_features.explode('weeks').reset_index(drop=True)

In [314]:
billboard_spotify_expanded

Unnamed: 0,song,artist,peak_rank,total_weeks,weeks,spotify_id,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,Blank Space,Taylor Swift,1,29,2015-01-03,1p80LdxRV74UKvL8gnD7ky,0.752,0.678,-5.421,0.0646,0.085000,0.000002,0.130,0.583,96.009
1,Blank Space,Taylor Swift,1,29,2015-01-10,1p80LdxRV74UKvL8gnD7ky,0.752,0.678,-5.421,0.0646,0.085000,0.000002,0.130,0.583,96.009
2,Blank Space,Taylor Swift,1,29,2015-01-17,1p80LdxRV74UKvL8gnD7ky,0.752,0.678,-5.421,0.0646,0.085000,0.000002,0.130,0.583,96.009
3,Blank Space,Taylor Swift,1,29,2015-01-24,1p80LdxRV74UKvL8gnD7ky,0.752,0.678,-5.421,0.0646,0.085000,0.000002,0.130,0.583,96.009
4,Blank Space,Taylor Swift,1,29,2015-01-31,1p80LdxRV74UKvL8gnD7ky,0.752,0.678,-5.421,0.0646,0.085000,0.000002,0.130,0.583,96.009
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26095,Nobody But You,Blake Shelton Duet With Gwen Stefani,79,1,2019-12-28,5M8goiFYynmNxhueYW6grR,0.463,0.653,-5.515,0.0273,0.111000,0.000136,0.172,0.233,143.912
26096,Famous Hoes,NLE Choppa,83,1,2019-12-28,35Xf2ABjPzUXKz19akd7Jj,0.732,0.627,-5.693,0.1610,0.346000,0.000000,0.261,0.332,81.481
26097,Cherry,Harry Styles,84,1,2019-12-28,2IOFZdYYkFxEHVz1w34PoL,0.530,0.512,-9.261,0.0259,0.615000,0.004530,0.129,0.411,95.946
26098,Golden,Harry Styles,86,1,2019-12-28,45S5WTQEGOB1VHr1Q4FuPl,0.448,0.838,-5.257,0.0557,0.210000,0.000131,0.131,0.254,139.863


In [315]:
all_weeks = sorted(list(set(billboard_spotify_expanded['weeks'])))
all_weeks

['2015-01-03',
 '2015-01-10',
 '2015-01-17',
 '2015-01-24',
 '2015-01-31',
 '2015-02-07',
 '2015-02-14',
 '2015-02-21',
 '2015-02-28',
 '2015-03-07',
 '2015-03-14',
 '2015-03-21',
 '2015-03-28',
 '2015-04-04',
 '2015-04-11',
 '2015-04-18',
 '2015-04-25',
 '2015-05-02',
 '2015-05-09',
 '2015-05-16',
 '2015-05-23',
 '2015-05-30',
 '2015-06-06',
 '2015-06-13',
 '2015-06-20',
 '2015-06-27',
 '2015-07-04',
 '2015-07-11',
 '2015-07-18',
 '2015-07-25',
 '2015-08-01',
 '2015-08-08',
 '2015-08-15',
 '2015-08-22',
 '2015-08-29',
 '2015-09-05',
 '2015-09-12',
 '2015-09-19',
 '2015-09-26',
 '2015-10-03',
 '2015-10-10',
 '2015-10-17',
 '2015-10-24',
 '2015-10-31',
 '2015-11-07',
 '2015-11-14',
 '2015-11-21',
 '2015-11-28',
 '2015-12-05',
 '2015-12-12',
 '2015-12-19',
 '2015-12-26',
 '2016-01-02',
 '2016-01-09',
 '2016-01-16',
 '2016-01-23',
 '2016-01-30',
 '2016-02-06',
 '2016-02-13',
 '2016-02-20',
 '2016-02-27',
 '2016-03-05',
 '2016-03-12',
 '2016-03-19',
 '2016-03-26',
 '2016-04-02',
 '2016-04-

# Creating a DF of average audio feature by week

In [316]:
average_audio_features = billboard_spotify_expanded.groupby('weeks').mean()[['danceability','energy','loudness','speechiness',
                                                   'acousticness','instrumentalness','liveness','valence','tempo']]
average_audio_features = average_audio_features.rename(columns={'danceability':'danceability_avg',
                                                                        'energy':'energy_avg',
                                                                        'loudness':'loudness_avg',
                                                                        'speechiness':'speechiness_avg',
                                                                        'acousticness':'acousticness_avg',
                                                                        'instrumentalness':'instrumentalness_avg',
                                                                        'liveness':'liveness_avg',
                                                                        'valence':'valence_avg',
                                                                        'tempo':'tempo_avg'})

In [318]:
average_audio_features

Unnamed: 0_level_0,danceability_avg,energy_avg,loudness_avg,speechiness_avg,acousticness_avg,instrumentalness_avg,liveness_avg,valence_avg,tempo_avg
weeks,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2015-01-03,0.63033,0.678523,-6.01788,0.085307,0.173665,0.001144,0.191614,0.493561,122.80917
2015-01-10,0.63922,0.689513,-5.88143,0.083772,0.157524,0.001171,0.198832,0.500751,121.03907
2015-01-17,0.63595,0.682273,-5.91040,0.081830,0.176445,0.001168,0.188154,0.487091,121.89695
2015-01-24,0.62964,0.688593,-5.84467,0.077276,0.159706,0.001681,0.188570,0.496414,123.59163
2015-01-31,0.62859,0.686523,-5.82135,0.075536,0.166731,0.001177,0.190216,0.490854,122.65702
...,...,...,...,...,...,...,...,...,...
2019-11-30,0.68050,0.622630,-6.13101,0.134666,0.201180,0.009509,0.157982,0.505003,115.81626
2019-12-07,0.66947,0.603250,-6.36134,0.124137,0.244060,0.009414,0.161925,0.500593,117.66386
2019-12-14,0.66165,0.593800,-6.85570,0.118458,0.275300,0.009416,0.164756,0.523363,119.27327
2019-12-21,0.65738,0.596610,-6.74606,0.105254,0.262279,0.008002,0.186568,0.517043,116.91367


# Save Final CSV to Plot
Make a folder named Average_Features

In [320]:
average_audio_features.to_csv(f'Average_Features/avg_audio_features({filter_range})_4.27.csv', index=False)