In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import matplotlib.ticker as ticker

from IPython.display import Audio
from sklearn import datasets, linear_model
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler
from sklearn.manifold import TSNE
from wordcloud import WordCloud

pd.set_option('display.max_columns', 50)
pd.set_option('display.max_colwidth', 200)

In [2]:
#Convert "Spotify Genres" from Desired Dataframe to a Dict with Frequency Counter & Sorted List

def genre_df_to_dict_counter(df_column):
    
    clean_series = df_column.replace('[]', np.nan).dropna()
    
    new = []
    for item in clean_series:
        for ch in ['\\','`','*','_','{','}','[',']','(',')'
                   ,'>','#','+','.','!','$','\'']:

            if ch in item:
                item = item.replace(ch,'')
        new.append(item)
    
    messy_string = ', '.join(new)
    
    clean_string = [item.strip() for item in messy_string.split(',')]
    dict_counter = {k:clean_string.count(k) for k in set(clean_string)}
    sorted_list = sorted(dict_counter.items(), key=lambda x: x[1], reverse=True)
    
    return dict_counter, sorted_list;

In [3]:
#Billboard Top 100 By Week from 1958 to 2021
bb100 = pd.read_csv('Hot Stuff.csv')

In [4]:
#Convert 'WeekID' to DateTime Object & Check First & Last 2 entries by Week/Year
bb100['WeekID'] = pd.DatetimeIndex(bb100['WeekID'])
bb100.sort_values(by='WeekID').iloc[np.r_[0:2, -2:0]]

Unnamed: 0,url,WeekID,Week Position,Song,Performer,SongID,Instance,Previous Week Position,Peak Position,Weeks on Chart
18553,http://www.billboard.com/charts/hot-100/1958-08-02,1958-08-02,63,High School Confidential,Jerry Lee Lewis And His Pumping Piano,High School ConfidentialJerry Lee Lewis And His Pumping Piano,1,,63,1
103337,http://www.billboard.com/charts/hot-100/1958-08-02,1958-08-02,98,Little Serenade,The Ames Brothers,Little SerenadeThe Ames Brothers,1,,98,1
300806,https://www.billboard.com/charts/hot-100/2021-05-29,2021-05-29,61,Almost Maybes,Jordan Davis,Almost MaybesJordan Davis,2,64.0,61,17
152154,https://www.billboard.com/charts/hot-100/2021-05-29,2021-05-29,78,White Teeth,YoungBoy Never Broke Again,White TeethYoungBoy Never Broke Again,1,,78,1


In [5]:
#Features
features = pd.read_excel('Hot 100 Audio Features.xlsx')
features.iloc[np.r_[0:2, -2:0]]

Unnamed: 0,SongID,Performer,Song,spotify_genre,spotify_track_id,spotify_track_preview_url,spotify_track_duration_ms,spotify_track_explicit,spotify_track_album,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,spotify_track_popularity
0,-twistin'-White Silver SandsBill Black's Combo,Bill Black's Combo,-twistin'-White Silver Sands,[],,,,,,,,,,,,,,,,,,
1,¿Dònde Està Santa Claus? (Where Is Santa Claus?)Augie Rios,Augie Rios,¿Dònde Està Santa Claus? (Where Is Santa Claus?),['novelty'],,,,,,,,,,,,,,,,,,
29501,Zorba The GreekHerb Alpert & The Tijuana Brass,Herb Alpert & The Tijuana Brass,Zorba The Greek,"['adult standards', 'easy listening', 'lounge']",3WLEVNohakzZmMpN5W7mHK,https://p.scdn.co/mp3-preview/1841a4034ba42fc07df76af815ba62f4b206466b?cid=b8d3901151d34489a160e3cf0ab1fa94,264853.0,0.0,!!!Going Places!!!,0.531,0.642,5.0,-12.702,1.0,0.323,0.154,0.279,0.0584,0.192,82.107,4.0,35.0
29502,Zunga ZengK7,K7,Zunga Zeng,['freestyle'],0XevPPcCBPovknaBw3lFvh,https://p.scdn.co/mp3-preview/8d5174aeb7d6b7409b7e36dfc42082d9d8a32e6a?cid=b8d3901151d34489a160e3cf0ab1fa94,273000.0,0.0,Swing Batta Swing!,0.846,0.657,1.0,-9.642,1.0,0.14,0.0478,0.000363,0.0392,0.619,103.743,4.0,28.0


In [6]:
#Merge two dataframes and groupby "Year"
bb100_features = pd.merge(bb100, features, on='SongID', how='left')
bb100_features.groupby(bb100['WeekID'].dt.year).sum()

Unnamed: 0_level_0,Week Position,Instance,Previous Week Position,Peak Position,Weeks on Chart,spotify_track_duration_ms,spotify_track_explicit,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,spotify_track_popularity
WeekID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
1958.0,114898,2271,93310.0,98492,15751,4.400233e+08,220.0,1176.284,1229.010900,10419.0,-16394.389,1358.0,139.9752,526.779778,49.612151,365.33560,1159.7662,232486.315,7677.0,88945.0
1959.0,269581,5504,228037.0,230563,41857,1.007030e+09,488.0,2720.471,2816.161800,23293.0,-37849.587,3305.0,314.2018,1167.817035,115.655747,805.89450,2702.1550,536896.740,17652.0,208454.0
1960.0,276645,5582,231872.0,237683,41549,1.042515e+09,500.0,2781.454,2837.884100,23863.0,-39248.075,3299.0,322.1398,1244.358376,157.147703,851.92120,2740.3374,544915.978,17965.0,208729.0
1961.0,271699,5476,226237.0,236093,39233,1.009777e+09,490.0,2682.034,2771.030800,23174.0,-38130.988,3199.0,308.8943,1223.942136,114.340794,858.45987,2637.1148,538170.804,17553.0,204585.0
1962.0,267824,5554,220852.0,234546,39785,1.034380e+09,460.0,2776.080,2844.494900,23995.0,-38599.833,3289.0,307.4025,1235.691196,123.377032,878.63730,2763.0744,545843.508,17893.0,210622.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017.0,261822,6016,215544.0,201976,58627,1.043447e+09,577.0,2817.753,2912.620981,24305.0,-36705.340,3308.0,339.0161,1123.849268,125.714796,858.76870,2696.7655,543713.713,17986.0,234403.0
2018.0,268670,5999,215435.0,213592,55942,1.059102e+09,654.0,2823.306,2955.572000,24054.0,-36529.776,3257.0,349.2476,1080.234162,127.614565,871.04430,2653.6516,555805.479,18212.0,237402.0
2019.0,264577,5990,216123.0,208262,57426,1.027236e+09,580.0,2770.249,2861.443100,23034.0,-35513.502,3160.0,324.3064,1109.281449,113.542895,788.56570,2635.9596,536638.114,17679.0,225255.0
2020.0,264312,5956,211240.0,209580,57408,1.028711e+09,616.0,2765.416,2870.139800,23189.0,-35249.899,3228.0,340.4606,1083.504425,112.076856,824.65650,2604.0602,541578.671,17709.0,232473.0


In [7]:
#Just pick out WeekID and spotify_genre columns from merged dataframe:
bb100_Year_Genre = bb100_features[['WeekID', 'spotify_genre']]
bb100_Year_Genre

Unnamed: 0,WeekID,spotify_genre
0,1965-07-17,['deep adult standards']
1,1965-07-24,['deep adult standards']
2,1965-07-31,['deep adult standards']
3,1965-08-07,['deep adult standards']
4,1965-08-14,['deep adult standards']
...,...,...
330456,2018-10-20,"['dance pop', 'pop', 'post-teen pop']"
330457,1977-05-21,"['classic soul', 'disco', 'funk', 'motown', 'post-disco', 'quiet storm', 'soul']"
330458,1981-05-23,
330459,1973-09-29,"['classic soul', 'funk', 'memphis soul', 'quiet storm', 'soul', 'soul blues', 'southern soul']"


In [8]:
#Add Year column to dataframe
bb100_Year_Genre['Year'] = bb100_Year_Genre['WeekID'].dt.year
bb100_Year_Genre = bb100_Year_Genre[bb100_Year_Genre['Year'].notnull()].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bb100_Year_Genre['Year'] = bb100_Year_Genre['WeekID'].dt.year


In [9]:
#Check Dataframe
bb100_Year_Genre

Unnamed: 0,WeekID,spotify_genre,Year
0,1965-07-17,['deep adult standards'],1965
1,1965-07-24,['deep adult standards'],1965
2,1965-07-31,['deep adult standards'],1965
3,1965-08-07,['deep adult standards'],1965
4,1965-08-14,['deep adult standards'],1965
...,...,...,...
330456,2018-10-20,"['dance pop', 'pop', 'post-teen pop']",2018
330457,1977-05-21,"['classic soul', 'disco', 'funk', 'motown', 'post-disco', 'quiet storm', 'soul']",1977
330458,1981-05-23,,1981
330459,1973-09-29,"['classic soul', 'funk', 'memphis soul', 'quiet storm', 'soul', 'soul blues', 'southern soul']",1973


In [10]:
#Group the dataframe by Year and Convert the dataframes into a list
grouped = bb100_Year_Genre.groupby(['Year'])
l_grouped = list(grouped)
len(l_grouped)

64

In [11]:
#Check 1958 dataframe:
l_grouped[0][1]

Unnamed: 0,WeekID,spotify_genre,Year
332,1958-08-02,['adult standards'],1958
390,1958-11-01,['novelty'],1958
524,1958-11-15,"['adult standards', 'deep adult standards', 'easy listening', 'vocal harmony group']",1958
563,1958-12-20,,1958
947,1958-08-02,"['adult standards', 'brill building pop', 'bubblegum pop', 'folk rock', 'lounge', 'rock-and-roll', 'rockabilly']",1958
...,...,...,...
328812,1958-11-15,"['rock-and-roll', 'rockabilly']",1958
329062,1958-11-08,"['deep adult standards', 'doo-wop']",1958
329087,1958-12-06,"['arkansas country', 'country', 'country rock']",1958
329898,1958-10-25,"['doo-wop', 'rhythm and blues']",1958


In [12]:
#Check 1960 dataframe:
l_grouped[2][1]

Unnamed: 0,WeekID,spotify_genre,Year
210,1960-01-02,,1960
211,1960-02-20,"['latin christian', 'mariachi cristiano']",1960
212,1960-08-20,"['adult standards', 'brill building pop', 'easy listening', 'lounge', 'rock-and-roll', 'rockabilly']",1960
334,1960-02-27,"['bass trap', 'danish electronic', 'edm', 'electronic trap', 'traprun']",1960
335,1960-07-23,,1960
...,...,...,...
330158,1960-08-13,[],1960
330221,1960-08-27,,1960
330363,1960-04-16,"['adult standards', 'brill building pop', 'doo-wop', 'lounge', 'rock-and-roll', 'rockabilly']",1960
330440,1960-05-21,[],1960


In [13]:
#For Each Year, count the number of genre categories used and print them: 
for i in range(len(l_grouped)):
    dc, sl = genre_df_to_dict_counter(l_grouped[i][1]['spotify_genre'])
    print(len(sl))

109
132
141
157
161
187
187
190
223
187
220
237
235
247
228
192
205
211
190
227
215
230
220
199
215
222
181
165
174
166
202
215
248
238
232
237
278
226
224
213
230
176
166
192
178
184
178
198
208
189
179
156
150
175
154
181
194
183
196
157
160
162
142
133


In [14]:
#Conclusion: Doesn't demonstrate exponential growth of new genres. 
#Might need to apply some sort of cumulative summation to get measure intended observation. 