In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import matplotlib.ticker as ticker
from IPython.display import Audio
from sklearn import datasets, linear_model
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler
from sklearn.manifold import TSNE

sns.set()
pd.set_option('display.max_columns', 50)
pd.set_option('display.max_colwidth', 200)

In [2]:
#Features
features = pd.read_excel('Hot 100 Audio Features.xlsx')

In [3]:
#Billboard Top 100 By Week from 1958 to 2021
bb100 = pd.read_csv('Hot Stuff.csv')

In [4]:
#Convert 'WeekID' to DateTime Object & Check First & Last 2 entries by Week/Year
bb100['WeekID'] = pd.DatetimeIndex(bb100['WeekID'])
bb100.sort_values(by='WeekID').iloc[np.r_[0:2, -2:0]]

Unnamed: 0,url,WeekID,Week Position,Song,Performer,SongID,Instance,Previous Week Position,Peak Position,Weeks on Chart
18553,http://www.billboard.com/charts/hot-100/1958-08-02,1958-08-02,63,High School Confidential,Jerry Lee Lewis And His Pumping Piano,High School ConfidentialJerry Lee Lewis And His Pumping Piano,1,,63,1
103337,http://www.billboard.com/charts/hot-100/1958-08-02,1958-08-02,98,Little Serenade,The Ames Brothers,Little SerenadeThe Ames Brothers,1,,98,1
300806,https://www.billboard.com/charts/hot-100/2021-05-29,2021-05-29,61,Almost Maybes,Jordan Davis,Almost MaybesJordan Davis,2,64.0,61,17
152154,https://www.billboard.com/charts/hot-100/2021-05-29,2021-05-29,78,White Teeth,YoungBoy Never Broke Again,White TeethYoungBoy Never Broke Again,1,,78,1


In [5]:
#Merge BB100 with Features
bb100_features = pd.merge(bb100, features, on='SongID', how='left')
print(bb100_features.shape)
bb100_features.head(2)

(330461, 31)


Unnamed: 0,url,WeekID,Week Position,Song_x,Performer_x,SongID,Instance,Previous Week Position,Peak Position,Weeks on Chart,Performer_y,Song_y,spotify_genre,spotify_track_id,spotify_track_preview_url,spotify_track_duration_ms,spotify_track_explicit,spotify_track_album,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,spotify_track_popularity
0,http://www.billboard.com/charts/hot-100/1965-07-17,1965-07-17,34,Don't Just Stand There,Patty Duke,Don't Just Stand TherePatty Duke,1,45.0,34,4,Patty Duke,Don't Just Stand There,['deep adult standards'],1YhNCQ3XOdTCZgubfX8PgB,,163160.0,0.0,Lost Hits Of The 60's (All Original Artists & Versions),0.574,0.256,7.0,-15.044,1.0,0.0298,0.61,7.7e-05,0.1,0.568,82.331,3.0,21.0
1,http://www.billboard.com/charts/hot-100/1965-07-24,1965-07-24,22,Don't Just Stand There,Patty Duke,Don't Just Stand TherePatty Duke,1,34.0,22,5,Patty Duke,Don't Just Stand There,['deep adult standards'],1YhNCQ3XOdTCZgubfX8PgB,,163160.0,0.0,Lost Hits Of The 60's (All Original Artists & Versions),0.574,0.256,7.0,-15.044,1.0,0.0298,0.61,7.7e-05,0.1,0.568,82.331,3.0,21.0


In [6]:
#Taking some important columns to introduce Spotify Metrics
subset_1 = bb100_features[['WeekID','Song_x', 'spotify_track_preview_url', 
                           'instrumentalness', 'speechiness',
                           'spotify_track_popularity']]
print(subset_1.shape)
subset_1.head(2)

(330461, 6)


Unnamed: 0,WeekID,Song_x,spotify_track_preview_url,instrumentalness,speechiness,spotify_track_popularity
0,1965-07-17,Don't Just Stand There,,7.7e-05,0.0298,21.0
1,1965-07-24,Don't Just Stand There,,7.7e-05,0.0298,21.0


In [7]:
#Remove all Nan's from subset_1
subset_1 = subset_1.dropna()
print(subset_1.shape)
subset_1.head(2)

(169763, 6)


Unnamed: 0,WeekID,Song_x,spotify_track_preview_url,instrumentalness,speechiness,spotify_track_popularity
14,1971-04-24,Don't Knock My Love - Pt. 1,https://p.scdn.co/mp3-preview/5d3332b4ae616cd2157f948f7329b20470714a95?cid=b8d3901151d34489a160e3cf0ab1fa94,7e-06,0.0287,26.0
15,1971-05-01,Don't Knock My Love - Pt. 1,https://p.scdn.co/mp3-preview/5d3332b4ae616cd2157f948f7329b20470714a95?cid=b8d3901151d34489a160e3cf0ab1fa94,7e-06,0.0287,26.0


In [8]:
#Remove all duplicate songs from subset_1
subset_1 = subset_1.drop_duplicates(subset='Song_x')
print(subset_1.shape)
subset_1.head(2)

(12689, 6)


Unnamed: 0,WeekID,Song_x,spotify_track_preview_url,instrumentalness,speechiness,spotify_track_popularity
14,1971-04-24,Don't Knock My Love - Pt. 1,https://p.scdn.co/mp3-preview/5d3332b4ae616cd2157f948f7329b20470714a95?cid=b8d3901151d34489a160e3cf0ab1fa94,7e-06,0.0287,26.0
39,1989-09-30,Don't Know Much,https://p.scdn.co/mp3-preview/49fff23849dc6ec86909dd299908100857831882?cid=b8d3901151d34489a160e3cf0ab1fa94,1.1e-05,0.0327,60.0


In [9]:
#Sort by Instrumentalness Score
Ins = subset_1.sort_values(by='instrumentalness', ascending=False)
Ins.head(3)

Unnamed: 0,WeekID,Song_x,spotify_track_preview_url,instrumentalness,speechiness,spotify_track_popularity
11237,1964-01-25,Charade,https://p.scdn.co/mp3-preview/b9701ae85e779702866b6201bc1fa9f05a8e306f?cid=b8d3901151d34489a160e3cf0ab1fa94,0.982,0.0335,2.0
6316,2001-07-21,Sandstorm,https://p.scdn.co/mp3-preview/5e8021f2ceaf535ca90d65a51e71b919d0367094?cid=b8d3901151d34489a160e3cf0ab1fa94,0.982,0.0469,62.0
20081,2010-07-31,La La La,https://p.scdn.co/mp3-preview/9a4da05831b80ee3713f72c4e86dbb07bdc1d904?cid=b8d3901151d34489a160e3cf0ab1fa94,0.978,0.273,9.0


In [10]:
Ins.describe()

Unnamed: 0,instrumentalness,speechiness,spotify_track_popularity
count,12689.0,12689.0,12689.0
mean,0.037527,0.071868,39.671369
std,0.149234,0.080615,22.605779
min,0.0,0.0,0.0
25%,0.0,0.0321,21.0
50%,5e-06,0.0412,41.0
75%,0.000573,0.0677,58.0
max,0.982,0.951,100.0


In [11]:
Ins = Ins.loc[(Ins['instrumentalness']>0.85) &
              (Ins['spotify_track_popularity'] > 65) &
              (Ins['speechiness'] < 0.05)]
Ins.shape

(4, 6)

In [12]:
Ins = Ins.sort_values(by='instrumentalness', ascending=False)
Ins

Unnamed: 0,WeekID,Song_x,spotify_track_preview_url,instrumentalness,speechiness,spotify_track_popularity
54009,1962-08-11,Green Onions,https://p.scdn.co/mp3-preview/6afcf8c2efdb3c33264e8aa5836927df8b65b78c?cid=b8d3901151d34489a160e3cf0ab1fa94,0.949,0.0339,66.0
154134,2018-12-22,whoa (mind in awe),https://p.scdn.co/mp3-preview/f69b47e8941ef8f49fc89296f9e4099e7a990c8c?cid=b8d3901151d34489a160e3cf0ab1fa94,0.918,0.0441,77.0
53148,2006-11-04,Irreplaceable,https://p.scdn.co/mp3-preview/b1cd9ecf3b5108363846f497897b5bcbec44343a?cid=b8d3901151d34489a160e3cf0ab1fa94,0.869,0.041,74.0
129884,1999-02-13,We Like To Party!,https://p.scdn.co/mp3-preview/977aa847ebb0df8ed2f1eb78b7427d2881ab1d30?cid=b8d3901151d34489a160e3cf0ab1fa94,0.859,0.0449,67.0


In [13]:
#Get 'Green Onions' preview URL:
Green_Onions = Ins['spotify_track_preview_url'].iloc[0]
Green_Onions

'https://p.scdn.co/mp3-preview/6afcf8c2efdb3c33264e8aa5836927df8b65b78c?cid=b8d3901151d34489a160e3cf0ab1fa94'

In [14]:
#Download URL and save as MP3 file: 
import urllib.request
url = Green_Onions
filename = 'Instrumental_Score_0.949.mp3'
urllib.request.urlretrieve(url, filename)

('Instrumental_Score_0.949.mp3', <http.client.HTTPMessage at 0x7f88716f90a0>)

In [15]:
#Play MP3 file: 
from IPython.display import Audio
Audio('Instrumental_Score_0.949.mp3')