DEPENDENCIES: Python 3, pandas (numpy required for pandas), billboard.py unofficial billboard api

In [1]:
import billboard as bb
import pandas as pd
from datetime import datetime as dt
from datetime import timedelta as td

ModuleNotFoundError: No module named 'billboard'

This notebook uses the open source billboard.py scrpaer to pull billboard top 100 charts up to a desired number of years, tracking a desired number of top songs. It then outputs the results to a JSON object and saves the json object in 'hot_100_charts.json' in the current directory. After use move 'hot_100_charts.json' out of the folder to avoid pushing it to git every time. 

In [None]:
def BillBoard_Hot100_Parser(numSongsToTrack=3, numYears=1, isVerbose=False):
    """
    FUNCTIONALITY:
        Takes chart to follow, number of top songs to track, number of years to go back, and a verbosity flag.
        Prints progress during execution and prints top 5 rows upon compeletion if verbosity flag is set to true.
        Pulls the billboard hot 100 chart for the number of years back from today (current week).
        Adds the numSongsToTrack top songs to pandas dataframe w/ 1 Row = 1 Week.
        Dataframe columns = beginDate (for the week of the chart), endDate, artist<i>, title<i>,
        peakPosition<i>, positionLastWeek<i>,  weeksOnChart<i>. <i> is the ranking of the current song.
        For example the #1 ranked song of the week is artist1, title1, etc.
        There is one artist column, one title column... etc for each song tracked as passed in by the numSongsToTrack parameter.
    RETURN VALUE:
        Pandas DataFrame
    ERRORS:
        if numSongsToTrack <= 0 or > 100 Prints Error Message and returns None.
        If numYears <= 0 Prints Error Message and returns None.
        If numYears exceeds total history of billboard charts, prints warning and returns dataframe up to end of charts.
    NOTES:
        Takes a while; be patient and use the verbosity flag if you want to make sure it is working.
        How long it takes is internet-speed dependent."""
    
    
    if numSongsToTrack <= 0 or numSongsToTrack >= 100:
        print("Error: Num Songs cannont less than or equal to 0 or greater than 100.")
        return None
    if numYears <= 0:
        print("Error: Num Years cannont be less than or equal to 0.")
        return None
    
    
    df = pd.DataFrame()
    
    #Gets current chart
    chart = bb.ChartData('hot-100')

    numWeeksGotten = 0
    numWeeks = numYears * 52
    if isVerbose:
        print("Total Number of Weeks To Process: {}\n".format(numWeeks))
    
    #while there is another chart to get and number of weeks doesn't exceed that passed in.
    while (chart.date and numWeeksGotten < numWeeks):
        
        #converts adds one to chart previous date through python's datetime module
        #to get correct begining date for chart
        endDate = chart.date
        previousDate = chart.previousDate
        dateTimePrevious = dt.strptime(previousDate, "%Y-%m-%d")
        dateTimePreviousPlusOne = dateTimePrevious + td(days=1)
        strBeginDate = dateTimePreviousPlusOne.strftime("%Y-%m-%d")
        
        #appends info to a temporary dictionary that later is converted to a dataframe
        dfToAppend = {}
        dfToAppend['endDate'] = endDate
        dfToAppend['beginDate'] = strBeginDate
        
        #iterates through the correct numSongs as passed above and adds to dataframe.
        for ii in range(0, numSongsToTrack):

            strII = str(ii+1)

            strTitle = 'title_'+strII
            dfToAppend[strTitle] = chart[ii].title

            strArtist = 'artist_' + strII
            dfToAppend[strArtist] = chart[ii].artist

            strPeakPos = 'peakPosition_' + strII
            dfToAppend[strPeakPos] = chart[ii].peakPos

            strLastPos = 'positionLastWeek_' + strII
            dfToAppend[strLastPos] = chart[ii].lastPos

            strWeeksOnChart = 'weeksOnChart_' + strII
            dfToAppend[strWeeksOnChart] = chart[ii].weeks
        
         #converts dict to dataframe   
        dfToAppend =pd.DataFrame(dfToAppend, index =[endDate])
        
        #appends dataframe to whole dataframe
        df = df.append(dfToAppend)
        
        numWeeksGotten += 1
        if isVerbose:
            print("Number of Weeks Processed: {}\n".format(numWeeksGotten))
            
        dfToAppend = None
        
        #gets previous chart
        chart = bb.ChartData('hot-100', chart.previousDate)
        
           
    if numWeeksGotten < numWeeks:
        print("Warning: numYears exceeds total history of top-100 chart.")
        
    if isVerbose:
        print(df.head())
            
    return df

In [None]:
df = BillBoard_Parser(numSongsToTrack=3, numYears=1, isVerbose=True)

In [None]:
df.to_json(path_or_buf="hot_100_charts.json", orient='index')