In [22]:
import pandas as pd
import numpy as np
from datetime import datetime as dt
from datetime import timedelta as td
import billboard as bb
import pymongo as pm
from pymongo import MongoClient

In [23]:
def BillBoard_Hot100_Parser(numSongsToTrack=3, numYears=1, isVerbose=False):
    """
    FUNCTIONALITY:
        Takes chart to follow, number of top songs to track, number of years to go back, and a verbosity flag.
        Prints progress during execution and prints top 5 rows upon compeletion if verbosity flag is set to true.
        Pulls the billboard hot 100 chart for the number of years back from today (current week).
        Adds the numSongsToTrack top songs to to a MongoDB Document.
        Song_n in the MongoDB store = the nth most popular song for the week.
        Updates the store, will not store multiple entries of the same document.
    RETURN VALUE:
        None, updates mongo store.
    ERRORS:
        if numSongsToTrack <= 0 or > 100 Prints Error Message and returns None.
        If numYears <= 0 Prints Error Message and returns None.
        If numYears exceeds total history of billboard charts, prints warning and returns dataframe up to end of charts.
    NOTES:
        Takes a while; be patient and use the verbosity flag if you want to make sure it is working.
        How long it takes is internet-speed dependent."""
    
    
    if numSongsToTrack <= 0 or numSongsToTrack >= 100:
        print("Error: Num Songs cannont less than or equal to 0 or greater than 100.")
        return None
    if numYears <= 0:
        print("Error: Num Years cannont be less than or equal to 0.")
        return None
    
    #Gets most recent chart
    chart = bb.ChartData('hot-100')  
        
    numWeeksGotten = 0
    numWeeks = numYears * 52
    
    #while there is another chart to get and number of weeks doesn't exceed that passed in.
    while (chart.date and numWeeksGotten < numWeeks):
        
        #catches error where billbard interface doesn't return a chart, just gets chart for -1 day from date.
        try:
            testVar = chart[0].title
        except:
            dateTimeStart = dt.strptime(endDate, "%Y-%m-%d")
            dateTimeStartMinusOne = dateTimeStart + td(days=(-1))
            strBeginDate = dateTimeStartMinusOne.strftime("%Y-%m-%d")
            chart = bb.ChartData('hot-100', strBeginDate)
        
        #converts adds one to chart previous date through python's datetime module
        #to get correct begining date for chart
        endDate = chart.date
        previousDate = chart.previousDate
        dateTimePrevious = dt.strptime(previousDate, "%Y-%m-%d")
        dateTimePreviousPlusOne = dateTimePrevious + td(days=1)
        strBeginDate = dateTimePreviousPlusOne.strftime("%Y-%m-%d")
            
        
        #appends info to a temporary dictionary that later is converted to a dataframe
        dictToAppend = {}
        dictToAppend['endDate'] = endDate
        dictToAppend['beginDate'] = strBeginDate
        
        #iterates through the correct numSongs as passed above and adds to dataframe.
        for ii in range(0, numSongsToTrack):

            strII = str(ii+1)

            strTitle = 'title_'+strII
            
            strArtist = 'artist_' + strII
            dictToAppend[strArtist] = chart[ii].artist

            strPeakPos = 'peakPosition_' + strII
            dictToAppend[strPeakPos] = chart[ii].peakPos

            strLastPos = 'positionLastWeek_' + strII
            dictToAppend[strLastPos] = chart[ii].lastPos

            strWeeksOnChart = 'weeksOnChart_' + strII
            dictToAppend[strWeeksOnChart] = chart[ii].weeks
        
        
        songs.update_one({'endDate':endDate}, {'$set': dictToAppend}, upsert=True)
        numWeeksGotten += 1
        
        if isVerbose:
            print(numWeeksGotten, " Weeks Added")
            
        #gets previous chart
        chart = bb.ChartData('hot-100', chart.previousDate)
        
           
    if numWeeksGotten < numWeeks:
        print("Warning: numYears exceeds total history of top-100 chart.")
                          
    return None

In [24]:
def get_All_Time_200(numToGet=200):
    """
    FUNCTIONALITY:
        Takes number of songs to get and retrieves the top <numToGet> songs from the billboard all-time top 200 Chart.
        Updates Mongo Store with them, adds if not currently in database.
        db = top_
    RETURN VALUE:
        None, but updates Mongo Store
    Errors:
        Prints error message and returns None if numToGet <= 0.
        Prints warning and returns full chart if numToGet >= 200"""
    
    #Error Messages
    if numToGet <= 0:
        print("Error: Cannot Get 0 or Less Songs")
        return None
    if numToGet > 200:
        print("Warning: Cannont get more than 200 Songs. Full Chart Returned")
        
    
    chart = bb.ChartData('greatest-billboard-200-albums')

    #appends info about one song to a dictionary, then converts to data frame, then appends to original dataframe.
    for ii in range(numToGet):

            dictToAppend = {}

            dictToAppend['title'] = chart[ii].title

            dictToAppend['artist'] = chart[ii].artist
            
            dictToAppend['rank'] = chart[ii].rank
            
            songs.update_one({'rank': chart[ii].rank}, {'$set': dictToAppend}, upsert=True)
            
    return None

    

In [25]:
BillBoard_Hot100_Parser(numSongsToTrack=15, numYears=7, isVerbose=True)

1  Weeks Added
2  Weeks Added
3  Weeks Added
4  Weeks Added
5  Weeks Added
6  Weeks Added
7  Weeks Added
8  Weeks Added
9  Weeks Added
10  Weeks Added
11  Weeks Added
12  Weeks Added
13  Weeks Added
14  Weeks Added
15  Weeks Added
16  Weeks Added
17  Weeks Added
18  Weeks Added
19  Weeks Added
20  Weeks Added
21  Weeks Added
22  Weeks Added
23  Weeks Added
24  Weeks Added
25  Weeks Added
26  Weeks Added
27  Weeks Added
28  Weeks Added
29  Weeks Added
30  Weeks Added
31  Weeks Added
32  Weeks Added
33  Weeks Added
34  Weeks Added
35  Weeks Added
36  Weeks Added
37  Weeks Added
38  Weeks Added
39  Weeks Added
40  Weeks Added
41  Weeks Added
42  Weeks Added
43  Weeks Added
44  Weeks Added
45  Weeks Added
46  Weeks Added
47  Weeks Added
48  Weeks Added
49  Weeks Added
50  Weeks Added
51  Weeks Added
52  Weeks Added
53  Weeks Added
54  Weeks Added
55  Weeks Added
56  Weeks Added
57  Weeks Added
58  Weeks Added
59  Weeks Added
60  Weeks Added
61  Weeks Added
62  Weeks Added
63  Weeks Added
6

ReadTimeout: HTTPSConnectionPool(host='www.billboard.com', port=443): Read timed out. (read timeout=25)

In [None]:
def main():
    client = MongoClient()
    db = client.top_100_weekly
    songs = db.songs
    BillBoard_Hot100_Parser(numSongsToTrack=15, numYears=55, isVerbose=False)
    db = client.top_200_All_Time
    songs = db.songs
    
    