In [1]:
import pandas as pd
import numpy as np
from datetime import datetime as dt
from datetime import timedelta as td
import billboard as bb
import pymongo as pm
from pymongo import MongoClient

In [29]:
chart = bb.ChartData('hot-100', '2016-07-29')
print(chart)

hot-100 chart from 2016-07-30
-----------------------------
1. 'One Dance' by Drake Featuring WizKid & Kyla
2. 'Can't Stop The Feeling!' by Justin Timberlake
3. 'Cheap Thrills' by Sia Featuring Sean Paul
4. 'This Is What You Came For' by Calvin Harris Featuring Rihanna
5. 'Don't Let Me Down' by The Chainsmokers Featuring Daya
6. 'Ride' by twenty one pilots
7. 'Needed Me' by Rihanna
8. 'Panda' by Desiigner
9. 'Don't Mind' by Kent Jones
10. 'Send My Love (To Your New Lover)' by Adele
11. 'Just Like Fire' by P!nk
12. 'Work From Home' by Fifth Harmony Featuring Ty Dolla $ign
13. 'I Took A Pill In Ibiza' by Mike Posner
14. 'H.O.L.Y.' by Florida Georgia Line
15. 'Me Too' by Meghan Trainor
16. 'Controlla' by Drake
17. 'For Free' by DJ Khaled Featuring Drake
18. 'Let It Go' by James Bay
19. 'Too Good' by Drake Featuring Rihanna
20. 'Treat You Better' by Shawn Mendes
21. 'Work' by Rihanna Featuring Drake
22. '7 Years' by Lukas Graham
23. 'Heathens' by twenty one pilots
24. 'Dangerous Woman' by 

In [2]:
client = MongoClient()

In [3]:
print(client)

MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True)


In [None]:
db = client.top_songs

songs = db.songs

#songs.insert_one({'Date':12})

#dict_test = {'Author': 'Bill'}


#songs.insert_one(dict_test)

In [41]:
def BillBoard_Hot100_Parser(numSongsToTrack=3, numYears=1, isVerbose=False):
    """
    FUNCTIONALITY:
        Takes chart to follow, number of top songs to track, number of years to go back, and a verbosity flag.
        Prints progress during execution and prints top 5 rows upon compeletion if verbosity flag is set to true.
        Pulls the billboard hot 100 chart for the number of years back from today (current week).
        Adds the numSongsToTrack top songs to pandas dataframe w/ 1 Row = 1 Week.
        Dataframe columns = beginDate (for the week of the chart), endDate, artist<i>, title<i>,
        peakPosition<i>, positionLastWeek<i>,  weeksOnChart<i>. <i> is the ranking of the current song.
        For example the #1 ranked song of the week is artist1, title1, etc.
        There is one artist column, one title column... etc for each song tracked as passed in by the numSongsToTrack parameter.
    RETURN VALUE:
        None, updates mongo store.
    ERRORS:
        if numSongsToTrack <= 0 or > 100 Prints Error Message and returns None.
        If numYears <= 0 Prints Error Message and returns None.
        If numYears exceeds total history of billboard charts, prints warning and returns dataframe up to end of charts.
    NOTES:
        Takes a while; be patient and use the verbosity flag if you want to make sure it is working.
        How long it takes is internet-speed dependent."""
    
    
    if numSongsToTrack <= 0 or numSongsToTrack >= 100:
        print("Error: Num Songs cannont less than or equal to 0 or greater than 100.")
        return None
    if numYears <= 0:
        print("Error: Num Years cannont be less than or equal to 0.")
        return None
    
    #Gets current chart
    chart = bb.ChartData('hot-100')
    
        

    numWeeksGotten = 0
    numWeeks = numYears * 52
    
    #while there is another chart to get and number of weeks doesn't exceed that passed in.
    while (chart.date and numWeeksGotten < numWeeks):
        
        try:
            testVar = chart[0].title
        except:
            dateTimeStart = dt.strptime(endDate, "%Y-%m-%d")
            dateTimeStartMinusOne = dateTimeStart + td(days=(-1))
            strBeginDate = dateTimeStartMinusOne.strftime("%Y-%m-%d")
            chart = bb.ChartData('hot-100', strBeginDate)
        
        #converts adds one to chart previous date through python's datetime module
        #to get correct begining date for chart
        endDate = chart.date
        previousDate = chart.previousDate
        dateTimePrevious = dt.strptime(previousDate, "%Y-%m-%d")
        dateTimePreviousPlusOne = dateTimePrevious + td(days=1)
        strBeginDate = dateTimePreviousPlusOne.strftime("%Y-%m-%d")
            
        
        #appends info to a temporary dictionary that later is converted to a dataframe
        dictToAppend = {}
        dictToAppend['endDate'] = endDate
        dictToAppend['beginDate'] = strBeginDate
        
        #iterates through the correct numSongs as passed above and adds to dataframe.
        for ii in range(0, numSongsToTrack):

            strII = str(ii+1)

            strTitle = 'title_'+strII
            try:
                dictToAppend[strTitle] = chart[ii].title
            except:
                print(chart)

            strArtist = 'artist_' + strII
            dictToAppend[strArtist] = chart[ii].artist

            strPeakPos = 'peakPosition_' + strII
            dictToAppend[strPeakPos] = chart[ii].peakPos

            strLastPos = 'positionLastWeek_' + strII
            dictToAppend[strLastPos] = chart[ii].lastPos

            strWeeksOnChart = 'weeksOnChart_' + strII
            dictToAppend[strWeeksOnChart] = chart[ii].weeks
        
        numWeeksGotten += 1
        songs.update_one({dictToAppend[, {'$set': dictToAppend}, upsert=True)
        print(numWeeksGotten, " Weeks Added")
        #gets previous chart
        chart = bb.ChartData('hot-100', chart.previousDate)
        
           
    if numWeeksGotten < numWeeks:
        print("Warning: numYears exceeds total history of top-100 chart.")

In [42]:
df = BillBoard_Hot100_Parser(numSongsToTrack=1, numYears=4, isVerbose=True)

1  Weeks Added
2  Weeks Added
3  Weeks Added
4  Weeks Added
5  Weeks Added
6  Weeks Added
7  Weeks Added
8  Weeks Added
9  Weeks Added
10  Weeks Added
11  Weeks Added
12  Weeks Added
13  Weeks Added
14  Weeks Added
15  Weeks Added
16  Weeks Added
17  Weeks Added
18  Weeks Added
19  Weeks Added
20  Weeks Added
21  Weeks Added
22  Weeks Added
23  Weeks Added
24  Weeks Added
25  Weeks Added
26  Weeks Added
27  Weeks Added
28  Weeks Added
29  Weeks Added
30  Weeks Added
31  Weeks Added
32  Weeks Added
33  Weeks Added
34  Weeks Added
35  Weeks Added
36  Weeks Added
37  Weeks Added
38  Weeks Added
39  Weeks Added
40  Weeks Added
41  Weeks Added
42  Weeks Added
43  Weeks Added
44  Weeks Added
45  Weeks Added
46  Weeks Added
47  Weeks Added
48  Weeks Added
49  Weeks Added
50  Weeks Added
51  Weeks Added
52  Weeks Added
53  Weeks Added
54  Weeks Added
55  Weeks Added
56  Weeks Added
57  Weeks Added
58  Weeks Added
59  Weeks Added
60  Weeks Added
61  Weeks Added
62  Weeks Added
63  Weeks Added
6