In [1]:
import requests
from pprint import pprint
import numpy as np

In [2]:
#Default Strings
master_url = "http://legislation.nysenate.gov"
key = "KEY_REMOVED"


#### Grabbing All Bills/Resolutions From 2008 to now  
Terms are two years - so data from   
* 2009 = 2010
* 2011 = 2012
* 2013 = 2014
* 2015 = 2016
* 2017 = 2018

So we only have to grab 5 "terms" of data (2009, 2011, 2013, 2015, 2017)  

###### Structure
Structure of a received JSON  
{'limit': 10,  
     'message': '',  
     'offsetEnd': 26533,  
     'offsetStart': 26555,  
     'responseType': 'empty list',  
     'result': {'items': [], 'size': 0},  
     'success': True,  
     'total': 26533}  
     
Result is what is going to be stored, but total/offsetEnd/offsetStart are telling us how many items we have to grab. Max Limit we can request is 1000, so we'll have to loop

In [19]:
def bill_year(year,lim=100,offset=0):
    '''
    Uses NYS Senate API to return json bills in year.
    Input: Year, Lim=100, Offset (starts at 0)
    Returns: json of requested bills as dictionary
    
    Use loop to increment offset upwards after each return
    '''
    master_url = "http://legislation.nysenate.gov"
    key = "ujK4wDTlpIwURsPqYvys4ZQswx3NDZ0z"
    
    url = master_url+f"/api/3/bills/{year}?key="+key+f"&limit={lim}&offset={offset}&full=true"
    r = requests.get(url)
    return r.json()


In [4]:
#random bill/resolution from 2009
pprint(bill_year(2009,lim=1,offset=np.random.randint(0,25000)))

{'limit': 1,
 'message': '',
 'offsetEnd': 679,
 'offsetStart': 679,
 'responseType': 'bill list',
 'result': {'items': [{'actions': {'items': [{'billId': {'basePrintNo': 'A8001',
                                                         'basePrintNoStr': 'A8001-2009',
                                                         'printNo': 'A8001',
                                                         'session': 2009,
                                                         'version': ''},
                                              'chamber': 'ASSEMBLY',
                                              'date': '2009-05-01',
                                              'sequenceNo': 1,
                                              'text': 'REFERRED TO '
                                                      'GOVERNMENTAL '
                                                      'OPERATIONS'},
                                             {'billId': {'basePrintNo': 'A8001',
                  

                                                                                                     'LaValle',
                                                                                         'imgName': '419_kenneth_p._lavalle.jpg',
                                                                                         'memberId': 419,
                                                                                         'sessionMemberId': 49,
                                                                                         'sessionYear': 2009,
                                                                                         'shortName': 'LAVALLE'},
                                                                                        {'alternate': False,
                                                                                         'chamber': 'SENATE',
                                                                                         'distric

In [18]:
#Offset Start has to be incremented by limit (i.e. when limit is 1000, we have to start with 1000 next time around)
#I know how many to use from "total" at the bottom. May be worth just hardcoding these per year
#Use this dictionary for lookup vs. having to grab from API

In [5]:
total_year_dict = {}
def get_year_total(year):
    '''Gets total bills proposed/signed for a given year'''
    url = master_url+f"/api/3/bills/{year}?key="+key+f"&limit=1&offset=0&full=true"
    r = requests.get(url)
    r = r.json()
    return r['total']
for year in range(2007,2019,2):
    total_year_dict[year] = get_year_total(year)
    print(f"Year {year}: {get_year_total(year)}")

Year 2007: 1
Year 2009: 28495
Year 2011: 25745
Year 2013: 25571
Year 2015: 26612
Year 2017: 26674


In [6]:
total_year_dict

{2007: 1, 2009: 28495, 2011: 25745, 2013: 25571, 2015: 26612, 2017: 26674}

In [61]:
#Lets See if We can get all cases from 2009 term using above "bill_year" function
#Loop will have to go up to total_year_dict[2009] = 28,495

limit = 10 #1000 cases at a time
# for case_num in range(0,28496,limit):
for case_num in range(0,15,limit): #testing with 10
#     print(f"Case_Num: {case_num}")
    j = bill_year(2009,lim=limit,offset=case_num)
#     print(f"Result Length: {len(j['result']['items'])}")
    pprint(j)
    for item in (j['result']['items']):
        print("New Item \n")
        bill_entry = {}
        for key in item.keys():
            bill_entry[key] = item[key]
#             print(f"Key|Value: {key} | {item[key]}")
#             print("-"*25)
#             print(f"Value: {item[key]}")
#             pprint(item[key])
#         pprint(item)
#         pprint(bill_entry)
        print(bill_entry['amendments'])
        print("*"*50)
#         print(f"Pieces in 'Actions': {len(item['actions'])}")
#         pprint(item.keys())
#         print("*"*25)
#         pprint(item)
        


{'limit': 10,
 'message': '',
 'offsetEnd': 10,
 'offsetStart': 1,
 'responseType': 'bill list',
 'result': {'items': [{'actions': {'items': [{'billId': {'basePrintNo': 'S208',
                                                         'basePrintNoStr': 'S208-2009',
                                                         'printNo': 'S208',
                                                         'session': 2009,
                                                         'version': ''},
                                              'chamber': 'SENATE',
                                              'date': '2009-01-07',
                                              'sequenceNo': 1,
                                              'text': 'REFERRED TO CODES'},
                                             {'billId': {'basePrintNo': 'S208',
                                                         'basePrintNoStr': 'S208-2009',
                                                         'printNo': 'S

                                                                 'INTERROGATION '
                                                                 'AT A POLICE '
                                                                 'STATION OR '
                                                                 'OTHER PLACE '
                                                                 'OF '
                                                                 'DETENTION\n'
                                                                 'SHALL BE '
                                                                 'PRESUMED TO '
                                                                 'BE '
                                                                 'INADMISSIBLE '
                                                                 'AS EVIDENCE '
                                                                 'AGAINST THE '
                                                            

                                                                 'OR\n'
                                                                 '  (I)  A  '
                                                                 'STUDENT  '
                                                                 'ENROLLED IN '
                                                                 'AN '
                                                                 'ELEMENTARY '
                                                                 'OR SECONDARY '
                                                                 'SCHOOL IN '
                                                                 'THIS\n'
                                                                 'STATE, '
                                                                 'REGARDLESS '
                                                                 'OF AGE, WHO '
                                                                 'IS ENGAGED

                                              'sequenceNo': 1,
                                              'text': 'REFERRED TO '
                                                      'TRANSPORTATION'},
                                             {'billId': {'basePrintNo': 'S117',
                                                         'basePrintNoStr': 'S117-2009',
                                                         'printNo': 'S117',
                                                         'session': 2009,
                                                         'version': ''},
                                              'chamber': 'SENATE',
                                              'date': '2010-01-06',
                                              'sequenceNo': 2,
                                              'text': 'REFERRED TO '
                                                      'TRANSPORTATION'}],
                                   'size': 2},
                

                                                     'printNo': 'S123',
                                                     'publishDate': '2009-01-01',
                                                     'sameAs': {'items': [],
                                                                'size': 0},
                                                     'session': 2009,
                                                     'stricken': False,
                                                     'uniBill': False,
                                                     'version': ''}},
                                      'size': 1},
                       'approvalMessage': None,
                       'basePrintNo': 'S123',
                       'basePrintNoStr': 'S123-2009',
                       'billType': {'chamber': 'SENATE',
                                    'desc': 'Senate',
                                    'resolution': False},
                       'calendars': {'items

                                                                 'community '
                                                                 'district;\n'
                                                                 '  [(b)] (II) '
                                                                 'initially '
                                                                 'utilizing a '
                                                                 'community '
                                                                 'district '
                                                                 'school or '
                                                                 'facility\n'
                                                                 'for such a '
                                                                 'school or '
                                                                 'program;\n'
                                                          

 'result': {'items': [{'actions': {'items': [{'billId': {'basePrintNo': 'S129',
                                                         'basePrintNoStr': 'S129-2009',
                                                         'printNo': 'S129',
                                                         'session': 2009,
                                                         'version': ''},
                                              'chamber': 'SENATE',
                                              'date': '2009-01-07',
                                              'sequenceNo': 1,
                                              'text': 'REFERRED TO EDUCATION'},
                                             {'billId': {'basePrintNo': 'S129',
                                                         'basePrintNoStr': 'S129-2009',
                                                         'printNo': 'S129',
                                                         'session': 2009,
             

                                                             'PROVISIONS :\n'
                                                             'The real '
                                                             'property law is '
                                                             'amended by '
                                                             'adding a new '
                                                             'section 242-a '
                                                             'to\n'
                                                             'require a loan '
                                                             'counseling '
                                                             'disclosure rider '
                                                             'for all real '
                                                             'estate\n'
                                                             'contracts.\n'
           

                                                                                         'imgName': 'no_image.jpg',
                                                                                         'memberId': 441,
                                                                                         'sessionMemberId': 2,
                                                                                         'sessionYear': 2009,
                                                                                         'shortName': 'MORAHAN'},
                                                                                        {'alternate': False,
                                                                                         'chamber': 'SENATE',
                                                                                         'districtCode': 12,
                                                                                         'fullName': 'George '
    

                                                                 '                       '
                                                                 '2009-2010 '
                                                                 'Regular '
                                                                 'Sessions\n'
                                                                 '\n'
                                                                 '                            '
                                                                 'I N  S E N A '
                                                                 'T E\n'
                                                                 '\n'
                                                                 '                               '
                                                                 '(PREFILED)\n'
                                                                 '\n'
                                      

                                                                                         'shortName': 'SQUADRON'},
                                                                                        {'alternate': False,
                                                                                         'chamber': 'SENATE',
                                                                                         'districtCode': 58,
                                                                                         'fullName': 'William '
                                                                                                     'T. '
                                                                                                     'Stachowski',
                                                                                         'imgName': 'no_image.jpg',
                                                                                         'memberId': 453,
 

                                                                 'COURT, TO '
                                                                 'ENSURE THAT '
                                                                 'ANY CHILD\n'
                                                                 'SUBJECT TO '
                                                                 'SUCH ORDER '
                                                                 'IS RETURNED '
                                                                 'TO THE '
                                                                 'CUSTODIAL '
                                                                 'PARENT AT '
                                                                 'THE END  OF\n'
                                                                 'ANY  '
                                                                 'VISITATION  '
                                                            

                      {'actions': {'items': [{'billId': {'basePrintNo': 'S178',
                                                         'basePrintNoStr': 'S178-2009',
                                                         'printNo': 'S178',
                                                         'session': 2009,
                                                         'version': ''},
                                              'chamber': 'SENATE',
                                              'date': '2009-01-07',
                                              'sequenceNo': 1,
                                              'text': 'REFERRED TO JUDICIARY'},
                                             {'billId': {'basePrintNo': 'S178',
                                                         'basePrintNoStr': 'S178-2009',
                                                         'printNo': 'S178',
                                                         'session': 2009,
             

                                                                                         'fullName': 'Joseph '
                                                                                                     'E. '
                                                                                                     'Robach',
                                                                                         'imgName': '417_joseph_e._robach.jpg',
                                                                                         'memberId': 417,
                                                                                         'sessionMemberId': 47,
                                                                                         'sessionYear': 2009,
                                                                                         'shortName': 'ROBACH'},
                                                                                        {'alternate': 

In [59]:
#Set Up PyMongo
from pymongo import MongoClient

client = MongoClient()
db = client.ny_senate_db

db.collection_names()
col = db.senate

In [62]:
#Lets See if We can get all cases from 2009 term using above "bill_year" function
#Loop will have to go up to total_year_dict[2009] = 28,495

limit = 1000 #1000 cases at a time
for case_num in range(0,28496,limit):
# for case_num in range(0,28,10): #test entry
    
    j = bill_year(2009,lim=limit,offset=case_num)
    print(f"Case_Num {case_num}, Offset {j['offsetStart']}")
    for item in (j['result']['items']):
        
        #New Bill - Empty out Bill_Entry
        bill_entry = {}

        for key in item.keys():
            bill_entry[key] = item[key]
        
        #Insert All Elements
        col.insert_one(bill_entry)




Case_Num 0, Offset 1
Case_Num 1000, Offset 1000
Case_Num 2000, Offset 2000
Case_Num 3000, Offset 3000
Case_Num 4000, Offset 4000
Case_Num 5000, Offset 5000
Case_Num 6000, Offset 6000
Case_Num 7000, Offset 7000
Case_Num 8000, Offset 8000
Case_Num 9000, Offset 9000
Case_Num 10000, Offset 10000
Case_Num 11000, Offset 11000
Case_Num 12000, Offset 12000
Case_Num 13000, Offset 13000
Case_Num 14000, Offset 14000
Case_Num 15000, Offset 15000
Case_Num 16000, Offset 16000
Case_Num 17000, Offset 17000
Case_Num 18000, Offset 18000
Case_Num 19000, Offset 19000
Case_Num 20000, Offset 20000
Case_Num 21000, Offset 21000
Case_Num 22000, Offset 22000
Case_Num 23000, Offset 23000
Case_Num 24000, Offset 24000
Case_Num 25000, Offset 25000
Case_Num 26000, Offset 26000
Case_Num 27000, Offset 27000
Case_Num 28000, Offset 28000


In [58]:
for entry in col.find({"basePrintNoStr": "S208-2009"}):
    pprint(entry)

In [63]:
print(col.count())

29496


In [96]:
for entry in col.find({"basePrintNoStr": "S2923-2009"}):
    pprint(entry['actions']['items'])

[{'billId': {'basePrintNo': 'S2923',
             'basePrintNoStr': 'S2923-2009',
             'printNo': 'S2923',
             'session': 2009,
             'version': ''},
  'chamber': 'SENATE',
  'date': '2009-03-06',
  'sequenceNo': 1,
  'text': 'REFERRED TO CRIME VICTIMS, CRIME AND CORRECTION'},
 {'billId': {'basePrintNo': 'S2923',
             'basePrintNoStr': 'S2923-2009',
             'printNo': 'S2923',
             'session': 2009,
             'version': ''},
  'chamber': 'SENATE',
  'date': '2010-01-06',
  'sequenceNo': 2,
  'text': 'REFERRED TO CRIME VICTIMS, CRIME AND CORRECTION'},
 {'billId': {'basePrintNo': 'S2923',
             'basePrintNoStr': 'S2923-2009',
             'printNo': 'S2923',
             'session': 2009,
             'version': ''},
  'chamber': 'SENATE',
  'date': '2010-02-22',
  'sequenceNo': 3,
  'text': '1ST REPORT CAL.140'},
 {'billId': {'basePrintNo': 'S2923',
             'basePrintNoStr': 'S2923-2009',
             'printNo': 'S2923',
        

In [98]:
pipeline = [
    {"$unwind":"$actions"},
    {"$unwind":"$items"},
    {"$group": { "_id": { "bill": "0"}, 
                "uniqueIds": { "$addToSet": "$_id" },
                "count": { "$sum": 1 } }
    }, 
    {"$match": { "count": { "$gt": 1 } } }
]
pprint(list(db.col.aggregate(pipeline)))

[]


In [84]:
col.count()

29496

In [99]:
total_year_dict.keys()

dict_keys([2007, 2009, 2011, 2013, 2015, 2017])

In [103]:
#Set Up PyMongo
db = client.ny_senate_db

db.collection_names()
col = db.senate_total

limit = 1000 #1000 cases at a time
    
#Each Year in [2007, 2009, 2011, 2013, 2015, 2017]
for year in total_year_dict.keys():
    print(f"Year: {year}")
    
    #In a Given Year, iterate upwards by limit until we hit total number of bills/resolutions
    for case_num in range(0,total_year_dict[year]+1,limit):
        #Get JSON via bill_year function
        j = bill_year(year,lim=limit,offset=case_num)
        #Print case_num/offset to make sure its iterating upwards
        print(f"Case_Num {case_num}, Offset {j['offsetStart']}")
        
        #For item in our j - 1000 bills/resolutions at a time
        for item in (j['result']['items']):
            
            #New Bill - Empty out Bill_Entry
            bill_entry = {}
            
            #Add Each Entry within to bill_entry
            for key in item.keys():
                bill_entry[key] = item[key]

            #Insert All Elements into bill_entry
            col.insert_one(bill_entry)




Year: 2007
Case_Num 0, Offset 1
Year: 2009
Case_Num 0, Offset 1
Case_Num 1000, Offset 1000
Case_Num 2000, Offset 2000
Case_Num 3000, Offset 3000
Case_Num 4000, Offset 4000
Case_Num 5000, Offset 5000
Case_Num 6000, Offset 6000
Case_Num 7000, Offset 7000
Case_Num 8000, Offset 8000
Case_Num 9000, Offset 9000
Case_Num 10000, Offset 10000
Case_Num 11000, Offset 11000
Case_Num 12000, Offset 12000
Case_Num 13000, Offset 13000
Case_Num 14000, Offset 14000
Case_Num 15000, Offset 15000
Case_Num 16000, Offset 16000
Case_Num 17000, Offset 17000
Case_Num 18000, Offset 18000
Case_Num 19000, Offset 19000
Case_Num 20000, Offset 20000
Case_Num 21000, Offset 21000
Case_Num 22000, Offset 22000
Case_Num 23000, Offset 23000
Case_Num 24000, Offset 24000
Case_Num 25000, Offset 25000
Case_Num 26000, Offset 26000
Case_Num 27000, Offset 27000
Case_Num 28000, Offset 28000
Year: 2011
Case_Num 0, Offset 1
Case_Num 1000, Offset 1000
Case_Num 2000, Offset 2000
Case_Num 3000, Offset 3000
Case_Num 4000, Offset 4000
Ca

In [104]:
col.count()

133103

We now have all 133K bills in mongo. The below steps are data exploration. I wanted to just get a first glance at what the data looked like, as well as practice using pyMongo

### Data Exploration

In [125]:
for entry in col.find({"sponsor.member.fullName": "Martin J. Golden"}):
# for entry in col.find({"text":"REFERRED TO WAYS AND MEANS"}):
#     print(type(entry))
    pprint(entry)


{'_id': ObjectId('5b1c11ca5a67a40cd347d262'),
 'actions': {'items': [{'billId': {'basePrintNo': 'J2347',
                                   'basePrintNoStr': 'J2347-2009',
                                   'printNo': 'J2347',
                                   'session': 2009,
                                   'version': ''},
                        'chamber': 'SENATE',
                        'date': '2009-06-02',
                        'sequenceNo': 1,
                        'text': 'REFERRED TO FINANCE'},
                       {'billId': {'basePrintNo': 'J2347',
                                   'basePrintNoStr': 'J2347-2009',
                                   'printNo': 'J2347',
                                   'session': 2009,
                                   'version': ''},
                        'chamber': 'SENATE',
                        'date': '2009-06-02',
                        'sequenceNo': 2,
                        'text': 'REPORTED TO CALENDAR FOR CONSIDER

            'the drugs commonly used by people over 55 years of age; functions '
            'and drug interactions.',
 'title': 'Provides for the creation of a drug guide for seniors regarding the '
          'drugs commonly used by people over 62 years of age',
 'vetoMessages': {'items': [], 'size': 0},
 'vetoed': False,
 'votes': {'items': [{'billId': {'basePrintNo': 'S2031',
                                 'basePrintNoStr': 'S2031-2009',
                                 'printNo': 'S2031',
                                 'session': 2009,
                                 'version': ''},
                      'committee': {'chamber': 'SENATE', 'name': 'Aging'},
                      'memberVotes': {'items': {'AYE': {'items': [{'alternate': False,
                                                                   'chamber': 'SENATE',
                                                                   'districtCode': 15,
                                                                

                                           '                                '
                                           'ARTICLE 4\n'
                                           '                         '
                                           'DISTINGUISHED CHAIRS IN\n'
                                           '                         '
                                           'GERONTOLOGICAL SCIENCES\n'
                                           'SECTION 401. DISTINGUISHED CHAIRS '
                                           'IN GERONTOLOGICAL SCIENCES.\n'
                                           '  S 401. DISTINGUISHED CHAIRS IN '
                                           'GERONTOLOGICAL SCIENCES.  1. '
                                           'THERE  ARE\n'
                                           'HEREBY  ESTABLISHED, WITHIN '
                                           'AMOUNTS AVAILABLE BY APPROPRIATION '
                                           'THEREFOR,\n'
  

                       {'billId': {'basePrintNo': 'S2013',
                                   'basePrintNoStr': 'S2013-2009',
                                   'printNo': 'S2013',
                                   'session': 2009,
                                   'version': ''},
                        'chamber': 'SENATE',
                        'date': '2009-06-03',
                        'sequenceNo': 3,
                        'text': '2ND REPORT CAL.'},
                       {'billId': {'basePrintNo': 'S2013',
                                   'basePrintNoStr': 'S2013-2009',
                                   'printNo': 'S2013',
                                   'session': 2009,
                                   'version': ''},
                        'chamber': 'SENATE',
                        'date': '2009-06-04',
                        'sequenceNo': 4,
                        'text': 'ADVANCED TO THIRD READING'},
                       {'billId': {'basePrintNo': 'S20

                                           'York upon\n'
                                           'the occasion of celebrating its '
                                           '20th Anniversary, and to wish the '
                                           'entire\n'
                                           'organization continued success in '
                                           'all its future endeavors;  and  '
                                           'be  it\n'
                                           'further\n'
                                           '  RESOLVED, That a copy of this '
                                           'Resolution, suitably engrossed, be '
                                           'tran-\n'
                                           'smitted  to Mr. James P. Chou, '
                                           'President, The Asian American Bar '
                                           'Associ-\n'
                                        

                                           'PROSTITUTION  IN  THE  SECOND  '
                                           'DEGREE  AS  DEFINED IN SUBDIVISION '
                                           'TWO OF\n'
                                           'SECTION 230.30, assault on a peace '
                                           'officer, police officer,  fireman  '
                                           'or\n'
                                           'emergency  medical  services  '
                                           'professional as defined in section '
                                           '120.08,\n'
                                           'gang assault in the second degree '
                                           'as defined in section 120.06, '
                                           'burglary\n'
                                           'in the second degree as defined in '
                                           'section 140.25, robbery in 

                        'text': 'REFERRED TO TRANSPORTATION'},
                       {'billId': {'basePrintNo': 'S1579',
                                   'basePrintNoStr': 'S1579-2009',
                                   'printNo': 'S1579',
                                   'session': 2009,
                                   'version': ''},
                        'chamber': 'SENATE',
                        'date': '2010-01-06',
                        'sequenceNo': 2,
                        'text': 'REFERRED TO TRANSPORTATION'}],
             'size': 2},
 'activeVersion': '',
 'additionalSponsors': {'items': [], 'size': 0},
 'adopted': False,
 'amendmentVersions': {'items': [''], 'size': 1},
 'amendments': {'items': {'': {'actClause': '',
                               'basePrintNo': 'S1579',
                               'basePrintNoStr': 'S1579-2009',
                               'coSponsors': {'items': [], 'size': 0},
                               'fullText': '\n'
       

                                                         'districtCode': 24,
                                                         'fullName': 'Andrew J '
                                                                     'Lanza',
                                                         'imgName': '409_andrew_j_lanza.jpg',
                                                         'memberId': 409,
                                                         'sessionMemberId': 39,
                                                         'sessionYear': 2009,
                                                         'shortName': 'LANZA'},
                                                        {'alternate': False,
                                                         'chamber': 'SENATE',
                                                         'districtCode': 1,
                                                         'fullName': 'Kenneth '
                                               

                                           'SECTION 401. DEFINITIONS.\n'
                                           '        402.  LOCAL  '
                                           'INTERGENERATIONAL EDUCATIONAL AND '
                                           'MENTORING SERVICE\n'
                                           '               PROGRAMS.\n'
                                           '        403. DUTIES OF THE '
                                           'DIRECTOR.\n'
                                           '        404. FUNDING.\n'
                                           '        405. REPORT.\n'
                                           '  S 401. DEFINITIONS. AS USED IN '
                                           'THIS SECTION:\n'
                                           '  1. "ADVISORY COMMITTEE" SHALL '
                                           'MEAN  THE  ADVISORY  COMMITTEE  '
                                           'FOR  THE\n'
                        

                                           'BLY, DO ENACT AS FOLLOWS:\n'
                                           '\n'
                                           '  Section  1.  Legislative  '
                                           'findings and purpose. The '
                                           'legislature hereby\n'
                                           'finds and declares that access to '
                                           'healthy foods and exercise can '
                                           'make  a\n'
                                           'significant difference in the '
                                           'quality and length of the life of '
                                           'seniors,\n'
                                           'and  that  the voluntary program '
                                           'created by this act, with the '
                                           'education\n'
                               

                                       'factors, including the aging of the\n'
                                       "Baby Boom generation,' lower birth "
                                       'rates for generations immediately\n'
                                       'following the baby boom longer life '
                                       'expectancies and younger\n'
                                       'generations leaving the state to work '
                                       'elsewhere. Businesses across the\n'
                                       'state will feel the impact of the '
                                       'retirement of the baby boomers and\n'
                                       'will no doubt be looking at employing, '
                                       'or retaining older workers as\n'
                                       'the demand for workers will increase. '
                                       'This legislation establishes a\n'
   

KeyboardInterrupt: 

In [126]:
pipeline = [
    {"$group": { "_id": "sponsor.member.fullName", 
                "bills_sponsored": { "$sum": 1 } }
    } 
#    , {"$match": { "count": { "$gt": 1 } } }
]
pprint(list(db.col.aggregate(pipeline)))

[]


In [133]:
cursor = col.aggregate([
    {"$group": { "_id": "$session", 
                "count": { "$sum": 1 } 
    }} 
])
year_counts ={}
for doc in cursor:
    year_counts[doc["_id"]]=doc['count']

In [136]:
pprint(year_counts)
pprint(total_year_dict)

{2007: 1, 2009: 28496, 2011: 25746, 2013: 25572, 2015: 26613, 2017: 26675}
{2007: 1, 2009: 28495, 2011: 25745, 2013: 25571, 2015: 26612, 2017: 26674}


In [150]:
from bson.son import SON
cursor = col.aggregate(
[
    {"$group": { "_id": "$sponsor.member.fullName", 
                "bills_sponsored": { "$sum": 1 } }
    } 
    ,{"$sort": SON([("bills_sponsored", -1)])}
]
                    )
for doc in cursor:
    print(doc)

{'_id': 'Kevin S. Parker', 'bills_sponsored': 3983}
{'_id': 'John J. Bonacic', 'bills_sponsored': 3344}
{'_id': 'Neil D. Breslin', 'bills_sponsored': 2772}
{'_id': 'Martin J. Golden', 'bills_sponsored': 2404}
{'_id': 'Kenneth P. LaValle', 'bills_sponsored': 2387}
{'_id': 'Steven Englebright', 'bills_sponsored': 1913}
{'_id': 'William J. Larkin Jr.', 'bills_sponsored': 1875}
{'_id': 'Jack M. Martins', 'bills_sponsored': 1792}
{'_id': 'Patty Ritchie', 'bills_sponsored': 1531}
{'_id': 'Joseph A. Griffo', 'bills_sponsored': 1462}
{'_id': 'Fred Thiele', 'bills_sponsored': 1434}
{'_id': 'Carl L Marcellino', 'bills_sponsored': 1418}
{'_id': 'Linda Rosenthal', 'bills_sponsored': 1407}
{'_id': 'Charles J. Fuschillo Jr.', 'bills_sponsored': 1384}
{'_id': 'Catharine Young', 'bills_sponsored': 1367}
{'_id': 'Michael H. Ranzenhofer', 'bills_sponsored': 1356}
{'_id': 'J. Gary Pretlow', 'bills_sponsored': 1329}
{'_id': 'Velmanette Montgomery', 'bills_sponsored': 1288}
{'_id': 'Michael F. Nozzolio', '

In [158]:
for entry in col.find({"amendments.size": {"$gt" : 2}}).limit(10):
    pprint(entry)
    print("*"*25)


{'_id': ObjectId('5b1c11ca5a67a40cd347d10c'),
 'actions': {'items': [{'billId': {'basePrintNo': 'S134',
                                   'basePrintNoStr': 'S134-2009',
                                   'printNo': 'S134',
                                   'session': 2009,
                                   'version': ''},
                        'chamber': 'SENATE',
                        'date': '2009-01-07',
                        'sequenceNo': 1,
                        'text': 'REFERRED TO JUDICIARY'},
                       {'billId': {'basePrintNo': 'S134',
                                   'basePrintNoStr': 'S134-2009',
                                   'printNo': 'S134',
                                   'session': 2009,
                                   'version': ''},
                        'chamber': 'SENATE',
                        'date': '2009-01-27',
                        'sequenceNo': 2,
                        'text': '1ST REPORT CAL.4'},
                 

                                                                   'imgName': '412_daniel_l._squadron.jpg',
                                                                   'memberId': 412,
                                                                   'sessionMemberId': 42,
                                                                   'sessionYear': 2009,
                                                                   'shortName': 'SQUADRON'},
                                                                  {'alternate': False,
                                                                   'chamber': 'SENATE',
                                                                   'districtCode': 58,
                                                                   'fullName': 'William '
                                                                               'T. '
                                                                               'Stachowski',
       

                                            'PROGRAMS OF STUDY PURSUANT TO '
                                            'SUBDIVISION\n'
                                            'FOUR OF THIS SECTION.\n'
                                            '  3. THE DEPARTMENT, IN ITS '
                                            'DISCRETION, MAY ISSUE A  '
                                            'CONDITIONAL  REGIS-\n'
                                            'TRATION  TO  A  LICENSEE  WHO  '
                                            'FAILS  TO  MEET  THE CONTINUING '
                                            'EDUCATION\n'
                                            'REQUIREMENTS ESTABLISHED IN '
                                            'SUBDIVISION TWO  OF  THIS  '
                                            'SECTION  BUT  WHO\n'
                                            'AGREES  TO  MAKE  UP  ANY '
                                            'DEFICIENCIES AND TAKE ANY '
  

                              {'chamber': 'SENATE',
                               'name': 'Rules',
                               'referenceDate': '2009-07-16T00:00',
                               'sessionYear': 2009}],
                    'size': 7},
 'previousVersions': {'items': [{'basePrintNo': 'S2793',
                                 'basePrintNoStr': 'S2793-2007',
                                 'printNo': 'S2793B',
                                 'session': 2007,
                                 'version': 'B'}],
                      'size': 1},
 'printNo': 'S285C',
 'programInfo': None,
 'publishStatusMap': {'items': {'': {'effectDateTime': '2009-03-27T00:00',
                                     'published': True,
                                     'version': ''},
                                'A': {'effectDateTime': '2009-03-27T00:00',
                                      'published': True,
                                      'version': 'A'},
                    

             'rules': False},
 'status': {'actionDate': '2010-04-13',
            'billCalNo': 340,
            'committeeName': None,
            'statusDesc': 'Senate Floor Calendar',
            'statusType': 'SENATE_FLOOR'},
 'substitutedBy': None,
 'summary': 'Makes the conviction of a husband or wife of a class A felony, '
            'class B or C violent felony or a felony sex offense a ground for '
            'divorce.',
 'title': 'Makes conviction of certain criminal offenses grounds for divorce',
 'vetoMessages': {'items': [], 'size': 0},
 'vetoed': False,
 'votes': {'items': [{'billId': {'basePrintNo': 'S169',
                                 'basePrintNoStr': 'S169-2009',
                                 'printNo': 'S169A',
                                 'session': 2009,
                                 'version': 'A'},
                      'committee': {'chamber': 'SENATE', 'name': 'Judiciary'},
                      'memberVotes': {'items': {'AYE': {'items': [{'alter

                                            '  S 2. The tax law is amended by '
                                            'adding a new section 628-a to  '
                                            'read  as\n'
                                            'follows:\n'
                                            '  S  628-A.  GIFT FOR THE CHILD '
                                            'PSYCHIATRY ACCESS PROJECT. '
                                            'EFFECTIVE FOR\n'
                                            'ANY TAX YEAR COMMENCING ON OR '
                                            'AFTER JANUARY FIRST, TWO THOUSAND '
                                            'TEN,  AN\n'
                                            'INDIVIDUAL  IN  ANY  TAXABLE  '
                                            'YEAR  MAY ELECT TO CONTRIBUTE TO '
                                            'THE CHILD\n'
                                            'PSYCHIATRY ACCESS FUND. SUCH '
       

                           'committeeName': 'Transportation',
                           'statusDesc': 'In Senate Committee',
                           'statusType': 'IN_SENATE_COMM'}],
                'size': 1},
 'pastCommittees': {'items': [{'chamber': 'SENATE',
                               'name': 'Transportation',
                               'referenceDate': '2009-01-07T00:00',
                               'sessionYear': 2009},
                              {'chamber': 'SENATE',
                               'name': 'Transportation',
                               'referenceDate': '2009-02-04T00:00',
                               'sessionYear': 2009},
                              {'chamber': 'SENATE',
                               'name': 'Transportation',
                               'referenceDate': '2009-02-25T00:00',
                               'sessionYear': 2009},
                              {'chamber': 'SENATE',
                               'name': 'Tra

                                            '  ordered reprinted as amended '
                                            'and recommitted to said '
                                            'committee\n'
                                            '\n'
                                            'AN ACT to amend the insurance '
                                            'law, in relation to health '
                                            'insurance bene-\n'
                                            '  fits for domestic partners\n'
                                            '\n'
                                            '  THE  PEOPLE OF THE STATE OF NEW '
                                            'YORK, REPRESENTED IN SENATE AND '
                                            'ASSEM-\n'
                                            'BLY, DO ENACT AS FOLLOWS:\n'
                                            '\n'
                                            '  Section 1. Paragra

                                            'NOTIFICATION OF THE\n'
                                            'INSURER BY THE INSURED OR '
                                            'CERTIFICATE HOLDER OF THE END OF '
                                            'THE  DOMESTIC\n'
                                            'PARTNER RELATIONSHIP;\n'
                                            '  (6)  MAY,  FOR  PERSONS  NOT '
                                            'DEEMED AS QUALIFIED PURSUANT TO '
                                            'PARAGRAPH\n'
                                            'THREE OR FOUR OF THIS '
                                            'SUBSECTION,  INCLUDE  PROVISIONS  '
                                            'REQUIRING  PRIOR\n'
                                            'COHABITATION OF THE INSURED OR '
                                            'CERTIFICATE HOLDER AND THE '
                                            'DOMESTIC PART-\n'
  

{'_id': ObjectId('5b1c11ca5a67a40cd347d1cd'),
 'actions': {'items': [{'billId': {'basePrintNo': 'A1558',
                                   'basePrintNoStr': 'A1558-2009',
                                   'printNo': 'A1558',
                                   'session': 2009,
                                   'version': ''},
                        'chamber': 'ASSEMBLY',
                        'date': '2009-01-07',
                        'sequenceNo': 1,
                        'text': 'REFERRED TO CONSUMER AFFAIRS AND PROTECTION'},
                       {'billId': {'basePrintNo': 'A1558',
                                   'basePrintNoStr': 'A1558-2009',
                                   'printNo': 'A1558',
                                   'session': 2009,
                                   'version': ''},
                        'chamber': 'ASSEMBLY',
                        'date': '2009-01-27',
                        'sequenceNo': 2,
                        'text': 'REPOR

                                            'SERVICES OF\n'
                                            'THE PROVISIONS OF THIS SECTION '
                                            'SHALL BE DEEMED VOID AND '
                                            'UNENFORCEABLE BY\n'
                                            'THE ESCO AS CONTRARY TO PUBLIC '
                                            'POLICY.\n'
                                            '  9. THE ATTORNEY GENERAL, UPON '
                                            'HIS OR HER OWN MOTION OR  UPON  '
                                            'REFERRAL\n'
                                            'FROM  THE  PUBLIC SERVICE '
                                            'COMMISSION, THE LONG ISLAND POWER '
                                            'AUTHORITY OR\n'
                                            'THE STATE CONSUMER PROTECTION '
                                            'BOARD, MAY BRING A  CIVIL  '
          

                               'referenceDate': '2009-06-02T00:00',
                               'sessionYear': 2009},
                              {'chamber': 'SENATE',
                               'name': 'Consumer Protection',
                               'referenceDate': '2010-02-02T00:00',
                               'sessionYear': 2009},
                              {'chamber': 'SENATE',
                               'name': 'Rules',
                               'referenceDate': '2010-06-29T00:00',
                               'sessionYear': 2009}],
                    'size': 7},
 'previousVersions': {'items': [{'basePrintNo': 'A10180',
                                 'basePrintNoStr': 'A10180-2007',
                                 'printNo': 'A10180A',
                                 'session': 2007,
                                 'version': 'A'}],
                      'size': 1},
 'printNo': 'A1558C',
 'programInfo': None,
 'publishStatusMap': {'items': {

                                   'basePrintNoStr': 'A1627-2009',
                                   'printNo': 'A1627C',
                                   'session': 2009,
                                   'version': 'C'},
                        'chamber': 'SENATE',
                        'date': '2009-07-16',
                        'sequenceNo': 18,
                        'text': 'PASSED SENATE'},
                       {'billId': {'basePrintNo': 'A1627',
                                   'basePrintNoStr': 'A1627-2009',
                                   'printNo': 'A1627C',
                                   'session': 2009,
                                   'version': 'C'},
                        'chamber': 'SENATE',
                        'date': '2009-07-16',
                        'sequenceNo': 19,
                        'text': 'RETURNED TO ASSEMBLY'},
                       {'billId': {'basePrintNo': 'A1627',
                                   'basePrintNoStr': 'A

                                                                   'districtCode': 12,
                                                                   'fullName': 'George '
                                                                               'Onorato',
                                                                   'imgName': 'no_image.jpg',
                                                                   'memberId': 443,
                                                                   'sessionMemberId': 15,
                                                                   'sessionYear': 2009,
                                                                   'shortName': 'ONORATO'},
                                                                  {'alternate': False,
                                                                   'chamber': 'SENATE',
                                                                   'districtCode': 37,
                        

In [160]:
for entry in col.find({"basePrintNoStr": "S134-2009"}).limit(10):
    pprint(entry['amendments'])
    print("*"*25)


{'items': {'': {'actClause': '',
                'basePrintNo': 'S134',
                'basePrintNoStr': 'S134-2009',
                'coSponsors': {'items': [], 'size': 0},
                'fullText': '\x0c'
                            '\n'
                            '                    S T A T E   O F   N E W   Y O '
                            'R K\n'
                            '________________________________________________________________________\n'
                            '\n'
                            '                                   134\n'
                            '\n'
                            '                       2009-2010 Regular '
                            'Sessions\n'
                            '\n'
                            '                            I N  S E N A T E\n'
                            '\n'
                            '                               (PREFILED)\n'
                            '\n'
                            '      

https://www.nytimes.com/search?endDate=20190101&query=albany%20state%20senate&sort=best&startDate=20090101

And that is it! We now have all Senate bills from 2009 to now in our MongoDB