In [2]:
import requests
import datetime

import pandas as pd
import pymongo
from tqdm import tqdm

from credentials import CONGRESS_API_KEY, PROPUBLICA_API_KEY

get a specific roll call vote:

GET https://api.propublica.org/congress/v1/{congress}/{chamber}/sessions/{session-number}/votes/{roll-call-number}.json

get votes by date:

GET https://api.propublica.org/congress/v1/{chamber}/votes/{year}/{month}.json

get member's vote positions by ID:

GET https://api.propublica.org/congress/v1/members/{member-id}/votes.json

get all members (filter by in-office to get current):

GET https://api.propublica.org/congress/v1/{congress}/{chamber}/members.json


In [None]:
'''
pull all congress members to get propublica IDs
match with existing entries in congresspeople mongodb using first, last names and chamber = type {'sen', 'rep'}
'''

In [10]:
client = pymongo.MongoClient("mongodb://localhost:27017/") # must be run on olin312-04
db = client["comps"]
votes_collection = db["votes"]
bills_collection = db['bills']
congresspeople_collection = db['congresspeople']
committees_collection = db["committees"]

In [7]:
def get_members(congress, chamber):
    url = f"https://api.propublica.org/congress/v1/{congress}/{chamber}/members.json"
    headers = {"X-API-Key": PROPUBLICA_API_KEY}
    response = requests.get(url=url, headers=headers)
    if response.ok:
        return response.json()['results'][0]['members']
    else:
        response.raise_for_status()

In [8]:
'''
unsuitable: only returns 20 most recent votes
'''
def get_vote_positions(member_id):
    url = f"https://api.propublica.org/congress/v1/members/{member_id}/votes.json"
    headers = {"X-API-Key": PROPUBLICA_API_KEY}
    response = requests.get(url=url, headers=headers)
    if response.ok:
        return response.json()#['results'][0]['votes']
    else:
        response.raise_for_status()

In [47]:
all_members = get_members(117, "house") + get_members(117, "senate")
for member in all_members:
    try:
        with_pct = member["votes_with_party_pct"]
    except KeyError:
        with_pct = None
    try:
        against_pct = member["votes_against_party_pct"]
    except KeyError:
        against_pct = None
    congresspeople_collection.update_one(
        {
            #search on these keys: first name is not consistent bc of nicknames
            "url": member["url"]
        },
        {"$set": {
            "propublica_id": member['id'],
            'dw_nominate': member['dw_nominate'],
            "votes_with_party_pct": with_pct,
            "votes_against_party_pct": against_pct}})

In [11]:
def get_congress_years(congress_num):
    url = f"https://api.congress.gov/v3/congress/{congress_num}/"
    param_dict = {
        "api_key": CONGRESS_API_KEY,
    }
    response = requests.get(url=url, params=param_dict)
    if response.ok:
        this_congress = response.json()['congress']
        return int(this_congress['startYear']), int(this_congress['endYear'])
    else:
        response.raise_for_status()

In [4]:
def get_vote_uris(year):
    vote_uris = []
    for month in ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"]:
        url = f"https://api.propublica.org/congress/v1/both/votes/{year}/{month}.json"
        headers = {"X-API-Key": PROPUBLICA_API_KEY}
        response = requests.get(url=url, headers=headers)
        if response.ok:
            try:
                vote_uris.extend([vote['vote_uri'] for vote in response.json()['results']['votes']])
            except KeyError:
                print(f"Missing fields in query for {year}-{month}")
        else:
            response.raise_for_status()
    return vote_uris

In [5]:
vote_uris = get_vote_uris(2021) + get_vote_uris(2022)

In [36]:
def store_votes_by_bill(vote_uri, votes_collection):
    response = requests.get(url=vote_uri, headers={"X-API-Key": PROPUBLICA_API_KEY})
    if response.ok:
        vote_data = response.json()['results']['votes']['vote']
        try:
            bill_id = vote_data['bill']['bill_id']
            store_bill_data(bill_id)
            
        except KeyError:
            bill_id = None
            print(vote_data)    
        
        for position in vote_data['positions']:
            vote_doc = {
                'bill_id': bill_id,
                'member_propublica_id': position['member_id'],
                'vote_position': position['vote_position'],
                'description': vote_data['description'],
                'vote_type': vote_data['vote_type'],
                'date': vote_data['date']
            }
            votes_collection.insert_one(vote_doc)
        
    else:
        response.raise_for_status()

1949

In [8]:
response = requests.get(url=vote_uris[1], headers={"X-API-Key": PROPUBLICA_API_KEY})
vote_data = response.json()['results']['votes']['vote']
bill_uri = vote_data['bill']['api_uri']
bill_response = requests.get(url=bill_uri, headers={"X-API-Key": PROPUBLICA_API_KEY})
bill_response.json()

{'status': 'OK',
 'copyright': 'Copyright (c) 2023 Pro Publica Inc. All Rights Reserved.',
 'results': [{'bill_id': 'sres16-117',
   'bill_slug': 'sres16',
   'congress': '117',
   'bill': 'S.RES.16',
   'bill_type': 'sres',
   'number': 'S.RES.16',
   'bill_uri': 'https://api.propublica.org/congress/v1/117/bills/sres16.json',
   'title': 'A resolution to provide for related procedures concerning the article of impeachment against Donald John Trump, President of the United States.',
   'short_title': 'A resolution to provide for related procedures concerning the article of impeachment against Donald John Trump, President of the United States.',
   'sponsor_title': '',
   'sponsor': 'Charles E. Schumer',
   'sponsor_id': 'S000148',
   'sponsor_uri': 'https://api.propublica.org/congress/v1/members/S000148.json',
   'sponsor_party': 'D',
   'sponsor_state': 'NY',
   'gpo_pdf_uri': None,
   'congressdotgov_url': 'https://www.congress.gov/bill/117th-congress/senate-resolution/16',
   'govtr

Loading bills

In [52]:
def store_bills(congress, insert_in_db=True):
    url = f"https://api.propublica.org/congress/v1/{congress}/both/bills/introduced.json"
    offset = 0
    bill_list = []
    while True:
        response = requests.get(url=url, headers={"X-API-Key": PROPUBLICA_API_KEY}, params={"offset": offset})
        if response.ok:
            results = response.json()['results']
            bills = results[0]['bills']
            if bills == []:
                break
            offset += 20
            if insert_in_db:
                bills_collection.insert_many(bills)
            else:
                bill_list.extend(bills)
        else:
            print(f"Query response code: {response.status_code}")
            return bill_list, offset
    return bill_list, offset

In [53]:
bill_list, offset = store_bills(117)

Loading committees

In [63]:
def store_committee_memberships(congress):
    for doc in tqdm(congresspeople_collection.find()):
        member_id = doc['propublica_id']
        url = f"https://api.propublica.org/congress/v1/members/{member_id}.json"
        response = requests.get(url, headers={"X-API-Key": PROPUBLICA_API_KEY})
        if response.ok:
            results = response.json()['results'][0]
            roles = [role for role in results['roles'] if role['congress'] == str(congress)]
            if len(roles) == 0:
                continue
            else:
                for role in roles:
                    committees = [committee['name'] for committee in role['committees']]
                    subcommittees = [subcommittee['name'] for subcommittee in role['subcommittees']]
                    congresspeople_collection.update_one(
                        {"_id": doc["_id"]}, 
                        {"$addToSet": {"committees": {"$each": committees}, "subcommittees": {"$each": subcommittees}}}
                    )
        else:
            print(f"Query response code: {response.status_code}")

In [64]:
store_committee_memberships(117)

538it [02:06,  4.25it/s]


In [61]:
member_id = "M000355"
committees, subcommittees = [], []
url = f"https://api.propublica.org/congress/v1/members/{member_id}.json"
response = requests.get(url, headers={"X-API-Key": PROPUBLICA_API_KEY})
if response.ok:
    results = response.json()['results'][0]
    roles = [role for role in results['roles'] if role['congress'] == "117"]
    if len(roles) != 0:
        for role in roles:
            committees.extend([committee['name'] for committee in role['committees']])
            subcommittees.extend([subcommittee['name'] for subcommittee in role['subcommittees']])

In [62]:
committees

['Committee on Rules and Administration',
 'Committee on Agriculture, Nutrition, and Forestry',
 'Committee on Appropriations',
 'Select Committee on Intelligence']

inserting committees and subcommittees into db

In [13]:
committees = []
subcommittees = []
no_committees = 0
for doc in congresspeople_collection.find():
    try:
        new_comittees = [committee for committee in doc["committees"] if committee not in committees]
        new_subcomittees = [subcommittee for subcommittee in doc["subcommittees"] if subcommittee not in subcommittees]
        committees.extend(new_comittees)
        subcommittees.extend(new_subcomittees)
    except KeyError:
        no_committees += 1

In [16]:
committees_collection.insert_many([{"type": "committee", "name": committee} for committee in committees])
committees_collection.insert_many([{"type": "subcommittee", "name": subcommittee} for subcommittee in subcommittees])

<pymongo.results.InsertManyResult at 0x7fede3b710c0>

In [66]:
primary_subjects = {}
for doc in bills_collection.find():
    subject = doc["primary_subject"]
    if subject in primary_subjects:
        primary_subjects[subject] += 1
    else:
        primary_subjects[subject] = 1
primary_subjects

{'': 17823}

In [75]:
def get_bill_subjects(bill_id, congress):
    url = f"https://api.propublica.org/congress/v1/{congress}/bills/{bill_id}/subjects.json"
    subjects = []
    initial_response = requests.get(url=url, headers={"X-API-Key": PROPUBLICA_API_KEY})
    if initial_response.ok:
        num_results = initial_response.json()['num_results']
        for i in range((num_results // 20) + 1):
            offset = 20 * i
            response = requests.get(
                url=url, 
                headers={"X-API-Key": PROPUBLICA_API_KEY}, 
                params={"offset": offset}
            ).json()
            subjects.extend([subject['name'] for subject in response['results'][0]['subjects']])
    return subjects

    

In [71]:
114 // 20

5

In [73]:
url = "https://api.propublica.org/congress/v1/115/bills/hr2810/subjects.json"
offset = 100
response = requests.get(url=url, headers={"X-API-Key": PROPUBLICA_API_KEY}, params={"offset": offset}).json()
response

{'status': 'OK',
 'copyright': 'Copyright (c) 2023 Pro Publica Inc. All Rights Reserved.',
 'num_results': 114,
 'offset': 100,
 'results': [{'congress': '115',
   'bill_id': 'hr2810-115',
   'bill_slug': 'hr2810',
   'bill_type': 'hr',
   'number': 'H.R.2810',
   'bill_uri': 'https://api.propublica.org/congress/v1/115/bills/hr2810.json',
   'url_number': 'hr2810',
   'title': 'To authorize appropriations for fiscal year 2018 for military activities of the Department of Defense and for military construction, to prescribe military personnel strengths for such fiscal year, and for other purposes.',
   'sponsor_title': 'Rep.',
   'sponsor_id': 'T000238',
   'sponsor_name': 'William M. Thornberry',
   'sponsor_state': 'TX',
   'sponsor_party': 'R',
   'sponsor_uri': 'https://api.propublica.org/congress/v1/members/T000238.json',
   'introduced_date': '2017-06-07',
   'number_of_cosponsors': 1,
   'committees': 'House Transportation and Infrastructure Committee',
   'latest_major_action_date

In [76]:
congress = 117
bills = bills_collection.find()
for bill in tqdm(bills):
    subjects = get_bill_subjects(bill['bill_slug'], congress)
    bills_collection.find_one_and_update(
        {"_id": bill['_id']},
        {"$set": {"subjects": subjects}})

984it [10:13,  1.77it/s]

In [8]:
bills = bills_collection.find()
with_subjects = 0
nonempty_subjects = 0
total = 0
for bill in bills:
    total += 1
    if "subjects" in bill:
        with_subjects += 1
print(with_subjects)
print(total)

8774
17823
