In [None]:
import json
import requests
import time
import csv
import pandas as pd

## Get list of bill summaries

In [None]:
base_url = "https://api.congress.gov/v3/summaries/"
congress_apikey = json.load(open("LibraryOfCongress.json", 'r'))["apikey"]
congress_nums = [str(i) for i in range(118,106,-1)]
bill_types = ["hr", "s", "hjres", "sjres"]

In [None]:
with open("bill_summaries.csv", mode='a', newline='') as f:
    writer = csv.writer(f, delimiter=",")
    for congress_num in congress_nums:
        for bill_type in bill_types:
            url = "https://api.congress.gov/v3/summaries/" + str(congress_num) + "/" + bill_type + "?fromDateTime=2001-01-01T00:00:00Z&toDateTime=2024-05-01T00:00:00Z&sort=updateDate+asc&limit=250&api_key=" + congress_apikey
            offset = 0
            limit = 250
            url_with_offset = url + "&offset=" + str(offset)
            response_data = requests.get(url_with_offset).json()
            
            try:
                while response_data["summaries"]:
                    for i in range(len(response_data["summaries"])):
                        if len(response_data["summaries"][i]["text"]) < 3000:
                            writer.writerow([response_data["summaries"][i]["bill"]["congress"],
                                              response_data["summaries"][i]["bill"]["type"],
                                              response_data["summaries"][i]["bill"]["number"],
                                              response_data["summaries"][i]["text"]])
                    if len(response_data["summaries"]) < 250:
                        break
                    else:
                        offset += limit
                        url_with_offset = url + "&offset=" + str(offset)
                        response_data = requests.get(url_with_offset).json()
                        if offset % 10000 == 0:
                            print("Processed", str(offset), "Bills from Congress", str(congress_num), bill_type)
    
                print("Processed Congress", str(congress_num), bill_type)
            except:
                print(response_data)
                assert False

### Deduplicate the Summaries file destructively

In [None]:
summaries = pd.read_csv("bill_summaries.csv", header=None, names=["congress_num", "bill_type", "bill_num", "summary"])
summaries_deduped = summaries.drop_duplicates(subset=("congress_num", "bill_type", "bill_num"), keep="first")
summaries_deduped.to_csv("bill_summaries.csv")

## Get the number of D and R Sponsors of each bill

In [None]:
def get_num_dem_sponsors(congress_num, bill_type, bill_num):
    num_dem_sponsors = 0
    url = "https://api.congress.gov/v3/bill/" + str(congress_num) + "/" + bill_type.lower() + "/" + str(bill_num) + "?limit=250&offset=0&api_key=" + congress_apikey
    response = requests.get(url)
    if response.status_code == 429: # check that the api didn't give a rate limit error
        raise Exception("Rate limit exceeded")
    sponsors_data = response.json()["bill"]["sponsors"]
    for sponsor in sponsors_data:
        if sponsor["party"] == "D":
            num_dem_sponsors += 1
            
    if len(sponsors_data) == 250: # in case there are more than 250 sponsors
        url = "https://api.congress.gov/v3/bill/" + str(congress_num) + "/" + bill_type.lower() + "/" + str(bill_num) + "?limit=250&offset=250&api_key=" + congress_apikey
        response = requests.get(url)
        if response.status_code == 429:
            raise Exception("Rate limit exceeded")
        sponsors_data = response.json()["bill"]["sponsors"]
        for sponsor in sponsors_data:
            if sponsor["party"] == "D":
                num_dem_sponsors += 1
                
    url = "https://api.congress.gov/v3/bill/" + str(congress_num) + "/" + bill_type.lower() + "/" + str(bill_num) + "/cosponsors" + "?limit=250&offset=0&api_key=" + congress_apikey
    response = requests.get(url)
    if response.status_code == 429:
        raise Exception("Rate limit exceeded")
    cosponsors_data = response.json()["cosponsors"]
    for cosponsor in cosponsors_data:
        if cosponsor["party"] == "D":
            num_dem_sponsors += 1
            
    if len(cosponsors_data) == 250:
        url = "https://api.congress.gov/v3/bill/" + str(congress_num) + "/" + bill_type.lower() + "/" + str(bill_num) + "/cosponsors" + "?limit=250&offset=250&api_key=" + congress_apikey
        response = requests.get(url)
        if response.status_code == 429:
            raise Exception("Rate limit exceeded")
        cosponsors_data = response.json()["cosponsors"]
        for cosponsor in cosponsors_data:
            if cosponsor["party"] == "D":
                num_dem_sponsors += 1
                
    return num_dem_sponsors

def get_num_rep_sponsors(congress_num, bill_type, bill_num):
    num_rep_sponsors = 0
    url = "https://api.congress.gov/v3/bill/" + str(congress_num) + "/" + bill_type.lower() + "/" + str(bill_num) + "?limit=250&offset=0&api_key=" + congress_apikey
    response = requests.get(url)
    if response.status_code == 429:
        raise Exception("Rate limit exceeded")
    sponsors_data = response.json()["bill"]["sponsors"]
    for sponsor in sponsors_data:
        if sponsor["party"] == "R":
            num_rep_sponsors += 1
            
    if len(sponsors_data) == 250: # in case there are more than 250 sponsors
        url = "https://api.congress.gov/v3/bill/" + str(congress_num) + "/" + bill_type.lower() + "/" + str(bill_num) + "?limit=250&offset=250&api_key=" + congress_apikey
        response = requests.get(url)
        if response.status_code == 429:
            raise Exception("Rate limit exceeded")
        sponsors_data = response.json()["bill"]["sponsors"]
        for sponsor in sponsors_data:
            if sponsor["party"] == "R":
                num_rep_sponsors += 1
                
    url = "https://api.congress.gov/v3/bill/" + str(congress_num) + "/" + bill_type.lower() + "/" + str(bill_num) + "/cosponsors" + "?limit=250&offset=0&api_key=" + congress_apikey
    response = requests.get(url)
    if response.status_code == 429:
        raise Exception("Rate limit exceeded")
    cosponsors_data = response.json()["cosponsors"]
    for cosponsor in cosponsors_data:
        if cosponsor["party"] == "R":
            num_rep_sponsors += 1
            
    if len(cosponsors_data) == 250:
        url = "https://api.congress.gov/v3/bill/" + str(congress_num) + "/" + bill_type.lower() + "/" + str(bill_num) + "/cosponsors" + "?limit=250&offset=250&api_key=" + congress_apikey
        response = requests.get(url)
        if response.status_code == 429:
            raise Exception("Rate limit exceeded")
        cosponsors_data = response.json()["cosponsors"]
        for cosponsor in cosponsors_data:
            if cosponsor["party"] == "R":
                num_rep_sponsors += 1
                
    return num_rep_sponsors

In [None]:
starting_position = 145749

with open("bill_summaries.csv", mode="r") as infile, open("bill_summaries_and_sponsors.csv", mode="a") as outfile:
    reader = csv.reader(infile)
    writer = csv.writer(outfile)

    header = next(reader)
    if starting_position == 0:
        writer.writerow(header + ["D Sponsors", "R Sponsors"])
        
    for row in reader:
        if int(row[0]) < starting_position:
            continue
        try:
            writer.writerow(row + 
                            [get_num_dem_sponsors(row[header.index("congress_num")], row[header.index("bill_type")], row[header.index("bill_num")]),
                             get_num_rep_sponsors(row[header.index("congress_num")], row[header.index("bill_type")], row[header.index("bill_num")])])
        except Exception as e:
            print(e, row[0:4])
            print(time.strftime("%H:%M:%S", time.localtime()), "Sleeping for an hour for rate limit to refresh")
            time.sleep(60*60)
            writer.writerow(row + 
                            [get_num_dem_sponsors(row[header.index("congress_num")], row[header.index("bill_type")], row[header.index("bill_num")]),
                             get_num_rep_sponsors(row[header.index("congress_num")], row[header.index("bill_type")], row[header.index("bill_num")])])
        if int(row[0]) % 1000 == 0:
            print("Completed Bill " + str(row[0]))