In [1]:
from utils import *
from datetime import datetime

In [None]:
def get_json(path : str):
    with open(path, "r") as f:
        return json.load(f)

In [None]:
def save_json(obj : any, path : str):
    with open(path, "w+") as f:
        json.dump(obj, f)

In [None]:
legislators = get_json("data/legislators_all.json")

In [None]:
for legislator in legislators:
    party = None
    for term in legislator["terms"]:
        if party == None and "party" in term:
            party = term["party"]
        if "party" in term and term["party"] != party:
            party = "multiple"
    legislator["party"] = party

In [None]:
# Get only legislators who's last term was at least 1972
recent_legislators = [leg for leg in legislators if leg["terms"][-1]["end"] > "1970-01-01"]

In [None]:
for leg in recent_legislators:
    state = leg["terms"][0]["state"]
    for term in leg["terms"]:
        if term["state"] != state:
            state = "multiple"
    leg["state"] = state

In [None]:
save_json(recent_legislators, "data/legislators_recent.json")

In [None]:
recent_legislators_dict = {}
for leg in recent_legislators:
    if "thomas" in leg["id"]:
        recent_legislators_dict[leg["id"]["thomas"]] = leg
    recent_legislators_dict[leg["id"]["bioguide"]] = leg

In [None]:
save_json(recent_legislators_dict, "data/recent_legislators_map.json")

In [None]:
len([leg for leg in recent_legislators if leg["party"] == "multiple"])

In [None]:
len([leg for leg in recent_legislators if leg["state"] == "multiple"])

In [None]:
recent_legislators_map = get_json("data/recent_legislators_map.json")

In [None]:
senate_bills = get_json("data/senate_bills.json")

In [None]:
house_bills = get_json("data/house_bills.json")

In [None]:
i = 0
total = len(senate_bills) + len(house_bills)
for bill in senate_bills + house_bills:
    try: 
        if "bioguide_id" not in bill["sponsor"]:
            if bill["sponsor"]["thomas_id"] == "01594":
                    bill["sponsor"]["bioguide_id"] = "T000461"
            else:
                bill["sponsor"]["bioguide_id"] = recent_legislators_map[bill["sponsor"]["thomas_id"]]["id"]["bioguide"]
        for cosponsor in bill["cosponsors"]:
            if "bioguide_id" not in cosponsor:
                if cosponsor["thomas_id"] == "01594":
                    cosponsor["bioguide_id"] = "T000461"
                else:
                    cosponsor["bioguide_id"] = recent_legislators_map[cosponsor["thomas_id"]]["id"]["bioguide"]
        if i % 10000 == 0:
            print("Finished {} of {}".format(i, total))
    except Exception as e:
        print(bill)
        raise(e)
    i += 1

In [None]:
all_legislators = get_json("data/legislators_all.json")

In [None]:
save_json(senate_bills, "data/senate_bills.json")

In [None]:
save_json(house_bills, "data/house_bills.json")

In [None]:
senate_bills[0]

In [None]:
bills_map = {}

In [None]:
for bill in senate_bills + house_bills:
    bills_map[bill["bill_id"]] = bill

In [None]:
save_json(bills_map, "data/bills_map.json")

In [None]:
for legislator in recent_legislators_map.values():
    legislator["sponsored_bills"] = []
    legislator["cosponsored_bills"] = []

In [None]:
list(recent_legislators_map.values())[0]

In [None]:
i = 0
total = len(senate_bills) + len(house_bills)
for bill in senate_bills + house_bills:
    sponsor = bill["sponsor"]
    bill_info = {
        "id" : bill["bill_id"],
        "sponsor_id" : sponsor["bioguide_id"],
        "type" : bill["bill_type"],
        "congress" : int(bill["congress"])
    }
    recent_legislators_map[sponsor["bioguide_id"]]["sponsored_bills"].append(bill_info)
    for cosponsor in bill["cosponsors"]: 
        recent_legislators_map[cosponsor["bioguide_id"]]["cosponsored_bills"].append(bill_info)
    i += 1
    if (i % 10000 == 0):
        print("Finished {} out of {}".format(i, total))
    

In [None]:
senate_bills[0]

In [None]:
thomas_ids = []
for key, value in recent_legislators_map.items():
    if key != value["id"]["bioguide"]:
        thomas_ids.append(key)

for key in thomas_ids:
    recent_legislators_map.pop(key)

In [None]:
save_json(recent_legislators_map, "data/recent_legislators_map_bills.json")

In [None]:
sum([len(x["cosponsored_bills"]) for x in recent_legislators_map.values()]) / len(list(recent_legislators_map.values()))

In [None]:
cosponsor_relations = {}
for key in recent_legislators_map.keys():
    cosponsor_relations[key] = {}

In [None]:
i = 0
for bill in senate_bills + house_bills:
    sponsor = bill["sponsor"]
    sponsor_id = sponsor["bioguide_id"]
    for cosponsor in bill["cosponsors"]:
        cosponsor_id = cosponsor["bioguide_id"]
        if sponsor_id not in cosponsor_relations[cosponsor_id]:
            cosponsor_relations[cosponsor_id][sponsor_id] = {}
        cur_congress = cosponsor_relations[cosponsor_id][sponsor_id].get(bill["congress"], 0)
        cur_total = cosponsor_relations[cosponsor_id][sponsor_id].get("total", 0)
        cosponsor_relations[cosponsor_id][sponsor_id]["total"] = cur_total + 1
        cosponsor_relations[cosponsor_id][sponsor_id][bill["congress"]] = cur_congress + 1
        
    i += 1
    if i % 10000 == 0:
        print("Finished {} of {}".format(i, total))

In [None]:
for cosponsor in cosponsor_relations.values():
    for sponsor in cosponsor.values():
        sponsor["average"] = sponsor["total"] / (len(sponsor.keys()) - 1)

In [None]:
save_json(cosponsor_relations, "data/cosponsor_relations.json")

In [None]:
test_cosponsor = cosponsor_relations["B000944"]

In [None]:
sorted([(key, test_cosponsor[key]) for key in test_cosponsor.keys()], key = lambda x: x[1]["average"], reverse = True)

In [None]:
recent_legislators_map['R000146']

In [None]:
recent_legislators_map[list(cosponsor_relations.keys())[0]]

In [32]:
leg_map = get_json("data/recent_legislators_map_bills.json")

In [33]:
nominate_csv = get_csv("data/HSall_members.csv")

In [5]:
len(nominate_csv)

49358

In [39]:
for row in nominate_csv:
    if "bioguide_id" in row and len(row["bioguide_id"]) > 0:
        bioguide = row["bioguide_id"]
        if bioguide in leg_map:
            try: 
                leg_map[bioguide]["nominate"] = {
                    "dim1" : float(row["nominate_dim1"]),
                    "dim2" : float(row["nominate_dim2"])
                }
            except:
                pass

In [43]:
save_json(leg_map, "data/recent_legislators_map_nominate.json")

In [40]:
missing = [leg for leg in leg_map.values() if "nominate" not in leg]

In [42]:
list(leg_map.values())[0]

{'id': {'bioguide': 'B000944',
  'thomas': '00136',
  'lis': 'S307',
  'govtrack': 400050,
  'opensecrets': 'N00003535',
  'votesmart': 27018,
  'fec': ['H2OH13033', 'S6OH00163'],
  'cspan': 5051,
  'wikipedia': 'Sherrod Brown',
  'house_history': 9996,
  'ballotpedia': 'Sherrod Brown',
  'maplight': 168,
  'icpsr': 29389,
  'wikidata': 'Q381880',
  'google_entity_id': 'kg:/m/034s80'},
 'name': {'first': 'Sherrod',
  'last': 'Brown',
  'official_full': 'Sherrod Brown'},
 'bio': {'birthday': '1952-11-09', 'gender': 'M'},
 'terms': [{'type': 'rep',
   'start': '1993-01-05',
   'end': '1995-01-03',
   'state': 'OH',
   'district': 13,
   'party': 'Democrat'},
  {'type': 'rep',
   'start': '1995-01-04',
   'end': '1997-01-03',
   'state': 'OH',
   'district': 13,
   'party': 'Democrat'},
  {'type': 'rep',
   'start': '1997-01-07',
   'end': '1999-01-03',
   'state': 'OH',
   'district': 13,
   'party': 'Democrat'},
  {'type': 'rep',
   'start': '1999-01-06',
   'end': '2001-01-03',
   'sta

In [30]:
[x["name"] for x in missing]

[{'first': 'Eleanor',
  'middle': 'Holmes',
  'last': 'Norton',
  'official_full': 'Eleanor Holmes Norton'},
 {'first': 'Gregorio',
  'last': 'Sablan',
  'official_full': 'Gregorio Kilili Camacho Sablan',
  'middle': 'Kilili Camacho'},
 {'first': 'Stacey',
  'last': 'Plaskett',
  'official_full': 'Stacey E. Plaskett'},
 {'first': 'Aumua',
  'last': 'Amata',
  'official_full': 'Aumua Amata Coleman Radewagen'},
 {'first': 'Jenniffer',
  'last': 'González-Colón',
  'official_full': 'Jenniffer González-Colón'},
 {'first': 'Michael',
  'middle': 'F. Q.',
  'last': 'San Nicolas',
  'official_full': 'Michael F. Q. San Nicolas'},
 {'first': 'Jorge', 'middle': 'Luis', 'last': 'Córdova Díaz'},
 {'first': 'Jaime', 'last': 'Benítez'},
 {'first': 'Melvin', 'middle': 'Herbert', 'last': 'Evans'},
 {'first': 'Baltasar', 'last': 'Corrada-del Río'},
 {'first': 'Antonio', 'middle': 'Borja', 'last': 'Won Pat'},
 {'first': 'Fofó', 'middle': 'Iosefa Fiti', 'last': 'Sunia'},
 {'first': 'Walter', 'middle': 'E

In [49]:
icpsr_map = {}
no_icpsr = []
for leg in leg_map.values():
    if "icpsr" in leg["id"]:
        icpsr_map[leg["id"]["icpsr"]] = leg["id"]["bioguide"]
    else:
        no_icpsr.append(leg)

In [47]:
len(icpsr_map)

2394

In [50]:
no_icpsr

[{'id': {'bioguide': 'N000147',
   'thomas': '00868',
   'govtrack': 400295,
   'opensecrets': 'N00001692',
   'votesmart': 775,
   'fec': ['H0DC00058'],
   'cspan': 882,
   'wikipedia': 'Eleanor Holmes Norton',
   'house_history': 19016,
   'maplight': 390,
   'wikidata': 'Q461649',
   'google_entity_id': 'kg:/m/01s_vp'},
  'name': {'first': 'Eleanor',
   'middle': 'Holmes',
   'last': 'Norton',
   'official_full': 'Eleanor Holmes Norton'},
  'bio': {'birthday': '1937-06-13', 'gender': 'F'},
  'terms': [{'type': 'rep',
    'start': '1991-01-03',
    'end': '1993-01-03',
    'state': 'DC',
    'district': 0,
    'party': 'Democrat'},
   {'type': 'rep',
    'start': '1993-01-05',
    'end': '1995-01-03',
    'state': 'DC',
    'district': 0,
    'party': 'Democrat'},
   {'type': 'rep',
    'start': '1995-01-04',
    'end': '1997-01-03',
    'state': 'DC',
    'district': 0,
    'party': 'Democrat'},
   {'type': 'rep',
    'start': '1997-01-07',
    'end': '1999-01-03',
    'state': 'DC'

In [52]:
house_assignments = get_csv("data/house_assignments_103-115.csv")

In [53]:
senate_assignments = get_csv("data/senate_assignments_103-115.csv")

In [65]:
no_ids = []
for assignment in house_assignments + senate_assignments:
    try:
        icpsr_id = int(assignment["ID #"])
        assignment["bioguide"] = icpsr_map[icpsr_id]
    except:
        no_id.append(assignment)

In [70]:
save_csv(house_assignments, "data/house_assignments_103-115.csv")
save_csv(senate_assignments, "data/senate_assignments_103-115.csv")

dict_keys([29389, 39310, 15408, 15015, 40703, 49300, 40700, 29373, 29147, 29732, 40702, 40704, 40707, 29534, 40304, 49703, 40305, 15021, 49706, 29566, 15424, 14921, 40908, 29142, 40902, 14852, 40906, 29924, 40909, 20735, 40916, 40915, 29701, 21143, 29940, 21110, 40910, 20758, 20357, 29339, 20351, 29588, 41101, 29735, 20101, 29760, 21193, 20709, 21132, 20355, 29548, 20340, 29323, 20146, 20757, 20356, 20919, 20708, 29550, 20955, 21172, 20733, 20147, 20517, 39301, 20748, 20344, 20531, 20952, 15019, 20501, 20706, 29345, 21106, 20533, 29717, 20108, 15410, 29710, 29109, 21179, 20959, 20316, 29571, 29561, 21174, 15603, 29312, 21178, 21182, 20518, 20521, 20941, 20958, 21112, 21165, 20527, 21103, 29762, 14226, 20124, 20962, 20529, 21191, 20305, 20916, 21139, 21149, 29337, 20930, 21187, 20519, 20907, 20713, 41107, 14873, 21142, 29573, 21162, 39305, 20712, 41111, 20738, 15029, 21140, 21167, 29769, 29375, 20325, 21128, 20704, 20136, 21166, 20145, 29908, 20755, 14307, 29778, 41110, 15431, 20508, 20