In [1]:
from bs4 import BeautifulSoup as soup
import requests
from utils import *
import unidecode
import textdistance

In [None]:
def get_html(url):
    return requests.get(url).content.decode("utf-8")

In [None]:
def get_soup(url):
    return soup(requests.get(url).content, "html.parser")

In [None]:
page_soup = get_soup("https://www.opensecrets.org/revolving/top.php?display=Z")

In [None]:
rightColumn = page_soup.find(id = "rightColumn")

In [None]:
uls = rightColumn.find_all("ul")

In [None]:
senate_list = uls[0]
house_list = uls[1]

In [None]:
for item in senate_list.find_all("li") + house_list.find_all("li"):
    print(item.text[:-3], len(item.find_all("img")) != 0)

In [None]:
former_legs = [{"name" : item.text[:-3].strip(), "lobbyist" : len(item.find_all("img")) != 0} for item in senate_list.find_all("li") + house_list.find_all("li")]

In [None]:
def find_by_name(leg_map, last_name, first_name):
    def clean_name(s):
        return unidecode.unidecode(s.strip().lower().replace(" ", "").replace("-", ""))
    last_name = clean_name(last_name)
    first_name = clean_name(first_name)
    cur_filter = [leg for leg in leg_map.values() if clean_name(leg["name"]["last"]) == last_name]
    if len(cur_filter) == 1:
        return cur_filter[0]
    if len(cur_filter) == 0:
        cur_filter = [leg for leg in leg_map.values() if clean_name(leg["name"]["last"]) in last_name or
                            last_name in clean_name(leg["name"]["last"])]
    if len(cur_filter) > 1:
        first_filter_strict = [leg for leg in cur_filter if first_name == clean_name(leg["name"]["first"])]
        if len(first_filter_strict) > 0:
            cur_filter = first_filter_strict
            
    if len(cur_filter) == 1:
        return cur_filter[0]
    else:
        # return cur_filter
        return cur_filter

In [None]:
def add_bioguide(former_legs, name, bioguide):
    for leg in former_legs:
        if leg["name"] == name:
            leg["bioguide"] = bioguide

In [2]:

db, engine, meta = db_connect()

In [3]:
tables = db_tables(meta)

In [4]:
leg_map = get_leg_map(engine)

In [5]:
former_legs = get_csv("data/former_legs.csv")

In [None]:
unfindable = []
for leg in former_legs:
    if "bioguide" in leg and leg["bioguide"] is not None:
        continue
    names = leg["name"].split(", ")
    last_name = names[0]
    first_name = names[1]
    
    found = sorted(find_by_name(leg_map, last_name, first_name), 
                   key = lambda x: textdistance.levenshtein(first_name + " "+ 
                                                            last_name,x["id"]["wikipedia"]))
                                                            #x["name"]["first"] + " " + x["name"]["last"]))
    unfindable.append((leg["name"], found))

In [None]:
print(len(unfindable))
print(len(unfindable[1][1]))
print(unfindable[1][0])
unfindable[1][1][1]

In [None]:
add_bioguide(former_legs, "Lipinski, Bill", 'L000342')

In [None]:
save_csv(former_legs, "data/former_legs.csv")

In [None]:
for leg in former_legs:
    leg["lobbyist"] = 1 if leg["lobbyist"] else 0

In [None]:
former_legs

In [None]:
leg_map["L000342"]

In [13]:
former_legs = get_csv("data/former_legs.csv")

In [None]:
former_leg[0]

In [14]:
for leg in former_legs:
    if leg["bioguide"] in leg_map:
        leg_val = leg_map[leg["bioguide"]]
        congress_map = leg_val["congress_map"]
        if len(congress_map) == 0:
            continue
        last_congress = sorted(congress_map.values(), key = lambda x: x["congress"])[-1]
        if "committee_min_rank" in last_congress: 
            leg["last_min_committee_rank"] = last_congress["committee_min_rank"]
            leg["last_max_coeff"] = last_congress["max_coeff"]
        leg["last_committee_rank_recips"] = last_congress["committee_rank_recips"]
        leg["last_committee_count"] = last_congress["committee_count"]
        if "cosponsors_per_bill" in last_congress:
            leg["last_cosponsors_per_bill"] = last_congress["cosponsors_per_bill"]
        leg["last_congress"] = last_congress["congress"]
        leg["last_leadership"] = last_congress["leadership"]
        leg["last_cosponsored"] = last_congress["bills_cosponsored"]
        if "cosponsors_per_bill" in last_congress:
            leg["last_cosponsors_per_bill"] = last_congress["cosponsors_per_bill"]
        leg["chamber"] = last_congress["chamber"]

In [None]:
for leg in former_legs:
    if leg["bioguide"] in leg_map:
        leg_val = leg_map[leg["bioguide"]]
        leg["party"] = leg_val["party"]
        leg["last_leadership"] = 

In [15]:
save_csv(former_legs, "data/former_legs.csv")

In [7]:
list(leg_map.values())[0]["cosponsored_bills"]

[{'id': 's34-111', 'sponsor_id': 'D000595', 'type': 's', 'congress': 111},
 {'id': 's544-111', 'sponsor_id': 'E000285', 'type': 's', 'congress': 111},
 {'id': 's370-111', 'sponsor_id': 'I000024', 'type': 's', 'congress': 111},
 {'id': 's941-111', 'sponsor_id': 'C000880', 'type': 's', 'congress': 111},
 {'id': 's348-111', 'sponsor_id': 'R000361', 'type': 's', 'congress': 111},
 {'id': 's144-111', 'sponsor_id': 'K000148', 'type': 's', 'congress': 111},
 {'id': 's371-111', 'sponsor_id': 'T000250', 'type': 's', 'congress': 111},
 {'id': 's527-111', 'sponsor_id': 'T000250', 'type': 's', 'congress': 111},
 {'id': 's1401-111', 'sponsor_id': 'M001162', 'type': 's', 'congress': 111},
 {'id': 's1065-111', 'sponsor_id': 'B000953', 'type': 's', 'congress': 111},
 {'id': 's3735-111', 'sponsor_id': 'L000035', 'type': 's', 'congress': 111},
 {'id': 's1259-111', 'sponsor_id': 'K000352', 'type': 's', 'congress': 111},
 {'id': 's455-111', 'sponsor_id': 'R000307', 'type': 's', 'congress': 111},
 {'id': '