In [175]:
from bs4 import BeautifulSoup as soup
import requests
from utils import *
import unidecode
import textdistance

In [2]:
def get_html(url):
    return requests.get(url).content.decode("utf-8")

In [5]:
def get_soup(url):
    return soup(requests.get(url).content, "html.parser")

In [8]:
page_soup = get_soup("https://www.opensecrets.org/revolving/top.php?display=Z")

In [17]:
rightColumn = page_soup.find(id = "rightColumn")

In [18]:
uls = rightColumn.find_all("ul")

In [21]:
senate_list = uls[0]
house_list = uls[1]

In [30]:
for item in senate_list.find_all("li") + house_list.find_all("li"):
    print(item.text[:-3], len(item.find_all("img")) != 0)

Abraham, Spencer  True
Allard, Wayne  True
Allen, George  False
Ashcroft, John  True
Bayh, Evan  False
Begich, Mark  False
Bond, Christopher Kit  True
Boschwitz, Rudy E  True
Breaux, John  True
Breaux, John Jr  True
Brown, Scott  True
Bryan, Richard H  True
Burris, Roland  False
Campbell, Ben Nighthorse  True
Chambliss, Saxby  False
Cleland, Max  True
Coburn, Tom  False
Cohen, William S  False
Coleman, Norm  True
Conrad, Kent  True
Corzine, Jon S  False
Craig, Larry  True
D'Amato, Alfonse M  True
Daschle, Tom  True
Deconcini, Dennis  True
DeMint, Jim  False
Dodd, Christopher  True
Dole, Bob  True
Dorgan, Byron  True
Durenberger, Dave  True
Faircloth, Lauch  True
Gorton, Slade  True
Gramm, Phil  True
Gregg, Judd  False
Hagan, Kay R  False
Hagel, Chuck  False
Hutchinson, Tim  True
Hutchison, Kay Bailey  False
Johanns, Mike  False
Johnston, J Bennett  True
Kasten, Robert W Jr  True
Kaufman, Ted  False
Kempthorne, Dirk  True
Kerrey, Bob  False
Kirk, Paul  True
Kyl, Jon  True
Landrieu, Mary

In [33]:
former_legs = [{"name" : item.text[:-3].strip(), "lobbyist" : len(item.find_all("img")) != 0} for item in senate_list.find_all("li") + house_list.find_all("li")]

In [59]:
def find_by_name(leg_map, last_name, first_name):
    def clean_name(s):
        return unidecode.unidecode(s.strip().lower().replace(" ", "").replace("-", ""))
    last_name = clean_name(last_name)
    first_name = clean_name(first_name)
    cur_filter = [leg for leg in leg_map.values() if clean_name(leg["name"]["last"]) == last_name]
    if len(cur_filter) == 1:
        return cur_filter[0]
    if len(cur_filter) == 0:
        cur_filter = [leg for leg in leg_map.values() if clean_name(leg["name"]["last"]) in last_name or
                            last_name in clean_name(leg["name"]["last"])]
    if len(cur_filter) > 1:
        first_filter_strict = [leg for leg in cur_filter if first_name == clean_name(leg["name"]["first"])]
        if len(first_filter_strict) > 0:
            cur_filter = first_filter_strict
            
    if len(cur_filter) == 1:
        return cur_filter[0]
    else:
        # return cur_filter
        return cur_filter

In [69]:
def add_bioguide(former_legs, name, bioguide):
    for leg in former_legs:
        if leg["name"] == name:
            leg["bioguide"] = bioguide

In [36]:
db, engine, meta = db_connect()

In [37]:
tables = db_tables(meta)

In [38]:
leg_map = get_leg_map(engine)

In [390]:
unfindable = []
for leg in former_legs:
    if "bioguide" in leg and leg["bioguide"] is not None:
        continue
    names = leg["name"].split(", ")
    last_name = names[0]
    first_name = names[1]
    
    found = sorted(find_by_name(leg_map, last_name, first_name), 
                   key = lambda x: textdistance.levenshtein(first_name + " "+ 
                                                            last_name,x["id"]["wikipedia"]))
                                                            #x["name"]["first"] + " " + x["name"]["last"]))
    unfindable.append((leg["name"], found))

In [392]:
print(len(unfindable))
print(len(unfindable[1][1]))
print(unfindable[1][0])
unfindable[1][1][1]

3
2
Weldon, Curt


{'id': {'bioguide': 'W000267',
  'thomas': '01215',
  'govtrack': 400427,
  'opensecrets': 'N00002807',
  'votesmart': 22159,
  'fec': ['H4FL15114'],
  'wikipedia': 'Dave Weldon',
  'icpsr': 29509,
  'house_history': 23538,
  'wikidata': 'Q672269',
  'google_entity_id': 'kg:/m/0252yz'},
 'name': {'first': 'David',
  'middle': 'J.',
  'last': 'Weldon',
  'suffix': 'Jr.',
  'nickname': 'Dave'},
 'bio': {'birthday': '1953-08-31', 'gender': 'M'},
 'terms': [{'type': 'rep',
   'start': '1995-01-04',
   'end': '1997-01-03',
   'state': 'FL',
   'district': 15,
   'party': 'Republican'},
  {'type': 'rep',
   'start': '1997-01-07',
   'end': '1999-01-03',
   'state': 'FL',
   'district': 15,
   'party': 'Republican'},
  {'type': 'rep',
   'start': '1999-01-06',
   'end': '2001-01-03',
   'state': 'FL',
   'district': 15,
   'party': 'Republican'},
  {'type': 'rep',
   'start': '2001-01-03',
   'end': '2003-01-03',
   'state': 'FL',
   'district': 15,
   'party': 'Republican'},
  {'type': 'rep'

In [403]:
add_bioguide(former_legs, "Lipinski, Bill", 'L000342')

In [412]:
save_csv(former_legs, "data/former_legs.csv")

In [409]:
for leg in former_legs:
    leg["lobbyist"] = 1 if leg["lobbyist"] else 0

In [410]:
former_legs

[{'name': 'Abraham, Spencer',
  'lobbyist': 1,
  'bioguide': 'A000355',
  'experience': 3},
 {'name': 'Allard, Wayne',
  'lobbyist': 1,
  'bioguide': 'A000109',
  'experience': 9},
 {'name': 'Allen, George',
  'lobbyist': 0,
  'bioguide': 'A000121',
  'experience': 4},
 {'name': 'Ashcroft, John',
  'lobbyist': 1,
  'bioguide': 'A000356',
  'experience': 3},
 {'name': 'Bayh, Evan', 'lobbyist': 0, 'bioguide': 'B001233', 'experience': 6},
 {'name': 'Begich, Mark',
  'lobbyist': 0,
  'bioguide': 'B001265',
  'experience': 3},
 {'name': 'Bond, Christopher Kit',
  'lobbyist': 1,
  'bioguide': 'B000611',
  'experience': 12},
 {'name': 'Boschwitz, Rudy E',
  'lobbyist': 1,
  'bioguide': 'B000647',
  'experience': 6},
 {'name': 'Breaux, John',
  'lobbyist': 1,
  'bioguide': 'B000780',
  'experience': 16},
 {'name': 'Breaux, John Jr',
  'lobbyist': 1,
  'bioguide': 'B000780',
  'experience': 16},
 {'name': 'Brown, Scott',
  'lobbyist': 1,
  'bioguide': 'B001268',
  'experience': 2},
 {'name': 'B

In [402]:
leg_map["L000342"]

{'id': {'bioguide': 'L000342',
  'thomas': '00694',
  'govtrack': 400243,
  'opensecrets': 'N00004856',
  'icpsr': 15036,
  'wikipedia': 'Bill Lipinski',
  'house_history': 17020,
  'wikidata': 'Q2903280',
  'google_entity_id': 'kg:/m/0256c2'},
 'name': {'first': 'William', 'middle': 'O.', 'last': 'Lipinski'},
 'bio': {'birthday': '1937-12-22', 'gender': 'M'},
 'terms': [{'type': 'rep',
   'start': '1983-01-03',
   'end': '1985-01-03',
   'state': 'IL',
   'district': 5,
   'party': 'Democrat'},
  {'type': 'rep',
   'start': '1985-01-03',
   'end': '1987-01-03',
   'state': 'IL',
   'district': 5,
   'party': 'Democrat'},
  {'type': 'rep',
   'start': '1987-01-06',
   'end': '1989-01-03',
   'state': 'IL',
   'district': 5,
   'party': 'Democrat'},
  {'type': 'rep',
   'start': '1989-01-03',
   'end': '1991-01-03',
   'state': 'IL',
   'district': 5,
   'party': 'Democrat'},
  {'type': 'rep',
   'start': '1991-01-03',
   'end': '1993-01-03',
   'state': 'IL',
   'district': 5,
   'part

In [413]:
former_leg = get_csv("data/former_legs.csv")

In [414]:
former_leg[0]

OrderedDict([('name', 'Abraham, Spencer'),
             ('lobbyist', '1'),
             ('bioguide', 'A000355'),
             ('experience', '3'),
             ('cur_relations_score', '40.76778133972673'),
             ('remaining_friends', '11')])

In [440]:
for leg in former_leg:
    if leg["bioguide"] in leg_map:
        leg_val = leg_map[leg["bioguide"]]
        congress_map = leg_val["congress_map"]
        if len(congress_map) == 0:
            continue
        last_congress = sorted(congress_map.values(), key = lambda x: x["congress"])[-1]
        if "committee_min_rank" in last_congress: 
            leg["last_min_committee_rank"] = last_congress["committee_min_rank"]
            leg["last_max_coeff"] = last_congress["max_coeff"]
        leg["last_committee_rank_recips"] = last_congress["committee_rank_recips"]
        leg["last_committee_count"] = last_congress["committee_count"]
        if "cosponsors_per_bill" in last_congress:
            leg["last_cosponsors_per_bill"] = last_congress["cosponsors_per_bill"]
        leg["last_congress"] = last_congress["congress"]
        leg["chamber"] = last_congress["chamber"]

In [441]:
save_csv(former_leg, "data/former_legs.csv")

In [None]:
list(leg_map.values())