In [1]:
from utils import *
import math
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
import sqlalchemy
from sqlalchemy import Column, Integer, Text  
from sqlalchemy.dialects.postgresql import JSON, JSONB

In [2]:
db, engine, meta = db_connect()

In [3]:
tables = db_tables(meta)

In [4]:
leg_map = get_leg_map(engine)

In [5]:
cosponsor_relations = get_json("data/cosponsor_relations.json")

In [None]:
bills_map = get_json("data/bills_map.json")

In [6]:
def relations_score(a_id, b_id):
    ab_rel = cosponsor_relations[a_id].get(b_id, {"total" : 0, "average" : 0})
    ba_rel = cosponsor_relations[b_id].get(a_id, {"total" : 0, "average" : 0})
    shared_years = min(len(ab_rel.keys()) - 2, len(ba_rel.keys()) - 2)
    return math.sqrt(shared_years * ab_rel["average"] * ba_rel["average"])

In [7]:
def nominate_distance(a_id, b_id):
    if "nominate" not in leg_map[a_id] or "nominate" not in leg_map[b_id]:
        return -1
    a_nominate = leg_map[a_id]["nominate"]
    b_nominate = leg_map[b_id]["nominate"]
    return math.sqrt((a_nominate["dim1"] - b_nominate["dim1"])**2 + (a_nominate["dim2"] - b_nominate["dim2"])**2)

In [8]:
def same_state(a_id, b_id):
    return 1 if leg_map[a_id]["state"] == leg_map[b_id]["state"] else 0

In [9]:
keys = list(cosponsor_relations.keys())

In [10]:
list(leg_map.values())[0]

{'id': {'bioguide': 'W000802',
  'thomas': '01823',
  'lis': 'S316',
  'govtrack': 412247,
  'opensecrets': 'N00027533',
  'votesmart': 2572,
  'fec': ['S6RI00221'],
  'cspan': 92235,
  'wikipedia': 'Sheldon Whitehouse',
  'ballotpedia': 'Sheldon Whitehouse',
  'maplight': 728,
  'icpsr': 40704,
  'wikidata': 'Q652066',
  'google_entity_id': 'kg:/m/07qw94'},
 'name': {'first': 'Sheldon',
  'last': 'Whitehouse',
  'official_full': 'Sheldon Whitehouse'},
 'bio': {'gender': 'M', 'birthday': '1955-10-20'},
 'terms': [{'type': 'sen',
   'start': '2007-01-04',
   'end': '2013-01-03',
   'state': 'RI',
   'class': 1,
   'party': 'Democrat',
   'url': 'http://whitehouse.senate.gov/',
   'address': '717 HART SENATE OFFICE BUILDING WASHINGTON DC 20510',
   'phone': '202-224-2921',
   'fax': '202-228-6362',
   'contact_form': 'http://www.whitehouse.senate.gov/contact/',
   'office': '717 Hart Senate Office Building'},
  {'type': 'sen',
   'start': '2013-01-03',
   'end': '2019-01-03',
   'state':

In [11]:
def score_all(keys, verbose = False):
    scores_map = PairMap()
    i = 0
    total = len(keys) ** 2
    for a in keys:
        for b in keys: 
            if a != b:
                pair_data = {
                    "relations_score" : relations_score(a, b),
                    "nominate_distance" : nominate_distance(a, b),
                    "same_state" : same_state(a, b)
                }
                scores_map.put(a, b, pair_data)
            i += 1
            if (verbose and i % 100000 == 0):
                print("Finished {} of {}".format(i, total))
    return scores_map

In [12]:
scores_map = score_all(keys, verbose = True)

Finished 100000 of 5943844
Finished 200000 of 5943844
Finished 300000 of 5943844
Finished 400000 of 5943844
Finished 500000 of 5943844
Finished 600000 of 5943844
Finished 700000 of 5943844
Finished 800000 of 5943844
Finished 900000 of 5943844
Finished 1000000 of 5943844
Finished 1100000 of 5943844
Finished 1200000 of 5943844
Finished 1300000 of 5943844
Finished 1400000 of 5943844
Finished 1500000 of 5943844
Finished 1600000 of 5943844
Finished 1700000 of 5943844
Finished 1800000 of 5943844
Finished 1900000 of 5943844
Finished 2000000 of 5943844
Finished 2100000 of 5943844
Finished 2200000 of 5943844
Finished 2300000 of 5943844
Finished 2400000 of 5943844
Finished 2500000 of 5943844
Finished 2600000 of 5943844
Finished 2700000 of 5943844
Finished 2800000 of 5943844
Finished 2900000 of 5943844
Finished 3000000 of 5943844
Finished 3100000 of 5943844
Finished 3200000 of 5943844
Finished 3300000 of 5943844
Finished 3400000 of 5943844
Finished 3500000 of 5943844
Finished 3600000 of 5943844
F

In [None]:
sorted_scores = sorted(scores_map.items(), key = lambda x : x[1]["relations_score"], reverse = True)

In [None]:
scores_keys = list(scores_map.map.keys())

In [None]:
sorted_scores[0]

In [15]:
def score_str(key):
    a_name = leg_map[key[0]]["name"]["first"] + " " + leg_map[key[0]]["name"]["last"]
    b_name = leg_map[key[1]]["name"]["first"] + " " + leg_map[key[1]]["name"]["last"]
    return "{}-{}: {}".format(a_name, b_name, scores_map.get(key[0], key[1]))

In [None]:
len(list(cosponsor_relations.keys()))

In [None]:
pair_data = [x[1] for x in sorted_scores if x[1]["relations_score"] > 0.01 and x[1]["nominate_distance"] >= 0]

In [None]:
xs = [x["same_state"] for x in pair_data]
ys = [x["relations_score"] for x in pair_data]

In [None]:
reg = stats.linregress(xs, ys)
reg

In [None]:
def plot_data(pair_data, xkey, ykey, plot_reg= False):
    xs = [x[xkey] for x in pair_data]
    ys = [x[ykey] for x in pair_data]
    plt.plot(xs, ys, "o", alpha = 0.2)
    if plot_reg:
        reg = stats.linregress(xs, ys)
        print(reg)
        plt.plot([min(xs), max(xs)], [min(xs) * reg.slope + reg.intercept, max(xs) * reg.slope + reg.intercept], "-", linewidth = 4)
    plt.ylabel(ykey)
    plt.show()
    

In [None]:
plot_data(pair_data, "nominate_distance", "relations_score", reg)

In [None]:
save_csv(pair_data, "data/pair_scores.csv")

In [None]:
len(xs)

In [None]:
same_state('K000302', 'P000218')

In [16]:
scores_map.get('K000302', 'P000218')

{'relations_score': 63.48228099241551,
 'nominate_distance': 1.014869449732329,
 'same_state': 0}

In [13]:
cur_leg = get_json("data/legislators-current.json")

In [20]:
former_leg = get_csv("data/former_legs.csv")

In [14]:
cur_leg_map = {}
for leg in cur_leg:
    cur_leg_map[leg["id"]["bioguide"]] = leg

In [30]:
for leg in former_leg:
    if len(leg["bioguide"]) == 0:
        continue
    leg_id = leg["bioguide"]
    total_cur_relations = 0
    remaining_friends = 0
    for cur_id, cur_leg in cur_leg_map.items():
        if leg_id != cur_id:
            pair_score = scores_map.get(leg_id, cur_id)["relations_score"]
            total_cur_relations += pair_score
            if pair_score > 0:
                remaining_friends += 1

    leg["cur_relations_score"] = total_cur_relations
    leg["remaining_friends"] = remaining_friends

In [32]:
save_csv(former_leg, "data/former_legs.csv")