In [3]:
from utils import *
import math
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
import sqlalchemy
from sqlalchemy import Column, Integer, Text  
from sqlalchemy.dialects.postgresql import JSON, JSONB

In [None]:
cosponsor_relations = get_json("data/cosponsor_relations.json")

In [None]:
leg_map = get_json("data/recent_legislators_map_nominate.json")

In [None]:
bills_map = get_json("data/bills_map.json")

In [None]:
def relations_score(a_id, b_id):
    ab_rel = cosponsor_relations[a_id].get(b_id, {"total" : 0, "average" : 0})
    ba_rel = cosponsor_relations[b_id].get(a_id, {"total" : 0, "average" : 0})
    shared_years = min(len(ab_rel.keys()) - 2, len(ba_rel.keys()) - 2)
    return math.sqrt(shared_years * ab_rel["average"] * ba_rel["average"])

In [None]:
def nominate_distance(a_id, b_id):
    if "nominate" not in leg_map[a_id] or "nominate" not in leg_map[b_id]:
        return -1
    a_nominate = leg_map[a_id]["nominate"]
    b_nominate = leg_map[b_id]["nominate"]
    return math.sqrt((a_nominate["dim1"] - b_nominate["dim1"])**2 + (a_nominate["dim2"] - b_nominate["dim2"])**2)

In [None]:
def same_state(a_id, b_id):
    return 1 if leg_map[a_id]["state"] == leg_map[b_id]["state"] else 0

In [None]:
keys = list(cosponsor_relations.keys())

In [None]:
list(leg_map.values())[0]

In [None]:
def score_all(keys, verbose = False):
    scores_map = PairMap()
    i = 0
    total = len(keys) ** 2
    for a in keys:
        for b in keys: 
            if a != b:
                pair_data = {
                    "relations_score" : relations_score(a, b),
                    "nominate_distance" : nominate_distance(a, b),
                    "same_state" : same_state(a, b)
                }
                scores_map.put(a, b, pair_data)
            i += 1
            if (verbose and i % 100000 == 0):
                print("Finished {} of {}".format(i, total))
    return scores_map

In [None]:
scores_map = score_all(keys, verbose = True)

In [None]:
sorted_scores = sorted(scores_map.items(), key = lambda x : x[1]["relations_score"], reverse = True)

In [None]:
scores_keys = list(scores_map.map.keys())

In [None]:
sorted_scores[0]

In [None]:
def score_str(key):
    a_name = leg_map[key[0]]["name"]["first"] + " " + leg_map[key[0]]["name"]["last"]
    b_name = leg_map[key[1]]["name"]["first"] + " " + leg_map[key[1]]["name"]["last"]
    return "{}-{}: {}".format(a_name, b_name, scores_map.get(key[0], key[1]))

In [None]:
len(list(cosponsor_relations.keys()))

In [None]:
pair_data = [x[1] for x in sorted_scores if x[1]["relations_score"] > 0.01 and x[1]["nominate_distance"] >= 0]

In [None]:
xs = [x["same_state"] for x in pair_data]
ys = [x["relations_score"] for x in pair_data]

In [None]:
reg = stats.linregress(xs, ys)
reg

In [None]:
def plot_data(pair_data, xkey, ykey, plot_reg= False):
    xs = [x[xkey] for x in pair_data]
    ys = [x[ykey] for x in pair_data]
    plt.plot(xs, ys, "o", alpha = 0.2)
    if plot_reg:
        reg = stats.linregress(xs, ys)
        print(reg)
        plt.plot([min(xs), max(xs)], [min(xs) * reg.slope + reg.intercept, max(xs) * reg.slope + reg.intercept], "-", linewidth = 4)
    plt.ylabel(ykey)
    plt.show()
    

In [None]:
plot_data(pair_data, "nominate_distance", "relations_score", reg)

In [None]:
save_csv(pair_data, "data/pair_scores.csv")

In [None]:
len(xs)

In [None]:
same_state('K000302', 'P000218')

In [None]:
sorted_scores[0]