In [9]:
import pandas as pd
import numpy as np
from tokenize import generate_tokens
xrange = range
from io import StringIO

In [10]:
def parts(a):
    """Split a python-tokenizable expression on comma operators"""
    compos = [-1] # compos stores the positions of the relevant commas in the argument string
    compos.extend(t[2][1] for t in generate_tokens(StringIO(a).readline) if t[1] == ',')
    compos.append(len(a))
    return [ a[compos[i]+1:compos[i+1]] for i in xrange(len(compos)-1)]

#coverts to list of strings
def convertType(toConv, makeUpper):
    
    tcs = []
    for tc in toConv:
        chain = parts(tc)
        
        chain2 = []
        for c in chain:
            c = c.strip('[')
            c = c.strip(']')
            c = c.strip(' \'')
            c = c.strip('\'')
            c = c.strip('@')
            chain2.append(c) 
        
        #make uppercase if specified
        chain3 = []
        if makeUpper:
            for c in chain2:
                chain3.append(c.upper())
            tcs.append(chain3)
        else:
            tcs.append(chain2)
        
    return np.array(tcs)

In [11]:
df = pd.read_csv("bills_2018.csv")
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]

for column in df:
    if (type(df[column][0]) == str):
        df[column] = convertType(df[column], False)


In [12]:
print(list(df))

['bill_id', 'bill_number', 'change_hash', 'url', 'status_date', 'status', 'last_action_date', 'last_action', 'title', 'description', 'session_id', 'session_name', 'state_link', 'completed', 'state', 'state_id', 'bill_type', 'bill_type_id', 'bodie', 'body_id', 'current_body', 'current_body_id', 'pending_committee_id', 'progress_event', 'progress_date', 'history_date', 'history_action', 'history_chamber', 'history_chamber_id', 'history_importance', 'sponsor_people_id', 'sponsor_party_id', 'sponsor_party', 'sponsor_role_id', 'sponsor_role', 'sponsor_name', 'sponsor_district', 'sponsor_type_id', 'sponsor_order', 'committee_sponsor', 'votes_roll_call_id', 'votes_date', 'votes_desc', 'votes_yea', 'votes_nay', 'votes_nv', 'votes_absent', 'votes_total', 'votes_passed', 'votes_chamber', 'votes_chamber_id', 'votes_url', 'votes_state_link']


In [13]:
## Here we find every legislator in our data, we create a dataset with some basic info about them

df = df.dropna()

leg_parties = []
leg_names = []
leg_districts = []
leg_roles = []
leg_people_ids = []
leg_committee_sponsors = []
for index, row in df.iterrows():
    names = row["sponsor_name"]
    parties = row["sponsor_party"]
    districts = row["sponsor_district"]
    roles = row["sponsor_role"]
    people_ids = row["sponsor_people_id"]
    committee_sponsors = row["committee_sponsor"]
    for name, party, district, role, people_id, committee_sponsor in zip(names,parties,districts,roles,people_ids,committee_sponsors):
        if (name.replace("\"","") not in leg_names and name != ""):
            leg_names.append(name.replace("\"",""))
            leg_roles.append(role)
            leg_people_ids.append(people_id)
            leg_committee_sponsors.append(committee_sponsor)
            if (party == ""):
                leg_parties.append("No Party")
            else:
                leg_parties.append(party)
            if (district == ""):
                leg_districts.append("No District")
            else:
                leg_districts.append(district)
            
legislators = pd.DataFrame({
    "name": leg_names,
    "party": leg_parties,
    "district" : leg_districts,
    "role" : leg_roles,
    "people_id" : leg_people_ids,
    "committee_sponsor" : leg_committee_sponsors
})

In [14]:
## Here we count how many bills each legislator has sponsored, we also count each type of sponsorship
# and make a list of all of the bills and their status

# counts of bills
all_sponsors_count = []
primary_sponsors_count = []
co_sponsors_count = []
joint_sponsors_count = []
unspecified_sponsors_count = []

# lists of bills
all_bills = []
primary_bills = []
co_bills = []
joint_bills = []
unspecified_bills = []

# lists of bill status
all_bills_status = []
primary_bills_status = []
co_bills_status = []
joint_bills_status = []
unspecified_bills_status = []

for leg_id in legislators["people_id"]:
    
    l_all_bills = []
    l_primary_bills = []
    l_co_bills = []
    l_joint_bills = []
    l_unspecified_bills = []
    l_all_bills_status = []
    l_primary_bills_status = []
    l_co_bills_status = []
    l_joint_bills_status = []
    l_unspecified_bills_status = []
    
    
    for index, row in df.iterrows():
        sponsor_people_ids = row["sponsor_people_id"]
        sponsor_type_ids = row["sponsor_type_id"]
        for people_id, type_id in zip(sponsor_people_ids,sponsor_type_ids):
            if (people_id == leg_id):
                l_all_bills.append(row["bill_id"])
                l_all_bills_status.append(row["status"])
                if (type_id == "0"):
                    l_unspecified_bills.append(row["bill_id"])
                    l_unspecified_bills_status.append(row["status"])
                elif (type_id == "1"):
                    l_primary_bills.append(row["bill_id"])
                    l_primary_bills_status.append(row["status"])
                elif (type_id == "2"):
                    l_co_bills.append(row["bill_id"])
                    l_co_bills_status.append(row["status"])
                elif (type_id == "3"):
                    l_joint_bills.append(row["bill_id"])
                    l_joint_bills_status.append(row["status"])
    
    all_bills.append(l_all_bills)
    all_bills_status.append(l_all_bills_status)
    unspecified_bills.append(l_unspecified_bills)
    unspecified_bills_status.append(l_unspecified_bills_status)
    primary_bills.append(l_primary_bills)
    primary_bills_status.append(l_primary_bills_status)
    co_bills.append(l_co_bills)
    co_bills_status.append(l_co_bills_status)
    joint_bills.append(l_joint_bills)
    joint_bills_status.append(l_joint_bills_status)


    all_sponsors_count.append(len(l_all_bills))
    unspecified_sponsors_count.append(len(l_unspecified_bills))
    primary_sponsors_count.append(len(l_primary_bills))
    co_sponsors_count.append(len(l_co_bills))
    joint_sponsors_count.append(len(l_joint_bills))

In [15]:
legislators["all_sponsored_bills_count"] = all_sponsors_count
legislators["unspecified_sponsored_bills_count"] = unspecified_sponsors_count
legislators["primary_sponsored_bills_count"] = primary_sponsors_count
legislators["co_sponsored_bills_count"] = co_sponsors_count
legislators["joint_sponsored_bills_count"] = joint_sponsors_count
legislators["all_sponsored_bills"] = all_bills
legislators["unspecified_sponsored_bills"] = unspecified_bills
legislators["primary_sponsored_bills"] = primary_bills
legislators["co_sponsored_bills"] = co_bills
legislators["joint_sponsored_bills"] = joint_bills
legislators["all_sponsored_bills_status"] = all_bills_status
legislators["unspecified_sponsored_bills_status"] = unspecified_bills_status
legislators["primary_sponsored_bills_status"] = primary_bills_status
legislators["co_sponsored_bills_status"] = co_bills_status
legislators["joint_sponsored_bills_status"] = joint_bills_status

all_sponsored_bills_passed = []
unspecified_sponsored_bills_passed = []
primary_sponsored_bills_passed = []
co_sponsored_bills_passed = []
joint_sponsored_bills_passed = []
for index, row in legislators.iterrows():
    all_sponsored_bills_passed.append(row["all_sponsored_bills_status"].count(4))
    unspecified_sponsored_bills_passed.append(row["unspecified_sponsored_bills_status"].count(4))
    primary_sponsored_bills_passed.append(row["primary_sponsored_bills_status"].count(4))
    co_sponsored_bills_passed.append(row["co_sponsored_bills_status"].count(4))
    joint_sponsored_bills_passed.append(row["joint_sponsored_bills_status"].count(4))
 
legislators["all_sponsored_bills_passed"] = all_sponsored_bills_passed
legislators["unspecified_sponsored_bills_passed"] = unspecified_sponsored_bills_passed
legislators["primary_sponsored_bills_passed"] = primary_sponsored_bills_passed
legislators["co_sponsored_bills_passed"] = co_sponsored_bills_passed
legislators["joint_sponsored_bills_passed"] = joint_sponsored_bills_passed

In [16]:
# here we calculate avgs for each type of sponsorship. (what percent of bills were passed)

all_sponsored_bills_avg = []
unspecified_sponsored_bills_avg = []
primary_sponsored_bills_avg = []
co_sponsored_bills_avg = []
joint_sponsored_bills_avg = []
for index, row in legislators.iterrows():
    if (row["all_sponsored_bills_count"] == 0):
        all_sponsored_bills_avg.append(0.0)
    else:
        all_sponsored_bills_avg.append(row["all_sponsored_bills_passed"]/row["all_sponsored_bills_count"])
    if (row["unspecified_sponsored_bills_count"] == 0):
        unspecified_sponsored_bills_avg.append(0.0)
    else:
        unspecified_sponsored_bills_avg.append(row["unspecified_sponsored_bills_passed"]/row["unspecified_sponsored_bills_count"])
    if (row["primary_sponsored_bills_count"] == 0):
        primary_sponsored_bills_avg.append(0.0)
    else:
        primary_sponsored_bills_avg.append(row["primary_sponsored_bills_passed"]/row["primary_sponsored_bills_count"])
    if (row["co_sponsored_bills_count"] == 0):
        co_sponsored_bills_avg.append(0.0)
    else:
        co_sponsored_bills_avg.append(row["co_sponsored_bills_passed"]/row["co_sponsored_bills_count"])
    if (row["joint_sponsored_bills_count"] == 0):
        joint_sponsored_bills_avg.append(0.0)
    else:
        joint_sponsored_bills_avg.append(row["joint_sponsored_bills_passed"]/row["joint_sponsored_bills_count"])
 
legislators["all_sponsored_bills_avg"] = all_sponsored_bills_avg
legislators["unspecified_sponsored_bills_avg"] = unspecified_sponsored_bills_avg
legislators["primary_sponsored_bills_avg"] = primary_sponsored_bills_avg
legislators["co_sponsored_bills_avg"] = co_sponsored_bills_avg
legislators["joint_sponsored_bills_avg"] = joint_sponsored_bills_avg

In [17]:
# pd.set_option('display.max_column',None)
# pd.set_option('display.max_rows',None)
# pd.set_option('display.max_seq_items',None)
# pd.set_option('display.max_colwidth', 500)
# pd.set_option('expand_frame_repr', True)
legislators = legislators.sort_values(by=['primary_sponsored_bills_avg'], ascending=False).reset_index(drop=True)
legislators.head(20)

Unnamed: 0,name,party,district,role,people_id,committee_sponsor,all_sponsored_bills_count,unspecified_sponsored_bills_count,primary_sponsored_bills_count,co_sponsored_bills_count,...,all_sponsored_bills_passed,unspecified_sponsored_bills_passed,primary_sponsored_bills_passed,co_sponsored_bills_passed,joint_sponsored_bills_passed,all_sponsored_bills_avg,unspecified_sponsored_bills_avg,primary_sponsored_bills_avg,co_sponsored_bills_avg,joint_sponsored_bills_avg
0,Southern Maryland Delegation,No Party,No District,Rep,16216,1,1,0,1,0,...,1,0,1,0,0,1.0,0.0,1.0,0.0,0.0
1,Jeff Ghrist,R,HD-036,Rep,17399,0,84,0,2,82,...,21,0,2,19,0,0.25,0.0,1.0,0.231707,0.0
2,Dorchester County Delegation,No Party,No District,Rep,15485,1,3,0,3,0,...,3,0,3,0,0,1.0,0.0,1.0,0.0,0.0
3,Caroline County Delegation,No Party,No District,Rep,15482,1,2,0,2,0,...,2,0,2,0,0,1.0,0.0,1.0,0.0,0.0
4,Ways and Means,No Party,No District,Rep,15490,1,3,0,2,1,...,3,0,2,1,0,1.0,0.0,1.0,1.0,0.0
5,"Education, Health, and Environmental Affairs",No Party,No District,Sen,15414,1,15,0,15,0,...,15,0,15,0,0,1.0,0.0,1.0,0.0,0.0
6,Calvert County Senators,No Party,No District,Sen,15737,1,1,0,1,0,...,1,0,1,0,0,1.0,0.0,1.0,0.0,0.0
7,Caroline County Senators,No Party,No District,Sen,17891,1,3,0,3,0,...,3,0,3,0,0,1.0,0.0,1.0,0.0,0.0
8,Meagan Simonaire,R,HD-031B,Rep,17404,0,52,0,1,51,...,23,0,1,22,0,0.442308,0.0,1.0,0.431373,0.0
9,Baltimore County Delegation,No Party,No District,Rep,15728,1,11,0,11,0,...,10,0,10,0,0,0.909091,0.0,0.909091,0.0,0.0


In [18]:
sm = legislators[["name","party","primary_sponsored_bills_passed","primary_sponsored_bills_count","primary_sponsored_bills_avg","co_sponsored_bills_avg","co_sponsored_bills_count", "committee_sponsor","people_id"]]
sm = sm[sm['co_sponsored_bills_count'] > 1]
sm = sm[sm['committee_sponsor'] != 1]
sm = sm.sort_values(by=['primary_sponsored_bills_avg'], ascending=False).reset_index(drop=True)
sm.head(20)

Unnamed: 0,name,party,primary_sponsored_bills_passed,primary_sponsored_bills_count,primary_sponsored_bills_avg,co_sponsored_bills_avg,co_sponsored_bills_count,committee_sponsor,people_id
0,Jeff Ghrist,R,2,2,1.0,0.231707,82,0,17399
1,Meagan Simonaire,R,1,1,1.0,0.431373,51,0,17404
2,Sally Jameson,D,4,5,0.8,0.595238,42,0,4658
3,Edward Kasemeyer,D,10,14,0.714286,0.568182,44,0,4697
4,Talmadge Branch,D,4,6,0.666667,0.55,40,0,4602
5,Joseline Pena-Melnyk,D,10,15,0.666667,0.437158,183,0,4561
6,Jay Jacobs,R,6,10,0.6,0.222222,81,0,11449
7,Anne Kaiser,D,6,10,0.6,0.5,70,0,4588
8,Thomas Middleton,D,24,41,0.585366,0.609091,110,0,4687
9,Montgomery County Delegation,No Party,11,19,0.578947,0.416667,12,1,15731


In [19]:
legislators.to_csv("legislators.csv")

In [22]:
# here we count how many times legislators have co sponsered each other

relationships = []
first_names = []
second_names = []
co_bills = []
for index,row in legislators.iterrows():
    name1 = row["name"]
    for index2,row2 in legislators.iterrows():
        name2 = row2["name"]
        names = [name1,name2]
        names = sorted(names)
        combo_name = names[0] + names[1]
        if (combo_name not in relationships):
            relationships.append(combo_name)
            first_names.append(names[0])
            second_names.append(names[1])
            co_bills = []
        
matrix = pd.DataFrame({
    "relationship": relationships,
    "first_name": first_names,
    "second_name" : second_names
})

In [23]:
print(list(df))

['bill_id', 'bill_number', 'change_hash', 'url', 'status_date', 'status', 'last_action_date', 'last_action', 'title', 'description', 'session_id', 'session_name', 'state_link', 'completed', 'state', 'state_id', 'bill_type', 'bill_type_id', 'bodie', 'body_id', 'current_body', 'current_body_id', 'pending_committee_id', 'progress_event', 'progress_date', 'history_date', 'history_action', 'history_chamber', 'history_chamber_id', 'history_importance', 'sponsor_people_id', 'sponsor_party_id', 'sponsor_party', 'sponsor_role_id', 'sponsor_role', 'sponsor_name', 'sponsor_district', 'sponsor_type_id', 'sponsor_order', 'committee_sponsor', 'votes_roll_call_id', 'votes_date', 'votes_desc', 'votes_yea', 'votes_nay', 'votes_nv', 'votes_absent', 'votes_total', 'votes_passed', 'votes_chamber', 'votes_chamber_id', 'votes_url', 'votes_state_link']


In [None]:
co_bills = []
co_bills_status = []
co_bills_total = []
print(len(matrix))
for index, row in matrix.iterrows():
    print(index)
    r_co_bills = []
    r_co_bills_status = []
    name1 = row["first_name"]
    name2 = row["second_name"]
    for index2, row2 in df.iterrows():
        name_one_found = False
        name_two_found = False
        primary_found = False
        secondary_found = False
        for sponsor_name, sponsor_type_id in zip(row2["sponsor_name"],row2["sponsor_type_id"]):
            if (sponsor_name == name1):
                name_one_found = True
                if (sponsor_type_id == "1"):
                    primary_found = True 
                elif (sponsor_type_id == "2"):
                    secondary_found = True
            elif (sponsor_name == name2):
                name_two_found = True
                if (sponsor_type_id == "1"):
                    primary_found = True 
                elif (sponsor_type_id == "2"):
                    secondary_found = True
        if (name_one_found and name_two_found and primary_found and secondary_found):
            print("_______FOUND__________")
            r_co_bills.append(row2["bill_id"])
            r_co_bills_status.append(row2["status"])
        
    co_bills.append(r_co_bills)
    co_bills_status.append(r_co_bills_status)
    co_bills_total.append(len(r_co_bills))        

matrix["co_bills"] = co_bills
matrix["co_bills_status"] = co_bills_status
matrix["co_bills_total"] = co_bills_total

26335
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54


In [None]:
matrix = matrix.sort_values(by=['co_bills_total'], ascending=False)

matrix.to_csv("matrix.csv")

In [None]:
matrix.head(20)

In [4]:
matrix = pd.read_csv("matrix.csv")

In [5]:
matrix = matrix.sort_values(by=['co_bills_total'], ascending=False)

In [7]:
matrix.head(20)

Unnamed: 0.1,Unnamed: 0,relationship,first_name,second_name,co_bills,co_bills_status,co_bills_total
0,42768,Bilal AliNick Mosby,Bilal Ali,Nick Mosby,"[1071267, 1071381, 1071341, 1071282, 1071368, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, ...",30
1,40032,Bilal AliNick Mosby,Bilal Ali,Nick Mosby,"[1071267, 1071381, 1071341, 1071282, 1071368, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, ...",30
2,30998,Richard MadalenoSusan Lee,Richard Madaleno,Susan Lee,"[1053142, 1059464, 1066901, 1068450, 1071300, ...","[1, 4, 1, 4, 1, 1, 1, 4, 1, 1, 4, 4, 4, 4, 4, ...",27
3,19142,Richard MadalenoSusan Lee,Richard Madaleno,Susan Lee,"[1053142, 1059464, 1066901, 1068450, 1071300, ...","[1, 4, 1, 4, 1, 1, 1, 4, 1, 1, 4, 4, 4, 4, 4, ...",27
4,14064,Delores KelleyShirley Nathan-Pulliam,Delores Kelley,Shirley Nathan-Pulliam,"[1058992, 1059008, 1059009, 1058986, 1062483, ...","[4, 1, 4, 1, 1, 1, 1, 1, 4, 1, 4, 1, 1, 4, 1, ...",26
5,21816,Delores KelleyShirley Nathan-Pulliam,Delores Kelley,Shirley Nathan-Pulliam,"[1058992, 1059008, 1059009, 1058986, 1062483, ...","[4, 1, 4, 1, 1, 1, 1, 1, 4, 1, 4, 1, 1, 4, 1, ...",26
6,35165,Nicholaus KipkeThe Speaker,Nicholaus Kipke,The Speaker,"[1058110, 1058102, 1058079, 1068443, 1068455, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",25
7,29465,Nicholaus KipkeThe Speaker,Nicholaus Kipke,The Speaker,"[1058110, 1058102, 1058079, 1068443, 1068455, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",25
8,36460,Barbara RobinsonJoan Conway,Barbara Robinson,Joan Conway,"[1058099, 1065100, 1065032, 1066911, 1069793, ...","[5, 1, 4, 4, 4, 1, 2, 1, 4, 4, 4, 4, 1, 4, 4, ...",25
9,11380,Barbara RobinsonJoan Conway,Barbara Robinson,Joan Conway,"[1058099, 1065100, 1065032, 1066911, 1069793, ...","[5, 1, 4, 4, 4, 1, 2, 1, 4, 4, 4, 4, 1, 4, 4, ...",25
