# Congressional Roll Calls NLP Analysis

This notebooks takes the S117_rollcalls.csv data set, which contains bills that have gone through the senate, and performs some Natural Language Processing on the bill descriptions, namely a Name Entity Relationship analysis with Transformers.

In [1]:
import pandas as pd
import numpy as np
import spacy
from spacy import displacy
from spacy import tokenizer
import re
import warnings
warnings.filterwarnings("ignore")

2022-11-30 16:05:24.435366: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Load in and process the S117_rollcalls data

In [3]:
rollcalls = pd.read_csv("S117_rollcalls.csv")
rollcalls.head()

Unnamed: 0,congress,chamber,rollnumber,date,session,clerk_rollnumber,yea_count,nay_count,nominate_mid_1,nominate_mid_2,nominate_spread_1,nominate_spread_2,nominate_log_likelihood,bill_number,vote_result,vote_desc,vote_question,dtl_desc
0,117,Senate,1,2021-01-06,1,1,6,93,-0.695,0.62,0.115,-1.472,-18.659,,Objection Not Sustained,,On the Objection,
1,117,Senate,2,2021-01-07,1,2,7,92,-0.767,0.609,0.119,-1.265,-19.668,,Objection Not Sustained,,On the Objection,
2,117,Senate,3,2021-01-20,1,3,84,10,0.541,0.841,0.45,0.34,-14.842,PN7810,Nomination Confirmed,"Avril Danica Haines, of New York, to be Direct...",On the Nomination,
3,117,Senate,4,2021-01-21,1,4,69,27,-0.787,0.617,-0.132,-0.259,-51.575,HR335,Bill Passed,A bill to provide for an exception to a limita...,On Passage of the Bill,
4,117,Senate,5,2021-01-22,1,5,93,2,0.0,0.0,0.0,0.0,0.0,PN781,Nomination Confirmed,"Lloyd James Austin, of Georgia, to be Secretar...",On the Nomination,


In [4]:
rollcalls = rollcalls[rollcalls['vote_desc'].notna()]

In [5]:
rollcalls.head()

Unnamed: 0,congress,chamber,rollnumber,date,session,clerk_rollnumber,yea_count,nay_count,nominate_mid_1,nominate_mid_2,nominate_spread_1,nominate_spread_2,nominate_log_likelihood,bill_number,vote_result,vote_desc,vote_question,dtl_desc
2,117,Senate,3,2021-01-20,1,3,84,10,0.541,0.841,0.45,0.34,-14.842,PN7810,Nomination Confirmed,"Avril Danica Haines, of New York, to be Direct...",On the Nomination,
3,117,Senate,4,2021-01-21,1,4,69,27,-0.787,0.617,-0.132,-0.259,-51.575,HR335,Bill Passed,A bill to provide for an exception to a limita...,On Passage of the Bill,
4,117,Senate,5,2021-01-22,1,5,93,2,0.0,0.0,0.0,0.0,0.0,PN781,Nomination Confirmed,"Lloyd James Austin, of Georgia, to be Secretar...",On the Nomination,
5,117,Senate,6,2021-01-25,1,6,84,15,-0.045,0.999,0.129,0.67,-24.108,PN7824,Nomination Confirmed,"Janet Louise Yellen, of California, to be Secr...",On the Nomination,
6,117,Senate,7,2021-01-26,1,7,78,22,0.444,0.426,0.222,0.34,-22.975,PN783,Nomination Confirmed,"Antony John Blinken, of New York, to be Secret...",On the Nomination,


In [6]:
rolls_dict = {}
for ind in rollcalls.index:
    rollnum = int(rollcalls['rollnumber'][ind])
    vote_desc = str(rollcalls['vote_desc'][ind])
    vote_desc = vote_desc.replace('\n', '').rstrip().lstrip()
    rolls_dict[rollnum] = vote_desc

In [8]:
rolls_dict[3]

'Avril Danica Haines, of New York, to be Director of National Intelligence'

## Conduct NER on roll calls

Spacy transformers ...

In [7]:
nlp = spacy.load('en_core_web_trf')

In [9]:
def build_ner_table(roll_data):
    
    ner_table = []
    
    for num in roll_data.keys():
        desc = roll_data[num]
        doc = nlp(desc)
        ners = list((num, desc, X.label_, X.text) for X in doc.ents)
        ner_df = pd.DataFrame(ners, columns = ['rollnumber', 'vote_desc', 'NER_Lable', 'Text'])
        ner_table.append(ner_df)
        
    ner_table = pd.concat(ner_table, ignore_index = True)
    
    return(ner_table)

In [10]:
rollcall_ners = build_ner_table(rolls_dict)

In [15]:
rollcall_ners.head(25)

Unnamed: 0,rollnumber,vote_desc,NER_Lable,Text
0,3,"Avril Danica Haines, of New York, to be Direct...",PERSON,Avril Danica Haines
1,3,"Avril Danica Haines, of New York, to be Direct...",GPE,New York
2,4,A bill to provide for an exception to a limita...,ORG,Defense
3,4,A bill to provide for an exception to a limita...,DATE,seven years
4,4,A bill to provide for an exception to a limita...,ORG,the Armed Forces
5,5,"Lloyd James Austin, of Georgia, to be Secretar...",PERSON,Lloyd James Austin
6,5,"Lloyd James Austin, of Georgia, to be Secretar...",GPE,Georgia
7,5,"Lloyd James Austin, of Georgia, to be Secretar...",ORG,Defense
8,6,"Janet Louise Yellen, of California, to be Secr...",PERSON,Janet Louise Yellen
9,6,"Janet Louise Yellen, of California, to be Secr...",GPE,California


In [13]:
len(rollcall_ners)

2319

In [14]:
rollcall_ners.groupby('NER_Lable').size()

NER_Lable
CARDINAL         4
DATE           173
EVENT           11
FAC             12
GPE            931
LAW             90
LOC             12
MONEY           10
NORP            24
ORDINAL          4
ORG            527
PERCENT          4
PERSON         513
PRODUCT          1
WORK_OF_ART      3
dtype: int64

In [16]:
rollcall_ners.to_csv("rollcall_ners.csv", index = False)