In [1]:
import pandas as pd

vote_details_df = pd.read_csv("../../data/bills/congress_roll_call_bill_details.csv")
all_votes_df = pd.read_csv("../../data/bills/congress_roll_call_votes.csv")


In [2]:
print(vote_details_df.columns)
print(vote_details_df["vote_question"].unique().tolist())


Index(['roll_number', 'majority', 'congress', 'session', 'chamber',
       'rollcall_num', 'legis_num', 'vote_question', 'amendment_num',
       'amendment_author', 'vote_type', 'vote_result', 'action_date',
       'vote_desc', 'action_time_etz', 'action_time', 'republican_yeas',
       'republican_nays', 'republican_present', 'republican_not_voting',
       'democratic_yeas', 'democratic_nays', 'democratic_present',
       'democratic_not_voting', 'independent_yeas', 'independent_nays',
       'independent_present', 'independent_not_voting'],
      dtype='object')
['On Passage', 'On Ordering the Previous Question', 'On Agreeing to the Resolution', 'On Motion to Suspend the Rules and Pass, as Amended', 'On Agreeing to the Amendment', 'On Motion to Recommit', 'On Motion to Suspend the Rules and Agree', 'On Motion to Suspend the Rules and Pass', 'On Agreeing to the Resolution, as Amended', 'Passage, Objections of the President To The Contrary Notwithstanding', 'On Motion to Suspend the R

In [3]:
all_votes_df.head()

Unnamed: 0,roll_number,name_id,party,state,legislator_name,vote
0,100,A000370,D,NC,Adams,No
1,100,A000055,R,AL,Aderholt,Yes
2,100,A000371,D,CA,Aguilar,No
3,100,A000379,R,MO,Alford,Yes
4,100,A000372,R,GA,Allen,Yes


First we need to get rid of unecessary bills


In [4]:

relevant_vote_questions = [
    "On Passage",
    "Passage, Objections of the President To The Contrary Notwithstanding",
    "On Agreeing to the Resolution",
    "On Agreeing to the Resolution, as Amended",
    "On Agreeing to the Amendment",
]

vote_details_df = vote_details_df[vote_details_df["vote_question"].isin(relevant_vote_questions)]

print("Total number of relevant votes: ", len(all_votes_df["roll_number"].unique().tolist()))
print(vote_details_df["vote_question"].unique().tolist())

Total number of relevant votes:  625
['On Passage', 'On Agreeing to the Resolution', 'On Agreeing to the Amendment', 'On Agreeing to the Resolution, as Amended', 'Passage, Objections of the President To The Contrary Notwithstanding']


Lets narrow it down to 

In [5]:
threshold = 0.95

vote_details_df = vote_details_df.copy()
vote_details_df['D_yea_pct'] = (
    vote_details_df['democratic_yeas'] /
    (vote_details_df['democratic_yeas'] + vote_details_df['democratic_nays'])
)
vote_details_df['R_yea_pct'] = (
    vote_details_df['republican_yeas'] /
    (vote_details_df['republican_yeas'] + vote_details_df['republican_nays'])
)

pure_partisans = vote_details_df[
    (
        (vote_details_df['D_yea_pct'] >= threshold) &
        (vote_details_df['R_yea_pct'] <= (1 - threshold))
    ) | (
        (vote_details_df['R_yea_pct'] >= threshold) &
        (vote_details_df['D_yea_pct'] <= (1 - threshold))
    )
]['roll_number'].tolist()

filtered_vote_details = vote_details_df[
    ~vote_details_df['roll_number'].isin(pure_partisans)
]

filtered_all_votes = all_votes_df[
    ~all_votes_df['roll_number'].isin(pure_partisans)
]

print(f"Dropped {len(pure_partisans)} pure party-line votes.")
print("Remaining substantive roll_numbers:", filtered_vote_details['roll_number'].nunique())


Dropped 168 pure party-line votes.
Remaining substantive roll_numbers: 289


In [6]:
import os 

os.makedirs("../../data/bills/refined", exist_ok=True)

vote_details_df.to_csv("../../data/bills/refined/vote_details_df.csv", index=False)