In [291]:
import pandas as pd
import requests
import numpy as np
from collections import defaultdict

In [None]:
import requests

congress = 118
chamber = "house"
rollnumber = 45

# query the raw data source
url = f"https://voteview.com/static/data/out/rollcalls/HSall_rollcalls.json"
response = requests.get(url)

if response.status_code == 200:
    data = response.json()
else:
    print("Error:", response.status_code)

In [309]:
# create initial dataframe
df = pd.DataFrame.from_records(data).drop(columns=["clerk_rollnumber", "source_documents"]).drop(columns=["congress_url"])
print(df.columns)
print(df.shape)

Index(['nominate_log_likelihood', 'vote_desc', 'clausen_codes', 'yea_count',
       'session', 'bill_number', 'dtl_desc', 'nominate_mid_1', 'issue_codes',
       'nay_count', 'congress', 'nominate_mid_2', 'rollnumber', 'date',
       'crs_subjects', 'chamber', 'peltzman_codes', 'nominate_spread_1',
       'crs_policy_area', 'nominate_spread_2', 'vote_result', 'vote_question'],
      dtype='object')
(112461, 22)


In [310]:
# baseline cleaning -- require an indentifier to work with
df = df.dropna(subset=["congress", "bill_number", "rollnumber", "vote_question"])
df["congress"] = df["congress"].astype(int)
df["bill_number"] = df["bill_number"].astype(str).str.lower().str.strip()
df["vote_question"] = df["vote_question"].astype(str).str.lower().str.strip()
df["rollnumber"] = df["rollnumber"].astype(int)
print(df.shape)

(33040, 22)


In [311]:
# find the different types of bills
df["bill_type"] = df["bill_number"].str.replace(r'\d+', '', regex=True)
df["bill_type"].value_counts().head()

bill_type
hr         18045
hres        4544
s           4537
pn          3069
sconres      899
Name: count, dtype: int64

In [312]:
# these are canonical bills, the other bill types are resolutions, treaties, procedural and nomations
bill_types = ["hr", "s", "hj", "hjr", "hjres", "hjre", "sj", "sjr", "sjres", "sjre"]
df = df.loc[df["bill_type"].isin(bill_types)]
df.shape

(23496, 23)

In [313]:
# Remove any rows that are unusable -- you need to have a bill, a vote, and a location of the vote
df = df.dropna(subset=["yea_count", "nay_count", "chamber"], how="any")
print(df.shape)

(23496, 23)


In [314]:
# we now need to determine which type of votes to include
# clean vote questions
vote_questions = df["vote_question"].dropna().unique()
print(f"{len(vote_questions)} number of different vote questions")
top_100 = df["vote_question"].value_counts().head(100)
print(f"Filtering to the top 100 vote types keeps {top_100.sum() / df.shape[0]:.2%} of data")
top_100.head(20)

414 number of different vote questions
Filtering to the top 100 vote types keeps 98.49% of data


vote_question
on agreeing to the amendment                           7116
on the amendment                                       2882
on passage                                             2370
on the motion to table                                 1555
on motion to suspend the rules and pass                1523
on motion to suspend the rules and pass, as amended    1433
on the motion                                           975
on motion to recommit with instructions                 952
on the cloture motion                                   800
on passage of the bill                                  778
on agreeing to the conference report                    365
on the conference report                                266
on motion to instruct conferees                         242
on the joint resolution                                 176
on cloture on the motion to proceed                     175
on motion to recommit                                   164
suspend the rules and pass

In [315]:
vote_type_groups = {
  # this is a vote in favor of the amendment
  "amend": [
    "on agreeing to the amendment",
    "on the amendment",
    "on agreeing to the amendments en bloc",
    "on agreeing to the substitute amendment",
    "on agreeing to the amendments",
    "on agreeing to the amendment, as amended",
    "on agreeing to the senate amendment",
    "on agreeing to the amendment, as modified",
    "whether the amendment is germane"
  ],
  # this is a passage of the bill
  "pass": [
    "on passage",
    "on passage of the bill",
    "passage, objections of the president notwithstanding",
    "passage, objections of the president not withstanding",
    "passage, objections of the president to the contrary notwithstanding",
    "passage, objections ofthe president notwithstanding",
    "passage, objection of the president notwithstanding"
  ],
  # tabling is way to kill a bill -- a vote yes here is equivalent to voting no on the bill
  "table": [
    "on the motion to table",
    "table motion to reconsider",
  ],
  # suspending rules means that the bill is fast tracked to a vote -- it is essentially a vote in favor of the bill
  "suspend": [
    "on motion to suspend the rules and pass",
    "on motion to suspend the rules and pass, as amended",
    "suspend the rules and pass, as amended",
    "suspend the rules and pass",
    "suspend the rules and pass as amended",
    "suspend the rules and agree to senate amendment",
    "motion to suspend the rules and pass, as amended",
    "suspend the rules and agree to senate amendments",
    "suspend rules and pass, as amended",
    "suspend rules and passas amended",
    "motion to suspend the rules and pass",
    "suspend the rules and concur in the senate amendment",
    "suspend the rules and agree to the senate amendment",
    "suspend the rules and agree to conference report",
    "on motion to suspend rules and pass",
    "on motion to suspend rules and pass, as amended"
  ],
  # requesting changes to the bill -- tatic used to stall or kill the bill
  "recommit": [
    "on motion to recommit with instructions",
    "on motion to recommit",
    "on the motion to recommit",
    "on motion to commit with instructions",
    "on motion to recommit the conference report",
    "recommit conference report with instructions",
    "recommit the conference report with instructions",
    "on motion to recommit conference report with instructions",
    "motion to recommit conference report with instructions",
    "on motion to commit"
  ],
  # end debate a proceed to the vote -- often indicates a willingness to vote yes
  "cloture": [
    "on the cloture motion",
    "on cloture on the motion to proceed",
  ],
  # agreeing to the bill as passed by the other house -- voting yes
  "conference": [
    "on agreeing to the conference report",
    "on the conference report",
    "on motion to suspend the rules and agree to the conference report",
  ],
  # enact the bill regardless of the presidential veto -- strongly voting yes
  "veto": [
    "on overriding the veto",
    "on presidential veto"
  ],
  # accept changes made by the senate -- voting yes
  "concur": [
    "on motion to suspend the rules and concur in the senate amendment",
    "on motion to concur in the senate amendment",
    "on motion to concur in the senate amendment with an amendment",
    "on motion to suspend the rules and concur in the senate amendments",
    "on motion to concur in the senate amendments",
    "on motion to concur in senate amendments",
    "on motion to agree to the senate amendment",
    "on motion to concur in the senate adt to the house adt to the senate adt",
    "on motion to concur in the senate amdt to the house amdt to the senate amdt",
    "agree to senate amendments"
  ]
}

In [316]:
valid_vote_questions = []
for vote_type, vote_questions in vote_type_groups.items():
    valid_vote_questions.extend(vote_questions)

df = df.loc[df["vote_question"].isin(valid_vote_questions)]
df.shape

(21127, 23)

In [317]:
vote_question_to_vote_type = defaultdict(None)
for vote_type, vote_questions in vote_type_groups.items():
    vote_question_to_vote_type.update({vote_question: vote_type for vote_question in vote_questions})

In [318]:
df["vote_type"] = df["vote_question"].replace(vote_question_to_vote_type)
df = df.drop(columns="vote_question")
df["vote_type"].value_counts()

vote_type
amend         10103
suspend        3273
pass           3185
table          1582
recommit       1199
cloture         975
conference      636
concur          131
veto             43
Name: count, dtype: int64

In [319]:
df["vote_result"].value_counts()

vote_result
Failed                                        6040
Passed                                        5958
Agreed to                                     2611
Amendment Rejected                            1494
Amendment Agreed to                           1388
Motion to Table Agreed to                     1273
Bill Passed                                    765
Cloture Motion Agreed to                       422
Cloture Motion Rejected                        378
Motion to Table Failed                         282
Conference Report Agreed to                    265
Cloture on the Motion to Proceed Rejected       90
Cloture on the Motion to Proceed Agreed to      85
Veto Sustained                                  25
Bill Defeated                                   13
Veto Overridden                                 11
Motion to Recommit Rejected                     10
Amendment Not Germane                           10
Amendment Germane                                6
Conference Report R

In [320]:
vote_results = df["vote_result"].unique()
passed_results = [res for res in vote_results if any(res_type in res.lower() for res_type in ["passed", "agreed", "overridden", "germane"])]
failed_results = [res for res in vote_results if res not in passed_results]
passed_results, failed_results

(['Passed',
  'Agreed to',
  'Amendment Agreed to',
  'Bill Passed',
  'Motion to Table Agreed to',
  'Conference Report Agreed to',
  'Cloture Motion Agreed to',
  'Amendment Germane',
  'Amendment Not Germane',
  'Veto Overridden',
  'Cloture on the Motion to Proceed Agreed to'],
 ['Failed',
  'Amendment Rejected',
  'Motion to Recommit Rejected',
  'Motion to Table Failed',
  'Veto Sustained',
  'Cloture Motion Rejected',
  'Cloture on the Motion to Proceed Rejected',
  'Bill Defeated',
  'Conference Report Rejected'])

In [321]:
df["vote_passed"] = df["vote_result"].isin(passed_results).fillna(False)
df["vote_passed"].value_counts()

vote_passed
True     12794
False     8333
Name: count, dtype: int64

In [322]:
df.shape

(21127, 24)