In [205]:
import numpy as np
import pandas as pd
import json
import re

In [206]:
with open('data/daterangegovinfo02.json', 'r') as fl:
    govinfo_dct = json.loads(fl.read())

In [207]:
rolldf = pd.read_csv('data/H117_rollcalls.csv')
bills = list(set(rolldf['bill_number']))
bills[:10]

['S3905',
 'HR4489',
 'HR2250',
 'HR4350',
 'HRES1170',
 'HR5585',
 'HR7025',
 'HR7337',
 'HR207',
 'HCONRES70']

In [208]:
rolldf.head(100)

Unnamed: 0,congress,chamber,rollnumber,date,session,clerk_rollnumber,yea_count,nay_count,nominate_mid_1,nominate_mid_2,nominate_spread_1,nominate_spread_2,nominate_log_likelihood,bill_number,vote_result,vote_desc,vote_question,dtl_desc
0,117,House,1,2021-01-03,1,2,216,211,-0.057,0.038,0.707,0.274,-3.951,,Pelosi,,Election of the Speaker,
1,117,House,2,2021-01-03,1,3,371,2,0.000,0.000,0.000,0.000,0.000,HRES1,Passed,Authorizing and directing the Speaker to admin...,On Agreeing to the Resolution,
2,117,House,3,2021-01-04,1,4,214,204,0.019,-0.125,0.711,-0.235,-0.256,HRES8,Passed,Adopting the Rules of the House of Representat...,On Motion to Table the Motion to Postpone to a...,
3,117,House,4,2021-01-04,1,5,214,196,0.020,-0.133,0.710,-0.240,-0.256,HRES8,Passed,Adopting the Rules of the House of Representat...,Table Motion to Refer,
4,117,House,5,2021-01-04,1,6,217,204,0.019,-0.124,0.711,-0.234,-0.256,HRES8,Passed,Adopting the Rules of the House of Representat...,On Ordering the Previous Question,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,117,House,96,2021-03-19,1,97,398,14,0.531,-0.647,0.394,-1.499,-25.673,HRES134,Passed,Condemning the military coup that took place o...,On Motion to Suspend the Rules and Agree,
96,117,House,97,2021-04-13,1,98,384,38,0.794,0.608,1.180,-1.039,-57.773,HR1868,Passed,To prevent across-the-board direct spending cu...,On Motion to Suspend the Rules and Concur in t...,
97,117,House,98,2021-04-14,1,99,381,37,0.477,-0.879,0.922,-1.420,-45.918,HR172,Passed,United States Anti-Doping Agency Reauthorizati...,On Motion to Suspend the Rules and Pass,
98,117,House,99,2021-04-14,1,100,415,11,0.783,-0.622,1.220,-1.511,-30.763,S578,Passed,"Food Allergy Safety, Treatment, Education, and...",On Motion to Suspend the Rules and Pass,


In [209]:
# show all the different values for the vote_question column that contain the word "pass" (case insensitive)

rolldf[rolldf['vote_question'].str.contains('pass', case=False)]['vote_question'].unique()

array(['On Motion to Suspend the Rules and Pass', 'On Passage',
       'On Motion to Suspend the Rules and Pass, as Amended',
       'On Motion to Suspend the Rules and Pass Certain Bills and Agree to a Resolution',
       'On Motion to Suspend the Rules and Pass Certain Bills',
       'On Motion to Suspend the Rules and Pass Certain Bills and Agree to Certain Resolutions',
       'On Motion to Suspend the Rules and Pass Certain Bills and Concur in the Senate Amendments',
       'On Motion to Suspend the Rules and Pass Certain Bills and Concur in a Senate Amendment'],
      dtype=object)

In [210]:
# show all the different values for the vote_question column that don't contain the word "pass" (case insensitive)

rolldf[~rolldf['vote_question'].str.contains('pass', case=False)]['vote_question'].unique()

array(['Election of the Speaker', 'On Agreeing to the Resolution',
       'On Motion to Table the Motion to Postpone to a Day Certain',
       'Table Motion to Refer', 'On Ordering the Previous Question',
       'On Motion to Commit with Instructions',
       'On Agreeing to the Objection', 'On Motion to Adjourn',
       'On Agreeing to the Amendment', 'Table Motion to Reconsider',
       'On Motion to Recommit',
       'On Motion to Concur in the Senate Amendment',
       'On Motion to Table', 'On Motion to Suspend the Rules and Agree',
       'On Motion to Suspend the Rules and Concur in the Senate Amendment',
       'On Motion to Commit', 'On Consideration of the Resolution',
       'On Motion to Suspend the Rules and Agree, as Amended',
       'On Motion to Concur in the Senate Amendment With An Amendment',
       'On Concurring in Senate Amdt with Amdt (Divisions B,C,F,X,Z, titles 2&3 of Division N)',
       'On Concurring in Senate Amdt with Amdt (Remaining Divisions)',
       'T

In [211]:
only_final_votes = rolldf[rolldf['vote_question'].str.contains('pass', case=False)]

# show average number of votes on each bill. Eg how many rows are there for each bill?
only_final_votes.groupby('bill_number').count()['rollnumber'].mean()

# show bills w at least 2 rows
has_many = only_final_votes.groupby('bill_number').count()['rollnumber'] > 1
has_many[has_many].index

# show those rows in the original dataframe
rolldf[rolldf['bill_number'].isin(has_many[has_many].index)]

Unnamed: 0,congress,chamber,rollnumber,date,session,clerk_rollnumber,yea_count,nay_count,nominate_mid_1,nominate_mid_2,nominate_spread_1,nominate_spread_2,nominate_log_likelihood,bill_number,vote_result,vote_desc,vote_question,dtl_desc
132,117,House,133,2021-05-11,1,134,250,168,0.163,0.987,0.315,0.299,-70.336,HR1629,Failed,Fairness in Orphan Drug Exclusivity Act,On Motion to Suspend the Rules and Pass,
148,117,House,149,2021-05-19,1,150,402,23,0.622,-0.783,0.237,-0.312,-56.575,HR1629,Passed,Fairness in Orphan Drug Exclusivity Act,On Passage,
158,117,House,159,2021-06-15,1,160,240,188,0.239,-0.087,0.425,-0.212,-51.419,HR239,Failed,Equal Access to Contraception for Veterans Act,On Motion to Suspend the Rules and Pass,
160,117,House,161,2021-06-15,1,162,248,177,0.287,0.958,0.458,0.056,-47.733,HR1443,Failed,LGBTQ Business Equal Credit Enforcement and In...,"On Motion to Suspend the Rules and Pass, as Am...",
180,117,House,181,2021-06-24,1,182,252,176,0.266,0.964,0.545,0.188,-48.119,HR1443,Passed,LGBTQ Business Equal Credit Enforcement and In...,On Passage,
182,117,House,183,2021-06-24,1,184,245,181,0.266,0.964,0.377,0.024,-51.592,HR239,Passed,Equal Access to Contraception for Veterans Act,On Passage,
494,117,House,495,2022-02-28,2,48,235,188,0.209,0.978,0.517,0.116,-29.188,HR2116,Failed,Creating a Respectful and Open World for Natur...,"On Motion to Suspend the Rules and Pass, as Am...",
528,117,House,529,2022-03-18,2,82,235,189,0.231,0.08,0.351,0.016,-29.476,HR2116,Passed,Creating a Respectful and Open World for Natur...,On Passage,
541,117,House,542,2022-03-30,2,95,238,187,0.213,0.237,0.224,-0.013,-55.395,S2938,Failed,To designate the United States Courthouse and ...,On Motion to Suspend the Rules and Pass,
609,117,House,610,2022-05-11,2,163,262,156,0.319,0.433,0.211,0.076,-92.357,HR6531,Failed,Targeting Resources to Communities in Need Act,"On Motion to Suspend the Rules and Pass, as Am...",


In [212]:
# create a new dataframe only_final_votes_limited that only has one row per bill. If a bill has multiple rows, keep the last one.

only_final_votes_limited = only_final_votes.groupby('bill_number').last().reset_index()
only_final_votes_limited.head()

Unnamed: 0,bill_number,congress,chamber,rollnumber,date,session,clerk_rollnumber,yea_count,nay_count,nominate_mid_1,nominate_mid_2,nominate_spread_1,nominate_spread_2,nominate_log_likelihood,vote_result,vote_desc,vote_question,dtl_desc
0,HJRES100,117,House,937,2022-11-30,2,490,290,137,0.431,0.759,0.169,-0.014,-151.316,Passed,To provide for a resolution with respect to th...,On Passage,
1,HJRES17,117,House,81,2021-03-17,1,82,222,204,-0.053,0.999,0.419,0.422,-15.544,Passed,Removing the deadline for the ratification of ...,On Passage,
2,HR1,117,House,61,2021-03-03,1,62,220,210,0.213,-0.977,0.55,0.498,-8.63,Passed,For the People Act,On Passage,
3,HR1002,117,House,111,2021-04-15,1,112,411,5,0.0,0.0,0.0,0.0,0.0,Passed,Debarment Enforcement of Bad Actor Registrants...,"On Motion to Suspend the Rules and Pass, as Am...",
4,HR1029,117,House,315,2021-10-19,1,316,421,3,0.0,0.0,0.0,0.0,0.0,Passed,Free Veterans from Fees Act,On Motion to Suspend the Rules and Pass,


In [213]:
# show only_final_votes_limited for a specific bill
only_final_votes_limited[only_final_votes_limited['bill_number'] == 'HR7900']

Unnamed: 0,bill_number,congress,chamber,rollnumber,date,session,clerk_rollnumber,yea_count,nay_count,nominate_mid_1,nominate_mid_2,nominate_spread_1,nominate_spread_2,nominate_log_likelihood,vote_result,vote_desc,vote_question,dtl_desc
338,HR7900,117,House,797,2022-07-14,2,350,329,101,0.181,-0.236,0.148,-2.426,-137.008,Passed,National Defense Authorization Act for Fiscal ...,On Passage,


In [214]:
import string

with open("congress/mentions_of_bills.json") as f:
	transcript = json.load(f)

# fix whitespace in transcripts
# replace \n with newlines
for t in transcript:
	t['transcript'] = t['transcript'].replace("\\n", "\n")

# filter out transcripts that are less than 800 characters long (likely not a debate)

transcript = [t for t in transcript if len(t['transcript']) > 800]

# filter out transcripts that are less than 1500 characters long and contain the phrase
# "Congress has the power to enact this legislation pursuant to the following:"
# (ignore whitespace)

no_whitespace = {ord(c): None for c in string.whitespace}

transcript = [t for t in transcript if len(t['transcript']) > 1500 or
		"Congress has the power to enact this legislation pursuant to the following:".translate(no_whitespace)
			  not in t['transcript'].translate(no_whitespace)]

bills = set()

r = re.compile(r"(H\.R\. \d+|H\. Res\. \d+|S\. \d+|S\. Res\. \d+)")

for item in transcript:
	b = (re.search(r, item['transcript']).group(1))
	bills.add(b)
	# print(item['transcript'])
	# input("Press enter for another transcript, or ctrl+c to exit")

print("Number of bills:", len(bills))
print("Number of transcripts:", len(transcript))
print(f"Number of characters: {sum([len(item['transcript']) for item in transcript]):_}")
print(f"Number of words: {sum([len(item['transcript'].split()) for item in transcript]):_}")

Number of bills: 1852
Number of transcripts: 5622
Number of characters: 161_378_245
Number of words: 21_039_602


In [215]:
# how many transcripts does the bill with the most transcripts have?
from collections import Counter
c = Counter()
for item in transcript:
    b = (re.search(r, item['transcript']).group(1))
    c[b] += 1
c.most_common(10)


[('H.R. 7900', 953),
 ('H.R. 5376', 251),
 ('H.R. 2617', 79),
 ('H.R. 3967', 76),
 ('H.R. 3076', 75),
 ('H.R. 4346', 60),
 ('S. 2938', 53),
 ('H.R. 4521', 50),
 ('H.R. 8404', 48),
 ('S. 27', 39)]

In [216]:
# because most bills have < 64 transcripts, we will have the input to the model be the vectorization of up to 64 transcripts.
# the output will be the number of yae votes for that bill

# to start with, we need to create a dataframe with the following columns:
# bill_number, yae_votes, transcript_1, transcript_2, ..., transcript_64 (if available, otherwise none)

for item in transcript:
    b = (re.search(r, item['transcript']).group(1))
    item['bill_number'] = b


In [217]:
# reformat bill number to match the format in the roll call data
# eg from H.R. 1 to HR1
# or H. Res. 1 to HRES1
# I think if we just strip non-alphanumeric characters, it should work
for item in transcript:
    item['bill_number'] = re.sub(r'\W+', '', item['bill_number'])

In [218]:
for item in transcript:
    if item.keys() != transcript[0].keys():
        print(item.keys())
        break

In [219]:
transcript[0].keys()

dict_keys(['meta', 'transcript', 'date', 'bill_number'])

In [220]:
# Assuming you have two datasets: transcripts_list and bills_df
# transcript is a list of dicts with 'bill_number' and 'transcript'
# bills_df is a DataFrame with 'bill_number', 'yae_count', and 'nay_count'

# Convert the list of transcripts to a DataFrame
transcripts_df = pd.DataFrame(transcript)
transcripts_df.head(10)

Unnamed: 0,meta,transcript,date,bill_number
0,"{'body': 'H', 'depth': 2, 'download_timeout': ...","<html>\n<head>\n<title>Congressional Record, V...",2022-01-23,HR6371
1,"{'body': 'H', 'depth': 2, 'download_timeout': ...","<html>\n<head>\n<title>Congressional Record, V...",2022-01-23,HR4673
2,"{'body': 'H', 'depth': 2, 'download_timeout': ...","<html>\n<head>\n<title>Congressional Record, V...",2022-01-23,HR2543
3,"{'body': 'H', 'depth': 2, 'download_timeout': ...","<html>\n<head>\n<title>Congressional Record, V...",2022-01-23,HR18
4,"{'body': 'H', 'depth': 2, 'download_timeout': ...","<html>\n<head>\n<title>Congressional Record, V...",2022-01-23,HR6434
5,"{'body': 'H', 'depth': 2, 'download_timeout': ...","<html>\n<head>\n<title>Congressional Record, V...",2022-01-23,S452
6,"{'body': 'H', 'depth': 2, 'download_timeout': ...","<html>\n<head>\n<title>Congressional Record, V...",2022-01-23,HR38
7,"{'body': 'S', 'depth': 2, 'download_timeout': ...","<html>\n<head>\n<title>Congressional Record, V...",2022-01-23,S401
8,"{'body': 'S', 'depth': 2, 'download_timeout': ...","<html>\n<head>\n<title>Congressional Record, V...",2022-01-23,S137
9,"{'body': 'S', 'depth': 2, 'download_timeout': ...","<html>\n<head>\n<title>Congressional Record, V...",2022-01-23,S27


In [221]:
# show transcripts for a specific bill
transcripts_df[transcripts_df['bill_number'] == 'HR7900']

Unnamed: 0,meta,transcript,date,bill_number
2325,"{'body': 'H', 'depth': 2, 'download_timeout': ...","<html>\n<head>\n<title>Congressional Record, V...",2022-07-17,HR7900
2327,"{'body': 'H', 'depth': 2, 'download_timeout': ...","<html>\n<head>\n<title>Congressional Record, V...",2022-07-17,HR7900
2330,"{'body': 'H', 'depth': 2, 'download_timeout': ...","<html>\n<head>\n<title>Congressional Record, V...",2022-07-17,HR7900
2334,"{'body': 'H', 'depth': 2, 'download_timeout': ...","<html>\n<head>\n<title>Congressional Record, V...",2022-07-17,HR7900
2359,"{'body': 'H', 'depth': 2, 'download_timeout': ...","<html>\n<head>\n<title>Congressional Record, V...",2022-07-17,HR7900
...,...,...,...,...
4781,"{'body': 'S', 'depth': 2, 'download_timeout': ...","<html>\n<head>\n<title>Congressional Record, V...",2022-11-16,HR7900
4782,"{'body': 'S', 'depth': 2, 'download_timeout': ...","<html>\n<head>\n<title>Congressional Record, V...",2022-11-16,HR7900
4783,"{'body': 'S', 'depth': 2, 'download_timeout': ...","<html>\n<head>\n<title>Congressional Record, V...",2022-11-16,HR7900
4784,"{'body': 'S', 'depth': 2, 'download_timeout': ...","<html>\n<head>\n<title>Congressional Record, V...",2022-11-16,HR7900


In [222]:
# show roll call for the first transcript
bill_no = "HR7900"
rolldf[rolldf['bill_number'] == bill_no]

Unnamed: 0,congress,chamber,rollnumber,date,session,clerk_rollnumber,yea_count,nay_count,nominate_mid_1,nominate_mid_2,nominate_spread_1,nominate_spread_2,nominate_log_likelihood,bill_number,vote_result,vote_desc,vote_question,dtl_desc
756,117,House,757,2022-07-13,2,310,218,207,-0.14,0.99,0.624,0.697,-5.457,HR7900,Agreed to,,On Agreeing to the Amendment,
757,117,House,758,2022-07-13,2,311,221,207,0.26,-0.194,0.373,0.902,-24.031,HR7900,Agreed to,,On Agreeing to the Amendment,
758,117,House,759,2022-07-13,2,312,220,209,0.417,-0.673,0.555,1.216,-25.531,HR7900,Agreed to,,On Agreeing to the Amendment,
759,117,House,760,2022-07-13,2,313,220,207,0.426,-0.905,0.33,0.529,-43.664,HR7900,Agreed to,,On Agreeing to the Amendment,
760,117,House,761,2022-07-13,2,314,219,209,0.1,0.276,0.723,0.093,-2.832,HR7900,Agreed to,,On Agreeing to the Amendment,
761,117,House,762,2022-07-13,2,315,215,212,0.053,-0.449,0.707,0.718,-9.994,HR7900,Agreed to,,On Agreeing to the Amendment,
762,117,House,763,2022-07-13,2,316,78,350,-0.497,-0.013,0.519,1.8,-76.784,HR7900,Failed,,On Agreeing to the Amendment,
763,117,House,764,2022-07-13,2,317,151,277,0.181,-0.27,0.122,1.024,-130.873,HR7900,Failed,,On Agreeing to the Amendment,
764,117,House,765,2022-07-13,2,318,155,272,-0.287,0.081,0.266,0.645,-80.909,HR7900,Failed,,On Agreeing to the Amendment,
765,117,House,766,2022-07-13,2,319,208,221,-0.273,0.962,0.233,0.268,-84.892,HR7900,Failed,,On Agreeing to the Amendment,


In [223]:
# show role call by number
rolldf[rolldf['rollnumber'] == 350]

Unnamed: 0,congress,chamber,rollnumber,date,session,clerk_rollnumber,yea_count,nay_count,nominate_mid_1,nominate_mid_2,nominate_spread_1,nominate_spread_2,nominate_log_likelihood,bill_number,vote_result,vote_desc,vote_question,dtl_desc
349,117,House,350,2021-11-02,1,351,407,9,0.0,0.0,0.0,0.0,0.0,HR4481,Passed,Small Business 7(a) Loan Agent Transparency Act,On Motion to Suspend the Rules and Pass,


In [224]:
# merge the two datasets on bill_number
merged_df = pd.merge(transcripts_df, only_final_votes_limited, on='bill_number')

In [225]:
merged_df.head(10)

Unnamed: 0,meta,transcript,date_x,bill_number,congress,chamber,rollnumber,date_y,session,clerk_rollnumber,...,nay_count,nominate_mid_1,nominate_mid_2,nominate_spread_1,nominate_spread_2,nominate_log_likelihood,vote_result,vote_desc,vote_question,dtl_desc
0,"{'body': 'H', 'depth': 2, 'download_timeout': ...","<html>\n<head>\n<title>Congressional Record, V...",2022-01-23,HR4673,117,House,461,2022-01-20,2,14,...,163,0.247,0.969,0.317,0.194,-75.88,Passed,EVEST Act,On Passage,
1,"{'body': 'S', 'depth': 2, 'download_timeout': ...","<html>\n<head>\n<title>Congressional Record, V...",2022-01-23,HR4673,117,House,461,2022-01-20,2,14,...,163,0.247,0.969,0.317,0.194,-75.88,Passed,EVEST Act,On Passage,
2,"{'body': 'S', 'depth': 2, 'download_timeout': ...","<html>\n<head>\n<title>Congressional Record, V...",2022-01-23,HR4673,117,House,461,2022-01-20,2,14,...,163,0.247,0.969,0.317,0.194,-75.88,Passed,EVEST Act,On Passage,
3,"{'body': 'H', 'depth': 2, 'download_timeout': ...","<html>\n<head>\n<title>Congressional Record, V...",2022-01-23,HR4673,117,House,461,2022-01-20,2,14,...,163,0.247,0.969,0.317,0.194,-75.88,Passed,EVEST Act,On Passage,
4,"{'body': 'H', 'depth': 2, 'download_timeout': ...","<html>\n<head>\n<title>Congressional Record, V...",2022-01-23,HR2543,117,House,722,2022-06-15,2,275,...,207,0.002,-0.275,0.672,0.61,-9.068,Passed,Federal Reserve Racial and Economic Equity Act,On Passage,
5,"{'body': 'H', 'depth': 2, 'download_timeout': ...","<html>\n<head>\n<title>Congressional Record, V...",2022-06-22,HR2543,117,House,722,2022-06-15,2,275,...,207,0.002,-0.275,0.672,0.61,-9.068,Passed,Federal Reserve Racial and Economic Equity Act,On Passage,
6,"{'body': 'H', 'depth': 2, 'download_timeout': ...","<html>\n<head>\n<title>Congressional Record, V...",2022-06-22,HR2543,117,House,722,2022-06-15,2,275,...,207,0.002,-0.275,0.672,0.61,-9.068,Passed,Federal Reserve Racial and Economic Equity Act,On Passage,
7,"{'body': 'H', 'depth': 2, 'download_timeout': ...","<html>\n<head>\n<title>Congressional Record, V...",2022-06-22,HR2543,117,House,722,2022-06-15,2,275,...,207,0.002,-0.275,0.672,0.61,-9.068,Passed,Federal Reserve Racial and Economic Equity Act,On Passage,
8,"{'body': 'H', 'depth': 2, 'download_timeout': ...","<html>\n<head>\n<title>Congressional Record, V...",2022-06-22,HR2543,117,House,722,2022-06-15,2,275,...,207,0.002,-0.275,0.672,0.61,-9.068,Passed,Federal Reserve Racial and Economic Equity Act,On Passage,
9,"{'body': 'S', 'depth': 2, 'download_timeout': ...","<html>\n<head>\n<title>Congressional Record, V...",2022-06-22,HR2543,117,House,722,2022-06-15,2,275,...,207,0.002,-0.275,0.672,0.61,-9.068,Passed,Federal Reserve Racial and Economic Equity Act,On Passage,


In [226]:
# reshape the dataframe so that each row is a bill, and each column is a transcript. Limit to 64 transcripts per bill, if there are more than 64

# first, sort by bill_number and rollnumber
merged_df = merged_df.sort_values(['bill_number', 'rollnumber'])
merged_df.head(10)

Unnamed: 0,meta,transcript,date_x,bill_number,congress,chamber,rollnumber,date_y,session,clerk_rollnumber,...,nay_count,nominate_mid_1,nominate_mid_2,nominate_spread_1,nominate_spread_2,nominate_log_likelihood,vote_result,vote_desc,vote_question,dtl_desc
99,"{'body': 'H', 'depth': 2, 'download_timeout': ...","<html>\n<head>\n<title>Congressional Record, V...",2022-01-23,HR1,117,House,61,2021-03-03,1,62,...,210,0.213,-0.977,0.55,0.498,-8.63,Passed,For the People Act,On Passage,
100,"{'body': 'S', 'depth': 2, 'download_timeout': ...","<html>\n<head>\n<title>Congressional Record, V...",2022-01-23,HR1,117,House,61,2021-03-03,1,62,...,210,0.213,-0.977,0.55,0.498,-8.63,Passed,For the People Act,On Passage,
101,"{'body': 'S', 'depth': 2, 'download_timeout': ...","<html>\n<head>\n<title>Congressional Record, V...",2022-01-23,HR1,117,House,61,2021-03-03,1,62,...,210,0.213,-0.977,0.55,0.498,-8.63,Passed,For the People Act,On Passage,
102,"{'body': 'S', 'depth': 2, 'download_timeout': ...","<html>\n<head>\n<title>Congressional Record, V...",2022-01-23,HR1,117,House,61,2021-03-03,1,62,...,210,0.213,-0.977,0.55,0.498,-8.63,Passed,For the People Act,On Passage,
103,"{'body': 'H', 'depth': 2, 'download_timeout': ...","<html>\n<head>\n<title>Congressional Record, V...",2022-03-24,HR1,117,House,61,2021-03-03,1,62,...,210,0.213,-0.977,0.55,0.498,-8.63,Passed,For the People Act,On Passage,
104,"{'body': 'H', 'depth': 2, 'download_timeout': ...","<html>\n<head>\n<title>Congressional Record, V...",2022-04-10,HR1,117,House,61,2021-03-03,1,62,...,210,0.213,-0.977,0.55,0.498,-8.63,Passed,For the People Act,On Passage,
2579,"{'body': 'H', 'depth': 2, 'download_timeout': ...","<html>\n<head>\n<title>Congressional Record, V...",2022-09-16,HR1066,117,House,870,2022-09-13,2,423,...,88,0.165,-0.986,0.36,-0.745,-80.849,Passed,Wildfire Recovery Act,"On Motion to Suspend the Rules and Pass, as Am...",
2580,"{'body': 'H', 'depth': 2, 'download_timeout': ...","<html>\n<head>\n<title>Congressional Record, V...",2022-09-16,HR1066,117,House,870,2022-09-13,2,423,...,88,0.165,-0.986,0.36,-0.745,-80.849,Passed,Wildfire Recovery Act,"On Motion to Suspend the Rules and Pass, as Am...",
2857,"{'body': 'S', 'depth': 2, 'download_timeout': ...","<html>\n<head>\n<title>Congressional Record, V...",2022-12-24,HR1154,117,House,302,2021-09-28,1,303,...,36,0.354,-0.935,0.293,-0.724,-47.042,Passed,Great Dismal Swamp National Heritage Area Act,On Motion to Suspend the Rules and Pass,
105,"{'body': 'H', 'depth': 2, 'download_timeout': ...","<html>\n<head>\n<title>Congressional Record, V...",2022-01-23,HR1192,117,House,36,2021-02-24,1,37,...,0,0.0,0.0,0.0,0.0,0.0,Passed,Puerto Rico Recovery Accuracy in Disclosures Act,On Motion to Suspend the Rules and Pass,


In [231]:
# next, group by bill_number and take the first 64 transcripts
merged_df.to_csv('annotated_transcirpts_117C_v2.csv', escapechar='\\')

In [232]:
c = Counter(merged_df['vote_result'])

In [233]:
c['Failed']

2

In [234]:
c['Passed']

2866