In [1]:
import numpy as np
import pandas as pd
from pymongo import MongoClient
import pprint

In [2]:
client = MongoClient() # defaults to localhost
db = client.bills
bill_details = db.bill_details


In [3]:
bill_details.find().count()

  """Entry point for launching an IPython kernel.


253000

In [4]:
# print out record counts with text
print('--> Number of records in database: {}'.format(bill_details.find().count()))

records_with_text = bill_details.find({'body': {'$regex': 'e'}})
record_count = records_with_text.count()
print('--> Current number of records with text: {}'.format(record_count))


  
  """


--> Number of records in database: 253000
--> Current number of records with text: 10948


In [5]:
# need to execute query everytime i refer to it?
records_with_text = bill_details.find({'body': {'$regex': 'e'}})
data = pd.DataFrame(list(records_with_text))

In [6]:
data.bill_status.unique()

array(['Introduced', 'Became Law', 'Passed House', 'To President',
       'Resolving Differences', 'Failed House', 'Became Private Law',
       'Passed Senate', 'Agreed to in Senate', 'Agreed to in House',
       'Failed to pass over veto', 'Vetoed by President',
       'Passed over veto', 'Pocket vetoed by President', 'Failed Senate'],
      dtype=object)

In [7]:

# LABELS

# Every record that doesn't have status Became Law will have label 0 if before current (115th) congress.
# Try this out with 3 labels.

#  

#                             Whole     House     Senate
# Introduced:                 None      None      None
# Became Law:                 1         1         1
# Passed House:               None      1         None
# To President:               1         1         1
# Resolving Differences:      1         1         1
# Failed House:               0         0         1 if S
# Became Private Law:         1         1         1
# Passed Senate:              None      None      1
# Failed to pass over veto:   1         1         1
# Vetoed by President:        1         1         1
# Passed over veto:           1         1         1     #stronger support for this one???
# Pocket vetoed by President: 1         1         1
# Failed Senate:              0         1 if H    0


In [8]:
# check numbers for each status
for i in data.bill_status.unique():
    num = len(data[data['bill_status'] == i])
    print('{}: \t\t{}'.format(i, num))

Introduced: 		6909
Became Law: 		1439
Passed House: 		2140
To President: 		21
Resolving Differences: 		74
Failed House: 		114
Became Private Law: 		2
Passed Senate: 		42
Agreed to in Senate: 		19
Agreed to in House: 		120
Failed to pass over veto: 		34
Vetoed by President: 		27
Passed over veto: 		1
Pocket vetoed by President: 		5
Failed Senate: 		1


In [9]:
# create columns for labels
# data['house_label'] = None
# data['senate_label'] = None
# data['president_label'] = None
data['passed'] = None

In [10]:
# # start breaking up the labels after first iteration
# introduced = data[data['bill_status'] == 'Introduced']
# became_law = data[data['bill_status'] == 'Became Law']
# passed_house = data[data['bill_status'] == 'Passed House']
# to_pres = data[data['bill_status'] == 'To President']
# res_diff = data[data['bill_status'] == 'Resolving Differences']
# failed_house = data[data['bill_status'] == 'Failed House']
# became_priv = data[data['bill_status'] == 'Became Private Law']
# passed_senate = data[data['bill_status'] == 'Passed Senate']
# failed_over_veto = data[data['bill_status'] == 'Failed to pass over veto']
# veto_pres = data[data['bill_status'] == 'Vetoed by President']
# passed_over_veto = data[data['bill_status'] == 'Passed over veto']   #more weight to this one?
# pocket_veto = data[data['bill_status'] == 'Pocket vetoed by President']  #this is a pass
# failed_senate = data[data['bill_status'] == 'Failed Senate']


# # all labels on became_law will be 1
# became_law.loc[:, 'house_label'] = 1
# became_law.loc[:, 'senate_label'] = 1
# became_law.loc[:, 'president_label'] = 1
# became_law.loc[:, 'passed'] = 1

# # labels on veto_pres will be 1, 1, 0
# veto_pres.loc[:, 'house_label'] = 1
# veto_pres.loc[:, 'senate_label'] = 1
# veto_pres.loc[:, 'president_label'] = 0



In [18]:
print('Shape of entire data: {}'.format(data.shape))

Shape of entire data: (10948, 18)


In [23]:
# break up dataframe into those that became law and others (did not or still pending)
became_law = data[(data['bill_status'] == 'Became Law') | (data['bill_status'] == 'Became Private Law')]
others = data[(data['bill_status'] != 'Became Law') & (data['bill_status'] != 'Became Private Law')]

became_law.loc[:, 'passed'] = 1


# break up others into current congress and previous ones. Anything that hasn't been signed into law
# before current session is dead.
current_cong = others[others['congress_id'] == '115th']
prev_cong = others[others['congress_id'] != '115th']

prev_cong.loc[:, 'passed'] = 0



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item_labels[indexer[info_axis]]] = value


In [24]:
current_cong.bill_status.unique()

array(['Introduced', 'Passed House', 'To President',
       'Resolving Differences', 'Failed House', 'Passed Senate',
       'Agreed to in Senate', 'Agreed to in House'], dtype=object)

In [20]:

current_cong[current_cong['bill_status'] == 'Agreed to in Senate']

Unnamed: 0,_id,bill_status,body,committee,congress_id,cosponsors,cosponsors_url,desc,intro_date,leg_id,leg_type,leg_url,num_of_cosponsors,sponsor,sponsor_district,sponsor_party,sponsor_state,passed
7368,5c11d658cd68d16918e5ab78,Agreed to in Senate,[Congressional Bills 115th Congress] [From the...,,115th,,,Directing the Secretary of the Senate to make ...,05/16/2018,H Con Res 121,CONCURRENT RESOLUTION,https://www.congress.gov/bill/115th-congress/h...,0,"Rep. Roe, David P.",1,TN,R,
7373,5c11d658cd68d16918e5ab7d,Agreed to in Senate,[Congressional Bills 115th Congress] [From the...,,115th,,,Providing for a correction in the enrollment o...,03/22/2018,H Con Res 116,CONCURRENT RESOLUTION,https://www.congress.gov/bill/115th-congress/h...,0,"Rep. Frelinghuysen, Rodney P.",11,NJ,R,
7374,5c11d658cd68d16918e5ab7e,Agreed to in Senate,[Congressional Bills 115th Congress] [From the...,House - Transportation and Infrastructure,115th,,https://www.congress.gov/bill/115th-congress/h...,Authorizing the use of the Capitol Grounds for...,03/15/2018,H Con Res 115,CONCURRENT RESOLUTION,https://www.congress.gov/bill/115th-congress/h...,1,"Rep. Barletta, Lou",11,PA,R,
7376,5c11d658cd68d16918e5ab80,Agreed to in Senate,[Congressional Bills 115th Congress] [From the...,House - Transportation and Infrastructure,115th,,https://www.congress.gov/bill/115th-congress/h...,Authorizing the use of the Capitol Grounds for...,03/08/2018,H Con Res 113,CONCURRENT RESOLUTION,https://www.congress.gov/bill/115th-congress/h...,8,"Rep. Hoyer, Steny H.",5,MD,D,
7377,5c11d658cd68d16918e5ab81,Agreed to in Senate,[Congressional Bills 115th Congress] [From the...,House - House Administration,115th,,https://www.congress.gov/bill/115th-congress/h...,Authorizing the use of Emancipation Hall in th...,03/08/2018,H Con Res 112,CONCURRENT RESOLUTION,https://www.congress.gov/bill/115th-congress/h...,1,"Rep. Hanabusa, Colleen",1,HI,D,
7378,5c11d658cd68d16918e5ab82,Agreed to in Senate,[Congressional Bills 115th Congress] [From the...,"House - Foreign Affairs | Senate - Commerce, S...",115th,,https://www.congress.gov/bill/115th-congress/h...,Recognizing and supporting the efforts of the ...,03/05/2018,H Con Res 111,CONCURRENT RESOLUTION,https://www.congress.gov/bill/115th-congress/h...,47,"Rep. LaHood, Darin",18,IL,R,
7382,5c11d658cd68d16918e5ab86,Agreed to in Senate,[Congressional Bills 115th Congress] [From the...,House - House Administration,115th,,https://www.congress.gov/bill/115th-congress/h...,Authorizing Reverend Billy Graham to lie in ho...,02/23/2018,H Con Res 107,CONCURRENT RESOLUTION,https://www.congress.gov/bill/115th-congress/h...,12,"Rep. McHenry, Patrick T.",10,NC,R,
7383,5c11d658cd68d16918e5ab87,Agreed to in Senate,[Congressional Bills 115th Congress] [From the...,House - House Administration,115th,,https://www.congress.gov/bill/115th-congress/h...,Authorizing the use of Emancipation Hall for a...,02/16/2018,H Con Res 106,CONCURRENT RESOLUTION,https://www.congress.gov/bill/115th-congress/h...,1,"Rep. Latta, Robert E.",5,OH,R,
7386,5c11d658cd68d16918e5ab8a,Agreed to in Senate,[Congressional Bills 115th Congress] [From the...,House - House Administration,115th,,https://www.congress.gov/bill/115th-congress/h...,Authorizing the use of Emancipation Hall for a...,02/05/2018,H Con Res 103,CONCURRENT RESOLUTION,https://www.congress.gov/bill/115th-congress/h...,10,"Rep. Ros-Lehtinen, Ileana",27,FL,R,
7387,5c11d658cd68d16918e5ab8b,Agreed to in Senate,[Congressional Bills 115th Congress] [From the...,House - House Administration,115th,,,Authorizing the use of Emancipation Hall in th...,02/05/2018,H Con Res 102,CONCURRENT RESOLUTION,https://www.congress.gov/bill/115th-congress/h...,0,"Rep. McCarthy, Kevin",23,CA,R,
