In [168]:
import pandas as pd
import os
import requests
import json
from datetime import datetime

In [169]:
# force Pandas to not truncate the dataframe when printing 
pd.set_option('display.max_columns', 50)
pd.set_option('display.max_rows', 100)

In [170]:
# store json from the url

# current 
json_url_c = "https://theunitedstates.io/congress-legislators/legislators-current.json"
r_c = requests.get(json_url_c)
json_response_c = r_c.json()

# historical
json_url_h = "https://theunitedstates.io/congress-legislators/legislators-historical.json"
r_h = requests.get(json_url_h)
json_response_h = r_h.json()

# combined 

json_response = json_response_c+json_response_h

#check

len(json_response) == len(json_response_c)+ len(json_response_h)


True

In [171]:
# example item in the jason
example = json_response[-1]
example

{'bio': {'birthday': '1950-05-20', 'gender': 'M'},
 'id': {'ballotpedia': 'Chris Collins',
  'bioguide': 'C001092',
  'cspan': 94144,
  'fec': ['H8NY29032'],
  'google_entity_id': 'kg:/m/03cv81x',
  'govtrack': 412563,
  'icpsr': 21345,
  'maplight': 1787,
  'opensecrets': 'N00001285',
  'thomas': '02151',
  'votesmart': 139770,
  'wikidata': 'Q5106218',
  'wikipedia': 'Chris Collins (American politician)'},
 'name': {'first': 'Chris',
  'last': 'Collins',
  'official_full': 'Chris Collins'},
 'terms': [{'address': '1117 Longworth HOB; Washington DC 20515-3227',
   'contact_form': 'https://chriscollins.house.gov/contact/email-me',
   'district': 27,
   'end': '2015-01-03',
   'office': '1117 Longworth House Office Building',
   'party': 'Republican',
   'phone': '202-225-5265',
   'rss_url': 'http://chriscollins.house.gov/rss.xml',
   'start': '2013-01-03',
   'state': 'NY',
   'type': 'rep',
   'url': 'http://chriscollins.house.gov'},
  {'address': '1117 Longworth HOB; Washington DC 2

In [172]:
# Create a list of lists comprising indicators for every other year 

indicators = []
for legislator in json_response:              # iterate through each legislator stored in the Json response
    name_first = legislator["name"]["first"]  # store the name 
    name_middle = legislator["name"].get("middle","")
    name_last = legislator["name"]["last"]
    
    birthday = legislator["bio"].get("birthday","Missing") #store the birthday
    
    term_list = legislator["terms"]         # store all the terms of the legislator
   
    dates = []
    for term in term_list:                  # for each term of the legislator
        start_date = datetime.strptime(term['start'], '%Y-%m-%d')          
        
        if (term['type']=='rep' and start_date.year > 1933 and start_date.month==1):  # check 2 conditions: year in the start date > 1933 and month starts in Jan
            dates.append(start_date.year)   # if true: save the years of the start date in a list 
    
    d=[]                                    # stores indicators
    for year in range(1935,2021,2):         # for each year from 1935 till 2019
        if year in dates:                   # check if that year appears in the list of start years made above 
            d.append(1)                     # assign 1 if true
        else:
            d.append(0)                     # 0 if false
    d.insert(0, name_first+' '+name_middle +' '+name_last) 
    d.insert(1,birthday)
    indicators.append(d)
   


In [173]:
# create column names 
columns = list(range(1935, 2021,2))
columns.insert(0, 'Name')
columns.insert(1, 'Birthday')

# create the df of idicators 
df = pd.DataFrame(indicators, columns = columns)

# remove empty rows
df_indicators = df[df.sum(axis=1)!=0]

# write it to a csv file

df_indicators.to_csv(r"/Users/ishitagopal/Box/rep_indicators.csv")

In [174]:
# some checks 
# there should be 435 representatives each year 

df_indicators.iloc[:, 2:].sum(axis=0)


1935    448
1937    453
1939    463
1941    456
1943    456
1945    451
1947    453
1949    447
1951    452
1953    444
1955    442
1957    446
1959    449
1961    450
1963    446
1965    445
1967    440
1969    449
1971    446
1973    444
1975    447
1977    445
1979    444
1981    448
1983    445
1985    444
1987    447
1989    447
1991    445
1993    440
1995    439
1997    442
1999    439
2001    439
2003    440
2005    439
2007    440
2009    440
2011    440
2013    439
2015    439
2017    441
2019    439
dtype: int64