In [61]:
import pandas as pd
import os
import requests
import json
from datetime import datetime

In [62]:
# force Pandas to not truncate the dataframe when printing 
pd.set_option('display.max_columns', 50)
pd.set_option('display.max_rows', 100)

In [63]:
# store json from the url

# current 
json_url_c = "https://theunitedstates.io/congress-legislators/legislators-current.json"
r_c = requests.get(json_url_c)
json_response_c = r_c.json()

# historical
json_url_h = "https://theunitedstates.io/congress-legislators/legislators-historical.json"
r_h = requests.get(json_url_h)
json_response_h = r_h.json()

# combined 

json_response = json_response_c+json_response_h

#check

len(json_response) == len(json_response_c)+ len(json_response_h)


True

In [64]:
# example item in the jason
example = json_response[1]
example

{'bio': {'birthday': '1958-10-13', 'gender': 'F'},
 'id': {'ballotpedia': 'Maria Cantwell',
  'bioguide': 'C000127',
  'cspan': 26137,
  'fec': ['S8WA00194', 'H2WA01054'],
  'google_entity_id': 'kg:/m/01x68t',
  'govtrack': 300018,
  'house_history': 10608,
  'icpsr': 39310,
  'lis': 'S275',
  'maplight': 544,
  'opensecrets': 'N00007836',
  'thomas': '00172',
  'votesmart': 27122,
  'wikidata': 'Q22250',
  'wikipedia': 'Maria Cantwell'},
 'name': {'first': 'Maria',
  'last': 'Cantwell',
  'official_full': 'Maria Cantwell'},
 'terms': [{'district': 1,
   'end': '1995-01-03',
   'party': 'Democrat',
   'start': '1993-01-05',
   'state': 'WA',
   'type': 'rep'},
  {'class': 1,
   'end': '2007-01-03',
   'party': 'Democrat',
   'start': '2001-01-03',
   'state': 'WA',
   'type': 'sen',
   'url': 'http://cantwell.senate.gov'},
  {'address': '311 HART SENATE OFFICE BUILDING WASHINGTON DC 20510',
   'class': 1,
   'contact_form': 'http://www.cantwell.senate.gov/contact/',
   'end': '2013-01-

In [65]:
indicators = []
for legislator in json_response:              # iterate through each legislator stored in the Json response
    name_first = legislator["name"]["first"]  # store the name 
    name_middle = legislator["name"].get("middle","")
    name_last = legislator["name"]["last"]
    
    birthday = legislator["bio"].get("birthday","Missing") #store the birthday
    
    term_list = legislator["terms"]         # store all the terms of the legislator
   
    dates = []
    for term in term_list:                  # for each term of the legislator
        start_date = datetime.strptime(term['start'], '%Y-%m-%d')          
        
        if (term['type']=='sen' and start_date.year > 1933 and start_date.month==1):
            dates.append(start_date.year)
            
    d=[]                                    # stores indicators
    for year in range(1935,2021,2):         # for each year from 1935 till 2019
        if year in dates:                   # check if that year appears in the list of start years made above 
            d.append(1)                     # assign 1 if true
        else:
            d.append(0)                     # 0 if false
            
    
    # addition to rep_indicators_congress code
    # find the index where the indicator value in 'd' is 1 and then replace 0 for 1 for the next two indexes 
    
    Startindex = [i for i, value in enumerate(d) if value==1] 
    
    for i in Startindex:
        if i+1 < len(d):              
            d[i+1] = 1
        if i+2 < len(d):
            d[i+2] = 1
        
    
     
    d.insert(0, name_first+' '+name_middle +' '+name_last) 
    d.insert(1,birthday)
    indicators.append(d)
    
    
   


In [66]:
# create column names 
columns = list(range(1935, 2021,2))
columns.insert(0, 'Name')
columns.insert(1, 'Birthday')

# create the df of idicators 
df = pd.DataFrame(indicators, columns = columns)

# remove empty rows
df_indicators = df[df.sum(axis=1)!=0]

# write it to a csv file

df_indicators.to_csv(r"/Users/ishitagopal/Box/sen_indicators.csv")
df_indicators.head()

Unnamed: 0,Name,Birthday,1935,1937,1939,1941,1943,1945,1947,1949,1951,1953,1955,1957,1959,1961,1963,1965,1967,1969,1971,1973,1975,1977,1979,1981,1983,1985,1987,1989,1991,1993,1995,1997,1999,2001,2003,2005,2007,2009,2011,2013,2015,2017,2019
0,Sherrod Brown,1952-11-09,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1
1,Maria Cantwell,1958-10-13,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1
2,Benjamin L. Cardin,1943-10-05,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1
3,Thomas Richard Carper,1947-01-23,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1
4,Robert P. Casey,1960-04-13,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1


In [67]:
# some checks 
# there should be 100 senatorsn in each year 

df_indicators.iloc[:, 2:].sum(axis=0)


1935     32
1937     63
1939     96
1941    100
1943     98
1945    100
1947     97
1949     99
1951     95
1953     97
1955     95
1957     98
1959    100
1961    101
1963    103
1965    102
1967    101
1969     98
1971     97
1973     97
1975     98
1977     99
1979     99
1981    100
1983    100
1985    100
1987    100
1989    101
1991    102
1993    104
1995    103
1997    102
1999    100
2001    100
2003    101
2005    100
2007    100
2009    103
2011    102
2013    102
2015    103
2017    100
2019    101
dtype: int64