## Import Data

In [1]:
#list the data
!ls Data

averages.csv
fivethirtyeight_partisan_lean_DISTRICTS.csv
fivethirtyeight_partisan_lean_STATES.csv
vote_predictions.csv


In [2]:
!ls

Capstone Project Proposal - 2.pdf  Data Wrangling.ipynb    README.md
Data				   PROPUBLICA_API_KEY.TXT


### Get API working

In [3]:
#Load ProPublica API key without exposing it to github. Request one yourself. They're free.
APIfile = open('./PROPUBLICA_API_KEY.TXT',)
APIkey = APIfile.read().replace('\n', '')
print(type(APIkey))
#print(APIkey)

<class 'str'>


In [4]:
#API request function
import requests

#create API key
headers = {'X-API-Key': APIkey}

#load status codes from API documentation(https://projects.propublica.org/api-docs/congress-api/#errors) into dictionary
status_codes = {
    200:'Successful Connection!',
    400:'Bad Request – Your request is improperly formed',
    403:'Forbidden – Your request did not include an authorization header',
    404:'Not Found – The specified record(s) could not be found',
    406:'Not Acceptable – You requested a format that isn’t json or xml',
    500:'Internal Server Error – We had a problem with our server. Try again later',
    503:'Service Unavailable – The service is currently not working. Please try again later'
    }

#load base url and test endpoint
url = 'https://api.propublica.org/congress/v1'
test_endpoint = '/members/new.json'

def API_Request(end_point, verbose=True):
#send a get request to restful API, print status and return response as a dictionary
    response = requests.get(url+end_point, headers=headers)
    #print the response status
    
    try:
        status = status_codes[response.status_code]
    except: 
        status = ('API Failure, unknown status code: '+str(response.status_code))
        return status
    
    if verbose == True:
        print(status)
        
    return response.json()

#test that the API is working. 
new_members = API_Request(test_endpoint)
print(new_members.keys()) # When the API works this should give the list of attributes per member
print('status:', new_members['status'])
print('no. results:', new_members['results'][0]['num_results'])

Successful Connection!
dict_keys(['status', 'copyright', 'results'])
status: OK
no. results: 20


### The API is working.  Let's import voting data

In [18]:
def API_attempts(endpoint, attempts, verbose=False):
    '''takes and endpoint and requests the API with it for a given number of times'''  
    
    #loop throught the number of attempts
    for attempt in range(attempts):
    
        #if we are already trying again we'll do it loud
        if attempt > 0: 
            verbose = True
            print('Retry:', attempt)
        
        #send endpoint to API function for request
        response = API_Request(endpoint, verbose)
    
        #if we can't extract the status we'll keep trying, and if it's OK we'll stop
        status = 'failed'
        #extract the status of the API request
        try:
            status = response['status']
        except:
            pass
        if status == 'OK':
            break
        else: # if the status is not OK, say so
            print('Status:', status)

    return response, status

#let's create a function that will import vote positions for a given rollcall vote.
def import_vote(congress, chamber, session, roll_call, verbose=False):
    '''imports vote details and member positions of a given roll_call vote
        takes congress number 102-116 for House, 80-116 for Senate
        chamber(house|senate) session 1 for odd 2 for even-numbered years
        returns a dicitonary of vote positions and API status'''
    #construct endpoint for API request
    call_endpoint = '/'+str(congress)+'/'+chamber.lower()+'/sessions/'+str(session)+'/votes/'+str(roll_call)+'.json'
    
    #let's attempt a few times to account for internet burps
    call_response, status = API_attempts(call_endpoint, 3)  
        
    #try to extract and return the vote positions
    try:
        vote = call_response['results']['votes']['vote']
        return vote, status
    except:
        return {}, status 
    

def positions_df(metalist):
    '''return a dataframe of postions for a given list of vote metadata'''
    
    #initialize dataframe
    dfp = pd.DataFrame()
    
    #for every position in the list
    for congress, chamber, session, roll_call in metalist:
        
        #request the roll_call vote from the API
        vote, status = import_vote(congress, chamber, session, roll_call)
        
        #extract the positions from the rollcall vote data
        positions = vote.pop('positions')
        
        #turn the postions into a dataframe
        call_positions = pd.DataFrame(positions).drop_duplicates()
        
        #if the output dataframe isn't empty and the size is right,
        if not dfp.empty:
            
            #merge the positions for that call into it
            dfp = pd.merge(dfp, call_positions, on=mergelst, how='outer')
            
            #let us know it's working
            print('.', end='')
            
        #if it is empty
        elif dfp.empty: 
            #make it equal to call_positions
            dfp = call_positions
            
            #make a list of the columns that will be used to merge the call positions
            mergelst = dfp.columns.tolist()[:-1]
            
            #move party and state to front of merglist
            for x in ['state','party']:
                mergelst.insert(0, mergelst.pop(mergelst.index(x)))
            
            #alert that we've started
            print('df created', end=' ')
     
    
    #create a multindex of partymembers
    dfp = dfp.set_index(mergelst).transpose()
    
    #after transposing use the metalist as the index
    dfp.index = metalist
    
    return dfp 

def import_month(year, chamber, month):
    '''import vote metadata for a given year or month and return a dataframe '''
    
    #construct endpoint
    call_endpoint = '/'+chamber.lower()+'/votes/'+str(year)+'/'+str(month)+'.json'
    
    #make 3 attempts at calling the API with the endpoint
    response, status = API_attempts(call_endpoint, 3)
    
    #Extract the votes from the response
    votes = response['results']['votes']

    #create a dataframe out of the votes
    votesdf = pd.DataFrame(votes)
    
    if not votesdf.empty:
        votesdf = votesdf.set_index(['congress','chamber','session','roll_call']).sort_index()
    
    #return the months dataframe
    return votesdf

def import_year(year, chamber):
    '''import votes based on dates, return a dataframe of appended votes'''
    
    
    #So we know we started:
    print('importing', chamber, year, end=' ')
    #loop through the months 
    for month in range(1,13):
        
        #import the first month
        if month == 1:
            yeardf = import_month(year, chamber, month)
       
        #import the month of votes as a df and append it to the list 
        else:
            df = import_month(year, chamber, month)
            if not df.empty:
                yeardf = yeardf.append(df)
            else:
                print('skip', end=' ')
        #print something so we know it's working
        print(month, end=' ')
        
    #return the dataframe
    return yeardf

def save_years(start_y, end_y):
    '''take a range of years and save a series of dataframes for those years in both the hosue and senate'''
    #iterate through the range of years
    for year in range(start_y, end_y+1):
        #alternate between senate and house
        for chamber in ['senate','house']:
            #import vote metadata dataframe
            votemetadf = import_year(year, chamber)
            
            #import vote positions for given dataframe
            voteposdf = positions_df(votemetadf.index)
            
            #construct paths
            pathhead = 'Data/'+str(year)+'_'+chamber+'vote_'
            metapath = pathhead + 'meta.csv'
            pospath = pathhead + 'pos.csv'
            
            #announce what we're doing
            print('\nsaving '+str(year)+' '+chamber+' as '+metapath+' and '+pospath)
        
            #save data frames as csv in data/csv directory
            votemetadf.to_csv(metapath)
            voteposdf.to_csv(pospath)
            
            #save data frames as pickle in data/pickle directory
            
            
#test (remove when working)
print('testing import with latest session:')
#latest_session_test_1_df = import_session(116, 'House', 1)
#latest_session_test_1_df
#senate_2019_test = import_year(2019, 'senate')
#positions = positions_df(senate_2019_test.index)
#positions
save_years(2019, 2019)

testing import with latest session:
importing senate 2019 1 2 3 4 5 skip 6 skip 7 skip 8 skip 9 skip 10 skip 11 skip 12 df created ...................................................................................................................
saving 2019 senate as Data/2019_senatevote_meta.csv and Data/2019_senatevote_pos.csv
importing house 2019 1 2 3 4 5 skip 6 skip 7 skip 8 skip 9 skip 10 skip 11 skip 12 df created ........................................................................................................................................................................................................................
saving 2019 house as Data/2019_housevote_meta.csv and Data/2019_housevote_pos.csv


In [33]:
save_years(2000, 2018)

importing senate 2000 1 2 3 4 5 6 7 skip 8 9 10 11 12 df created .........................................................................................................................................................................................................................................................................................................
saving 2000 senate as Data/2000_senatevote_meta.csv and Data/2000_senatevote_pos.csv
importing house 2000 1 2 3 4 5 6 7 skip 8 9 10 11 12 df created ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................

importing house 2006 1 2 3 4 5 6 7 skip 8 9 skip 10 11 12 df created ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................
saving 2006 house as Data/2006_housevote_meta.csv and Data/2006_housevote_pos.csv
importing senate 2007 1 2 3 4 5 6 7 8 9 10 11 12 df created ......................................................................................................................................................................................................................................................

ValueError: not enough values to unpack (expected 2, got 0)

In [32]:
test_df = pd.read_csv('Data/2019_senatevote_pos.csv', header = [0,1,2,3,4], index_col = [0,1,2,3])
test_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,party,R,D,R,D,R,D,R,D,R,R,R,R,R,D,D,D,D,D,R,D,R
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,state,TN,WI,WY,CO,TN,CT,MO,NJ,AR,IN,...,NC,PA,NM,MD,VA,MA,RI,MS,OR,IN
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,dw_nominate,0.323,-0.521,0.534,-0.208,0.614,-0.411,0.429,-0.618,0.401,nan,...,0.419,0.642,-0.454,-0.392,-0.195,-0.762,-0.394,0.377,-0.321,0.48
Unnamed: 0_level_3,Unnamed: 1_level_3,Unnamed: 2_level_3,member_id,A000360,B001230,B001261,B001267,B001243,B001277,B000575,B001288,B001236,B001310,...,T000476,T000461,U000039,V000128,W000805,W000817,W000802,W000437,W000779,Y000064
Unnamed: 0_level_4,Unnamed: 1_level_4,Unnamed: 2_level_4,name,Lamar Alexander,Tammy Baldwin,John Barrasso,Michael Bennet,Marsha Blackburn,Richard Blumenthal,Roy Blunt,Cory Booker,John Boozman,Mike Braun,...,Thom Tillis,Patrick J. Toomey,Tom Udall,Chris Van Hollen,Mark Warner,Elizabeth Warren,Sheldon Whitehouse,Roger Wicker,Ron Wyden,Todd Young
congress,chamber,session,roll_call,Unnamed: 4_level_5,Unnamed: 5_level_5,Unnamed: 6_level_5,Unnamed: 7_level_5,Unnamed: 8_level_5,Unnamed: 9_level_5,Unnamed: 10_level_5,Unnamed: 11_level_5,Unnamed: 12_level_5,Unnamed: 13_level_5,Unnamed: 14_level_5,Unnamed: 15_level_5,Unnamed: 16_level_5,Unnamed: 17_level_5,Unnamed: 18_level_5,Unnamed: 19_level_5,Unnamed: 20_level_5,Unnamed: 21_level_5,Unnamed: 22_level_5,Unnamed: 23_level_5,Unnamed: 24_level_5
116,Senate,1,1,Yes,No,Yes,No,Yes,No,Yes,No,Yes,Yes,...,Yes,Yes,No,No,No,No,No,Yes,No,Yes
116,Senate,1,2,Yes,No,Yes,No,Yes,No,Yes,No,Yes,Yes,...,Yes,Yes,No,No,No,No,No,Yes,No,Yes
116,Senate,1,3,Yes,No,Yes,No,Yes,No,Yes,No,Yes,Yes,...,Yes,Yes,No,No,No,No,No,Yes,No,Yes
116,Senate,1,4,Yes,No,Yes,No,Yes,No,Yes,No,No,Yes,...,Yes,Yes,No,No,No,No,No,Yes,No,Yes
116,Senate,1,5,No,Yes,No,Yes,No,Yes,No,Yes,Yes,No,...,No,No,Yes,Yes,Yes,Yes,Yes,No,Yes,No
116,Senate,1,6,No,Yes,No,Yes,No,Yes,No,Yes,Yes,No,...,No,No,Yes,Yes,Yes,Yes,Yes,No,Yes,No
116,Senate,1,7,Not Voting,No,Yes,No,Yes,No,Yes,No,Yes,Yes,...,Yes,Yes,No,No,No,No,No,Yes,No,Yes
116,Senate,1,8,No,Yes,Yes,Yes,Yes,Yes,Yes,No,Yes,Yes,...,Yes,No,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes
116,Senate,1,9,Yes,No,Yes,No,Yes,No,Yes,No,Yes,Yes,...,Yes,Yes,No,No,No,No,No,Yes,No,Yes
116,Senate,1,10,Yes,Yes,No,Yes,No,Yes,No,Yes,No,No,...,No,No,Yes,Yes,Yes,Yes,Yes,No,Yes,No
