## Import Data

In [1]:
#list the data
!ls Data

averages.csv
fivethirtyeight_partisan_lean_DISTRICTS.csv
fivethirtyeight_partisan_lean_STATES.csv
vote_predictions.csv


In [2]:
!ls

Capstone Project Proposal - 2.pdf  Data Wrangling.ipynb    README.md
Data				   PROPUBLICA_API_KEY.TXT


### Get API working

In [3]:
#Load ProPublica API key without exposing it to github. Request one yourself. They're free.
APIfile = open('./PROPUBLICA_API_KEY.TXT',)
APIkey = APIfile.read().replace('\n', '')
print(type(APIkey))
#print(APIkey)

<class 'str'>


In [22]:
#API request function
import requests

#create API key
headers = {'X-API-Key': APIkey}

#load status codes from API documentation(https://projects.propublica.org/api-docs/congress-api/#errors) into dictionary
status_codes = {
    200:'Successful Connection!',
    400:'Bad Request – Your request is improperly formed',
    403:'Forbidden – Your request did not include an authorization header',
    404:'Not Found – The specified record(s) could not be found',
    406:'Not Acceptable – You requested a format that isn’t json or xml',
    500:'Internal Server Error – We had a problem with our server. Try again later',
    503:'Service Unavailable – The service is currently not working. Please try again later'
    }

#load base url and test endpoint
url = 'https://api.propublica.org/congress/v1'
test_endpoint = '/members/new.json'

def API_Request(end_point, verbose=True):
#send a get request to restful API, print status and return response as a dictionary
    response = requests.get(url+end_point, headers=headers)
    #print the response status
    
    try:
        status = status_codes[response.status_code]
    except: 
        status = ('API Failure, unknown status code: '+str(response.status_code))
        return status
    
    if verbose == True:
        print(status)
        
    return response.json()

#test that the API is working. 
new_members = API_Request(test_endpoint)
print(new_members.keys()) # When the API works this should give the list of attributes per member
print('status:', new_members['status'])
print('no. results:', new_members['results'][0]['num_results'])

Successful Connection!
dict_keys(['status', 'copyright', 'results'])
status: OK
no. results: 20


In [23]:
#print what type of data we can get for each new member
new_members_data = new_members['results'][0]['members']
print(new_members_data[0].keys())


dict_keys(['id', 'api_uri', 'first_name', 'middle_name', 'last_name', 'suffix', 'party', 'chamber', 'state', 'start_date'])


In [24]:
#list some data for 20 members:
for member in new_members_data:
    print(member['first_name'], member['last_name']+':', member['party'])

Rick Scott: R
Michael San Nicolas: D
Jared Golden: D
Martha McSally: R
Mitt Romney: R
Joshua Hawley: R
Mike Braun: R
Carol Miller: R
Bryan Steil: R
Kim Schrier: D
Jennifer Wexton: D
Abigail Spanberger: D
Ben Cline: R
Denver Riggleman: R
Elaine Luria: D
Ben McAdams: D
Colin Allred: D
Sylvia Garcia: D
Chip Roy: R
Veronica Escobar: D


### The API is working.  Let's import voting data

In [25]:
#import test rollcal vote for the senate
recent_endpoint = '/senate/votes/recent.json'
senate_vote_recent = API_Request(recent_endpoint)
print('no. results', senate_vote_recent['results']['num_results'])

Successful Connection!
no. results 20


In [26]:
last_20_votes = senate_vote_recent['results']['votes']
print(last_20_votes[0].keys())

dict_keys(['congress', 'chamber', 'session', 'roll_call', 'source', 'url', 'vote_uri', 'bill', 'amendment', 'nomination', 'question', 'question_text', 'description', 'vote_type', 'date', 'time', 'result', 'tie_breaker', 'tie_breaker_vote', 'document_number', 'document_title', 'democratic', 'republican', 'independent', 'total'])


In [27]:
for vote in last_20_votes:
    print(vote['chamber'], vote['congress'], str(vote['roll_call'])+':', vote['description']+'\n')
    

Senate 116 106: Michael H. Park, of New York, to be United States Circuit Judge for the Second Circuit

Senate 116 105: Michael H. Park, of New York, to be United States Circuit Judge for the Second Circuit

Senate 116 104: Janet Dhillon, of Pennsylvania, to be a Member of the Equal Employment Opportunity Commission for a term expiring July 1, 2022

Senate 116 103: Janet Dhillon, of Pennsylvania, to be a Member of the Equal Employment Opportunity Commission for a term expiring July 1, 2022

Senate 116 102: Judith DelZoppo Pryor, of Ohio, to be a Member of the Board of Directors of the Export-Import Bank of the United States for a term expiring January 20, 2021

Senate 116 101: Spencer Bachus III, of Alabama, to be a Member of the Board of Directors of the Export-Import Bank of the United States for a term expiring January 20, 2023

Senate 116 100: Kimberly A. Reed, of West Virginia, to be President of the Export-Import Bank of the United States for a term expiring January 20, 2021

Sen

In [28]:
import pandas as pd
last_20df = pd.DataFrame(last_20_votes)
last_20df.loc[:4,:]#'bill':'description']

Unnamed: 0,amendment,bill,chamber,congress,date,democratic,description,document_number,document_title,independent,...,roll_call,session,source,tie_breaker,tie_breaker_vote,time,total,url,vote_type,vote_uri
0,{},{},Senate,116,2019-05-09,"{'yes': 0, 'no': 39, 'present': 0, 'not_voting...","Michael H. Park, of New York, to be United Sta...",245,"Michael H. Park, of New York, to be United Sta...","{'yes': 0, 'no': 2, 'present': 0, 'not_voting'...",...,106,1,https://www.senate.gov/legislative/LIS/roll_ca...,,,13:45:00,"{'yes': 52, 'no': 41, 'present': 0, 'not_votin...",https://www.senate.gov/legislative/LIS/roll_ca...,1/2,https://api.propublica.org/congress/v1/116/sen...
1,{},{},Senate,116,2019-05-08,"{'yes': 0, 'no': 41, 'present': 0, 'not_voting...","Michael H. Park, of New York, to be United Sta...",245,"Michael H. Park, of New York, to be United Sta...","{'yes': 0, 'no': 2, 'present': 0, 'not_voting'...",...,105,1,https://www.senate.gov/legislative/LIS/roll_ca...,,,14:48:00,"{'yes': 51, 'no': 43, 'present': 0, 'not_votin...",https://www.senate.gov/legislative/LIS/roll_ca...,1/2,https://api.propublica.org/congress/v1/116/sen...
2,{},{},Senate,116,2019-05-08,"{'yes': 0, 'no': 41, 'present': 0, 'not_voting...","Janet Dhillon, of Pennsylvania, to be a Member...",173,"Janet Dhillon, of Pennsylvania, to be a Member...","{'yes': 0, 'no': 2, 'present': 0, 'not_voting'...",...,104,1,https://www.senate.gov/legislative/LIS/roll_ca...,,,14:19:00,"{'yes': 50, 'no': 43, 'present': 0, 'not_votin...",https://www.senate.gov/legislative/LIS/roll_ca...,1/2,https://api.propublica.org/congress/v1/116/sen...
3,{},{},Senate,116,2019-05-08,"{'yes': 0, 'no': 42, 'present': 0, 'not_voting...","Janet Dhillon, of Pennsylvania, to be a Member...",173,"Janet Dhillon, of Pennsylvania, to be a Member...","{'yes': 0, 'no': 2, 'present': 0, 'not_voting'...",...,103,1,https://www.senate.gov/legislative/LIS/roll_ca...,,,11:53:00,"{'yes': 52, 'no': 44, 'present': 0, 'not_votin...",https://www.senate.gov/legislative/LIS/roll_ca...,1/2,https://api.propublica.org/congress/v1/116/sen...
4,{},{},Senate,116,2019-05-08,"{'yes': 42, 'no': 0, 'present': 0, 'not_voting...","Judith DelZoppo Pryor, of Ohio, to be a Member...",54,"Judith DelZoppo Pryor, of Ohio, to be a Member...","{'yes': 1, 'no': 1, 'present': 0, 'not_voting'...",...,102,1,https://www.senate.gov/legislative/LIS/roll_ca...,,,11:33:00,"{'yes': 77, 'no': 19, 'present': 0, 'not_votin...",https://www.senate.gov/legislative/LIS/roll_ca...,1/2,https://api.propublica.org/congress/v1/116/sen...


In [29]:
#use above to test get voting record for the latest bill
congress = last_20df.loc[0,'congress']
chamber = last_20df.loc[0,'chamber']
session = last_20df.loc[0,'session']
roll_call = last_20df.loc[0,'roll_call']

lv_endpoint = '/'+str(congress)+'/'+chamber.lower()+'/sessions/'+str(session)+'/votes/'+str(roll_call)+'.json'
print('using endpoint:', lv_endpoint)

latest_vote = API_Request(lv_endpoint)

using endpoint: /116/senate/sessions/1/votes/106.json
Successful Connection!


In [30]:
vote = latest_vote['results']['votes']['vote']
vote.keys()

dict_keys(['congress', 'session', 'chamber', 'roll_call', 'source', 'url', 'bill', 'amendment', 'nomination', 'question', 'question_text', 'description', 'vote_type', 'date', 'time', 'result', 'tie_breaker', 'tie_breaker_vote', 'document_number', 'document_title', 'democratic', 'republican', 'independent', 'total', 'positions'])

In [31]:
#load a dataframe of everyone's position on the latest vote:
latest_vote_positions_df = pd.DataFrame(vote['positions'])
latest_vote_positions_df

Unnamed: 0,dw_nominate,member_id,name,party,state,vote_position
0,0.323,A000360,Lamar Alexander,R,TN,Yes
1,-0.521,B001230,Tammy Baldwin,D,WI,No
2,0.534,B001261,John Barrasso,R,WY,Yes
3,-0.208,B001267,Michael Bennet,D,CO,Not Voting
4,0.614,B001243,Marsha Blackburn,R,TN,Yes
5,-0.411,B001277,Richard Blumenthal,D,CT,No
6,0.429,B000575,Roy Blunt,R,MO,Yes
7,-0.618,B001288,Cory Booker,D,NJ,Not Voting
8,0.401,B001236,John Boozman,R,AR,Yes
9,,B001310,Mike Braun,R,IN,Yes


In [39]:
#Yeah, that's right! That's vote data!

#let's create a function that will import vote positions for a given rollcall vote.
def import_vote(congress, chamber, session, roll_call, verbose=False):
    '''imports vote details and member positions of a given roll_call vote
        takes congress number 102-116 for House, 80-116 for Senate
        chamber(house|senate) session 1 for odd 2 for even-numbered years
        returns a dicitonary of vote positions and API status'''
    #construct endpoint for API request
    call_endpoint = '/'+str(congress)+'/'+chamber.lower()+'/sessions/'+str(session)+'/votes/'+str(roll_call)+'.json'
    
    #let's attempt a few times to account for internet burps
    for attempt in range(3):
    #send endpoint to API function for request
        if attempt > 0: 
            verbose = True
            print('Retry:', attempt)
        call_response = API_Request(call_endpoint, verbose)
    
        status = 'failed'
        #extract the status of the API request
        try:
            status = call_response['status']
        except:
            pass
        if status == 'OK':
            break
        else:
            print('Status:', status)
        
    #try to extract and return the vote positions
    try:
        vote = call_response['results']['votes']['vote']
        return vote, status
    except:
        return {}, status 
    
    
#let's create a function that will import a given congressional session
def import_session(congress, chamber, session):
    '''import a congressional session worth of votes using the pro-publica API
        takes the congress number chamber and session as input and returns a data frame'''
    #initialize roll_call and Status
    roll_call = 0 
    status = 'OK'
    
    #announce which congress chamber and session we're importing
    print('Importing ',congress,chamber,session)
    
    #Keep using the function above to import votes as long as the API returns an ok status
    while status == 'OK':
        roll_call += 1 #itererate which roll call we are on
        
        #use the API to import votes
        vote, status = import_vote(congress,chamber,session,roll_call)
                
        #get the positions of a given vote while removing them from the vote dictionary
        try:
            positions = vote.pop('positions')
            votedicts = {}
            for key in list(vote.keys()):
                if type(vote[key]) == dict:
                    votedicts[key] = vote.pop(key)
            #print(votedicts)
            
        except:
            positions = {}
            vote = {}
        #Make a dataframe out of the votes
        vote_df = pd.Series(vote)
        #print(vote_df) #debug
        
        #Make a dataframe out of the positions
        call_df = pd.DataFrame(positions)
        
        #replace the column called 'vote_position' with the roll_call number we are on.
        call_df = call_df.rename(columns={'vote_position':roll_call})
        
        #find the length of the positions dataframe
        dflng = len(call_df)
        
        #make sure the lenth of the data frame fits the critera for a congressional house
        if dflng > 0 and dflng < 440:
            try: #merge this dataframe with the previous so the votes stack up 
                dfp = pd.merge(dfp, call_df, on=mergelst, how='outer')
                print('.', end='') #print a little dot to show we are making progress
                
                #stack the vote dataframes
                dfv = pd.concat([dfv, vote_df], axis=1)
            except:
                #if the above fails, it's probably because there wasn't a data set to merge into.
                #we'll initialize it here
                print('Initalizing df', roll_call)
                print('API status:', status)
                dfp = call_df
                dfv = vote_df
                #these are the columns we'll merge on (everthing except the vote position)
                mergelst = dfp.columns.tolist()[:-1]
                print('merging on:',mergelst)
        else: #if the vote postions don't fit the size critera we'll skip them to avoid screwing up the df
            print('(Size) skip vote', roll_call)
                
    return dfv, dfp

#test (remove when working)
print('testing import with latest session:')
latest_session_test_1_df, latest_session_test_positions = import_session(116, 'Senate', 1)
latest_session_test_1_df

testing import with latest session:
Importing  116 Senate 1
Initalizing df 1
API status: OK
merging on: ['dw_nominate', 'member_id', 'name', 'party', 'state']
.........................................................................................................Status: ERROR
Retry: 1
Successful Connection!
Status: ERROR
Retry: 2
Successful Connection!
Status: ERROR
(Size) skip vote 107


Unnamed: 0,0,1,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,...,0.9,0.10,0.11,0.12,0.13,0.14,0.15,0.16,0.17,0.18
chamber,Senate,Senate,Senate,Senate,Senate,Senate,Senate,Senate,Senate,Senate,...,Senate,Senate,Senate,Senate,Senate,Senate,Senate,Senate,Senate,Senate
congress,116,116,116,116,116,116,116,116,116,116,...,116,116,116,116,116,116,116,116,116,116
date,2019-01-08,2019-01-10,2019-01-14,2019-01-15,2019-01-15,2019-01-16,2019-01-17,2019-01-24,2019-01-24,2019-01-24,...,2019-05-07,2019-05-07,2019-05-08,2019-05-08,2019-05-08,2019-05-08,2019-05-08,2019-05-08,2019-05-08,2019-05-09
description,A bill to make improvements to certain defense...,A bill to make improvements to certain defense...,A bill to make improvements to certain defense...,A joint resolution disapproving the President'...,A joint resolution disapproving the President'...,A joint resolution disapproving the President'...,A bill to prohibit taxpayer funded abortions.,A bill making supplemental appropriations for ...,Of a perfecting nature.,Of a perfecting nature.,...,"Spencer Bachus III, of Alabama, to be a Member...","Judith DelZoppo Pryor, of Ohio, to be a Member...","Joseph F. Bianco, of New York, to be United St...","Kimberly A. Reed, of West Virginia, to be Pres...","Spencer Bachus III, of Alabama, to be a Member...","Judith DelZoppo Pryor, of Ohio, to be a Member...","Janet Dhillon, of Pennsylvania, to be a Member...","Janet Dhillon, of Pennsylvania, to be a Member...","Michael H. Park, of New York, to be United Sta...","Michael H. Park, of New York, to be United Sta..."
document_number,1,1,1,2,2,2,109,268,,,...,47,54,220,55,47,54,173,173,245,245
document_title,A bill to make improvements to certain defense...,A bill to make improvements to certain defense...,A bill to make improvements to certain defense...,A joint resolution disapproving the President&...,A joint resolution disapproving the President&...,A joint resolution disapproving the President&...,A bill to prohibit taxpayer funded abortions.,A bill making supplemental appropriations for ...,,,...,"Spencer Bachus III, of Alabama, to be a Member...","Judith DelZoppo Pryor, of Ohio, to be a Member...","Joseph F. Bianco, of New York, to be United St...","Kimberly A. Reed, of West Virginia, to be Pres...","Spencer Bachus III, of Alabama, to be a Member...","Judith DelZoppo Pryor, of Ohio, to be a Member...","Janet Dhillon, of Pennsylvania, to be a Member...","Janet Dhillon, of Pennsylvania, to be a Member...","Michael H. Park, of New York, to be United Sta...","Michael H. Park, of New York, to be United Sta..."
question,On Cloture on the Motion to Proceed,On Cloture on the Motion to Proceed,On Cloture on the Motion to Proceed,On the Motion to Table,On the Motion to Proceed,On the Cloture Motion,On Cloture on the Motion to Proceed,On the Motion for Attendance,On the Cloture Motion,On the Cloture Motion,...,On the Cloture Motion,On the Cloture Motion,On the Nomination,On the Nomination,On the Nomination,On the Nomination,On the Cloture Motion,On the Nomination,On the Cloture Motion,On the Nomination
question_text,On Cloture on the Motion to Proceed S. 1,On Cloture on the Motion to Proceed S. 1,On Cloture on the Motion to Proceed S. 1,On the Motion to Table S.J.Res. 2,On the Motion to Proceed S.J.Res. 2,On the Cloture Motion S.J.Res. 2,On Cloture on the Motion to Proceed S. 109,On the Motion for Attendance H.R. 268,On the Cloture Motion S.Amdt. 5 to H.R. 268 (S...,On the Cloture Motion S.Amdt. 6 to H.R. 268 (S...,...,On the Cloture Motion PN47,On the Cloture Motion PN54,On the Nomination PN220,On the Nomination PN55,On the Nomination PN47,On the Nomination PN54,On the Cloture Motion PN173,On the Nomination PN173,On the Cloture Motion PN245,On the Nomination PN245
result,Cloture on the Motion to Proceed Rejected,Cloture on the Motion to Proceed Rejected,Cloture Motion Rejected,Motion to Table Failed,Motion to Proceed Agreed to,Cloture Motion Rejected,Cloture on the Motion to Proceed Rejected,Motion for Attendance Agreed to,Cloture Motion Rejected,Cloture Motion Rejected,...,Cloture Motion Agreed to,Cloture Motion Agreed to,Nomination Confirmed,Nomination Confirmed,Nomination Confirmed,Nomination Confirmed,Cloture Motion Agreed to,Nomination Confirmed,Cloture Motion Agreed to,Nomination Confirmed
roll_call,1,2,3,4,5,6,7,8,9,10,...,97,98,99,100,101,102,103,104,105,106


In [None]:
latest_session_test_positions

In [None]:
#import vote postitions for 115th senate, session 1
senate_115_1_df = import_session(115, 'senate', 1)
senate_115_1_df

In [21]:
_

Unnamed: 0,amendment,bill,chamber,congress,date,democratic,description,document_number,document_title,independent,...,result,roll_call,session,source,tie_breaker,tie_breaker_vote,time,total,url,vote_type
api_uri,,https://api.propublica.org/congress/v1/116/bil...,Senate,116,2019-01-08,,A bill to make improvements to certain defense...,1,A bill to make improvements to certain defense...,,...,Cloture on the Motion to Proceed Rejected,1,1,https://www.senate.gov/legislative/LIS/roll_ca...,,,17:39:00,,https://www.senate.gov/legislative/LIS/roll_ca...,3/5
bill_id,,s1-116,Senate,116,2019-01-08,,A bill to make improvements to certain defense...,1,A bill to make improvements to certain defense...,,...,Cloture on the Motion to Proceed Rejected,1,1,https://www.senate.gov/legislative/LIS/roll_ca...,,,17:39:00,,https://www.senate.gov/legislative/LIS/roll_ca...,3/5
latest_action,,Held at the desk.,Senate,116,2019-01-08,,A bill to make improvements to certain defense...,1,A bill to make improvements to certain defense...,,...,Cloture on the Motion to Proceed Rejected,1,1,https://www.senate.gov/legislative/LIS/roll_ca...,,,17:39:00,,https://www.senate.gov/legislative/LIS/roll_ca...,3/5
majority_position,,,Senate,116,2019-01-08,No,A bill to make improvements to certain defense...,1,A bill to make improvements to certain defense...,,...,Cloture on the Motion to Proceed Rejected,1,1,https://www.senate.gov/legislative/LIS/roll_ca...,,,17:39:00,,https://www.senate.gov/legislative/LIS/roll_ca...,3/5
no,,,Senate,116,2019-01-08,41,A bill to make improvements to certain defense...,1,A bill to make improvements to certain defense...,2.0,...,Cloture on the Motion to Proceed Rejected,1,1,https://www.senate.gov/legislative/LIS/roll_ca...,,,17:39:00,44.0,https://www.senate.gov/legislative/LIS/roll_ca...,3/5
not_voting,,,Senate,116,2019-01-08,0,A bill to make improvements to certain defense...,1,A bill to make improvements to certain defense...,0.0,...,Cloture on the Motion to Proceed Rejected,1,1,https://www.senate.gov/legislative/LIS/roll_ca...,,,17:39:00,0.0,https://www.senate.gov/legislative/LIS/roll_ca...,3/5
number,,S.1,Senate,116,2019-01-08,,A bill to make improvements to certain defense...,1,A bill to make improvements to certain defense...,,...,Cloture on the Motion to Proceed Rejected,1,1,https://www.senate.gov/legislative/LIS/roll_ca...,,,17:39:00,,https://www.senate.gov/legislative/LIS/roll_ca...,3/5
present,,,Senate,116,2019-01-08,0,A bill to make improvements to certain defense...,1,A bill to make improvements to certain defense...,0.0,...,Cloture on the Motion to Proceed Rejected,1,1,https://www.senate.gov/legislative/LIS/roll_ca...,,,17:39:00,0.0,https://www.senate.gov/legislative/LIS/roll_ca...,3/5
short_title,,Strengthening America's Security in the Middle...,Senate,116,2019-01-08,,A bill to make improvements to certain defense...,1,A bill to make improvements to certain defense...,,...,Cloture on the Motion to Proceed Rejected,1,1,https://www.senate.gov/legislative/LIS/roll_ca...,,,17:39:00,,https://www.senate.gov/legislative/LIS/roll_ca...,3/5
title,,A bill to make improvements to certain defense...,Senate,116,2019-01-08,,A bill to make improvements to certain defense...,1,A bill to make improvements to certain defense...,,...,Cloture on the Motion to Proceed Rejected,1,1,https://www.senate.gov/legislative/LIS/roll_ca...,,,17:39:00,,https://www.senate.gov/legislative/LIS/roll_ca...,3/5


In [23]:
#import vote poisiotns for 115th house, session 1
house_115_1_df = import_session(115, 'house', 1)
house_115_1_df

Importing  115 house 1
(Size) skip vote 1
Initalizing df 2
API status: OK
....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................(Size) skip vote 711


Unnamed: 0,member_id,name,party,state,vote_position,vote_position_3,vote_position_4,vote_position_5,vote_position_6,vote_position_7,...,vote_position_701,vote_position_702,vote_position_703,vote_position_704,vote_position_705,vote_position_706,vote_position_707,vote_position_708,vote_position_709,vote_position_710
0,A000374,Ralph Abraham,R,LA,Ryan (WI),Yes,Yes,No,Yes,No,...,No,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes
1,A000370,Alma Adams,D,NC,Pelosi,Not Voting,No,Yes,No,Yes,...,Yes,No,No,No,No,Yes,No,No,Yes,Yes
2,A000055,Robert B. Aderholt,R,AL,Ryan (WI),Yes,Yes,No,Yes,No,...,No,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes
3,A000371,Pete Aguilar,D,CA,Pelosi,No,No,Yes,No,Yes,...,Yes,No,No,No,No,Yes,No,No,Yes,Yes
4,A000372,Rick Allen,R,GA,Ryan (WI),Yes,Yes,No,Yes,No,...,No,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes
5,A000367,Justin Amash,R,MI,Ryan (WI),Yes,Yes,No,No,No,...,No,No,No,Yes,No,Yes,No,No,No,Yes
6,A000369,Mark Amodei,R,NV,Ryan (WI),Yes,Yes,No,Yes,No,...,No,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes
7,A000375,Jodey Arrington,R,TX,Ryan (WI),Yes,Yes,No,Yes,No,...,No,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Not Voting
8,B001291,Brian Babin,R,TX,Ryan (WI),Yes,Yes,No,Yes,No,...,No,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes
9,B001298,Don Bacon,R,NE,Ryan (WI),Yes,Yes,No,Yes,No,...,No,Yes,Yes,Yes,Yes,Yes,Not Voting,Yes,Yes,Yes


In [41]:
positions, status = import_positions(115,'house',1,3)
pd.DataFrame(positions)

Unnamed: 0,district,dw_nominate,member_id,name,party,state,vote_position
0,5,0.508,A000374,Ralph Abraham,R,LA,Yes
1,12,-0.469,A000370,Alma Adams,D,NC,Not Voting
2,4,0.361,A000055,Robert B. Aderholt,R,AL,Yes
3,31,-0.285,A000371,Pete Aguilar,D,CA,No
4,12,0.611,A000372,Rick Allen,R,GA,Yes
5,3,0.648,A000367,Justin Amash,R,MI,Yes
6,2,0.375,A000369,Mark Amodei,R,NV,Yes
7,19,0.549,A000375,Jodey Arrington,R,TX,Yes
8,36,0.693,B001291,Brian Babin,R,TX,Yes
9,2,0.432,B001298,Don Bacon,R,NE,Yes


In [36]:
test = pd.DataFrame(positions)
test.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1292 entries, 0 to 1291
Data columns (total 7 columns):
district         1292 non-null object
dw_nominate      1292 non-null float64
member_id        1292 non-null object
name             1292 non-null object
party            1292 non-null object
state            1292 non-null object
vote_position    1292 non-null object
dtypes: float64(1), object(6)
memory usage: 70.7+ KB


In [37]:
names = test['name'].tolist()
nameset = set(names)
len(nameset)

436

In [39]:
ids = test['member_id'].tolist()
idset = set(ids)
len(idset)

436