In [1]:
import numpy as np
import pandas as pd


In [2]:
url = 'https://voteview.com/static/data/out/members/HS119_members.csv'
ideology = pd.read_csv(url)
ideology.head(3).T

cols_to_keep = ['bioname','chamber', 'nominate_dim1', 'party_code']
crosswalk_cols = ['bioname','state_abbrev' ,'district_code','icpsr', 'bioguide_id']

crosswalk = ideology[crosswalk_cols]
ideology = ideology[cols_to_keep]

In [3]:
ideology['party_code'].value_counts()

party_code
200    279
100    264
328      2
Name: count, dtype: int64

In [4]:
replace_map = {200: 'Republican',
               100: 'Democrat',
               328: 'Independent'}
ideology['party'] = ideology['party_code'].replace(replace_map)
ideology['party'].value_counts

<bound method IndexOpsMixin.value_counts of 0      Republican
1        Democrat
2      Republican
3      Republican
4      Republican
          ...    
540    Republican
541      Democrat
542    Republican
543    Republican
544    Republican
Name: party, Length: 545, dtype: object>

In [5]:
ideology.drop(['party_code'], axis=1)

Unnamed: 0,bioname,chamber,nominate_dim1,party
0,"ROGERS, Mike Dennis",House,0.378,Republican
1,"SEWELL, Terri",House,-0.401,Democrat
2,"PALMER, Gary James",House,0.674,Republican
3,"MOORE, Barry",House,0.645,Republican
4,"STRONG, Dale",House,0.606,Republican
...,...,...,...,...
540,"JUSTICE, James Conley, II",Senate,0.564,Republican
541,"BALDWIN, Tammy",Senate,-0.487,Democrat
542,"JOHNSON, Ron",Senate,0.641,Republican
543,"LUMMIS, Cynthia M.",Senate,0.686,Republican


In [6]:
ideology = ideology.rename({'nominate_dim1': 'left_right_ideology'}, axis=1)
ideology 

Unnamed: 0,bioname,chamber,left_right_ideology,party_code,party
0,"ROGERS, Mike Dennis",House,0.378,200,Republican
1,"SEWELL, Terri",House,-0.401,100,Democrat
2,"PALMER, Gary James",House,0.674,200,Republican
3,"MOORE, Barry",House,0.645,200,Republican
4,"STRONG, Dale",House,0.606,200,Republican
...,...,...,...,...,...
540,"JUSTICE, James Conley, II",Senate,0.564,200,Republican
541,"BALDWIN, Tammy",Senate,-0.487,100,Democrat
542,"JOHNSON, Ron",Senate,0.641,200,Republican
543,"LUMMIS, Cynthia M.",Senate,0.686,200,Republican


In [7]:
ideology.sort_values('left_right_ideology', ascending=False)

Unnamed: 0,bioname,chamber,left_right_ideology,party_code,party
398,"GILL, Brandon",House,0.981,200,Republican
296,"HARRIGAN, Pat",House,0.981,200,Republican
443,"TUBERVILLE, Thomas Hawley (Tommy)",Senate,0.936,200,Republican
493,"SCHMITT, Eric Stephen",Senate,0.917,200,Republican
129,"MCCORMICK, Rich",House,0.891,200,Republican
...,...,...,...,...,...
429,"RANDALL, Emily",House,-0.685,100,Democrat
485,"WARREN, Elizabeth",Senate,-0.744,100,Democrat
401,"TURNER, Sylvester",House,-0.746,100,Democrat
381,"GARCIA, Sylvia",House,-0.781,100,Democrat


In [8]:
crosswalk

Unnamed: 0,bioname,state_abbrev,district_code,icpsr,bioguide_id
0,"ROGERS, Mike Dennis",AL,3,20301,R000575
1,"SEWELL, Terri",AL,7,21102,S001185
2,"PALMER, Gary James",AL,6,21500,P000609
3,"MOORE, Barry",AL,1,22140,M001212
4,"STRONG, Dale",AL,5,22366,S001220
...,...,...,...,...,...
540,"JUSTICE, James Conley, II",WV,0,42504,J000312
541,"BALDWIN, Tammy",WI,0,29940,B001230
542,"JOHNSON, Ron",WI,0,41111,J000293
543,"LUMMIS, Cynthia M.",WY,0,20953,L000571


In [9]:
ideology_crosswalk = pd.merge(
    ideology,
    crosswalk,
    on='bioname',
    how='outer',           # choose 'inner', 'left', 'right', or 'outer'
    validate='one_to_one', # this checks that each key appears only once in both DF
    indicator='matched'    # make sure 'matched' doesn't already exist
)

ideology_crosswalk


Unnamed: 0,bioname,chamber,left_right_ideology,party_code,party,state_abbrev,district_code,icpsr,bioguide_id,matched
0,"ADAMS, Alma",House,-0.462,100,Democrat,NC,12,21545,A000370,both
1,"ADERHOLT, Robert",House,0.405,200,Republican,AL,4,29701,A000055,both
2,"AGUILAR, Peter Rey",House,-0.324,100,Democrat,CA,33,21506,A000371,both
3,"ALFORD, Mark",House,0.569,200,Republican,MO,4,22300,A000379,both
4,"ALLEN, Rick W.",House,0.690,200,Republican,GA,12,21516,A000372,both
...,...,...,...,...,...,...,...,...,...,...
540,"WOMACK, Steve",House,0.348,200,Republican,AR,3,21108,W000809,both
541,"WYDEN, Ronald Lee",Senate,-0.333,100,Democrat,OR,0,14871,W000779,both
542,"YAKYM, Rudy, III",House,0.513,200,Republican,IN,2,22171,Y000067,both
543,"YOUNG, Todd",Senate,0.438,200,Republican,IN,0,21133,Y000064,both


In [10]:
ideology_crosswalk.query("state_abbrev == 'VA'")

Unnamed: 0,bioname,chamber,left_right_ideology,party_code,party,state_abbrev,district_code,icpsr,bioguide_id,matched
31,"BEYER, Donald Sternoff Jr.",House,-0.395,100,Democrat,VA,8,21554,B001292,both
80,"CLINE, Benjamin",House,0.716,200,Republican,VA,6,21908,C001118,both
90,"CONNOLLY, Gerald E. (Gerry)",House,-0.309,100,Democrat,VA,11,20952,C001078,both
195,"GRIFFITH, H. Morgan",House,0.51,200,Republican,VA,9,21191,G000568,both
252,"KAINE, Timothy Michael (Tim)",Senate,-0.243,100,Democrat,VA,0,41305,K000384,both
265,"KIGGANS, Jennifer",House,0.26,200,Republican,VA,2,22335,K000399,both
318,"MCCLELLAN, Jennifer",House,-0.55,100,Democrat,VA,4,22374,M001227,both
324,"MCGUIRE, John J., III",House,0.673,200,Republican,VA,5,22539,M001239,both
445,"SCOTT, Robert C.",House,-0.45,100,Democrat,VA,3,39307,S000185,both
477,"SUBRAMANYAM, Suhas",House,-0.301,100,Democrat,VA,10,22554,S001230,both


## Vote similarity matrix

In [11]:
url = 'https://voteview.com/static/data/out/votes/HS119_votes.csv'
votes = pd.read_csv(url)
votes.head(3).T

Unnamed: 0,0,1,2
congress,119,119,119
chamber,House,House,House
rollnumber,1,1,1
icpsr,14854,14863,14873
cast_code,1,1,6
prob,99.6,78.2,100.0


### pd.merge(data1, data2, on, how, validate, indicator)

Six arguments:

* data1, data2: the two dataframes we want to join/merge

* on: the column(s) whose values the dataframes shares. If these columns don't have the same name, use left_ion and right_on instead.

* how: what to do with rows that do not have a match in the other dataset. 
  
  * inner: drop any unmatched row 
  
  * outer (full): keep all rows, if they don't match put missing values in the unmatched part the data.
 
  * left: keep everything from data1, drop unmatched from data2.

  * right: keep everything from data2, drop unmatched from data1.

  Note: it's a good idea to join outer, at first, to see if there are any problems with matching. 

* validate: "one_to_one", "one_to_many", "many_to_one", or "many_to_many". Sets an expectation for how many rows in data2 one row in data1 will match to. If the expection is not met, returns an error.

* indicator: a new column that will tell you whether an ID (from the on feature) was found in both datasets, the left only, or the right only.

In [12]:
votes = votes.drop(['congress', 'prob',], axis=1)
votes 

Unnamed: 0,chamber,rollnumber,icpsr,cast_code
0,House,1,14854,1
1,House,1,14863,1
2,House,1,14873,6
3,House,1,15029,6
4,House,1,15433,6
...,...,...,...,...
175905,Senate,543,42504,1
175906,Senate,543,42505,1
175907,Senate,543,42506,1
175908,Senate,543,49308,6


In [13]:
vote_compare = pd.merge(votes, votes,
                        on = ['chamber', 'rollnumber'],
                        how = 'outer',
                        indicator = 'matched',
                        validate = 'many_to_many')

In [14]:
vote_compare = vote_compare.drop(['matched'], axis=1)
vote_compare = vote_compare.query("icpsr_x != icpsr_y")
vote_compare

Unnamed: 0,chamber,rollnumber,icpsr_x,cast_code_x,icpsr_y,cast_code_y
1,House,1,14854,1,14863,1
2,House,1,14854,1,14873,6
3,House,1,14854,1,15029,6
4,House,1,14854,1,15433,6
5,House,1,14854,1,15448,6
...,...,...,...,...,...,...
58066808,Senate,543,49703,1,42503,1
58066809,Senate,543,49703,1,42504,1
58066810,Senate,543,49703,1,42505,1
58066811,Senate,543,49703,1,42506,1


In [15]:
vote_compare['agree'] = vote_compare['cast_code_x'] == vote_compare['cast_code_y']
vote_compare

Unnamed: 0,chamber,rollnumber,icpsr_x,cast_code_x,icpsr_y,cast_code_y,agree
1,House,1,14854,1,14863,1,True
2,House,1,14854,1,14873,6,False
3,House,1,14854,1,15029,6,False
4,House,1,14854,1,15433,6,False
5,House,1,14854,1,15448,6,False
...,...,...,...,...,...,...,...
58066808,Senate,543,49703,1,42503,1,True
58066809,Senate,543,49703,1,42504,1,True
58066810,Senate,543,49703,1,42505,1,True
58066811,Senate,543,49703,1,42506,1,True


In [16]:
vote_compare = vote_compare.groupby(['icpsr_x', 'icpsr_y']).agg({'agree': 'mean'}).reset_index()
vote_compare

Unnamed: 0,icpsr_x,icpsr_y,agree
0,14226,14435,0.020258
1,14226,14858,0.066298
2,14226,14871,0.051565
3,14226,14921,0.883978
4,14226,15021,0.097606
...,...,...,...
206035,91980,29911,0.320285
206036,91980,31101,0.341637
206037,91980,31102,0.704626
206038,91980,39301,0.341637


In [None]:
vote_compare = vote_compare.drop(columns=['matched'], errors='ignore')
vote_compare = pd.merge(
    vote_compare,
    crosswalk,
    left_on='icpsr_x',
    right_on='icpsr',
    how='outer',
    indicator='matched',
    validate='many_to_one')



In [18]:
vote_compare = vote_compare[['bioname', 'icpsr_y', 'agree']]
vote_compare

Unnamed: 0,bioname,icpsr_y,agree
0,"GRASSLEY, Charles Ernest",14435,0.020258
1,"GRASSLEY, Charles Ernest",14858,0.066298
2,"GRASSLEY, Charles Ernest",14871,0.051565
3,"GRASSLEY, Charles Ernest",14921,0.883978
4,"GRASSLEY, Charles Ernest",15021,0.097606
...,...,...,...
206035,"VAN DREW, Jefferson",29911,0.320285
206036,"VAN DREW, Jefferson",31101,0.341637
206037,"VAN DREW, Jefferson",31102,0.704626
206038,"VAN DREW, Jefferson",39301,0.341637


In [19]:
vote_compare =pd.merge(vote_compare,crosswalk,
         left_on='icpsr_y',
         right_on='icpsr',
         how='outer',
         indicator='matched',
         validate='many_to_one')

In [20]:
vote_compare = vote_compare[['bioname_x', 'bioname_y', 'agree']]
vote_compare = vote_compare.rename({'bioname_x': 'bioname', 'bioname_y': 'comparison_member'}, axis=1)
vote_compare 

Unnamed: 0,bioname,comparison_member,agree
0,"MARKEY, Edward John","GRASSLEY, Charles Ernest",0.020258
1,"SCHUMER, Charles Ellis (Chuck)","GRASSLEY, Charles Ernest",0.066298
2,"WYDEN, Ronald Lee","GRASSLEY, Charles Ernest",0.051565
3,"McCONNELL, Addison Mitchell (Mitch)","GRASSLEY, Charles Ernest",0.883978
4,"DURBIN, Richard Joseph","GRASSLEY, Charles Ernest",0.097606
...,...,...,...
206035,"SCHAKOWSKY, Janice D.","VAN DREW, Jefferson",0.320285
206036,"DelBENE, Suzan K.","VAN DREW, Jefferson",0.341637
206037,"MASSIE, Thomas","VAN DREW, Jefferson",0.704626
206038,"CLYBURN, James Enos","VAN DREW, Jefferson",0.341637


In [21]:
vote_compare[vote_compare['bioname'].str.contains('MCGUIRE')].sort_values('agree', ascending=False)

Unnamed: 0,bioname,comparison_member,agree
69910,"MCGUIRE, John J., III","JOHNSON, Mike",0.973684
127414,"MCGUIRE, John J., III","BEAN, Aaron",0.953737
80821,"MCGUIRE, John J., III","CLINE, Benjamin",0.950178
104872,"MCGUIRE, John J., III","CAMMACK, Kat",0.950178
162771,"MCGUIRE, John J., III","CRANK, Jeff",0.950178
...,...,...,...
102224,"MCGUIRE, John J., III","PLASKETT, Stacey E.",0.121951
101785,"MCGUIRE, John J., III","NORTON, Eleanor Holmes",0.121951
95949,"MCGUIRE, John J., III","SHERRILL, Mikie",0.106762
187057,"MCGUIRE, John J., III","WALKINSHAW, James R.",0.105263
