In [2]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import datetime

## **REPLICATION DATA**

### **Barbera 2014: 'Birds of the Same Feather Tweet Together...' (Bayesian)**

In [3]:
elites_data = pd.read_csv('data/misc/Replication_data_Barbera_2014/elites-data.csv')
elites_data = elites_data[['screen_name','name', 'dw.nom.1', 'nameid', 'party', 'state', 'gender', 'idealPoint']]
print(f'{len(elites_data)} elites loaded')

results_elites_US = pd.read_csv('data/misc/Replication_data_Barbera_2014/results-elites-US.csv')
results_elites_US = results_elites_US.drop(['Unnamed: 0', 'phi.sd', 'alpha'], axis=1)
print(f'{len(results_elites_US)} results loaded')

barbera_2014_data = elites_data.merge(results_elites_US, on='screen_name', how='left')

# Filter just for congressmembers
barbera_2014_data_congressmembers = barbera_2014_data[(barbera_2014_data['phi'].notnull()) & (barbera_2014_data['dw.nom.1'].notnull())] \
                    .drop(['screen_name', 'name', 'idealPoint', 'state', 'party', 'gender'], axis=1) \
                    .rename(columns = {'phi' : 'Barbera1_score'})
print(f'{len(barbera_2014_data_congressmembers)} congressmembers loaded')

barbera_2014_data_congressmembers.sort_values(by='nameid', ascending=True).head(15)

#dw.nom1 is the first dimension DW-NOMINATE of congressmembers
#phi is estimated ideology score from logit/bayesian method (phi variable)

666 elites loaded
318 results loaded
231 congressmembers loaded


Unnamed: 0,dw.nom.1,nameid,Barbera1_score
255,-0.462,A000014,-0.478761
302,-0.317,A000210,-1.685715
637,0.623,A000358,0.479282
552,0.323,A000360,0.523803
623,0.4,A000365,0.424721
413,0.837,A000367,0.598396
395,0.531,B000213,0.817583
306,-0.554,B000287,-1.333579
20,0.393,B000461,0.564326
619,0.516,B000589,0.869296


### **Barbera 2015: 'Tweeting from Left to Right...' (Correspondence)**

In [4]:
elites_data_2 = pd.read_csv('data/misc/Replication_data_Barbera_2015/elites-data.csv').rename(columns = {'id' : 'nameid'})
print(f'{len(elites_data_2)} elites loaded')
house_jackman_results = pd.read_csv('data/misc/Replication_data_Barbera_2015/house.csv')
print(f'{len(house_jackman_results)} house results loaded')
senate_jackman_results = pd.read_csv('data/misc/Replication_data_Barbera_2015/senate.csv')
print(f'{len(senate_jackman_results)} senate results loaded')
congresss_jackman_results = pd.concat([house_jackman_results, senate_jackman_results])[['nameid', 'idealPoint', 'gender']]
print(f'{len(congresss_jackman_results)} congress results loaded')
barbera_2015_data = elites_data_2.dropna(subset=['nameid']).merge(congresss_jackman_results, on='nameid', how='left') \
                    .drop(['thomas_id', 'twitter_id', 'type'], axis=1).rename(columns = {'idealPoint' : 'Jackman_score'})
barbera_2015_data.head(15)

587 elites loaded
439 house results loaded
105 senate results loaded
544 congress results loaded


Unnamed: 0,nameid,chamber,stdis,first_name,last_name,party,twitter_name,Jackman_score,gender
0,M001153,Senate,AK,Lisa,Murkowski,R,lisamurkowski,0.314888,F
1,Y000033,House,AK,Don,Young,R,repdonyoung,0.488085,M
2,B001265,Senate,AK,Mark,Begich,D,senatorbegich,-0.367857,M
3,S001141,Senate,AL,Jeff,Sessions,R,senatorsessions,1.275841,M
4,S000320,Senate,AL,Richard,Shelby,R,senshelby,1.231818,M
5,B001244,House,AL1,Jo,Bonner,R,,0.726885,M
6,R000591,House,AL2,Martha,Roby,R,repmartharoby,0.820202,F
7,R000575,House,AL3,Mike,Rogers,R,repmikerogersal,0.811111,M
8,A000055,House,AL4,Robert,Aderholt,R,robert_aderholt,0.846485,M
9,B001274,House,AL5,Mo,Brooks,R,repmobrooks,0.930296,M


## **ZHIXIANG DATA**

### **House_list_in_318-accounts.csv + Senate_list_in_318-accounts.csv (congressmember amongst the 318 elites)**

In [5]:
all_congressmembers = pd.read_csv('data/raw/all_congress_members.csv').drop(['DW-Nominate'], axis=1)[['nameid', 'twitter_name', 'congressperson']]

In [6]:
zhixiang_house = pd.read_csv('data/misc/Data_about_Opinion_Measurement_Zhixiang/2.Opinion results/opinion_house.csv')
zhixiang_senate = pd.read_csv('data/misc/Data_about_Opinion_Measurement_Zhixiang/2.Opinion results/opinion_senate.csv')
zhixiang_results = pd.concat([zhixiang_house, zhixiang_senate]).rename(columns = {'opinion measured' : 'Zhixiang_score'})

zhixiang_data = zhixiang_results.merge(all_congressmembers, on='twitter_name', how='left').dropna(subset=['nameid']).drop(['twitter_name'], axis=1)

In [7]:
full_characteristics_table = zhixiang_data.merge(barbera_2014_data_congressmembers, on='nameid', how='left') \
                                          .merge(barbera_2015_data, on='nameid', how='left') \
                                          .drop_duplicates(subset=['nameid']) \
                                          [['nameid', 'congressperson', 'first_name', 'last_name', 'gender','party', 'chamber', 'stdis','twitter_name', 'dw.nom.1', 'Barbera1_score', 'Jackman_score', 'Zhixiang_score']]
                                          

In [8]:
full_characteristics_table.to_csv('data/raw/congressmembers_characteristics.csv')

In [9]:
full_characteristics_table.head(10)

Unnamed: 0,nameid,congressperson,first_name,last_name,gender,party,chamber,stdis,twitter_name,dw.nom.1,Barbera1_score,Jackman_score,Zhixiang_score
0,B000589,speakerboehner,John,Boehner,M,R,House,OH8,speakerboehner,0.516,0.869296,0.653032,0.597428
1,R000570,speakerryan,Paul,Ryan,M,R,House,WI1,reppaulryan,0.567,1.445708,0.949106,0.5
2,P000197,speakerpelosi,Nancy,Pelosi,F,D,House,CA12,nancypelosi,-0.533,-1.454262,-1.363589,0.393234
3,B001256,michelebachmann,Michele,Bachmann,F,R,House,MN6,michelebachmann,0.579,1.364745,0.890506,0.623674
4,W000797,dwstweets,Debbie,Wasserman Schultz,F,D,House,FL23,dwstweets,,,-0.942929,0.365214
5,C001046,ericcantor,Eric,Cantor,M,R,House,VA7,gopleader,0.544,1.197462,0.953945,0.616731
6,I000056,darrellissa,Darrell,Issa,M,R,House,CA49,darrellissa,0.515,1.488262,0.749277,0.576349
7,E000288,keithellison,Keith,Ellison,M,D,House,MN5,keithellison,-0.634,-1.648543,-1.445884,0.353148
8,G000556,alangrayson,Alan,Grayson,M,D,House,FL9,alangrayson,-0.384,-2.027048,-1.156199,0.39936
9,C001076,jasoninthehouse,Jason,Chaffetz,M,R,House,UT3,jasoninthehouse,0.706,1.1153,0.908414,0.607527


In [16]:
full_characteristics_table[full_characteristics_table['congressperson'] == 'senatorreid']

Unnamed: 0,nameid,congressperson,first_name,last_name,gender,party,chamber,stdis,twitter_name,dw.nom.1,Barbera1_score,Jackman_score,Zhixiang_score
123,R000146,senatorreid,Harry,Reid,M,D,Senate,NV,senatorreid,-0.304,-1.162128,-0.347716,0.637092
