# Get Concise List of Politicians and Candidates

This notebook processes the S117_members and 2022-senate-races files and matches them with the candidate_twitters files so that we can have a concise list of politicians/candidates whose recent tweets we can pull based on their twitter IDs

In [2]:
import pandas as pd
import numpy as np

### Load in datasets

In [7]:
senate_races = pd.read_csv("2022-senate-races.csv")
senate_races.head()

Unnamed: 0,STATE,First name,Last name,Party,Incumbent
0,Alabama,Will,Boyd,Dem,N
1,Alabama,Katie,Britt,Rep,N
2,Alabama,John,Sophocleus,Lib,N
3,Alaska,Patricia,Chesbro,Dem,N
4,Alaska,Buzz,Kelley,Rep,N


In [8]:
len(senate_races)

136

In [5]:
senate_members = pd.read_csv("S117_members.csv")
senate_members.head()

Unnamed: 0,congress,chamber,icpsr,state_icpsr,district_code,state_abbrev,party_code,occupancy,last_means,bioname,...,died,nominate_dim1,nominate_dim2,nominate_log_likelihood,nominate_geo_mean_probability,nominate_number_of_votes,nominate_number_of_errors,conditional,nokken_poole_dim1,nokken_poole_dim2
0,117,President,99912,99,0,USA,200,0.0,0.0,"TRUMP, Donald John",...,,0.403,0.162,,,,,,,
1,117,President,99913,99,0,USA,100,0.0,0.0,"BIDEN, Joseph Robinette, Jr.",...,,-0.32,0.283,,,,,,,
2,117,Senate,42102,41,0,AL,200,,,"TUBERVILLE, Thomas Hawley (Tommy)",...,,0.936,-0.352,-75.54402,0.90957,797.0,28.0,,0.874,-0.213
3,117,Senate,94659,41,0,AL,200,,,"SHELBY, Richard C.",...,,0.448,0.576,-140.62321,0.82903,750.0,74.0,,0.54,0.506
4,117,Senate,40300,81,0,AK,200,,,"MURKOWSKI, Lisa",...,,0.21,-0.302,-125.63009,0.84745,759.0,57.0,,0.209,-0.365


In [6]:
len(senate_members)

104

In [31]:
twitter_accounts = pd.read_csv("candidate_twitters.csv")
twitter_accounts.head()

Unnamed: 0,Name,Twitter_username,Account_start_time,Account_ID,Sex,Birthplace,Birthday,Age,Instagram_username,Political_party
0,A. Donald McEachin,RepMcEachin,2017-01-03T00:00:00Z,816181091673448448,male,Germany,1961-10-10T00:00:00Z,59.0,repmceachin,Democratic Party
1,Aaron Michlewitz,RepMichlewitz,2010-06-27T00:00:00Z,160246973,male,United States of America,1978-01-01T00:00:00Z,42.0,,Democratic Party
2,Aaron Peskin,AaronPeskin,2010-11-13T00:00:00Z,215369273,male,United States of America,1964-06-17T00:00:00Z,56.0,apeskin52,Democratic Party
3,Aaron Peña,AaronPena,2007-10-31T00:00:00Z,9843332,male,United States of America,1959-06-08T00:00:00Z,61.0,,Republican Party
4,Aaron Schock,aaronschock,2009-03-12T00:00:00Z,23951197,male,United States of America,1981-05-28T00:00:00Z,39.0,aaronschock,Republican Party


In [32]:
twitter_names = twitter_accounts[['Name', 'Twitter_username']]

In [33]:
twitter_names.head()

Unnamed: 0,Name,Twitter_username
0,A. Donald McEachin,RepMcEachin
1,Aaron Michlewitz,RepMichlewitz
2,Aaron Peskin,AaronPeskin
3,Aaron Peña,AaronPena
4,Aaron Schock,aaronschock


In [34]:
senate_members = senate_members.iloc[:, :10]

In [35]:
senate_members.head()

Unnamed: 0,congress,chamber,icpsr,state_icpsr,district_code,state_abbrev,party_code,occupancy,last_means,bioname
0,117,President,99912,99,0,USA,200,0.0,0.0,"TRUMP, Donald John"
1,117,President,99913,99,0,USA,100,0.0,0.0,"BIDEN, Joseph Robinette, Jr."
2,117,Senate,42102,41,0,AL,200,,,"TUBERVILLE, Thomas Hawley (Tommy)"
3,117,Senate,94659,41,0,AL,200,,,"SHELBY, Richard C."
4,117,Senate,40300,81,0,AK,200,,,"MURKOWSKI, Lisa"


### Preprocess the senator name data so that the name are more consistent with the twitter names

In [36]:
senate_races['Full name'] = senate_races['First name']+" "+senate_races['Last name']

In [37]:
senate_races.head()

Unnamed: 0,STATE,First name,Last name,Party,Incumbent,Full name
0,Alabama,Will,Boyd,Dem,N,Will Boyd
1,Alabama,Katie,Britt,Rep,N,Katie Britt
2,Alabama,John,Sophocleus,Lib,N,John Sophocleus
3,Alaska,Patricia,Chesbro,Dem,N,Patricia Chesbro
4,Alaska,Buzz,Kelley,Rep,N,Buzz Kelley


In [38]:
senate_cands = list(senate_races['Full name'])
twitter_cands = list(twitter_names['Name'])

In [39]:
len(list(set(senate_cands) & set(twitter_cands)))

32

### Add last name column to senate members dataset and twitter name dataset and use last name as primary matching criteria.  Since the final list is rather small, I can manually evaluate the matching.

In [40]:
# Add 'Last name' column to twitter accounts dataset

last_name = []
for ind in twitter_names.index:
    name = twitter_names['Name'][ind]
    last = name.split(" ")
    last = last[len(last)-1]
    last_name.append(last)
    
twitter_names['Last name'] = last_name
twitter_names.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  twitter_names['Last name'] = last_name


Unnamed: 0,Name,Twitter_username,Last name
0,A. Donald McEachin,RepMcEachin,McEachin
1,Aaron Michlewitz,RepMichlewitz,Michlewitz
2,Aaron Peskin,AaronPeskin,Peskin
3,Aaron Peña,AaronPena,Peña
4,Aaron Schock,aaronschock,Schock


In [41]:
# Add 'Last name' column to senate members dataset

last_name = []
for ind in senate_members.index:
    name = senate_members['bioname'][ind]
    last = name.split(" ")
    last = last[0].title()
    last = last.split(",")[0]
    last_name.append(last)
    
senate_members['Last name'] = last_name
senate_members.head()

Unnamed: 0,congress,chamber,icpsr,state_icpsr,district_code,state_abbrev,party_code,occupancy,last_means,bioname,Last name
0,117,President,99912,99,0,USA,200,0.0,0.0,"TRUMP, Donald John",Trump
1,117,President,99913,99,0,USA,100,0.0,0.0,"BIDEN, Joseph Robinette, Jr.",Biden
2,117,Senate,42102,41,0,AL,200,,,"TUBERVILLE, Thomas Hawley (Tommy)",Tuberville
3,117,Senate,94659,41,0,AL,200,,,"SHELBY, Richard C.",Shelby
4,117,Senate,40300,81,0,AK,200,,,"MURKOWSKI, Lisa",Murkowski


In [45]:
senate_races_twitter = senate_races.merge(twitter_names, on = ['Last name'] )
senate_races_twitter = senate_races_twitter.drop_duplicates()
senate_races_twitter.head(10)

Unnamed: 0,STATE,First name,Last name,Party,Incumbent,Full name,Name,Twitter_username
0,Alaska,Lisa,Murkowski,Rep,Y,Lisa Murkowski,Lisa Murkowski,lisamurkowski
1,Alaska,Sid,Hill,Ind,N,Sid Hill,Bradford Hill,repbradhill
2,Alaska,Sid,Hill,Ind,N,Sid Hill,Curtis Hill,CurtisHill_IN
3,Alaska,Sid,Hill,Ind,N,Sid Hill,French Hill,RepFrenchHill
4,Alaska,Sid,Hill,Ind,N,Sid Hill,Katie Hill,RepKatieHill
5,Arizona,Mark,Kelly,Dem,Y,Mark Kelly,Laura Kelly,GovLauraKelly
6,Arizona,Mark,Kelly,Dem,Y,Mark Kelly,Mark Kelly,CaptMarkKelly
8,Arizona,Mark,Kelly,Dem,Y,Mark Kelly,Mike Kelly,MikeKellyPA
9,Arizona,Mark,Kelly,Dem,Y,Mark Kelly,Robin Kelly,RepRobinKelly
10,Arizona,Mark,Kelly,Dem,Y,Mark Kelly,Tim Kelly,timothykelly


In [46]:
senate_members_twitter = senate_members.merge(twitter_names, on = ['Last name'] )
senate_members_twitter = senate_members_twitter.drop_duplicates()
senate_members_twitter.head(10)

Unnamed: 0,congress,chamber,icpsr,state_icpsr,district_code,state_abbrev,party_code,occupancy,last_means,bioname,Last name,Name,Twitter_username
0,117,President,99912,99,0,USA,200,0.0,0.0,"TRUMP, Donald John",Trump,Donald Trump,realDonaldTrump
1,117,President,99912,99,0,USA,200,0.0,0.0,"TRUMP, Donald John",Trump,Donald Trump,POTUS
4,117,President,99912,99,0,USA,200,0.0,0.0,"TRUMP, Donald John",Trump,Ivanka Trump,IvankaTrump
6,117,President,99913,99,0,USA,100,0.0,0.0,"BIDEN, Joseph Robinette, Jr.",Biden,Jill Biden,drbiden
7,117,President,99913,99,0,USA,100,0.0,0.0,"BIDEN, Joseph Robinette, Jr.",Biden,Joe Biden,JoeBiden
11,117,Senate,42102,41,0,AL,200,,,"TUBERVILLE, Thomas Hawley (Tommy)",Tuberville,Tommy Tuberville,TTuberville
12,117,Senate,94659,41,0,AL,200,,,"SHELBY, Richard C.",Shelby,Richard Shelby,SenShelby
14,117,Senate,40300,81,0,AK,200,,,"MURKOWSKI, Lisa",Murkowski,Lisa Murkowski,lisamurkowski
15,117,Senate,41500,81,0,AK,200,,,"SULLIVAN, Daniel Scott",Sullivan,Daniel S. Sullivan,SenDanSullivan
17,117,Senate,41500,81,0,AK,200,,,"SULLIVAN, Daniel Scott",Sullivan,Rip Sullivan,RipSullivan48


In [47]:
len(senate_races_twitter)

137

In [48]:
len(senate_members_twitter)

215

### SAve the twitter matching results and manually inspect to fix errors

In [50]:
senate_races_twitter.to_csv("senator_races_twitters.csv", index = False)
senate_members_twitter.to_csv("senator_members_twitters.csv", index = False)