## 1_create_twitter_df_handle.py

In [1]:
# Import packages
from urllib.request import urlopen
from io import open
import re
import pandas as pd

# Read the text file that contains the twitter handles for each congress member.
file = open('116th-Congress-Twitter-Handles.txt', encoding="utf-8")
text = file.read()

# Remove commas from the text.
text = re.sub(r"\,", "", text)

# Use regular expression to extract the name, state, and twitter handle for each congress person.
congress_data = re.findall(r'([A-Z]{2}\s?\-?\s?(?:[0-9]+)?)\s?(?:At Large )?(?:Delegate\s)?((?:[A-z\.\-\"\'\,úíéáÃº­©¡]+\s){1,5}|(?:N\/A))((?:@[A-z\.\-0-9]+)|N\/A)', text)

# Use this information to create a dataframe.
df_congress = pd.DataFrame(congress_data, columns = ["state","name","twitter_handle"])  

# Save the dataframe as a csv file.
df_congress.to_csv("congress_twitter_handles.csv")

## 2_join_handles_party.py

In [2]:
# Import packages
import pandas as pd
import re

# Import csv file directly from fivethirtyeight.
df_ = pd.read_csv("https://projects.fivethirtyeight.com/congress-tracker-data/csv/averages.csv")

# Only select members of the 116th congress.
trump_approval_116 = df_[df_['congress'] == 116]

# Import the twitter handles for 116th congress.
congress_twitter = pd.read_csv("congress_twitter_handles.csv")

# Clean up the names and create a variable that contains the last name.
congress_twitter['name'] = congress_twitter['name'].str.strip()
congress_twitter['name'] = [re.sub("\s?j|Jr\.?", "", name) for name in congress_twitter['name']]
congress_twitter['name'] = [re.sub("\s[A-Z]\.\s", " ", name) for name in congress_twitter['name']]
congress_twitter['last_name'] = [name.split()[-1] for name in congress_twitter['name']]

# Add the twitter handles to the trump approval data set.
joined = pd.merge(trump_approval_116, congress_twitter, on = "last_name")

# Create the csv file
joined.to_csv("congress_twitter_trump_approval.csv")

## 3_collect_data_from_twitter_api

In [3]:
# Import packages
import json
import csv
from datetime import date
from datetime import datetime
import time
from tweepy.streaming import StreamListener
import tweepy
from urllib.request import urlopen
from io import open
import re
import pandas as pd

# Create path for where the tweets will be stored.
path = "tweets_02_02_2021"

# Read congress data, and create list of twitter handles
df_handles = pd.read_csv("congress_twitter_trump_approval.csv")
handles = df_handles['twitter_handle'].to_list()



['@Robert_Aderholt', '@justinamash', '@MarkAmodeiNV2', '@RepAdams', '@RepPeteAguilar', '@RepRickAllen', '@RepAbraham', '@RepArrington', '@RepColinAllred', '@RepArmstrongND', '@RepCindyAxne', '@SanfordBishop', '@RepRobBishop', '@SanfordBishop', '@RepRobBishop', '@SanfordBishop', '@RepRobBishop', '@repblumenauer', '@RepKevinBrady', '@michaelcburgess', '@GKButterfield', '@RepGusBilirakis', '@VernBuchanan', '@RepKarenBass', '@RepMoBrooks', '@SusanWBrooks', '@RepMoBrooks', '@SusanWBrooks', '@RepLarryBucshon', '@RepBonamici', '@RepBeatty', '@RepAndyBarr', '@RepBrownley', '@RepCheri', '@RepBera', '@RepByrne', '@RepBrianBabin', '@RepDonBeyer', '@RepBost', '@CongBoyle', '@RepKenBuck', '@RepDonBacon', '@RepJimBanks', '@RepBarragan', '@RepJackBergman', '@RepAndyBiggsAZ', '@SenSherrodBrown', '@RepAnthonyBrown', '@SenSherrodBrown', '@RepAnthonyBrown', '@RepTedBudd', '@RepBalderson', '@RepJimBaird', '@RepBrindisi', '@RepTimBurchett', '@KenCalvert', '@RepSteveChabot', '@WhipClyburn', '@repjimcooper',

In [8]:
api_key = input("What is the api key? ")
api_secret_key = input("What is the api secret key? ")
access_token = input("What is the access token? ")
access_token_secret = input("What is the access token secret? ")
 
# Connect to Twitter API using the secrets
auth = tweepy.OAuthHandler(api_key, api_secret_key)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)


What is the api key? fv5Ad7Is97reXHYpCzCMohrcN
What is the api secret key? gVFJFNKzlXyGSrH8EATXk5aGGPhPxMKZxtgAAApxvOeG29W62f
What is the access token? 3137748470-j35r09whPzI6k8ud9sj2Hk6ZplOAoWGwksWiMed
What is the access token secret? sC2UAmB1ArlgMBK6ovPOmYC6Tv9gcBr91q8hL3Ueuhh38


In [9]:
# Helper function to save data into a JSON file
def save_json(file_name, file_content):
  with open(path + file_name, 'w', encoding='utf-8') as f:
    json.dump(file_content, f, ensure_ascii=False, indent=4)

# Helper function to get all tweets of a specified user
# NOTE:This method only allows access to the most recent 3200 tweets
# Source: https://gist.github.com/yanofsky/5436496
def get_all_tweets(screen_name):
  # initialize a list to hold all the Tweets
  alltweets = []
  # make initial request for most recent tweets 
  # (200 is the maximum allowed count)
  new_tweets = api.user_timeline(screen_name = screen_name,count=200, tweet_mode = "extended")
  # save most recent tweets
  alltweets.extend(new_tweets)
  # save the id of the oldest tweet less one to avoid duplication
  oldest = alltweets[-1].id - 1
  # keep grabbing tweets until there are no tweets left
  while len(new_tweets) > 0:
    print("getting tweets before %s" % (oldest))
    # all subsequent requests use the max_id param to prevent
    # duplicates
    new_tweets = api.user_timeline(screen_name = screen_name,count=200,max_id=oldest, tweet_mode = "extended")
    # save most recent tweets
    alltweets.extend(new_tweets)
    # update the id of the oldest tweet less one
    oldest = alltweets[-1].id - 1
    print("...%s tweets downloaded so far" % (len(alltweets)))
    ### END OF WHILE LOOP ###
  # transform the tweepy tweets into a 2D array that will 
  # populate the csv
  print(alltweets)  
#outtweets = [[tweet.id_str, tweet.created_at, tweet.full_text, tweet.favorite_count,tweet.in_reply_to_screen_name, tweet.retweeted, tweet.screen_name] for tweet in alltweets]
  # write the csv
  #with open(path + '%s_tweets.csv' % screen_name, 'w') as f:
  #  writer = csv.writer(f)
  #  writer.writerow(["id","created_at","text","likes","in reply to","retweeted", "screen_name"])
  #  writer.writerows(outtweets)
  #pass

In [10]:
for politician in handles[0:5]:
  get_all_tweets(politician)

getting tweets before 1266813872859680768
...395 tweets downloaded so far
getting tweets before 1237349641903050751


TweepError: Failed to parse JSON payload: Unterminated string starting at: line 1 column 635485 (char 635484)