Twitter API
=========================================



You don't have to understand this code. 

In [1]:
# !pip install tweepy
# install twitter module for python

In [2]:
import os 
from pprint import pprint
import json
import tweepy
import numpy as np
import pandas as pd

# Authorization

In [3]:
#api keys
CONSUMER_KEY = ""
CONSUMER_SECRET = ""
ACCESS_TOKEN = ""
ACCESS_TOKEN_SECRET = ""


auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)

api = tweepy.API(auth,wait_on_rate_limit=True)

# Choosing Samples

Given the limitations of the free Twitter API, we chose to sample the 5 counties that had the highest proportion of votes for Democratic and Republican parties respectively. Interestingly, all these counties are in close proximity to one another.

In [4]:
county_data = pd.read_csv("county_politics.csv")

## Top 5 Democratic Majority Counties in CA

In [5]:
dems = county_data[county_data["majority"]=="Democrat"]
dem_short = dems.sort_values("dem_pct", ascending= False).head() #find 5 highest proportion democrat voting counties
dem_short

Unnamed: 0,county,dem_pct,rep_pct,majority,lat,long
37,San Francisco,85.26%,12.72%,Democrat,37.779026,-122.419906
20,Marin,82.33%,15.79%,Democrat,38.040914,-122.619964
0,Alameda,79.83%,17.62%,Democrat,37.609029,-121.899142
43,Santa Cruz,78.44%,18.49%,Democrat,37.050096,-121.990591
40,San Mateo,77.89%,20.20%,Democrat,37.496904,-122.333057


## Top 5 Republican Majority Counties in CA

In [9]:
reps = county_data[county_data["majority"]=="Republican"]
rep_short = reps.sort_values("rep_pct", ascending= False).head() #find 5 highest proportion democrat voting counties
rep_short

Unnamed: 0,county,dem_pct,rep_pct,majority,lat,long
17,Lassen,23.24%,74.47%,Republican,40.768558,-120.730998
24,Modoc,26.33%,71.19%,Republican,41.545049,-120.7436
51,Tehama,31.02%,66.62%,Republican,40.125133,-122.201553
44,Shasta,32.28%,65.41%,Republican,40.796512,-121.997919
10,Glenn,35.36%,62.52%,Republican,39.591277,-122.377866


# Retrieving Tweets

We made a function that makes it easier to retrieve Tweets about a topic given a certain location. We believe our version is a bit more readable than the API, especially when we want to carry out these queries for multiple locations. Currently, it is not possible to retrive tweets from multiple coordinates using the Twitter API natively.

In [27]:
def get_topic_tweets(topic,lat,long):
    '''Searches tweets about a topic (in the form of string) given coordinates'''
    tweet_list = []
    response = api.search_tweets(str(topic),
                                geocode = str(lat) + "," + str(long) + ",100km",
                                count = 50,
                                result_type="recent",
                                lang="en")
    for i in response: 
        tweet_list.append(i.text.split("https")[0])
    
    return tweet_list

# Republican County Tweets

In [29]:
rep_twt = []

for i in range (0,5):
    rep_twt += get_topic_tweets("covid",rep_short.iloc[i,4],rep_short.iloc[i,5]) #search each county for covid tweets

In [None]:
rep_twt_df = pd.DataFrame(rep_twt, columns = ["tweet"]).drop_duplicates() #create dataframe for tweets, drops duplicates
#rep_twt_df.to_csv("republican_tweets.csv",index = False)

In [16]:
rep_twt_df = pd.read_csv("republican_tweets.csv")

In [17]:
rep_twt_df

Unnamed: 0,tweet
0,Dems set up clash with GOP by pairing Covid re...
1,"@covid_priest @christogrozev @mod_russia Oh, i..."
2,"Trucker convoy laps Washington, DC, beltway to..."
3,Biden’s ‘test to treat’ covid plan draws prais...
4,@Kenny_Wallace My kids dad for covid pneumonia...
...,...
108,19 New Cases of COVID-19 Identified in Mendoci...
109,19 New Cases of COVID-19 Identified in Mendoci...
110,19 New Cases of COVID-19 Identified in Mendoci...
111,All your Covid stimmies used for co-pays for m...


In [18]:
import text2emotion as te

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/farhat/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /Users/farhat/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /Users/farhat/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [19]:
rep_no_dupes = list(rep_twt_df.tweet) #get republican tweets without duplicates in list form

In [20]:
len(rep_no_dupes)

113

In [21]:
rep_twt_master = " ".join(rep_no_dupes)
rep_emotion = te.get_emotion(rep_twt_master)
rep_emotion

{'Happy': 0.16, 'Angry': 0.04, 'Surprise': 0.22, 'Sad': 0.21, 'Fear': 0.36}

# Democrat County Tweets

In [10]:
dem_twt = []

#since all the top 5 democratic counties are within close proximity, 
#I choose to use only one location within 100km radius

response = api.search_tweets("covid", geocode = str(37.779026) + "," + str(-122.419906) + ",100km",
                    count = 200,
                    result_type="recent",
                    lang="en")

for i in response: 
    dem_twt.append(i.text.split("https")[0])


In [11]:
len(dem_twt)

100

In [12]:
dem_twt

['Hawaii becomes the final state to drop a mask mandate. Meanwhile in other news 1500 people/day are dying from covid right now. Right NOW!!!',
 '“She was told by one representative that a ‘young thing’ like her could not be disabled by the virus.”\n\n“There were… ',
 'Is anyone else just getting decimated by (not covid) sicknesses since starting to do more things recently after a 2… ',
 'Fascinating conversation happening now between Alexander Zaitchik and @lmlauramarsh for @newrepublic about missed o… ',
 'Four thousand more cases, four deaths related to COVID-19 since Friday. ',
 'An incredible and inspiring story indeed! #CorsiRosenthalBox @CorsIAQ @JimRosenthal4 #covid #innovation #impact ',
 'California’s COVID positive test rate plunges to the lowest point since July -- but the state is still reporting 17… ',
 'How to cope with the anxiety of no longer wearing a mask... ',
 'Influence of exercise and vitamin D on the immune system against Covid-19: an integrative review of curre

In [14]:
#dem_twt_df = pd.DataFrame(dem_twt, columns = ["tweet"]).drop_duplicates() #create dataframe for tweets, drops duplicates
#dem_twt_df.to_csv("dem_tweets.csv",index = False)

In [22]:
rep_twt_df = pd.read_csv("dem_tweets.csv")

In [23]:
dem_no_dupes = list(dem_twt_df.tweet)
dem_twt_master = " ".join(dem_no_dupes)

In [24]:
len(dem_no_dupes)

96

In [25]:
dem_emotion = te.get_emotion(dem_twt_master)
dem_emotion

{'Happy': 0.13, 'Angry': 0.07, 'Surprise': 0.2, 'Sad': 0.28, 'Fear': 0.33}

In [None]:
dem_emo_df = pd.DataFrame([list(dem_emotion.keys()),
              list(dem_emotion.values())]).T

dem_emo_df["majority"] = "Democrat"

dem_emo_df.columns = ["Emotion","Value","Majority"]

dem_emo_df

In [None]:
rep_emo_df = pd.DataFrame([list(rep_emotion.keys()),
              list(rep_emotion.values())]).T

rep_emo_df["majority"] = "Republican"

rep_emo_df.columns = ["Emotion","Value","Majority"]

rep_emo_df

In [None]:
all_emotes = pd.concat([dem_emo_df,rep_emo_df])
all_emotes

In [None]:
#all_emotes.to_csv("tweet_emotions.csv",index=False)

In [15]:
import plotly.express as px
fig = px.bar(all_emotes, x="Emotion", y="Value", color="Majority", barmode = "group",
            title = "Emotions of Tweets Regarding COVID, Democratic Counties (n=99) vs. Republican Counties(n=103)")

fig.show()

NameError: name 'all_emotes' is not defined