In [None]:
## imports
import pandas as pd
import numpy as np
import re
import requests
import yaml
import tweepy
import pickle
import plotnine
from plotnine import *
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

## repeated printouts
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
pd.set_option('display.max_colwidth', None)

## function to load credentials file
def load_creds(path: str):
    with open(path, 'r') as stream:
        try:
            creds = yaml.safe_load(stream)
        except yaml.YAMLError as exc:
            print(exc)
    return(creds)

# 0. Setup: authenticate to Twitter API

In [None]:
## read creds file- replace with path to your creds
creds = load_creds("YOUR PATH TO CREDS")


In [None]:
## use bearer token to initialize a tweepy client (connection with twitter API)


# 1. Use one: pulling recent tweets associated with a hashtag

Here, we're going to pull recent tweets associated with the metoo hashtag. Note that if we want tweets older than a week, we need to either get an academic license (rather than a student one) or use another method (discussion here: https://stackoverflow.com/questions/24214189/how-can-i-get-tweets-older-than-a-week-using-tweepy-or-other-python-libraries) 

The `Cursor` class w/in tweepy allows us to pull while skipping annoying things like pagination

**Example use**: Gallagher et al. look at the use of hashtags #BlackLivesMatter, #AllLivesMatters, and others in days following instances of officer-involved shootings or indictments/non-indictments. They examine things like the different words that are used in tweets containing those hashtags and whether the conversations occur separately or whether those using the #AllLivesMatter hashtag are inserting themselves into BLM conversations: https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0195644

## 1.1 Printing tweets and their attributes

In [None]:
## construct a hashtag
## we're filtering out retweets for simplicity
## discussion here of query construction:
## https://developer.twitter.com/en/docs/twitter-api/tweets/search/integrate/build-a-query
query = "#metoo econ -@jenniferagerson -is:retweet"



## 1.2 Printing attributes of users 

## 1.3 Storing that tweet and user information in a dataframe

If we want to work with this as data, instead of iterating and printing, 
want to store in a dataframe

### 1.3.1 Pull attributes/metadata about tweets

### 1.3.2 Pull attributes/metadata about users

## 1.4 Exploring the results

In [None]:
### 1. what percent are diff languages?


In [None]:
### 2. what's the relationship b/t followers and likes? (logged and removing 0)



In [None]:
### 3. search for who's tagged in a tweet - restricting to english language
tag_pat = r".*(@\w+).*"




In [None]:
### 4. search for hashtags in a tweet
hash_pat = r".*(\#\w+)\s+.*"


# 2. Use two: examining relationships between those on the site

- Previous example showed both who tweeted with a lot of likes/high follower counts, and also who they tended to tag in the tweets
- We can also use twitter to form the start of a social network where nodes are different accounts and edges are drawn between followers who follow the same focal account; here, we'll look at a simplified form

**Example use**: Mosleh et al. do a field experiment where they create Twitter bots that either self-identify as Republican or Democrat and then the bots follow people. They look at how the partisanship of the user the bot follows predicts whether the user follows a bot back: https://www.pnas.org/content/118/7/e2022761118.short

In [3]:
## read in standardized tweets
READ_SAVED = True
if READ_SAVED:
    tweets_df = pd.read_pickle("../../public_data/tweets_f22.pkl")
tweets_df.sort_values(by = 'public_metrics_tweet', ascending = False).head()




Unnamed: 0,id,created_at,author_id,text,lang,geo,public_metrics_tweet,username,description,location,verified,public_metrics_user
1,1585557668776779778,2022-10-27 09:03:01+00:00,725686324662046724,"There is a large #MeToo moment taking place in Economics.\nTo all men who react to this with ""I can't believe it, I am so shocked"": where have you been the past 5 years?\nTo all men in political science who say ""that's econ, not us"": where have you been the past 5 years? https://t.co/tvIYfkjtzg",en,,237,tabouchadi,"Associate Professor in European Politics at Nuffield College, University of Oxford. Elections, parties, social democracy & the radical right. he/him. 🏳️‍🌈",,True,19276
5,1585239900961374208,2022-10-26 12:00:19+00:00,482431084,"It's hard to overstate how important econ's latest #MeToo events are.\n\nTo be its best, the field needs everyone, which means welcoming and protecting every economist or potential economist.\n\nI've mostly been listening but have two thoughts to emphasize:",en,,149,JoshuaSGoodman,Education/labor economist - @WhiteHouseCEA (2022-23) - @BUWheelock / @BU_Economics - @Wheelock_Policy - @JPAM_DC,"Cambridge, MA",False,17499
13,1584721962823479297,2022-10-25 01:42:13+00:00,64092459,the #metoo (second season) moment in Econ has been SUPER useful in building my block list,en,,121,dynarski,economist | inequality & education | 1st-gen | prof @Harvard | @americanacad @NAEduc NBER | columns @nytimes https://t.co/5RZ12LOAPB | @dynarski@mastodon.social,Cambridge via Somerville,True,44231
18,1584590533489414144,2022-10-24 16:59:58+00:00,242422673,"If anyone has anything to say regarding the ongoing #metoo in econ, I can put u in touch with very professional investigative journalist that reached out to me. They can protect your identity and are open to chat on signal/telegram etc. They have long experience with this stuff",en,{'place_id': '2260fcb4a77f2bad'},65,dorotheafrenkel,"Assistant Prof in Behavioral Econ @TheChoiceLab examining the role of gender in group work, reproducibility & other topics.atm: TEACHING. 🇸🇪✡️🇭🇺","Bergen, Norway",False,4154
14,1584654525717413889,2022-10-24 21:14:15+00:00,245987858,"Plus, it highlights how important these statements are…signaling what the culture in Econ is currently like, moving towards, etc. The ripple effects can be massive! #EconTwitter #MeToo \n(3/4)",en,,47,estebanjq3,"Development economist, demographer & social scientist @MathematicaNow prior @UW_AAE @UW_CDE @IFPRI @FAO @AmeriCorpsNCCC Anti-fascist Anti-racist he/el 🇺🇸🇪🇨","Chicagoland, IL",False,2470


# Use three: extract recent tweets from a specific user

If we have a dataset or panel of users, can get their tweets and extract properties like likes, retweets, etc (most recent ~3200)

**Example use**: Bisbee et al. construct a panel of political science professors use Twitter; they then follow those users and looks at who retweets each other's research, finding that male professors are more likely to pass on research by other male professors than by female professors - https://www.cambridge.org/core/journals/perspectives-on-politics/article/polisci-twitter-a-descriptive-analysis-of-how-political-scientists-use-twitter-in-2019/C8A193C3E939C1ABCD4600DFE8AEF79A

In [None]:
## step 1: choose a focal account and get their numeric id
focal_account = "McCourtSchool"


In [None]:
## do sentiment scoring on the tweet


# Activity for your practice

1. Choose a public user (e.g., a politician; celebrity) and pull 100 tweets from their timeline and metadata about those tweets. When pulling metadata, make sure to get the conversation_id and count of replies (latter is in public_metrics)
2. Choose one of their tweets to focus on that got a lot of replies and get the conversation_id of that tweet
3. Paste the conversation id of that tweet into a query using this documentation for query building: https://developer.twitter.com/en/docs/twitter-api/tweets/search/integrate/build-a-query#examples
4. Similar to example 1.1 in the example code, use the `search_recent_tweets` method to pull tweets that are in response to the focal tweet from step 2--- note, if there are no public replies, this may return None; therefore, make sure to choose a tweet with 20+ replies
5. Place them in a dataframe and views the results