In [1]:
# This is a Jupyter notebook that we are using for BADM590SMA. It will contain many scripts that are
# directly from the Bonzanini textbook. His GitHub can be found at:
# https://github.com/bonzanini/Book-SocialMediaMiningPython. His scripts will be cited here, but we (or you)
# may modify parts of his scripts as needed.

In [2]:
import os
import sys
import tweepy
from tweepy import API
from tweepy import OAuthHandler

In [3]:
def get_twitter_auth():
    """Setup Twitter authentication.

    Return: tweepy.OAuthHandler object
    """
    try:
        consumer_key = os.environ['TWITTER_CONSUMER_KEY']
        consumer_secret = os.environ['TWITTER_CONSUMER_SECRET']
        access_token = os.environ['TWITTER_ACCESS_TOKEN']
        access_secret = os.environ['TWITTER_ACCESS_SECRET']
    except KeyError:
        sys.stderr.write("TWITTER_* environment variables not set\n")
        sys.exit(1)    
    auth = OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_secret)
    return auth

In [4]:
def get_twitter_client():
    """Setup Twitter API client.

    Return: tweepy.API object
    """
    auth = get_twitter_auth()
    client = API(auth)
    return client

In [5]:
#######################################################################
# Getting recent tweets from a user's timeline (Rest API)
#######################################################################

In [10]:
# Chap02-03/twitter_get_user_timeline.py
import sys
import json
from tweepy import Cursor

user = 'realDonaldTrump' # Change this user name to whatever you want
client = get_twitter_client()

fname = "user_timeline_{}.jsonl".format(user)
with open(fname, 'w') as f:
    for page in Cursor(client.user_timeline, screen_name=user, count=200).pages(16):
        for status in page:
            f.write(json.dumps(status._json)+"\n")

In [7]:
#######################################################################
# Analyzing hastag frequencies for the user timeline pulled above
#######################################################################

In [11]:
# Chap02-03/twitter_hashtag_frequency.py 
import sys 
from collections import Counter 
import json 

user = 'realDonaldTrump' # Change this user name to whatever you want
fname = "user_timeline_{}.jsonl".format(user)


def get_hashtags(tweet): 
  entities = tweet.get('entities', {}) 
  hashtags = entities.get('hashtags', []) 
  return [tag['text'].lower() for tag in hashtags]

with open(fname, 'r') as f: 
    hashtags = Counter() 
    for line in f: 
        tweet = json.loads(line) 
        hashtags_in_tweet = get_hashtags(tweet) 
        hashtags.update(hashtags_in_tweet) 
    for tag, count in hashtags.most_common(20): 
        print("{}: {}".format(tag, count))

trump2016: 191
makeamericagreatagain: 174
maga: 112
americafirst: 81
draintheswamp: 78
imwithyou: 62
bigleaguetruth: 58
debate: 47
votetrump: 42
crookedhillary: 38
trumppence16: 34
trumptrain: 30
debates2016: 24
icymi: 22
vpdebate: 19
debates: 16
rncincle: 15
wiprimary: 13
thankyoutour2016: 12
nyprimary: 11
