In [1]:
# This is a Jupyter notebook that we are using for BADM590SMA. It will contain many scripts that are
# directly from the Bonzanini textbook. His GitHub can be found at:
# https://github.com/bonzanini/Book-SocialMediaMiningPython. His scripts will be cited here, but we (or you)
# may modify parts of his scripts as needed.

In [1]:
import os
import sys
import tweepy
from tweepy import API
from tweepy import OAuthHandler

In [8]:
# Insert the Twitter handle that you are analyzing for this notebook
twitterhandle = 'realDonaldTrump'

In [2]:
def get_twitter_auth():
    """Setup Twitter authentication.

    Return: tweepy.OAuthHandler object
    """
    try:
        consumer_key = os.environ['TWITTER_CONSUMER_KEY']
        consumer_secret = os.environ['TWITTER_CONSUMER_SECRET']
        access_token = os.environ['TWITTER_ACCESS_TOKEN']
        access_secret = os.environ['TWITTER_ACCESS_SECRET']
    except KeyError:
        sys.stderr.write("TWITTER_* environment variables not set\n")
        sys.exit(1)    
    auth = OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_secret)
    return auth

In [3]:
def get_twitter_client():
    """Setup Twitter API client.

    Return: tweepy.API object
    """
    auth = get_twitter_auth()
    client = API(auth)
    return client

In [4]:
#######################################################################
# Getting recent tweets from a user's timeline (Rest API)
#######################################################################

In [5]:
# Chap02-03/twitter_get_user_timeline.py
import sys
import json
from tweepy import Cursor

user = twitterhandle # Change this user name to whatever you want
client = get_twitter_client()

fname = "user_timeline_{}.jsonl".format(user)
with open(fname, 'w') as f:
    for page in Cursor(client.user_timeline, screen_name=user, count=200).pages(16):
        for status in page:
            f.write(json.dumps(status._json)+"\n")

In [6]:
#######################################################################
# Analyzing hastag frequencies for the user timeline pulled above
#######################################################################

In [9]:
# Chap02-03/twitter_hashtag_frequency.py 
import sys 
from collections import Counter 
import json 

user = twitterhandle # Change this user name to whatever you want
fname = "user_timeline_{}.jsonl".format(user)


def get_hashtags(tweet): 
  entities = tweet.get('entities', {}) 
  hashtags = entities.get('hashtags', []) 
  return [tag['text'].lower() for tag in hashtags]

with open(fname, 'r') as f: 
    hashtags = Counter() 
    for line in f: 
        tweet = json.loads(line) 
        hashtags_in_tweet = get_hashtags(tweet) 
        hashtags.update(hashtags_in_tweet) 
    for tag, count in hashtags.most_common(20): 
        print("{}: {}".format(tag, count))

trump2016: 172
makeamericagreatagain: 158
maga: 112
americafirst: 81
draintheswamp: 78
imwithyou: 62
bigleaguetruth: 58
debate: 47
crookedhillary: 38
trumppence16: 34
votetrump: 33
trumptrain: 28
icymi: 25
debates2016: 24
vpdebate: 19
debates: 16
rncincle: 15
wiprimary: 13
thankyoutour2016: 12
debatenight: 11


In [None]:
#######################################################################
# Analyzing hastag stats for the user timeline pulled above
#######################################################################

In [10]:
# Chap02-03/twitter_hashtag_stats.py
import sys
from collections import defaultdict
import json

with open(fname, 'r') as f:
    hashtag_count = defaultdict(int)
    for line in f:
        tweet = json.loads(line)
        hashtags_in_tweet = get_hashtags(tweet)
        n_of_hashtags = len(hashtags_in_tweet)
        hashtag_count[n_of_hashtags] += 1
        
    tweets_with_hashtags = sum([count for n_of_tags, count in hashtag_count.items() if n_of_tags > 0])
    tweets_no_hashtags = hashtag_count[0]
    tweets_total = tweets_no_hashtags + tweets_with_hashtags
    tweets_with_hashtags_percent = "%.2f" % (tweets_with_hashtags / tweets_total * 100)
    tweets_no_hashtags_percent = "%.2f" % (tweets_no_hashtags / tweets_total * 100)
    print("{} tweets without hashtags ({}%)".format(tweets_no_hashtags, tweets_no_hashtags_percent))
    print("{} tweets with at least one hashtag ({}%)".format(tweets_with_hashtags, tweets_with_hashtags_percent))

    for tag_count, tweet_count in hashtag_count.items():
        if tag_count > 0:
            percent_total = "%.2f" % (tweet_count / tweets_total * 100)
            percent_elite = "%.2f" % (tweet_count / tweets_with_hashtags * 100)
            print("{} tweets with {} hashtags ({}% total, {}% elite)".format(tweet_count, tag_count, percent_total, percent_elite))

2277 tweets without hashtags (71.16%)
923 tweets with at least one hashtag (28.84%)
544 tweets with 1 hashtags (17.00% total, 58.94% elite)
317 tweets with 2 hashtags (9.91% total, 34.34% elite)
49 tweets with 3 hashtags (1.53% total, 5.31% elite)
9 tweets with 4 hashtags (0.28% total, 0.98% elite)
1 tweets with 5 hashtags (0.03% total, 0.11% elite)
2 tweets with 6 hashtags (0.06% total, 0.22% elite)
1 tweets with 8 hashtags (0.03% total, 0.11% elite)
