In [1]:
# Dependencies
import json
from pprint import pprint
from datetime import datetime as dt
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tweepy
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from config import (consumer_key, consumer_secret,
                    access_token, access_token_secret)

In [9]:
# Setup Tweepy API Authentication
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, parser=tweepy.parsers.JSONParser(), 
                 wait_on_rate_limit=False, wait_on_rate_limit_notify=False)

# Initialize Sentiment Analyzer
analyzer = SentimentIntensityAnalyzer()

In [10]:
# Movie csv path
csv_path = 'box_office_data/moviesFinal.csv'

# Read csv
movie_df = pd.read_csv(csv_path)
movie_df

Unnamed: 0,title,genre,release,rated,value,budget,opening,domestic,worldwide,ratio,zrating,zbudget,zopening,ztotal
0,#Jìyìdàshi,Thriller/Suspense,20170428,R,0.00,20000000,224942,587470,43326383,0.011247,-2.898292,-0.523493,-0.581893,-4.585571
1,#TylerPerrysBoo2AMadeaHalloween,Comedy,20171020,PG-13,0.00,20000000,21226953,47319572,47879572,1.061348,-2.898292,-0.523493,0.033620,-3.354543
2,#Gongfuyujia,Action,20170127,PG,0.00,65000000,111979,362657,254569616,0.001723,-2.898292,0.237558,-0.585204,-3.831142
3,#TheEmojiMovie,Adventure,20170728,PG,17.00,50000000,24531923,86089513,216807482,0.490638,-2.055509,-0.016126,0.130480,-1.810674
4,#JustGettingStarted,Comedy,20171208,PG-13,22.00,22000000,3201459,6069605,6709821,0.145521,-1.807632,-0.489668,-0.494660,-3.286619
5,#Rings,Horror,20170203,PG-13,25.67,25000000,13002632,27793018,82991460,0.520105,-1.625690,-0.438931,-0.207413,-2.479447
6,#TheSnowman,Thriller/Suspense,20171020,R,27.00,35000000,3372565,6670765,43232849,0.096359,-1.559755,-0.269809,-0.489645,-2.808854
7,#Geostorm,Action,20171020,PG-13,29.33,100000000,13707376,33700160,220800160,0.137074,-1.444244,0.829486,-0.186759,-0.988276
8,#FiftyShadesFreed,Drama,20180209,R,29.33,55000000,38560195,98382615,359382615,0.701094,-1.444244,0.068435,0.541612,-0.292584
9,#FiftyShadesDarker,Drama,20170210,R,29.67,55000000,46607250,114434010,381081967,0.847405,-1.427389,0.068435,0.777450,0.195948


In [11]:
# # "Real Person" Filters
# min_tweets = 5
# max_tweets = 10000
# max_followers = 2500
# max_following = 2500
# lang = "en"

# Create list of dictionaries
sentiment = []

for title in movie_df['title']:
    
    # Assign title as search term
    search_term = title
    
    # Create variable for holding the oldest tweet
    oldest_tweet = None

    # List to hold average compound values for each movie
    compound_list = []
    
    try:
    
        # Get all tweets from home feed (for each page specified)
        public_tweets = api.search(search_term, 
                                   count=100, 
                                   lang='en', 
                                   result_type="recent", 
                                   max_id=oldest_tweet)

        # Loop through all tweets
        for tweet in public_tweets['statuses']:
               
            # Use filters to check if user meets conditions
            # if (tweet["user"]["followers_count"] < max_followers and
                # tweet["user"]["statuses_count"] > min_tweets and
                # tweet["user"]["statuses_count"] < max_tweets and
                # tweet["user"]["friends_count"] < max_following and
                # tweet["user"]["lang"] == lang):
            
            # Grab tweet data
            tweet_text = tweet['text']
            
            # Run Vader Analysis on each tweet
            results = analyzer.polarity_scores(tweet["text"])
            compound = results["compound"]
            
            # Append compound value to list
            compound_list.append(compound)
            
            # Reassign the the oldest tweet (i.e. the max_id)
            oldest_tweet = int(tweet["id_str"])
        
            # Subtract 1 so the previous oldest isn't included
            # in the new search
            oldest_tweet -= 1

        # Store average 
        tweet_dict = {"title": search_term, 
                      "compound": np.mean(compound_list), 
                      "tweet_count": len(compound_list)}
    
        # Append tweet data to sentiment list
        sentiment.append(tweet_dict)

    except Exception as e:
        print(e)

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


In [18]:
# Create dataframe
movie_sent_df = pd.DataFrame(sentiment)

# Reorder columns
movie_sent_df = movie_sent_df[["title", "compound", "tweet_count"]]

# Today's date
today = dt.strftime(dt.now(), "%Y-%m-%d")

# Save to csv
# movie_sent_df.to_csv(f'all_movie_data/{today}_all_movie_sent.csv', encoding='utf-8', index=False)
movie_sent_df

Unnamed: 0,title,compound,tweet_count
0,#Jìyìdàshi,,0
1,#TylerPerrysBoo2AMadeaHalloween,,0
2,#Gongfuyujia,,0
3,#TheEmojiMovie,0.037363,81
4,#JustGettingStarted,0.305133,100
5,#Rings,0.310344,78
6,#TheSnowman,0.206885,100
7,#Geostorm,0.001510,100
8,#FiftyShadesFreed,0.230577,100
9,#FiftyShadesDarker,0.283805,100


In [20]:
# Merge dataframes
merged_df = pd.merge(movie_df, movie_sent_df, how="left")

# Save to csv
# merged_df.to_csv(f'all_movie_data/{today}_all_movie_data.csv', encoding='utf-8', index=False)
merged_df

Unnamed: 0,title,genre,release,rated,value,budget,opening,domestic,worldwide,ratio,zrating,zbudget,zopening,ztotal,compound,tweet_count
0,#Jìyìdàshi,Thriller/Suspense,20170428,R,0.00,20000000,224942,587470,43326383,0.011247,-2.898292,-0.523493,-0.581893,-4.585571,,0
1,#TylerPerrysBoo2AMadeaHalloween,Comedy,20171020,PG-13,0.00,20000000,21226953,47319572,47879572,1.061348,-2.898292,-0.523493,0.033620,-3.354543,,0
2,#Gongfuyujia,Action,20170127,PG,0.00,65000000,111979,362657,254569616,0.001723,-2.898292,0.237558,-0.585204,-3.831142,,0
3,#TheEmojiMovie,Adventure,20170728,PG,17.00,50000000,24531923,86089513,216807482,0.490638,-2.055509,-0.016126,0.130480,-1.810674,0.037363,81
4,#JustGettingStarted,Comedy,20171208,PG-13,22.00,22000000,3201459,6069605,6709821,0.145521,-1.807632,-0.489668,-0.494660,-3.286619,0.305133,100
5,#Rings,Horror,20170203,PG-13,25.67,25000000,13002632,27793018,82991460,0.520105,-1.625690,-0.438931,-0.207413,-2.479447,0.310344,78
6,#TheSnowman,Thriller/Suspense,20171020,R,27.00,35000000,3372565,6670765,43232849,0.096359,-1.559755,-0.269809,-0.489645,-2.808854,0.206885,100
7,#Geostorm,Action,20171020,PG-13,29.33,100000000,13707376,33700160,220800160,0.137074,-1.444244,0.829486,-0.186759,-0.988276,0.001510,100
8,#FiftyShadesFreed,Drama,20180209,R,29.33,55000000,38560195,98382615,359382615,0.701094,-1.444244,0.068435,0.541612,-0.292584,0.230577,100
9,#FiftyShadesDarker,Drama,20170210,R,29.67,55000000,46607250,114434010,381081967,0.847405,-1.427389,0.068435,0.777450,0.195948,0.283805,100
