In [1]:
# Dependencies
import json
from pprint import pprint
from datetime import datetime as dt
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tweepy
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import sys

# Grab config file
sys.path.insert(0, '..')
from config import (consumer_key, consumer_secret,
                    access_token, access_token_secret)

In [2]:
# Setup Tweepy API Authentication
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, parser=tweepy.parsers.JSONParser(), 
                 wait_on_rate_limit=False, wait_on_rate_limit_notify=False)

# Initialize Sentiment Analyzer
analyzer = SentimentIntensityAnalyzer()

In [3]:
# Movie list to predict success
movie_list = ["#Gringo", "#TheHurricaneHeist", "#PreyAtNight", "#WrinkleInTime", "#LoveSimon", "#TombRaider", 
              "#PacificRimUprising", "#SherlockGnomes", "#Acrimony", "#ReadyPlayerOne"]

In [4]:
# User input to specify search until date
search_date = input("What date do you want to query the movies? Must be in this format (%Y-%m-%d): ")

# "Real Person" Filters
min_tweets = 5
max_tweets = 10000
max_followers = 2500
max_following = 2500
lang = "en"

# Create list of dictionaries
sentiment = []

# Analyze each movie in list
for movie in movie_list:
    
    # Assign title as search term
    search_term = movie
    
    # Create variable for holding the oldest tweet
    oldest_tweet = None

    # List to hold average compound values for each movie
    compound_list = []
    
    try:
        
        # Loop through 18 times (total of 1800 tweets)
        for x in range(18):
    
            # Get all tweets from home feed (for each page specified)
            public_tweets = api.search(search_term, 
                                        count=100,
                                        lang='en', 
                                        until=search_date, 
                                        max_id=oldest_tweet)

            # Loop through all tweets
            for tweet in public_tweets['statuses']:
        
                # Use filters to check if user meets conditions
                if (tweet["user"]["followers_count"] < max_followers and
                    tweet["user"]["statuses_count"] > min_tweets and
                    tweet["user"]["statuses_count"] < max_tweets and
                    tweet["user"]["friends_count"] < max_following and
                    tweet["user"]["lang"] == lang):
        
                    # Grab tweet data
                    tweet_text = tweet['text']
            
                    # Run Vader Analysis on each tweet
                    results = analyzer.polarity_scores(tweet["text"])
                    compound = results["compound"]
                
                    # Append compound value to list
                    compound_list.append(compound)
            
                    # Reassign the the oldest tweet (i.e. the max_id)
                    oldest_tweet = int(tweet["id_str"])
        
                    # Subtract 1 so the previous oldest isn't included
                    # in the new search
                    oldest_tweet -= 1
                   
        # Store average 
        tweet_dict = {"Movie Title": search_term,
                      "Search Date": search_date,
                      "Compound": np.mean(compound_list), 
                      "Tweet Count": len(compound_list)}
    
        # Append tweet data to sentiment list
        sentiment.append(tweet_dict)
    
    except RateLimitError:
        print("You have exceeded Twitter's rate limit. Come back in 15 minutes and try again.")
    except Exception as e:
        print(e)

What date do you want to query the movies? Must be in this format (%Y-%m-%d): 2018-03-17


In [5]:
# Create Dataframe
movie_sent_df = pd.DataFrame(sentiment)

# Reorder columns
movie_sent_df = movie_sent_df.iloc[:,[1,0,2,3]]

# Save to csv
# movie_sent_df.to_csv(f'daily_movie_sentiment/{search_date}_movie_sentiment.csv', encoding='utf-8', index=False)
movie_sent_df

Unnamed: 0,Movie Title,Compound,Search Date,Tweet Count
0,#Gringo,0.21346,2018-03-17,648
1,#TheHurricaneHeist,0.249374,2018-03-17,223
2,#PreyAtNight,0.33335,2018-03-17,613
3,#WrinkleInTime,0.219977,2018-03-17,554
4,#LoveSimon,0.447294,2018-03-17,725
5,#TombRaider,0.261814,2018-03-17,629
6,#PacificRimUprising,0.014855,2018-03-17,610
7,#SherlockGnomes,0.293238,2018-03-17,450
8,#Acrimony,-0.004519,2018-03-17,597
9,#ReadyPlayerOne,0.339646,2018-03-17,856
