# Analysis of Movies Before/After Release on Twitter

The data is collected 3 days before the release of the movies and 3 days after the release of the movies. The movies selected are Captain Marvel, Gloria Bell, I'm Not Here, and Triple Frontier.

## Import Data

In [13]:
import pickle
import nltk
import numpy as np
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import matplotlib.pyplot as plt
import re
from PIL import Image
from wordcloud import WordCloud, STOPWORDS
from IPython.display import Image as im

with open('pkl/gloria_bell.pkl', 'rb') as f:
    gloria = pickle.load(f)
    
with open('pkl/captain_marvel_before.pkl', 'rb') as f:
    captain = pickle.load(f) 

with open('pkl/triple_frontier.pkl', 'rb') as f:
    frontier = pickle.load(f)

with open('pkl/im_not_here.pkl', 'rb') as f:
    here = pickle.load(f)

with open('pkl/gloria_bell_after.pkl', 'rb') as f:
    gloria_after = pickle.load(f)
    
with open('pkl/captain_marvel_after.pkl', 'rb') as f:
    captain_after = pickle.load(f) 

with open('pkl/triple_frontier_after.pkl', 'rb') as f:
    frontier_after = pickle.load(f)

with open('pkl/im_not_here_after.pkl', 'rb') as f:
    here_after = pickle.load(f)

## Comparison of the Number of Tweets Before/After Release

In [15]:
len_captainBefore = len(captain)
len_gloriaBefore = len(gloria)
len_tripleBefore = len(frontier)
len_hereBefore = len(here)   
    
bar1 = [len_captainBefore, len_gloriaBefore, len_tripleBefore, len_hereBefore]
bar2 = [len(captain_after), len(gloria_after), len(frontier_after), len(here_after)]
barWidth = .35
 
# Set position of bar on X axis
r1 = np.arange(len(bar1))
fig, ax = plt.subplots()

# Make the plot
before = ax.bar(r1, bar1, color= "#00a8ff", width=barWidth, edgecolor="#f1c40f", label='Movies Before')
after = ax.bar(r1 + barWidth, bar2, color= "#e84118", width=barWidth, edgecolor="#f1c40f", label='Movies After')

def label(rects):
    for rect in rects:
        height = rect.get_height()
        plt.text(rect.get_x() + rect.get_width()/2., 1.05*height,
                '%d' % int(height),
                ha='center', va='bottom')

ax.set_xlabel("Movies")
ax.set_ylabel("Number of Tweets")
ax.set_xticks(r1 + barWidth / 2)
ax.set_xticklabels(('Captain Marvel', 'Gloria Bell', 'Triple Frontier', 'Im Not Here'))
ax.margins(y = 0.1)
ax.legend()

label(before)
label(after)
plt.show()

### Notes: 
Captain Marvel had the most tweets with Triple Frontier in second then Gloria Bell and lastly I'm Not Here

## Cleaning Data for Word Cloud

In [19]:
def clean(tweets):
    clean_tweets = [tweet.text for tweet in tweets]

# Getting rid of unicode, links, special characters
    raw = ''.join(clean_tweets)
    no_links = re.sub(r'http\S+', '', raw)
    no_unicode = re.sub(r"\\[a-z][a-z]?[0-9]+", '', no_links)
    no_special_characters = re.sub('[^A-Za-z ]+', '', no_unicode)

# Getting rid small filler words
    clean_words = no_special_characters.split(" ")
    clean_words = [w for w in clean_words if len(w) > 3]
    clean_words = [w.lower() for w in clean_words]
    clean_words = [w for w in clean_words if w not in STOPWORDS]
    return clean_words

gloria_words = clean(gloria)
gloria_words_after = clean(gloria_after)
captain_words = clean(captain)
captains_words = clean(captain_after)
triple_words = clean(frontier)
triple_words_after = clean(frontier_after)
here_words = clean(here)
here_words_after = clean(here_after)

## Generate Word Cloud

In [47]:
def word_cloud(tweets_before, tweets_after, message1, message2):
    mask = np.array(Image.open('images/cloud.png'))
    wc = WordCloud(background_color="white", max_words=125, mask=mask)
    wc2 = WordCloud(background_color="white", max_words=125, mask=mask)
    clean_string = ','.join(tweets_before)
    clean_string2 = ','.join(tweets_after)
    wc.generate(clean_string)
    wc2.generate(clean_string2)

    f = plt.figure(figsize=(50,50))
    f.add_subplot(1,2,1)
    plt.imshow(wc, interpolation='bilinear')
    plt.title(message1, size=25)
    plt.axis("off")


    f.add_subplot(1,2,2)
    plt.imshow(wc2, interpolation='bilinear')
    plt.title(message2, size=25)
    plt.axis("off")
    plt.show()

word_cloud(gloria_words, gloria_words_after, "Gloria Bell Before", "Gloria Bell After")
word_cloud(captain_words, captains_words, "Captain Marvel Before", "Captain Marvel After")
word_cloud(triple_words, triple_words_after, "Triple Frontier Before", "Triple Frontier After")
word_cloud(here_words, here_words_after, "I'm Not Here Before", "I'm Not Here After")

### Notes: 
The most common words in Captain Marvel in the before picture are marvel studio, captainmarvel, last night, and theater. The most common words in the after picture are follow original funko, higher faster, captainmarvel, and brielarson.

The most common words in Gloria Bell in the before picture are gloriabell, opens, movie, nyla march, and juliannemoore. The most common words in the after picture are juliannemoore, academy awardwinning, great femaleled films, films century, and transcendently perfect.

The most common words in Triple Frontier in the before picture are triplefrontier, charlie hunnam, carpet, reveals heist, and talk working. The most common words in the after picture are charlie hunnam, oscar isaac, triplefrontier, select theaters, and netflix select. 

There are not many most common words due to the fact it was not tweeted as much as the others but the most common words in I'm Not Here in the before picture are imnotheremovie, preorder tickets, tickets, and special screening. The most common words in the after picture are imnotheremovie, playing platforms, netowrk, fandangonwrt, and dish network.

## Sentiment Analysis

In [48]:
def sentimentAnalyzer(tweets):
    analyzer = SentimentIntensityAnalyzer()
    summary = {"positive": 0, "neutral": 0, "negative": 0}
    for i, t in enumerate(tweets):
        text = t.text
        ss = analyzer.polarity_scores(text)
        if ss["compound"] > -0.05 and ss["compound"] < 0.05:
            summary["neutral"] += 1
        elif ss["compound"] > 0.05:
            summary["positive"] += 1
        else:
            summary["negative"] += 1
    return summary

sentimentCaptainBefore = sentimentAnalyzer(captain)
sentimentCaptainAfter = sentimentAnalyzer(captain_after)
sentimentGloriaBefore = sentimentAnalyzer(gloria)
sentimentGloriaAfter = sentimentAnalyzer(gloria_after)
sentimentFrontierBefore = sentimentAnalyzer(frontier)
sentimentFrontierAfter = sentimentAnalyzer(frontier_after)
sentimentHereBefore = sentimentAnalyzer(here)
sentimentHereAfter = sentimentAnalyzer(here_after)

def pie_chart(tweetsBefore, tweets, message1, message2, color1, color2, color3):
    sizes = [tweetsBefore["positive"], tweetsBefore["negative"], tweetsBefore["neutral"]]
    sizes2 = [tweets["positive"], tweets["negative"], tweets["neutral"]]
    names = ['Positive', 'Negative', 'Neutral']
    colors = [color1, color2, color3]
    explode = (0.05,0.05,0.05)
    
    fig, (ax1, ax2) = plt.subplots(1, 2)
    ax1.pie(sizes, colors = colors, labels=names, autopct='%1.1f%%', startangle=90, pctdistance=0.85, explode = explode)
    ax1.set_title(message1, size = 20)
    ax2.pie(sizes2, colors = colors, labels=names, autopct='%1.1f%%', startangle=90, pctdistance=0.85, explode = explode)
    ax2.set_title(message2, size = 20)
    centre_circle = plt.Circle((0,0),0.70,fc='white')
    fig = plt.gcf()
    fig.gca().add_artist(centre_circle)
    ax1.axis('equal') 
    ax2.axis('equal')
    plt.tight_layout()
    plt.show()
    
pie_chart(sentimentCaptainBefore, sentimentCaptainAfter, "Captain Marvel Before", "Captain Marvel After", '#E74C3C', '#3498DB', '#F1C40F')
pie_chart(sentimentGloriaBefore, sentimentGloriaAfter, "Gloria Bell Before", "Gloria Bell After", '#8E44AD', '#3498DB', '#E67E22')
pie_chart(sentimentFrontierBefore, sentimentFrontierAfter, "Triple Frontier Before", "Triple Frontier After", '#27AE60', '#2C3E50', '#1F618D')
pie_chart(sentimentHereBefore, sentimentHereAfter, "Im Not Here Before", "Im Not Here After", '#CB4335', '#DC7633', '#1F618D')

### Notes: 
Captain Marvel received more positive tweets after it was released. It also had a lower amount of negative tweets after release. 

Gloria Bell had fewer positive tweets after it was released and had a 12% increase in negative tweets. 

Triple Frontier had a lot of neutral tweets especially in the before release chart. The amount of negative tweets increased more than the amount of positive tweets after the movie was released. 

I'm Not Here also had a large amount of neutral tweets in the before release chart. It grew by a large amount in the positive number of tweets after it was released by around 50%. The amount of negative tweets increased by around 19%.