# Tweet Collection from the Twitter API

The categories that we are predicting for this project are **Best Picture**, **Best Actor**, **Best Actress**, **Best Supporting Actor**, and **Best Supporting Actress**.

The technical goal is to store tweets under jsons for each Oscar Nominee. There will be one json for each category.

tweets -> keyword -> nominee -> json

### Getting authentication to collect

In [1]:
import json
import tweepy
import pandas as pd

In [2]:
#keys/secrets
credentials = {}
credentials['CONSUMER_KEY'] = ""
credentials['CONSUMER_SECRET'] = ""
credentials['ACCESS_TOKEN'] = ""
credentials['ACCESS_SECRET'] = ""

with open("twitter_credentials.json", "w") as f:
    json.dump(credentials, f)

In [3]:
with open("twitter_credentials.json", "r") as f:
    creds = json.load(f)
    
auth = tweepy.OAuthHandler(creds['CONSUMER_KEY'], creds['CONSUMER_SECRET'])
auth.set_access_token(creds['ACCESS_TOKEN'], creds['ACCESS_SECRET'])

api = tweepy.API(auth, wait_on_rate_limit = True,
                wait_on_rate_limit_notify = True)
try:
    api.verify_credentials()
    print("Authentication OK")
except:
    print("Error during Authentication")

Authentication OK


In [4]:
# Code for collecting tweets for a list of keywords
def collectTweets(key, query, df, dateList):
    for date in dateList:
        for tweet in api.search(q=query, lang = "en", count = 100, toDate = date):
            actress_name = key
            text = tweet.text
            favorite_count = tweet.favorite_count
            retweet_count = tweet.retweet_count
            date_posted = tweet.created_at 
            df.loc[len(df.index)] = [key, text, favorite_count, retweet_count, date]       

In [2]:
n = ["Anthony Hopkins", "Riz Ahmed", "Chadwick Boseman", "Gary Oldman", "Steven Yeun"]

nominees_keyword_dict = {n[0]:"%23oscars%20%23anthonyhopkins%20-filter%3Aretweets", 
                         n[1]:"%23oscars%20%23rizahmed%20-filter%3Aretweets", 
                         n[2]:"%23oscars%20%23chadwickboseman%20-filter%3Aretweets", 
                         n[3]:"%23oscars%20%23garyoldman%20-filter%3Aretweets", 
                         n[4]:"%23oscars%20%23stevenyeun%20-filter%3Aretweets"}

tweets_df = pd.DataFrame(columns = ["actor_name", "text", "favorite_count", "retweet_count", "date"])

dates = ["Sat Apr 23 12:00:00 +0000 2021",
         "Sat Apr 22 12:00:00 +0000 2021",
         "Sat Apr 21 12:00:00 +0000 2021",
         "Sat Apr 20 12:00:00 +0000 2021",
         "Sat Apr 19 12:00:00 +0000 2021",
         "Sat Apr 18 12:00:00 +0000 2021",
         "Sat Apr 17 12:00:00 +0000 2021",
         "Sat Apr 16 12:00:00 +0000 2021"]

In [6]:
# Collecting tweets for best actress
for key in nominees_keyword_dict:
    collectTweets(key, nominees_keyword_dict[key], tweets_df, dates)

In [7]:
tweets_df

Unnamed: 0,actor_name,text,favorite_count,retweet_count,date
0,Anthony Hopkins,RT @La_Cle_ANGLAISE: #actualités 🇺🇸\n#Oscars20...,0,2,Sat Apr 23 12:00:00 +0000 2021
1,Anthony Hopkins,Anthony Hopkins won the Best Actor award for T...,0,0,Sat Apr 23 12:00:00 +0000 2021
2,Anthony Hopkins,RT @opendoorpeople: Let’s make sure we note -\...,0,49,Sat Apr 23 12:00:00 +0000 2021
3,Anthony Hopkins,RT @JoshSabarra: OSCAR THROWBACK: Anthony Hopk...,0,31,Sat Apr 23 12:00:00 +0000 2021
4,Anthony Hopkins,RT @JoshSabarra: OSCAR THROWBACK: Anthony Hopk...,0,31,Sat Apr 23 12:00:00 +0000 2021
...,...,...,...,...,...
3947,Steven Yeun,"Taylor, Michigan shout-out on the Oscars: Who ...",5,0,Sat Apr 16 12:00:00 +0000 2021
3948,Steven Yeun,The newest #BestActor #Oscar winner just walke...,0,0,Sat Apr 16 12:00:00 +0000 2021
3949,Steven Yeun,RT @pinkvilla: The cast of Minari set the Osca...,0,4,Sat Apr 16 12:00:00 +0000 2021
3950,Steven Yeun,Steven Yeun @the Oscars... still waiting for B...,0,0,Sat Apr 16 12:00:00 +0000 2021


In [8]:
# Number of tweets for Carey Mulligan for the Oscars prior to them taking place
tweets_df[tweets_df["actor_name"] == n[0]]

Unnamed: 0,actor_name,text,favorite_count,retweet_count,date
0,Anthony Hopkins,RT @La_Cle_ANGLAISE: #actualités 🇺🇸\n#Oscars20...,0,2,Sat Apr 23 12:00:00 +0000 2021
1,Anthony Hopkins,Anthony Hopkins won the Best Actor award for T...,0,0,Sat Apr 23 12:00:00 +0000 2021
2,Anthony Hopkins,RT @opendoorpeople: Let’s make sure we note -\...,0,49,Sat Apr 23 12:00:00 +0000 2021
3,Anthony Hopkins,RT @JoshSabarra: OSCAR THROWBACK: Anthony Hopk...,0,31,Sat Apr 23 12:00:00 +0000 2021
4,Anthony Hopkins,RT @JoshSabarra: OSCAR THROWBACK: Anthony Hopk...,0,31,Sat Apr 23 12:00:00 +0000 2021
...,...,...,...,...,...
795,Anthony Hopkins,RT @JoshSabarra: OSCAR THROWBACK: Anthony Hopk...,0,31,Sat Apr 16 12:00:00 +0000 2021
796,Anthony Hopkins,Folks aren’t feeling how the #Oscars seemingly...,4,0,Sat Apr 16 12:00:00 +0000 2021
797,Anthony Hopkins,RT @opendoorpeople: Let’s make sure we note -\...,0,49,Sat Apr 16 12:00:00 +0000 2021
798,Anthony Hopkins,RT @cyrilicioushawk: #AnthonyHopkins thanks an...,0,31,Sat Apr 16 12:00:00 +0000 2021


In [9]:
tweets_df[tweets_df["actor_name"] == n[1]]

Unnamed: 0,actor_name,text,favorite_count,retweet_count,date
800,Riz Ahmed,RT @ogseran: Riz Ahmed may have been nominated...,0,0,Sat Apr 23 12:00:00 +0000 2021
801,Riz Ahmed,Recently a video went viral ft Riz Ahmed fixin...,0,0,Sat Apr 23 12:00:00 +0000 2021
802,Riz Ahmed,RT @filmfare: The moment that’s got everyone t...,0,26,Sat Apr 23 12:00:00 +0000 2021
803,Riz Ahmed,"@rizwanahmed Aw, shut up! #academyawards #osca...",0,0,Sat Apr 23 12:00:00 +0000 2021
804,Riz Ahmed,Idk why everyone’s is calling him “controlling...,1,0,Sat Apr 23 12:00:00 +0000 2021
...,...,...,...,...,...
1595,Riz Ahmed,RT @NetsNews21: 🎖️Oscars 2021: Check out the v...,0,68,Sat Apr 16 12:00:00 +0000 2021
1596,Riz Ahmed,RT @NetsNews21: 🎖️Oscars 2021: Check out the v...,0,68,Sat Apr 16 12:00:00 +0000 2021
1597,Riz Ahmed,RT @NetsNews21: 🎖️Oscars 2021: Check out the v...,0,68,Sat Apr 16 12:00:00 +0000 2021
1598,Riz Ahmed,RT @khaleejtimes: #Oscars2021: #RizAhmed and h...,0,4,Sat Apr 16 12:00:00 +0000 2021


In [10]:
tweets_df[tweets_df["actor_name"] == n[2]]

Unnamed: 0,actor_name,text,favorite_count,retweet_count,date
1600,Chadwick Boseman,RT @tonetalks: “Did Daniel Kaluuya Win Chadwic...,0,3,Sat Apr 23 12:00:00 +0000 2021
1601,Chadwick Boseman,RT @tonetalks: “Did Daniel Kaluuya Win Chadwic...,0,3,Sat Apr 23 12:00:00 +0000 2021
1602,Chadwick Boseman,RT @ABC7: #ChadwickBoseman had been heavily fa...,0,11,Sat Apr 23 12:00:00 +0000 2021
1603,Chadwick Boseman,RT @tonetalks: “Did Daniel Kaluuya Win Chadwic...,0,3,Sat Apr 23 12:00:00 +0000 2021
1604,Chadwick Boseman,“Did Daniel Kaluuya Win Chadwick Boseman's Osc...,13,3,Sat Apr 23 12:00:00 +0000 2021
...,...,...,...,...,...
2395,Chadwick Boseman,RT @ComicBook: #ChadwickBoseman gets an NFT tr...,0,512,Sat Apr 16 12:00:00 +0000 2021
2396,Chadwick Boseman,They...made.... an....#NFT .....of...#Chadwick...,0,0,Sat Apr 16 12:00:00 +0000 2021
2397,Chadwick Boseman,RT @dilani_r: #IrrfanKhan &amp; #ChadwickBosem...,0,91,Sat Apr 16 12:00:00 +0000 2021
2398,Chadwick Boseman,Folks aren’t feeling how the #Oscars seemingly...,4,0,Sat Apr 16 12:00:00 +0000 2021


In [11]:
tweets_df[tweets_df["actor_name"] == n[3]]

Unnamed: 0,actor_name,text,favorite_count,retweet_count,date
2400,Gary Oldman,"RT @PaulSmithDesign: In 2012, #GaryOldman chos...",0,9,Sat Apr 23 12:00:00 +0000 2021
2401,Gary Oldman,RT @GettyVIP: Gary Oldman of @MankFilm attends...,0,15,Sat Apr 23 12:00:00 +0000 2021
2402,Gary Oldman,"RT @PaulSmithDesign: In 2012, #GaryOldman chos...",0,9,Sat Apr 23 12:00:00 +0000 2021
2403,Gary Oldman,"RT @PaulSmithDesign: In 2012, #GaryOldman chos...",0,9,Sat Apr 23 12:00:00 +0000 2021
2404,Gary Oldman,"RT @PaulSmithDesign: In 2012, #GaryOldman chos...",0,9,Sat Apr 23 12:00:00 +0000 2021
...,...,...,...,...,...
3147,Gary Oldman,🎬 Mank \n#garyoldman #Oscars https://t.co/EM74...,0,0,Sat Apr 16 12:00:00 +0000 2021
3148,Gary Oldman,Victorias en MEJOR ACTOR 🎭\n\n#ChadwickBoseman...,0,0,Sat Apr 16 12:00:00 +0000 2021
3149,Gary Oldman,With the @TheAcademy awards #Oscars coming up ...,0,0,Sat Apr 16 12:00:00 +0000 2021
3150,Gary Oldman,#CountdowntoOscars\nNominated film @MankFilm i...,0,0,Sat Apr 16 12:00:00 +0000 2021


In [12]:
tweets_df[tweets_df["actor_name"] == n[4]]

Unnamed: 0,actor_name,text,favorite_count,retweet_count,date
3152,Steven Yeun,RT @gucci: Nominee for Best Actor in a Leading...,0,75,Sat Apr 23 12:00:00 +0000 2021
3153,Steven Yeun,These are the top 10 people being mentioned wi...,1,0,Sat Apr 23 12:00:00 +0000 2021
3154,Steven Yeun,#LIFESTYLE\n\n#StevenYeun’s #Oscars Hair Pays ...,0,0,Sat Apr 23 12:00:00 +0000 2021
3155,Steven Yeun,can it do it? It’s “Tenet”! \n(@fionadourif @H...,0,0,Sat Apr 23 12:00:00 +0000 2021
3156,Steven Yeun,"As far as I'm concerned, the biggest winner of...",1,0,Sat Apr 23 12:00:00 +0000 2021
...,...,...,...,...,...
3947,Steven Yeun,"Taylor, Michigan shout-out on the Oscars: Who ...",5,0,Sat Apr 16 12:00:00 +0000 2021
3948,Steven Yeun,The newest #BestActor #Oscar winner just walke...,0,0,Sat Apr 16 12:00:00 +0000 2021
3949,Steven Yeun,RT @pinkvilla: The cast of Minari set the Osca...,0,4,Sat Apr 16 12:00:00 +0000 2021
3950,Steven Yeun,Steven Yeun @the Oscars... still waiting for B...,0,0,Sat Apr 16 12:00:00 +0000 2021


In [13]:
tweets_df[tweets_df["actor_name"] == n[4]].iloc[0]

actor_name                                              Steven Yeun
text              RT @gucci: Nominee for Best Actor in a Leading...
favorite_count                                                    0
retweet_count                                                    75
date                                 Sat Apr 23 12:00:00 +0000 2021
Name: 3152, dtype: object

In [14]:
tweets_df.to_pickle("best_actor.pkl")

In [3]:
train = pd.read_pickle("best_actor.pkl")

# Text Processing

In [4]:
train

Unnamed: 0,actor_name,text,favorite_count,retweet_count,date
0,Anthony Hopkins,RT @La_Cle_ANGLAISE: #actualités 🇺🇸\n#Oscars20...,0,2,Sat Apr 23 12:00:00 +0000 2021
1,Anthony Hopkins,Anthony Hopkins won the Best Actor award for T...,0,0,Sat Apr 23 12:00:00 +0000 2021
2,Anthony Hopkins,RT @opendoorpeople: Let’s make sure we note -\...,0,49,Sat Apr 23 12:00:00 +0000 2021
3,Anthony Hopkins,RT @JoshSabarra: OSCAR THROWBACK: Anthony Hopk...,0,31,Sat Apr 23 12:00:00 +0000 2021
4,Anthony Hopkins,RT @JoshSabarra: OSCAR THROWBACK: Anthony Hopk...,0,31,Sat Apr 23 12:00:00 +0000 2021
...,...,...,...,...,...
3947,Steven Yeun,"Taylor, Michigan shout-out on the Oscars: Who ...",5,0,Sat Apr 16 12:00:00 +0000 2021
3948,Steven Yeun,The newest #BestActor #Oscar winner just walke...,0,0,Sat Apr 16 12:00:00 +0000 2021
3949,Steven Yeun,RT @pinkvilla: The cast of Minari set the Osca...,0,4,Sat Apr 16 12:00:00 +0000 2021
3950,Steven Yeun,Steven Yeun @the Oscars... still waiting for B...,0,0,Sat Apr 16 12:00:00 +0000 2021


In [5]:
import nltk
from nltk.corpus import stopwords
additional  = ['rt','rts','retweet']
swords = set().union(stopwords.words('english'),additional)

In [6]:
train['processed_text'] = train['text'].str.lower()\
          .str.replace('(@[a-z0-9]+)\w+',' ')\
          .str.replace('(http\S+)', ' ')\
          .str.replace('([^0-9a-z \t])',' ')\
          .str.replace(' +',' ')\
          .str.replace('(@[0-9]+)\w+',' ')\
          .apply(lambda x: [i for i in x.split() if not i in swords])

In [7]:
from nltk.stem import PorterStemmer
ps = PorterStemmer()
train['stemmed'] = train['processed_text'].apply(lambda x: [ps.stem(i) for i in x if i != ''])

In [8]:
train["text2"] = train["processed_text"].str.join(" ")

In [9]:
train["text3"] = train["stemmed"].str.join(" ")

In [10]:
train

Unnamed: 0,actor_name,text,favorite_count,retweet_count,date,processed_text,stemmed,text2,text3
0,Anthony Hopkins,RT @La_Cle_ANGLAISE: #actualités 🇺🇸\n#Oscars20...,0,2,Sat Apr 23 12:00:00 +0000 2021,"[actualit, oscars2021, chlo, zhao, made, histo...","[actualit, oscars2021, chlo, zhao, made, histo...",actualit oscars2021 chlo zhao made history fir...,actualit oscars2021 chlo zhao made histori fir...
1,Anthony Hopkins,Anthony Hopkins won the Best Actor award for T...,0,0,Sat Apr 23 12:00:00 +0000 2021,"[anthony, hopkins, best, actor, award, father,...","[anthoni, hopkin, best, actor, award, father, ...",anthony hopkins best actor award father becomi...,anthoni hopkin best actor award father becom o...
2,Anthony Hopkins,RT @opendoorpeople: Let’s make sure we note -\...,0,49,Sat Apr 23 12:00:00 +0000 2021,"[let, make, sure, note, british, working, clas...","[let, make, sure, note, british, work, class, ...",let make sure note british working class talen...,let make sure note british work class talent e...
3,Anthony Hopkins,RT @JoshSabarra: OSCAR THROWBACK: Anthony Hopk...,0,31,Sat Apr 23 12:00:00 +0000 2021,"[oscar, throwback, anthony, hopkins, best, thi...","[oscar, throwback, anthoni, hopkin, best, thin...",oscar throwback anthony hopkins best think cam...,oscar throwback anthoni hopkin best think camp...
4,Anthony Hopkins,RT @JoshSabarra: OSCAR THROWBACK: Anthony Hopk...,0,31,Sat Apr 23 12:00:00 +0000 2021,"[oscar, throwback, anthony, hopkins, best, thi...","[oscar, throwback, anthoni, hopkin, best, thin...",oscar throwback anthony hopkins best think cam...,oscar throwback anthoni hopkin best think camp...
...,...,...,...,...,...,...,...,...,...
3947,Steven Yeun,"Taylor, Michigan shout-out on the Oscars: Who ...",5,0,Sat Apr 16 12:00:00 +0000 2021,"[taylor, michigan, shout, oscars, bingo, card,...","[taylor, michigan, shout, oscar, bingo, card, ...",taylor michigan shout oscars bingo card steven...,taylor michigan shout oscar bingo card steveny...
3948,Steven Yeun,The newest #BestActor #Oscar winner just walke...,0,0,Sat Apr 16 12:00:00 +0000 2021,"[newest, bestactor, oscar, winner, walked, ste...","[newest, bestactor, oscar, winner, walk, step,...",newest bestactor oscar winner walked steps ste...,newest bestactor oscar winner walk step steven...
3949,Steven Yeun,RT @pinkvilla: The cast of Minari set the Osca...,0,4,Sat Apr 16 12:00:00 +0000 2021,"[cast, minari, set, oscars, red, carpet, aligh...","[cast, minari, set, oscar, red, carpet, alight...",cast minari set oscars red carpet alight alan ...,cast minari set oscar red carpet alight alan k...
3950,Steven Yeun,Steven Yeun @the Oscars... still waiting for B...,0,0,Sat Apr 16 12:00:00 +0000 2021,"[steven, yeun, oscars, still, waiting, best, a...","[steven, yeun, oscar, still, wait, best, actor...",steven yeun oscars still waiting best actor ca...,steven yeun oscar still wait best actor catego...


# Calculating Average Sentiment Scores

In [11]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [12]:
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\Hojin\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [13]:
sia = SentimentIntensityAnalyzer()

In [14]:
def sentiment_calcnltk(text):
    try:
        return sia.polarity_scores(text)
    except:
        return None

In [15]:
train['NLTKsentiment_raw'] = train["text3"].apply(sentiment_calcnltk)

In [16]:
iterate = list(train["NLTKsentiment_raw"])
compoundList = []
for t in iterate:
    compound = t["compound"]
    compoundList.append(compound)
train["NLTKsentiment"] = compoundList

In [17]:
train

Unnamed: 0,actor_name,text,favorite_count,retweet_count,date,processed_text,stemmed,text2,text3,NLTKsentiment_raw,NLTKsentiment
0,Anthony Hopkins,RT @La_Cle_ANGLAISE: #actualités 🇺🇸\n#Oscars20...,0,2,Sat Apr 23 12:00:00 +0000 2021,"[actualit, oscars2021, chlo, zhao, made, histo...","[actualit, oscars2021, chlo, zhao, made, histo...",actualit oscars2021 chlo zhao made history fir...,actualit oscars2021 chlo zhao made histori fir...,"{'neg': 0.0, 'neu': 0.529, 'pos': 0.471, 'comp...",0.8402
1,Anthony Hopkins,Anthony Hopkins won the Best Actor award for T...,0,0,Sat Apr 23 12:00:00 +0000 2021,"[anthony, hopkins, best, actor, award, father,...","[anthoni, hopkin, best, actor, award, father, ...",anthony hopkins best actor award father becomi...,anthoni hopkin best actor award father becom o...,"{'neg': 0.0, 'neu': 0.318, 'pos': 0.682, 'comp...",0.9432
2,Anthony Hopkins,RT @opendoorpeople: Let’s make sure we note -\...,0,49,Sat Apr 23 12:00:00 +0000 2021,"[let, make, sure, note, british, working, clas...","[let, make, sure, note, british, work, class, ...",let make sure note british working class talen...,let make sure note british work class talent e...,"{'neg': 0.0, 'neu': 0.597, 'pos': 0.403, 'comp...",0.7964
3,Anthony Hopkins,RT @JoshSabarra: OSCAR THROWBACK: Anthony Hopk...,0,31,Sat Apr 23 12:00:00 +0000 2021,"[oscar, throwback, anthony, hopkins, best, thi...","[oscar, throwback, anthoni, hopkin, best, thin...",oscar throwback anthony hopkins best think cam...,oscar throwback anthoni hopkin best think camp...,"{'neg': 0.0, 'neu': 0.656, 'pos': 0.344, 'comp...",0.6369
4,Anthony Hopkins,RT @JoshSabarra: OSCAR THROWBACK: Anthony Hopk...,0,31,Sat Apr 23 12:00:00 +0000 2021,"[oscar, throwback, anthony, hopkins, best, thi...","[oscar, throwback, anthoni, hopkin, best, thin...",oscar throwback anthony hopkins best think cam...,oscar throwback anthoni hopkin best think camp...,"{'neg': 0.0, 'neu': 0.656, 'pos': 0.344, 'comp...",0.6369
...,...,...,...,...,...,...,...,...,...,...,...
3947,Steven Yeun,"Taylor, Michigan shout-out on the Oscars: Who ...",5,0,Sat Apr 16 12:00:00 +0000 2021,"[taylor, michigan, shout, oscars, bingo, card,...","[taylor, michigan, shout, oscar, bingo, card, ...",taylor michigan shout oscars bingo card steven...,taylor michigan shout oscar bingo card steveny...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0000
3948,Steven Yeun,The newest #BestActor #Oscar winner just walke...,0,0,Sat Apr 16 12:00:00 +0000 2021,"[newest, bestactor, oscar, winner, walked, ste...","[newest, bestactor, oscar, winner, walk, step,...",newest bestactor oscar winner walked steps ste...,newest bestactor oscar winner walk step steven...,"{'neg': 0.0, 'neu': 0.703, 'pos': 0.297, 'comp...",0.5859
3949,Steven Yeun,RT @pinkvilla: The cast of Minari set the Osca...,0,4,Sat Apr 16 12:00:00 +0000 2021,"[cast, minari, set, oscars, red, carpet, aligh...","[cast, minari, set, oscar, red, carpet, alight...",cast minari set oscars red carpet alight alan ...,cast minari set oscar red carpet alight alan k...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0000
3950,Steven Yeun,Steven Yeun @the Oscars... still waiting for B...,0,0,Sat Apr 16 12:00:00 +0000 2021,"[steven, yeun, oscars, still, waiting, best, a...","[steven, yeun, oscar, still, wait, best, actor...",steven yeun oscars still waiting best actor ca...,steven yeun oscar still wait best actor catego...,"{'neg': 0.0, 'neu': 0.741, 'pos': 0.259, 'comp...",0.6369


In [18]:
ob1 = train[train["actor_name"] == n[0]]
ob2 = train[train["actor_name"] == n[1]]
ob3 = train[train["actor_name"] == n[2]]
ob4 = train[train["actor_name"] == n[3]]
ob5 = train[train["actor_name"] == n[4]]                     

In [19]:
score1 = ob1["NLTKsentiment"].mean()
score2 = ob2["NLTKsentiment"].mean()
score3 = ob3["NLTKsentiment"].mean()
score4 = ob4["NLTKsentiment"].mean()
score5 = ob5["NLTKsentiment"].mean()

print(n[0], "Average Sentiment Score", score1)
print(n[1], "Average Sentiment Score", score2)
print(n[2], "Average Sentiment Score", score3)
print(n[3], "Average Sentiment Score", score4)
print(n[4], "Average Sentiment Score", score5)

Anthony Hopkins Average Sentiment Score 0.4451680000000015
Riz Ahmed Average Sentiment Score 0.4173220000000014
Chadwick Boseman Average Sentiment Score 0.24187700000000073
Gary Oldman Average Sentiment Score 0.43240106382979104
Steven Yeun Average Sentiment Score 0.34156499999999995


In [20]:
sentiment = {n[0]: score1,
             n[1]: score2,
             n[2]: score3,
             n[3]: score4,
             n[4]: score5}

# IMDB Movie Ratings, Rotten Tomatoes Critic Scores

In [21]:
# Scores were collected on April 24th

# Out of 10
imdb = {n[0]: 8.3,
        n[1]: 7.8,
        n[2]: 7.0,
        n[3]: 6.9,
        n[4]: 7.6}

# Percentage
rt_critics = {n[0]: 0.98,
              n[1]: 0.97,
              n[2]: 0.98,
              n[3]: 0.83,
              n[4]: 0.98}

# Percentage
rt_audience = {n[0]: 0.91,
               n[1]: 0.91,
               n[2]: 0.74,
               n[3]: 0.59,
               n[4]: 0.88}

# Scoring Model

In [22]:
# We will calculate the percentage chance that an Actress will win the Oscars.
# Our paper is a commentary on public opinion 
# Therefore we need to scale our metrics  to 0.25 each

def scaling(score, old_range):
    new_range = (0, 0.25)
    mini = old_range[0]
    maxi = old_range[1]
    percent = (score - (mini)) / (maxi - (mini))
    # Scaling formula
    weighted = new_range[1] * percent + new_range[0]
    return weighted

In [23]:
actor_win = {}
actors_df = pd.DataFrame(columns = ["name", "category", "imdb_audience_score", "rt_critic_score", "rt_audience_score", "sentiment_score", "oscar_win"])

In [24]:
# Calculating Total Probability of Oscar win and saving results to a csv
for actor in n:
    sentiment_score = scaling(sentiment[actor], (-1, 1))
    imdb_score = scaling(imdb[actor], (0, 10))
    rt_critics_score = scaling(rt_critics[actor], (0, 1))
    rt_audience_score = scaling(rt_audience[actor], (0, 1))
    percentage_win = sentiment_score + imdb_score + rt_critics_score + rt_audience_score
    actor_win[actor] = percentage_win
    actors_df.loc[len(actors_df.index)] = [actor, "actor", imdb_score, rt_critics_score, rt_audience_score, 
                                           sentiment_score, percentage_win]
actors_df.to_csv("actor_results.csv", index = False)

In [25]:
for key in actor_win:
    print(key, "score is", actor_win[key])

Anthony Hopkins score is 0.8606460000000002
Riz Ahmed score is 0.8421652500000002
Chadwick Boseman score is 0.760234625
Gary Oldman score is 0.7065501329787238
Steven Yeun score is 0.8226956249999999
