# Tweet Collection from the Twitter API

The categories that we are predicting for this project are **Best Picture**, **Best Actor**, **Best Actress**, **Best Supporting Actor**, and **Best Supporting Actress**.

The technical goal is to store tweets under jsons for each Oscar Nominee. There will be one json for each category.

tweets -> keyword -> nominee -> json

### Getting authentication to collect

In [1]:
import json
import tweepy
import pandas as pd

In [2]:
#keys/secrets
credentials = {}
credentials['CONSUMER_KEY'] = ""
credentials['CONSUMER_SECRET'] = ""
credentials['ACCESS_TOKEN'] = ""
credentials['ACCESS_SECRET'] = ""

with open("twitter_credentials.json", "w") as f:
    json.dump(credentials, f)

In [3]:
with open("twitter_credentials.json", "r") as f:
    creds = json.load(f)
    
auth = tweepy.OAuthHandler(creds['CONSUMER_KEY'], creds['CONSUMER_SECRET'])
auth.set_access_token(creds['ACCESS_TOKEN'], creds['ACCESS_SECRET'])

api = tweepy.API(auth, wait_on_rate_limit = True,
                wait_on_rate_limit_notify = True)
try:
    api.verify_credentials()
    print("Authentication OK")
except:
    print("Error during Authentication")

Authentication OK


In [4]:
# Code for collecting tweets for a list of keywords
def collectTweets(key, query, df, dateList):
    for date in dateList:
        for tweet in api.search(q=query, lang = "en", count = 100, toDate = date):
            actress_name = key
            text = tweet.text
            favorite_count = tweet.favorite_count
            retweet_count = tweet.retweet_count
            date_posted = tweet.created_at 
            df.loc[len(df.index)] = [key, text, favorite_count, retweet_count, date]       

In [5]:
nominees_keyword_dict = {"Viola Davis":"%23oscars%20%23violadavis%20-filter%3Aretweets", 
                         "Andra Day":"%23oscars%20%23andraday%20-filter%3Aretweets", 
                         "Vanessa Kirby":"%23oscars%20%23vanessakirby%20-filter%3Aretweets", 
                         "Frances McDormand":"%23oscars%20%23francesmcdormand%20-filter%3Aretweets", 
                         "Carey Mulligan":"%23oscars%20%23careymulligan%20-filter%3Aretweets"}

tweets_df = pd.DataFrame(columns = ["actress_name", "text", "favorite_count", "retweet_count", "date"])

dates = ["Sat Apr 23 12:00:00 +0000 2021",
         "Sat Apr 22 12:00:00 +0000 2021",
         "Sat Apr 21 12:00:00 +0000 2021",
         "Sat Apr 20 12:00:00 +0000 2021",
         "Sat Apr 19 12:00:00 +0000 2021",
         "Sat Apr 18 12:00:00 +0000 2021",
         "Sat Apr 17 12:00:00 +0000 2021",
         "Sat Apr 16 12:00:00 +0000 2021"]

In [22]:
actresses = ["Viola Davis", "Andra Day", "Vanessa Kirby", "Frances McDormand", "Carey Mulligan"]

In [6]:
# Collecting tweets for best actress
for key in nominees_keyword_dict:
    collectTweets(key, nominees_keyword_dict[key], tweets_df, dates)

In [7]:
tweets_df

Unnamed: 0,actress_name,text,favorite_count,retweet_count,date
0,Viola Davis,Anddddddd the Best Actress goesssss to 🏆@viola...,1,0,Sat Apr 23 12:00:00 +0000 2021
1,Viola Davis,RT @McQueen: See the stages of design of the c...,0,82,Sat Apr 23 12:00:00 +0000 2021
2,Viola Davis,RT @McQueen: See the stages of design of the c...,0,82,Sat Apr 23 12:00:00 +0000 2021
3,Viola Davis,"Divest from the white gaze, divorce from white...",0,0,Sat Apr 23 12:00:00 +0000 2021
4,Viola Davis,RT @McQueen: See the stages of design of the c...,0,82,Sat Apr 23 12:00:00 +0000 2021
...,...,...,...,...,...
3995,Carey Mulligan,RT @johnruns45: Let’s just take a moment to re...,0,71,Sat Apr 16 12:00:00 +0000 2021
3996,Carey Mulligan,#FrancesMcDormand triumphed over the following...,2,0,Sat Apr 16 12:00:00 +0000 2021
3997,Carey Mulligan,RT @johnruns45: Let’s just take a moment to re...,0,71,Sat Apr 16 12:00:00 +0000 2021
3998,Carey Mulligan,Although #FrancisMcDormand was gud in the film...,0,0,Sat Apr 16 12:00:00 +0000 2021


In [8]:
# Number of tweets for Carey Mulligan for the Oscars prior to them taking place
tweets_df[tweets_df["actress_name"] == "Carey Mulligan"]

Unnamed: 0,actress_name,text,favorite_count,retweet_count,date
3200,Carey Mulligan,Glass picks the best dressed of the Oscars 202...,1,0,Sat Apr 23 12:00:00 +0000 2021
3201,Carey Mulligan,RT @johnruns45: Let’s just take a moment to re...,0,71,Sat Apr 23 12:00:00 +0000 2021
3202,Carey Mulligan,RT @MaisonValentino: Nominee for Best Actress ...,0,27,Sat Apr 23 12:00:00 +0000 2021
3203,Carey Mulligan,Did you see me at the Oscars last night? #phot...,0,0,Sat Apr 23 12:00:00 +0000 2021
3204,Carey Mulligan,Very disappointed that #CareyMulligan has not ...,0,0,Sat Apr 23 12:00:00 +0000 2021
...,...,...,...,...,...
3995,Carey Mulligan,RT @johnruns45: Let’s just take a moment to re...,0,71,Sat Apr 16 12:00:00 +0000 2021
3996,Carey Mulligan,#FrancesMcDormand triumphed over the following...,2,0,Sat Apr 16 12:00:00 +0000 2021
3997,Carey Mulligan,RT @johnruns45: Let’s just take a moment to re...,0,71,Sat Apr 16 12:00:00 +0000 2021
3998,Carey Mulligan,Although #FrancisMcDormand was gud in the film...,0,0,Sat Apr 16 12:00:00 +0000 2021


In [9]:
tweets_df[tweets_df["actress_name"] == "Viola Davis"]

Unnamed: 0,actress_name,text,favorite_count,retweet_count,date
0,Viola Davis,Anddddddd the Best Actress goesssss to 🏆@viola...,1,0,Sat Apr 23 12:00:00 +0000 2021
1,Viola Davis,RT @McQueen: See the stages of design of the c...,0,82,Sat Apr 23 12:00:00 +0000 2021
2,Viola Davis,RT @McQueen: See the stages of design of the c...,0,82,Sat Apr 23 12:00:00 +0000 2021
3,Viola Davis,"Divest from the white gaze, divorce from white...",0,0,Sat Apr 23 12:00:00 +0000 2021
4,Viola Davis,RT @McQueen: See the stages of design of the c...,0,82,Sat Apr 23 12:00:00 +0000 2021
...,...,...,...,...,...
795,Viola Davis,@dc_heights I'm so fucking angry. I've had eno...,0,0,Sat Apr 16 12:00:00 +0000 2021
796,Viola Davis,@Desmonddotcom I'm so fucking angry. I've had ...,0,0,Sat Apr 16 12:00:00 +0000 2021
797,Viola Davis,@coho3188 I'm so fucking angry. I've had enoug...,0,0,Sat Apr 16 12:00:00 +0000 2021
798,Viola Davis,@Shashana80sKid I'm so fucking angry. I've had...,1,0,Sat Apr 16 12:00:00 +0000 2021


In [10]:
tweets_df[tweets_df["actress_name"] == "Andra Day"]

Unnamed: 0,actress_name,text,favorite_count,retweet_count,date
800,Andra Day,My #bestdressed at the #Oscars2021 \nI am so o...,0,0,Sat Apr 23 12:00:00 +0000 2021
801,Andra Day,RT @StarCreativeI: @FalconYourHero @GMA @Veriz...,0,1,Sat Apr 23 12:00:00 +0000 2021
802,Andra Day,#AndraDay should have won 🙄 #Oscars,0,0,Sat Apr 23 12:00:00 +0000 2021
803,Andra Day,RT @tgatp: @tonyenos @AndraDayMusic @DianaRoss...,0,1,Sat Apr 23 12:00:00 +0000 2021
804,Andra Day,@FalconYourHero @GMA @Verizon #AndraDay was al...,0,1,Sat Apr 23 12:00:00 +0000 2021
...,...,...,...,...,...
1595,Andra Day,So they snubbed #AndraDay AND #ChadwickBoseman...,0,0,Sat Apr 16 12:00:00 +0000 2021
1596,Andra Day,#Oscars what a complete let down at the end! ...,0,0,Sat Apr 16 12:00:00 +0000 2021
1597,Andra Day,#ChadwickBoseman should have won the #Oscars ...,0,0,Sat Apr 16 12:00:00 +0000 2021
1598,Andra Day,How tf does #ChadwickBoseman not win for #Best...,5,1,Sat Apr 16 12:00:00 +0000 2021


In [11]:
tweets_df[tweets_df["actress_name"] == "Vanessa Kirby"]

Unnamed: 0,actress_name,text,favorite_count,retweet_count,date
1600,Vanessa Kirby,RT @gucci: On the red carpet of @theacademy 20...,0,109,Sat Apr 23 12:00:00 +0000 2021
1601,Vanessa Kirby,RT @gucci: On the red carpet of @theacademy 20...,0,109,Sat Apr 23 12:00:00 +0000 2021
1602,Vanessa Kirby,RT @Cartier: In a rare moment before the #Osca...,0,29,Sat Apr 23 12:00:00 +0000 2021
1603,Vanessa Kirby,RT @Cartier: In a rare moment before the #Osca...,0,29,Sat Apr 23 12:00:00 +0000 2021
1604,Vanessa Kirby,RT @Cartier: In a rare moment before the #Osca...,0,29,Sat Apr 23 12:00:00 +0000 2021
...,...,...,...,...,...
2395,Vanessa Kirby,RT @gucci: On the red carpet of @theacademy 20...,0,109,Sat Apr 16 12:00:00 +0000 2021
2396,Vanessa Kirby,RT @gucci: On the red carpet of @theacademy 20...,0,109,Sat Apr 16 12:00:00 +0000 2021
2397,Vanessa Kirby,RT @gucci: On the red carpet of @theacademy 20...,0,109,Sat Apr 16 12:00:00 +0000 2021
2398,Vanessa Kirby,Did she know she’d match her surroundings? The...,0,0,Sat Apr 16 12:00:00 +0000 2021


In [12]:
tweets_df[tweets_df["actress_name"] == "Frances McDormand"]

Unnamed: 0,actress_name,text,favorite_count,retweet_count,date
2400,Frances McDormand,Great ! That’s just Great ! 🤨\n\nNow the price...,0,0,Sat Apr 23 12:00:00 +0000 2021
2401,Frances McDormand,Only Katharine Hepburn has more Best Actress O...,0,0,Sat Apr 23 12:00:00 +0000 2021
2402,Frances McDormand,"RT @MaisonValentino: #FrancesMcDormand, who wo...",0,20,Sat Apr 23 12:00:00 +0000 2021
2403,Frances McDormand,RT @AwardShowNews: #YuhJungYoun #DanielKaluuya...,0,8,Sat Apr 23 12:00:00 +0000 2021
2404,Frances McDormand,RT @EagleBrookCap: $SPRV\nBreaking News\nhttps...,0,1,Sat Apr 23 12:00:00 +0000 2021
...,...,...,...,...,...
3195,Frances McDormand,#Oscars : #FrancesMcDormand Wins Best Actress ...,2,0,Sat Apr 16 12:00:00 +0000 2021
3196,Frances McDormand,#FrancesMcDormand winning her 3rd Best Actress...,1,0,Sat Apr 16 12:00:00 +0000 2021
3197,Frances McDormand,RT @albinokid: I am very happy to have seen @n...,0,15,Sat Apr 16 12:00:00 +0000 2021
3198,Frances McDormand,I can’t wait to watch #Nomadland on 30th April...,0,0,Sat Apr 16 12:00:00 +0000 2021


In [13]:
tweets_df[tweets_df["actress_name"] == "Carey Mulligan"].iloc[0]

actress_name                                         Carey Mulligan
text              Glass picks the best dressed of the Oscars 202...
favorite_count                                                    1
retweet_count                                                     0
date                                 Sat Apr 23 12:00:00 +0000 2021
Name: 3200, dtype: object

In [14]:
tweets_df.to_pickle("best_actress.pkl")

In [2]:
train = pd.read_pickle("best_actress.pkl")

# Text Processing

In [3]:
train

Unnamed: 0,actress_name,text,favorite_count,retweet_count,date
0,Viola Davis,Anddddddd the Best Actress goesssss to 🏆@viola...,1,0,Sat Apr 23 12:00:00 +0000 2021
1,Viola Davis,RT @McQueen: See the stages of design of the c...,0,82,Sat Apr 23 12:00:00 +0000 2021
2,Viola Davis,RT @McQueen: See the stages of design of the c...,0,82,Sat Apr 23 12:00:00 +0000 2021
3,Viola Davis,"Divest from the white gaze, divorce from white...",0,0,Sat Apr 23 12:00:00 +0000 2021
4,Viola Davis,RT @McQueen: See the stages of design of the c...,0,82,Sat Apr 23 12:00:00 +0000 2021
...,...,...,...,...,...
3995,Carey Mulligan,RT @johnruns45: Let’s just take a moment to re...,0,71,Sat Apr 16 12:00:00 +0000 2021
3996,Carey Mulligan,#FrancesMcDormand triumphed over the following...,2,0,Sat Apr 16 12:00:00 +0000 2021
3997,Carey Mulligan,RT @johnruns45: Let’s just take a moment to re...,0,71,Sat Apr 16 12:00:00 +0000 2021
3998,Carey Mulligan,Although #FrancisMcDormand was gud in the film...,0,0,Sat Apr 16 12:00:00 +0000 2021


In [4]:
import nltk
from nltk.corpus import stopwords
additional  = ['rt','rts','retweet']
swords = set().union(stopwords.words('english'),additional)

In [5]:
train['processed_text'] = train['text'].str.lower()\
          .str.replace('(@[a-z0-9]+)\w+',' ')\
          .str.replace('(http\S+)', ' ')\
          .str.replace('([^0-9a-z \t])',' ')\
          .str.replace(' +',' ')\
          .str.replace('(@[0-9]+)\w+',' ')\
          .apply(lambda x: [i for i in x.split() if not i in swords])

In [6]:
from nltk.stem import PorterStemmer
ps = PorterStemmer()
train['stemmed'] = train['processed_text'].apply(lambda x: [ps.stem(i) for i in x if i != ''])

In [7]:
train["text2"] = train["processed_text"].str.join(" ")

In [8]:
train["text3"] = train["stemmed"].str.join(" ")

In [9]:
train

Unnamed: 0,actress_name,text,favorite_count,retweet_count,date,processed_text,stemmed,text2,text3
0,Viola Davis,Anddddddd the Best Actress goesssss to 🏆@viola...,1,0,Sat Apr 23 12:00:00 +0000 2021,"[anddddddd, best, actress, goesssss, violadavi...","[anddddddd, best, actress, goesssss, violadavi...",anddddddd best actress goesssss violadavis mar...,anddddddd best actress goesssss violadavi mara...
1,Viola Davis,RT @McQueen: See the stages of design of the c...,0,82,Sat Apr 23 12:00:00 +0000 2021,"[see, stages, design, custom, made, engineered...","[see, stage, design, custom, made, engin, whit...",see stages design custom made engineered white...,see stage design custom made engin white thist...
2,Viola Davis,RT @McQueen: See the stages of design of the c...,0,82,Sat Apr 23 12:00:00 +0000 2021,"[see, stages, design, custom, made, engineered...","[see, stage, design, custom, made, engin, whit...",see stages design custom made engineered white...,see stage design custom made engin white thist...
3,Viola Davis,"Divest from the white gaze, divorce from white...",0,0,Sat Apr 23 12:00:00 +0000 2021,"[divest, white, gaze, divorce, white, approval...","[divest, white, gaze, divorc, white, approv, d...",divest white gaze divorce white approval dejec...,divest white gaze divorc white approv deject w...
4,Viola Davis,RT @McQueen: See the stages of design of the c...,0,82,Sat Apr 23 12:00:00 +0000 2021,"[see, stages, design, custom, made, engineered...","[see, stage, design, custom, made, engin, whit...",see stages design custom made engineered white...,see stage design custom made engin white thist...
...,...,...,...,...,...,...,...,...,...
3995,Carey Mulligan,RT @johnruns45: Let’s just take a moment to re...,0,71,Sat Apr 16 12:00:00 +0000 2021,"[let, take, moment, recognize, actual, best, a...","[let, take, moment, recogn, actual, best, acto...",let take moment recognize actual best actor ac...,let take moment recogn actual best actor actre...
3996,Carey Mulligan,#FrancesMcDormand triumphed over the following...,2,0,Sat Apr 16 12:00:00 +0000 2021,"[francesmcdormand, triumphed, following, actre...","[francesmcdormand, triumph, follow, actress, w...",francesmcdormand triumphed following actresses...,francesmcdormand triumph follow actress win 2n...
3997,Carey Mulligan,RT @johnruns45: Let’s just take a moment to re...,0,71,Sat Apr 16 12:00:00 +0000 2021,"[let, take, moment, recognize, actual, best, a...","[let, take, moment, recogn, actual, best, acto...",let take moment recognize actual best actor ac...,let take moment recogn actual best actor actre...
3998,Carey Mulligan,Although #FrancisMcDormand was gud in the film...,0,0,Sat Apr 16 12:00:00 +0000 2021,"[although, francismcdormand, gud, film, feelin...","[although, francismcdormand, gud, film, feel, ...",although francismcdormand gud film feeling bad...,although francismcdormand gud film feel bad ca...


# Calculating Average Sentiment Scores

In [10]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [11]:
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\Hojin\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [12]:
sia = SentimentIntensityAnalyzer()

In [13]:
def sentiment_calcnltk(text):
    try:
        return sia.polarity_scores(text)
    except:
        return None

In [14]:
train['NLTKsentiment_raw'] = train["text3"].apply(sentiment_calcnltk)

In [15]:
iterate = list(train["NLTKsentiment_raw"])
compoundList = []
for t in iterate:
    compound = t["compound"]
    compoundList.append(compound)
train["NLTKsentiment"] = compoundList

In [16]:
train

Unnamed: 0,actress_name,text,favorite_count,retweet_count,date,processed_text,stemmed,text2,text3,NLTKsentiment_raw,NLTKsentiment
0,Viola Davis,Anddddddd the Best Actress goesssss to 🏆@viola...,1,0,Sat Apr 23 12:00:00 +0000 2021,"[anddddddd, best, actress, goesssss, violadavi...","[anddddddd, best, actress, goesssss, violadavi...",anddddddd best actress goesssss violadavis mar...,anddddddd best actress goesssss violadavi mara...,"{'neg': 0.0, 'neu': 0.625, 'pos': 0.375, 'comp...",0.6369
1,Viola Davis,RT @McQueen: See the stages of design of the c...,0,82,Sat Apr 23 12:00:00 +0000 2021,"[see, stages, design, custom, made, engineered...","[see, stage, design, custom, made, engin, whit...",see stages design custom made engineered white...,see stage design custom made engin white thist...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0000
2,Viola Davis,RT @McQueen: See the stages of design of the c...,0,82,Sat Apr 23 12:00:00 +0000 2021,"[see, stages, design, custom, made, engineered...","[see, stage, design, custom, made, engin, whit...",see stages design custom made engineered white...,see stage design custom made engin white thist...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0000
3,Viola Davis,"Divest from the white gaze, divorce from white...",0,0,Sat Apr 23 12:00:00 +0000 2021,"[divest, white, gaze, divorce, white, approval...","[divest, white, gaze, divorc, white, approv, d...",divest white gaze divorce white approval dejec...,divest white gaze divorc white approv deject w...,"{'neg': 0.181, 'neu': 0.621, 'pos': 0.198, 'co...",0.0772
4,Viola Davis,RT @McQueen: See the stages of design of the c...,0,82,Sat Apr 23 12:00:00 +0000 2021,"[see, stages, design, custom, made, engineered...","[see, stage, design, custom, made, engin, whit...",see stages design custom made engineered white...,see stage design custom made engin white thist...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0000
...,...,...,...,...,...,...,...,...,...,...,...
3995,Carey Mulligan,RT @johnruns45: Let’s just take a moment to re...,0,71,Sat Apr 16 12:00:00 +0000 2021,"[let, take, moment, recognize, actual, best, a...","[let, take, moment, recogn, actual, best, acto...",let take moment recognize actual best actor ac...,let take moment recogn actual best actor actre...,"{'neg': 0.167, 'neu': 0.482, 'pos': 0.351, 'co...",0.6369
3996,Carey Mulligan,#FrancesMcDormand triumphed over the following...,2,0,Sat Apr 16 12:00:00 +0000 2021,"[francesmcdormand, triumphed, following, actre...","[francesmcdormand, triumph, follow, actress, w...",francesmcdormand triumphed following actresses...,francesmcdormand triumph follow actress win 2n...,"{'neg': 0.0, 'neu': 0.504, 'pos': 0.496, 'comp...",0.7845
3997,Carey Mulligan,RT @johnruns45: Let’s just take a moment to re...,0,71,Sat Apr 16 12:00:00 +0000 2021,"[let, take, moment, recognize, actual, best, a...","[let, take, moment, recogn, actual, best, acto...",let take moment recognize actual best actor ac...,let take moment recogn actual best actor actre...,"{'neg': 0.167, 'neu': 0.482, 'pos': 0.351, 'co...",0.6369
3998,Carey Mulligan,Although #FrancisMcDormand was gud in the film...,0,0,Sat Apr 16 12:00:00 +0000 2021,"[although, francismcdormand, gud, film, feelin...","[although, francismcdormand, gud, film, feel, ...",although francismcdormand gud film feeling bad...,although francismcdormand gud film feel bad ca...,"{'neg': 0.211, 'neu': 0.482, 'pos': 0.307, 'co...",0.1531


In [17]:
ob1 = train[train["actress_name"] == "Viola Davis"]
ob2 = train[train["actress_name"] == "Andra Day"]
ob3 = train[train["actress_name"] == "Vanessa Kirby"]
ob4 = train[train["actress_name"] == "Frances McDormand"]
ob5 = train[train["actress_name"] == "Carey Mulligan"]

In [18]:
score1 = ob1["NLTKsentiment"].mean()
score2 = ob2["NLTKsentiment"].mean()
score3 = ob3["NLTKsentiment"].mean()
score4 = ob4["NLTKsentiment"].mean()
score5 = ob5["NLTKsentiment"].mean()

print(actresses[0], "Average Sentiment Score", score1)
print(actresses[1], "Average Sentiment Score", score2)
print(actresses[2], "Average Sentiment Score", score3)
print(actresses[3], "Average Sentiment Score", score4)
print(actresses[4], "Average Sentiment Score", score5)

NameError: name 'actresses' is not defined

In [24]:
sentiment = {actresses[0]: score1,
             actresses[1]: score2,
             actresses[2]: score3,
             actresses[3]: score4,
             actresses[4]: score5}

# IMDB Movie Ratings, Rotten Tomatoes Critic Scores

In [25]:
# Scores were collected on April 24th

# Out of 10
imdb = {"Viola Davis": 7.0,
        "Andra Day": 6.3,
        "Vanessa Kirby": 7.1,
        "Frances McDormand": 7.5,
        "Carey Mulligan": 7.5}

# Percentage
rt_critics = {"Viola Davis": 0.98,
              "Andra Day": 0.55,
              "Vanessa Kirby": 0.75,
              "Frances McDormand": 0.94,
              "Carey Mulligan": 0.91}

# Percentage
rt_audience = {"Viola Davis": 0.74,
               "Andra Day": 0.84,
               "Vanessa Kirby": 0.85,
               "Frances McDormand": 0.82,
               "Carey Mulligan": 0.88}

# Scoring Model

In [26]:
# We will calculate the percentage chance that an Actress will win the Oscars.
# Our paper is a commentary on public opinion 
# Therefore we need to scale our metrics  to 0.25 each

def scaling(score, old_range):
    new_range = (0, 0.25)
    mini = old_range[0]
    maxi = old_range[1]
    percent = (score - (mini)) / (maxi - (mini))
    # Scaling formula
    weighted = new_range[1] * percent + new_range[0]
    return weighted

In [27]:
actress_win = {}
actress_df = pd.DataFrame(columns = ["name", "category", "imdb_audience_score", "rt_critic_score", "rt_audience_score", "sentiment_score", "oscar_win"])

In [28]:
for actress in actresses:
    sentiment_score = scaling(sentiment[actress], (-1, 1))
    imdb_score = scaling(imdb[actress], (0, 10))
    rt_critics_score = scaling(rt_critics[actress], (0, 1))
    rt_audience_score = scaling(rt_audience[actress], (0, 1))
    percentage_win = sentiment_score + imdb_score + rt_critics_score + rt_audience_score
    actress_win[actress] = percentage_win
    actress_df.loc[len(actress_df.index)] = [actress, "actress", imdb_score, rt_critics_score, rt_audience_score, 
                                           sentiment_score, percentage_win]
actress_df.to_csv("actress_results.csv", index = False)

In [29]:
for key in actress_win:
    print(key, "score is", actress_win[key])

Viola Davis score is 0.7289405
Andra Day score is 0.632385375
Vanessa Kirby score is 0.7672851249999996
Frances McDormand score is 0.8180726249999996
Carey Mulligan score is 0.8172432500000003
