# Tweet Collection from the Twitter API

The categories that we are predicting for this project are **Best Picture**, **Best Actor**, **Best Actress**, **Best Supporting Actor**, and **Best Supporting Actress**.

The technical goal is to store tweets under jsons for each Oscar Nominee. There will be one json for each category.

tweets -> keyword -> nominee -> json

### Getting authentication to collect

In [1]:
import json
import tweepy
import pandas as pd

In [2]:
#keys/secrets
credentials = {}
credentials['CONSUMER_KEY'] = ""
credentials['CONSUMER_SECRET'] = ""
credentials['ACCESS_TOKEN'] = ""
credentials['ACCESS_SECRET'] = ""

with open("twitter_credentials.json", "w") as f:
    json.dump(credentials, f)

In [3]:
with open("twitter_credentials.json", "r") as f:
    creds = json.load(f)
    
auth = tweepy.OAuthHandler(creds['CONSUMER_KEY'], creds['CONSUMER_SECRET'])
auth.set_access_token(creds['ACCESS_TOKEN'], creds['ACCESS_SECRET'])

api = tweepy.API(auth, wait_on_rate_limit = True,
                wait_on_rate_limit_notify = True)
try:
    api.verify_credentials()
    print("Authentication OK")
except:
    print("Error during Authentication")

Authentication OK


In [4]:
# Code for collecting tweets for a list of keywords
def collectTweets(key, query, df, dateList):
    for date in dateList:
        for tweet in api.search(q=query, lang = "en", count = 100, toDate = date):
            actress_name = key
            text = tweet.text
            favorite_count = tweet.favorite_count
            retweet_count = tweet.retweet_count
            date_posted = tweet.created_at 
            df.loc[len(df.index)] = [key, text, favorite_count, retweet_count, date]       

In [18]:
n = ["Nomadland", 
     "The Father", 
     "Judas and the Black Messiah", 
     "Mank", 
     "Minari", 
     "Promising Young Woman", 
     "Sound of Metal", 
     "The Trial of the Chicago 7"]

nominees_keyword_dict = {n[0]:"%23oscars%20%23nomadland%20-filter%3Aretweets", 
                         n[1]:"%23oscars%20%23thefather%20-filter%3Aretweets", 
                         n[2]:"%23oscars%20%23judasandtheblackmessiah%20-filter%3Aretweets", 
                         n[3]:"%23oscars%20%23mank%20-filter%3Aretweets", 
                         n[4]:"%23oscars%20%23minari%20-filter%3Aretweets",
                         n[5]:"%23oscars%20%23promisingyoungwoman%20-filter%3Aretweets",
                         n[6]:"%23oscars%20%23soundofmetal%20-filter%3Aretweets",
                         n[7]:"%23oscars%20%23thetrialofthechicago7%20-filter%3Aretweets"}

tweets_df = pd.DataFrame(columns = ["movie_name", "text", "favorite_count", "retweet_count", "date"])

dates = ["Sat Apr 23 12:00:00 +0000 2021",
         "Sat Apr 22 12:00:00 +0000 2021",
         "Sat Apr 21 12:00:00 +0000 2021",
         "Sat Apr 20 12:00:00 +0000 2021",
         "Sat Apr 19 12:00:00 +0000 2021",
         "Sat Apr 18 12:00:00 +0000 2021",
         "Sat Apr 17 12:00:00 +0000 2021",
         "Sat Apr 16 12:00:00 +0000 2021"]

In [6]:
# Collecting tweets for best actress
for key in nominees_keyword_dict:
    collectTweets(key, nominees_keyword_dict[key], tweets_df, dates)

In [7]:
tweets_df

Unnamed: 0,movie_name,text,favorite_count,retweet_count,date
0,Nomadland,RT @baraju_SuperHit: Best Picture : #Nomadland...,0,12,Sat Apr 23 12:00:00 +0000 2021
1,Nomadland,RT @ETPanache: Woman of no colour #FrancesMcDo...,0,1,Sat Apr 23 12:00:00 +0000 2021
2,Nomadland,RT @RealMomOfSFV: Best Picture Winner #Nomadla...,0,1,Sat Apr 23 12:00:00 +0000 2021
3,Nomadland,RT @badiucao: Designed a new poster for #Oscar...,0,23,Sat Apr 23 12:00:00 +0000 2021
4,Nomadland,Just watched the #OSCARS winner for best pictu...,1,0,Sat Apr 23 12:00:00 +0000 2021
...,...,...,...,...,...
6395,The Trial of the Chicago 7,Best Picture nominee #TheTrialOfTheChicago7 #...,0,0,Sat Apr 16 12:00:00 +0000 2021
6396,The Trial of the Chicago 7,Rewatching #TheTrialOfTheChicago7 I can't beli...,0,0,Sat Apr 16 12:00:00 +0000 2021
6397,The Trial of the Chicago 7,Watching #TheTrialOfTheChicago7 #Oscars Will m...,2,0,Sat Apr 16 12:00:00 +0000 2021
6398,The Trial of the Chicago 7,RT @needledesign: Heres the 7th piece in my #F...,0,6,Sat Apr 16 12:00:00 +0000 2021


In [8]:
# Number of tweets for Carey Mulligan for the Oscars prior to them taking place
tweets_df[tweets_df["movie_name"] == n[0]]

Unnamed: 0,movie_name,text,favorite_count,retweet_count,date
0,Nomadland,RT @baraju_SuperHit: Best Picture : #Nomadland...,0,12,Sat Apr 23 12:00:00 +0000 2021
1,Nomadland,RT @ETPanache: Woman of no colour #FrancesMcDo...,0,1,Sat Apr 23 12:00:00 +0000 2021
2,Nomadland,RT @RealMomOfSFV: Best Picture Winner #Nomadla...,0,1,Sat Apr 23 12:00:00 +0000 2021
3,Nomadland,RT @badiucao: Designed a new poster for #Oscar...,0,23,Sat Apr 23 12:00:00 +0000 2021
4,Nomadland,Just watched the #OSCARS winner for best pictu...,1,0,Sat Apr 23 12:00:00 +0000 2021
...,...,...,...,...,...
795,Nomadland,"@theacademy Still think it was a boring movie,...",0,0,Sat Apr 16 12:00:00 +0000 2021
796,Nomadland,RT @APEntertainment: SMALL BUT STYLISH: Alan S...,0,23,Sat Apr 16 12:00:00 +0000 2021
797,Nomadland,"Poignant, yet heart-breaking. \n\nThe story be...",0,0,Sat Apr 16 12:00:00 +0000 2021
798,Nomadland,RT @APEntertainment: SMALL BUT STYLISH: Alan S...,0,1,Sat Apr 16 12:00:00 +0000 2021


In [9]:
tweets_df[tweets_df["movie_name"] == n[1]]

Unnamed: 0,movie_name,text,favorite_count,retweet_count,date
800,The Father,RT @alzassociation: Last night’s Academy Award...,0,47,Sat Apr 23 12:00:00 +0000 2021
801,The Father,RT @alzassociation: Last night’s Academy Award...,0,47,Sat Apr 23 12:00:00 +0000 2021
802,The Father,My campaign worked! Congratulation Sir Anthony...,0,0,Sat Apr 23 12:00:00 +0000 2021
803,The Father,RT @alzassociation: Last night’s Academy Award...,0,47,Sat Apr 23 12:00:00 +0000 2021
804,The Father,RT @EW: .@AnthonyHopkins wins Best Actor in a ...,0,96,Sat Apr 23 12:00:00 +0000 2021
...,...,...,...,...,...
1595,The Father,RT @alzassociation: Last night’s Academy Award...,0,47,Sat Apr 16 12:00:00 +0000 2021
1596,The Father,RT @GMA: BREAKING: @TheAcademy Award for Best ...,0,19,Sat Apr 16 12:00:00 +0000 2021
1597,The Father,RT @ABC: BREAKING: @TheAcademy Award for Best ...,0,59,Sat Apr 16 12:00:00 +0000 2021
1598,The Father,RT @JoshSabarra: OSCAR THROWBACK: Anthony Hopk...,0,31,Sat Apr 16 12:00:00 +0000 2021


In [10]:
tweets_df[tweets_df["movie_name"] == n[2]]

Unnamed: 0,movie_name,text,favorite_count,retweet_count,date
1600,Judas and the Black Messiah,RT @TheFilmUpdates: Daniel Kaluuya responds to...,0,1662,Sat Apr 23 12:00:00 +0000 2021
1601,Judas and the Black Messiah,"RT @Variety: Backstage at the #Oscars, one rep...",0,41,Sat Apr 23 12:00:00 +0000 2021
1602,Judas and the Black Messiah,RT @TheFilmUpdates: Daniel Kaluuya responds to...,0,1662,Sat Apr 23 12:00:00 +0000 2021
1603,Judas and the Black Messiah,RT @TheFilmUpdates: Daniel Kaluuya responds to...,0,1662,Sat Apr 23 12:00:00 +0000 2021
1604,Judas and the Black Messiah,RT @TheFilmUpdates: Daniel Kaluuya responds to...,0,1662,Sat Apr 23 12:00:00 +0000 2021
...,...,...,...,...,...
2395,Judas and the Black Messiah,RT @TheFilmUpdates: Daniel Kaluuya responds to...,0,1662,Sat Apr 16 12:00:00 +0000 2021
2396,Judas and the Black Messiah,RT @TheFilmUpdates: Daniel Kaluuya responds to...,0,1662,Sat Apr 16 12:00:00 +0000 2021
2397,Judas and the Black Messiah,RT @HERMusicx: Grateful!!! https://t.co/njxys5...,0,535,Sat Apr 16 12:00:00 +0000 2021
2398,Judas and the Black Messiah,RT @TheFilmUpdates: Daniel Kaluuya responds to...,0,1662,Sat Apr 16 12:00:00 +0000 2021


In [11]:
tweets_df[tweets_df["movie_name"] == n[3]]

Unnamed: 0,movie_name,text,favorite_count,retweet_count,date
2400,Mank,RT @enews: And the Oscar for Best Production D...,0,15,Sat Apr 23 12:00:00 +0000 2021
2401,Mank,"RT @enews: ""I got to go home and feel like I g...",0,18,Sat Apr 23 12:00:00 +0000 2021
2402,Mank,RT @INA24x7: The 93rd #AcademyAwards2021\n#Osc...,0,3,Sat Apr 23 12:00:00 +0000 2021
2403,Mank,RT @Viswanth94: Good to see you sir 💙\n\n#Mank...,0,1,Sat Apr 23 12:00:00 +0000 2021
2404,Mank,RT @filmmakerspod: HUGE congrats to @e_messers...,0,5,Sat Apr 23 12:00:00 +0000 2021
...,...,...,...,...,...
3195,Mank,RT @FilmstoFilms_: Amanda Seyfried #Oscars #Ma...,0,182,Sat Apr 16 12:00:00 +0000 2021
3196,Mank,RT @empiremagazine: Now it's Production Design...,0,25,Sat Apr 16 12:00:00 +0000 2021
3197,Mank,RT @NYCVotes: #DidYouKnow that @TheAcademy use...,0,5,Sat Apr 16 12:00:00 +0000 2021
3198,Mank,RT @FilmstoFilms_: Amanda Seyfried #Oscars #Ma...,0,182,Sat Apr 16 12:00:00 +0000 2021


In [12]:
tweets_df[tweets_df["movie_name"] == n[4]]

Unnamed: 0,movie_name,text,favorite_count,retweet_count,date
3200,Minari,RT @TheFilmUpdates: #Minari star and Academy A...,0,7032,Sat Apr 23 12:00:00 +0000 2021
3201,Minari,RT @TheFilmUpdates: #Minari star and Academy A...,0,7032,Sat Apr 23 12:00:00 +0000 2021
3202,Minari,RT @TheFilmUpdates: #Minari star and Academy A...,0,7032,Sat Apr 23 12:00:00 +0000 2021
3203,Minari,"RT @alfonsoespina: ""Even a rainbow has seven c...",0,32,Sat Apr 23 12:00:00 +0000 2021
3204,Minari,"RT @alfonsoespina: ""Even a rainbow has seven c...",0,32,Sat Apr 23 12:00:00 +0000 2021
...,...,...,...,...,...
3995,Minari,RT @TheFilmUpdates: #Minari star and Academy A...,0,7032,Sat Apr 16 12:00:00 +0000 2021
3996,Minari,RT @hookent3: Congratulations on winning the 9...,0,247,Sat Apr 16 12:00:00 +0000 2021
3997,Minari,RT @TheFilmUpdates: #Minari star and Academy A...,0,7032,Sat Apr 16 12:00:00 +0000 2021
3998,Minari,RT @TheFilmUpdates: #Minari star and Academy A...,0,7032,Sat Apr 16 12:00:00 +0000 2021


In [14]:
tweets_df[tweets_df["movie_name"] == n[5]]

Unnamed: 0,movie_name,text,favorite_count,retweet_count,date
4000,Promising Young Woman,"Okay, I just found out that @emeraldfennell wa...",0,0,Sat Apr 23 12:00:00 +0000 2021
4001,Promising Young Woman,#PromisingYoungWoman\nRating: 7.5/10\n\nThis w...,5,0,Sat Apr 23 12:00:00 +0000 2021
4002,Promising Young Woman,RT @IMDb: Congratulations to @emeraldfennell f...,0,90,Sat Apr 23 12:00:00 +0000 2021
4003,Promising Young Woman,"RT @enews: ""There’s a movie that we want, that...",0,17,Sat Apr 23 12:00:00 +0000 2021
4004,Promising Young Woman,RT @enews: The first award of the night! @emer...,0,45,Sat Apr 23 12:00:00 +0000 2021
...,...,...,...,...,...
4795,Promising Young Woman,RT @Chrissuccess: #Oscars| Winner\n\nOriginal ...,0,11,Sat Apr 16 12:00:00 +0000 2021
4796,Promising Young Woman,RT @LetsOTT: #Oscars for the Best Original Sc...,0,15,Sat Apr 16 12:00:00 +0000 2021
4797,Promising Young Woman,"RT @Chrissuccess: ""I’m not really interested i...",0,11,Sat Apr 16 12:00:00 +0000 2021
4798,Promising Young Woman,RT @TVLine: #Oscars: Emerald Fennell wins Best...,0,6,Sat Apr 16 12:00:00 +0000 2021


In [15]:
tweets_df[tweets_df["movie_name"] == n[6]]

Unnamed: 0,movie_name,text,favorite_count,retweet_count,date
4800,Sound of Metal,RT @Variety: #Oscars: #SoundOfMetal wins best ...,0,636,Sat Apr 23 12:00:00 +0000 2021
4801,Sound of Metal,RT @empiremagazine: The Oscar for Sound approp...,0,205,Sat Apr 23 12:00:00 +0000 2021
4802,Sound of Metal,RT @mishacollins: The first of #SoundOfMetal’s...,0,908,Sat Apr 23 12:00:00 +0000 2021
4803,Sound of Metal,RT @Variety: #Oscars: #SoundOfMetal wins best ...,0,636,Sat Apr 23 12:00:00 +0000 2021
4804,Sound of Metal,RT @EW: The cast of #SoundOfMetal discuss how ...,0,22,Sat Apr 23 12:00:00 +0000 2021
...,...,...,...,...,...
5595,Sound of Metal,RT @Mishalocked24: Do you all realize that Dar...,0,52,Sat Apr 16 12:00:00 +0000 2021
5596,Sound of Metal,RT @Mishalocked24: Do you all realize that Dar...,0,52,Sat Apr 16 12:00:00 +0000 2021
5597,Sound of Metal,RT @Mishalocked24: Do you all realize that Dar...,0,52,Sat Apr 16 12:00:00 +0000 2021
5598,Sound of Metal,RT @peterhowellfilm: For those keeping score a...,0,17,Sat Apr 16 12:00:00 +0000 2021


In [16]:
tweets_df[tweets_df["movie_name"] == n[7]]

Unnamed: 0,movie_name,text,favorite_count,retweet_count,date
5600,The Trial of the Chicago 7,RT @OllyGibbs: Presenting the #Oscars illustra...,0,1143,Sat Apr 23 12:00:00 +0000 2021
5601,The Trial of the Chicago 7,"RT @IBC365: Ahead of the #Oscars, go behind th...",0,2,Sat Apr 23 12:00:00 +0000 2021
5602,The Trial of the Chicago 7,RT @OllyGibbs: Presenting the #Oscars illustra...,0,1143,Sat Apr 23 12:00:00 +0000 2021
5603,The Trial of the Chicago 7,All the movies I have seen that are in nominat...,0,0,Sat Apr 23 12:00:00 +0000 2021
5604,The Trial of the Chicago 7,"Sorry for #TheTrialOfTheChicago7, damn it's ve...",0,0,Sat Apr 23 12:00:00 +0000 2021
...,...,...,...,...,...
6395,The Trial of the Chicago 7,Best Picture nominee #TheTrialOfTheChicago7 #...,0,0,Sat Apr 16 12:00:00 +0000 2021
6396,The Trial of the Chicago 7,Rewatching #TheTrialOfTheChicago7 I can't beli...,0,0,Sat Apr 16 12:00:00 +0000 2021
6397,The Trial of the Chicago 7,Watching #TheTrialOfTheChicago7 #Oscars Will m...,2,0,Sat Apr 16 12:00:00 +0000 2021
6398,The Trial of the Chicago 7,RT @needledesign: Heres the 7th piece in my #F...,0,6,Sat Apr 16 12:00:00 +0000 2021


In [17]:
tweets_df[tweets_df["movie_name"] == n[7]].iloc[0]

movie_name                               The Trial of the Chicago 7
text              RT @OllyGibbs: Presenting the #Oscars illustra...
favorite_count                                                    0
retweet_count                                                  1143
date                                 Sat Apr 23 12:00:00 +0000 2021
Name: 5600, dtype: object

In [18]:
tweets_df.to_pickle("best_picture.pkl")

In [2]:
train = pd.read_pickle("best_picture.pkl")

# Text Processing

In [3]:
train

Unnamed: 0,movie_name,text,favorite_count,retweet_count,date
0,Nomadland,RT @baraju_SuperHit: Best Picture : #Nomadland...,0,12,Sat Apr 23 12:00:00 +0000 2021
1,Nomadland,RT @ETPanache: Woman of no colour #FrancesMcDo...,0,1,Sat Apr 23 12:00:00 +0000 2021
2,Nomadland,RT @RealMomOfSFV: Best Picture Winner #Nomadla...,0,1,Sat Apr 23 12:00:00 +0000 2021
3,Nomadland,RT @badiucao: Designed a new poster for #Oscar...,0,23,Sat Apr 23 12:00:00 +0000 2021
4,Nomadland,Just watched the #OSCARS winner for best pictu...,1,0,Sat Apr 23 12:00:00 +0000 2021
...,...,...,...,...,...
6395,The Trial of the Chicago 7,Best Picture nominee #TheTrialOfTheChicago7 #...,0,0,Sat Apr 16 12:00:00 +0000 2021
6396,The Trial of the Chicago 7,Rewatching #TheTrialOfTheChicago7 I can't beli...,0,0,Sat Apr 16 12:00:00 +0000 2021
6397,The Trial of the Chicago 7,Watching #TheTrialOfTheChicago7 #Oscars Will m...,2,0,Sat Apr 16 12:00:00 +0000 2021
6398,The Trial of the Chicago 7,RT @needledesign: Heres the 7th piece in my #F...,0,6,Sat Apr 16 12:00:00 +0000 2021


In [4]:
import nltk
from nltk.corpus import stopwords
additional  = ['rt','rts','retweet']
swords = set().union(stopwords.words('english'),additional)

In [5]:
train['processed_text'] = train['text'].str.lower()\
          .str.replace('(@[a-z0-9]+)\w+',' ')\
          .str.replace('(http\S+)', ' ')\
          .str.replace('([^0-9a-z \t])',' ')\
          .str.replace(' +',' ')\
          .str.replace('(@[0-9]+)\w+',' ')\
          .apply(lambda x: [i for i in x.split() if not i in swords])

In [6]:
from nltk.stem import PorterStemmer
ps = PorterStemmer()
train['stemmed'] = train['processed_text'].apply(lambda x: [ps.stem(i) for i in x if i != ''])

In [7]:
train["text2"] = train["processed_text"].str.join(" ")

In [8]:
train["text3"] = train["stemmed"].str.join(" ")

In [9]:
train

Unnamed: 0,movie_name,text,favorite_count,retweet_count,date,processed_text,stemmed,text2,text3
0,Nomadland,RT @baraju_SuperHit: Best Picture : #Nomadland...,0,12,Sat Apr 23 12:00:00 +0000 2021,"[best, picture, nomadland, oscars, oscars2021]","[best, pictur, nomadland, oscar, oscars2021]",best picture nomadland oscars oscars2021,best pictur nomadland oscar oscars2021
1,Nomadland,RT @ETPanache: Woman of no colour #FrancesMcDo...,0,1,Sat Apr 23 12:00:00 +0000 2021,"[woman, colour, francesmcdormand, bestactress,...","[woman, colour, francesmcdormand, bestactress,...",woman colour francesmcdormand bestactress well...,woman colour francesmcdormand bestactress well...
2,Nomadland,RT @RealMomOfSFV: Best Picture Winner #Nomadla...,0,1,Sat Apr 23 12:00:00 +0000 2021,"[best, picture, winner, nomadland, available, ...","[best, pictur, winner, nomadland, avail, blu, ...",best picture winner nomadland available blu ra...,best pictur winner nomadland avail blu ray apr...
3,Nomadland,RT @badiucao: Designed a new poster for #Oscar...,0,23,Sat Apr 23 12:00:00 +0000 2021,"[designed, new, poster, oscars, winning, nomad...","[design, new, poster, oscar, win, nomadland, a...",designed new poster oscars winning nomadland a...,design new poster oscar win nomadland also tot...
4,Nomadland,Just watched the #OSCARS winner for best pictu...,1,0,Sat Apr 23 12:00:00 +0000 2021,"[watched, oscars, winner, best, picture, nomad...","[watch, oscar, winner, best, pictur, nomadland...",watched oscars winner best picture nomadland s...,watch oscar winner best pictur nomadland say w...
...,...,...,...,...,...,...,...,...,...
6395,The Trial of the Chicago 7,Best Picture nominee #TheTrialOfTheChicago7 #...,0,0,Sat Apr 16 12:00:00 +0000 2021,"[best, picture, nominee, thetrialofthechicago7...","[best, pictur, nomine, thetrialofthechicago7, ...",best picture nominee thetrialofthechicago7 osc...,best pictur nomine thetrialofthechicago7 oscar...
6396,The Trial of the Chicago 7,Rewatching #TheTrialOfTheChicago7 I can't beli...,0,0,Sat Apr 16 12:00:00 +0000 2021,"[rewatching, thetrialofthechicago7, believe, m...","[rewatch, thetrialofthechicago7, believ, movi,...",rewatching thetrialofthechicago7 believe movie...,rewatch thetrialofthechicago7 believ movi anyt...
6397,The Trial of the Chicago 7,Watching #TheTrialOfTheChicago7 #Oscars Will m...,2,0,Sat Apr 16 12:00:00 +0000 2021,"[watching, thetrialofthechicago7, oscars, make...","[watch, thetrialofthechicago7, oscar, make, 2n...",watching thetrialofthechicago7 oscars make 2nd...,watch thetrialofthechicago7 oscar make 2nd wat...
6398,The Trial of the Chicago 7,RT @needledesign: Heres the 7th piece in my #F...,0,6,Sat Apr 16 12:00:00 +0000 2021,"[heres, 7th, piece, foryourconsideration, osca...","[here, 7th, piec, foryourconsider, oscars2021,...",heres 7th piece foryourconsideration oscars202...,here 7th piec foryourconsider oscars2021 poste...


# Calculating Average Sentiment Scores

In [10]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [11]:
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\Hojin\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [12]:
sia = SentimentIntensityAnalyzer()

In [13]:
def sentiment_calcnltk(text):
    try:
        return sia.polarity_scores(text)
    except:
        return None

In [14]:
train['NLTKsentiment_raw'] = train["text3"].apply(sentiment_calcnltk)

In [15]:
iterate = list(train["NLTKsentiment_raw"])
compoundList = []
for t in iterate:
    compound = t["compound"]
    compoundList.append(compound)
train["NLTKsentiment"] = compoundList

In [16]:
train

Unnamed: 0,movie_name,text,favorite_count,retweet_count,date,processed_text,stemmed,text2,text3,NLTKsentiment_raw,NLTKsentiment
0,Nomadland,RT @baraju_SuperHit: Best Picture : #Nomadland...,0,12,Sat Apr 23 12:00:00 +0000 2021,"[best, picture, nomadland, oscars, oscars2021]","[best, pictur, nomadland, oscar, oscars2021]",best picture nomadland oscars oscars2021,best pictur nomadland oscar oscars2021,"{'neg': 0.0, 'neu': 0.488, 'pos': 0.512, 'comp...",0.6369
1,Nomadland,RT @ETPanache: Woman of no colour #FrancesMcDo...,0,1,Sat Apr 23 12:00:00 +0000 2021,"[woman, colour, francesmcdormand, bestactress,...","[woman, colour, francesmcdormand, bestactress,...",woman colour francesmcdormand bestactress well...,woman colour francesmcdormand bestactress well...,"{'neg': 0.0, 'neu': 0.811, 'pos': 0.189, 'comp...",0.2732
2,Nomadland,RT @RealMomOfSFV: Best Picture Winner #Nomadla...,0,1,Sat Apr 23 12:00:00 +0000 2021,"[best, picture, winner, nomadland, available, ...","[best, pictur, winner, nomadland, avail, blu, ...",best picture winner nomadland available blu ra...,best pictur winner nomadland avail blu ray apr...,"{'neg': 0.0, 'neu': 0.5, 'pos': 0.5, 'compound...",0.8402
3,Nomadland,RT @badiucao: Designed a new poster for #Oscar...,0,23,Sat Apr 23 12:00:00 +0000 2021,"[designed, new, poster, oscars, winning, nomad...","[design, new, poster, oscar, win, nomadland, a...",designed new poster oscars winning nomadland a...,design new poster oscar win nomadland also tot...,"{'neg': 0.169, 'neu': 0.618, 'pos': 0.213, 'co...",0.2023
4,Nomadland,Just watched the #OSCARS winner for best pictu...,1,0,Sat Apr 23 12:00:00 +0000 2021,"[watched, oscars, winner, best, picture, nomad...","[watch, oscar, winner, best, pictur, nomadland...",watched oscars winner best picture nomadland s...,watch oscar winner best pictur nomadland say w...,"{'neg': 0.0, 'neu': 0.404, 'pos': 0.596, 'comp...",0.9153
...,...,...,...,...,...,...,...,...,...,...,...
6395,The Trial of the Chicago 7,Best Picture nominee #TheTrialOfTheChicago7 #...,0,0,Sat Apr 16 12:00:00 +0000 2021,"[best, picture, nominee, thetrialofthechicago7...","[best, pictur, nomine, thetrialofthechicago7, ...",best picture nominee thetrialofthechicago7 osc...,best pictur nomine thetrialofthechicago7 oscar...,"{'neg': 0.0, 'neu': 0.656, 'pos': 0.344, 'comp...",0.6369
6396,The Trial of the Chicago 7,Rewatching #TheTrialOfTheChicago7 I can't beli...,0,0,Sat Apr 16 12:00:00 +0000 2021,"[rewatching, thetrialofthechicago7, believe, m...","[rewatch, thetrialofthechicago7, believ, movi,...",rewatching thetrialofthechicago7 believe movie...,rewatch thetrialofthechicago7 believ movi anyt...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0000
6397,The Trial of the Chicago 7,Watching #TheTrialOfTheChicago7 #Oscars Will m...,2,0,Sat Apr 16 12:00:00 +0000 2021,"[watching, thetrialofthechicago7, oscars, make...","[watch, thetrialofthechicago7, oscar, make, 2n...",watching thetrialofthechicago7 oscars make 2nd...,watch thetrialofthechicago7 oscar make 2nd wat...,"{'neg': 0.217, 'neu': 0.602, 'pos': 0.181, 'co...",-0.1531
6398,The Trial of the Chicago 7,RT @needledesign: Heres the 7th piece in my #F...,0,6,Sat Apr 16 12:00:00 +0000 2021,"[heres, 7th, piece, foryourconsideration, osca...","[here, 7th, piec, foryourconsider, oscars2021,...",heres 7th piece foryourconsideration oscars202...,here 7th piec foryourconsider oscars2021 poste...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0000


In [19]:
ob1 = train[train["movie_name"] == n[0]]
ob2 = train[train["movie_name"] == n[1]]
ob3 = train[train["movie_name"] == n[2]]
ob4 = train[train["movie_name"] == n[3]]
ob5 = train[train["movie_name"] == n[4]]
ob6 = train[train["movie_name"] == n[5]]     
ob7 = train[train["movie_name"] == n[6]]     
ob8 = train[train["movie_name"] == n[7]]     

In [20]:
score1 = ob1["NLTKsentiment"].mean()
score2 = ob2["NLTKsentiment"].mean()
score3 = ob3["NLTKsentiment"].mean()
score4 = ob4["NLTKsentiment"].mean()
score5 = ob5["NLTKsentiment"].mean()
score6 = ob6["NLTKsentiment"].mean()
score7 = ob7["NLTKsentiment"].mean()
score8 = ob8["NLTKsentiment"].mean()

print(n[0], "Average Sentiment Score", score1)
print(n[1], "Average Sentiment Score", score2)
print(n[2], "Average Sentiment Score", score3)
print(n[3], "Average Sentiment Score", score4)
print(n[4], "Average Sentiment Score", score5)
print(n[5], "Average Sentiment Score", score6)
print(n[6], "Average Sentiment Score", score7)
print(n[7], "Average Sentiment Score", score8)

Nomadland Average Sentiment Score 0.4483799999999986
The Father Average Sentiment Score 0.662438000000001
Judas and the Black Messiah Average Sentiment Score 0.041378000000000005
Mank Average Sentiment Score 0.44561199999999923
Minari Average Sentiment Score 0.7129479999999959
Promising Young Woman Average Sentiment Score 0.5793889999999977
Sound of Metal Average Sentiment Score 0.47397700000000187
The Trial of the Chicago 7 Average Sentiment Score 0.4322959999999996


In [21]:
sentiment = {n[0]: score1,
             n[1]: score2,
             n[2]: score3,
             n[3]: score4,
             n[4]: score5,
             n[5]: score6,
             n[6]: score7,
             n[7]: score8}

# IMDB Movie Ratings, Rotten Tomatoes Critic Scores

In [22]:
# Scores were collected on April 24th

# Out of 10
imdb = {n[0]: 7.5,
        n[1]: 8.3,
        n[2]: 7.6,
        n[3]: 6.9,
        n[4]: 7.6,
        n[5]: 7.5,
        n[6]: 7.8,
        n[7]: 7.8}

# Percentage
rt_critics = {n[0]: 0.94,
              n[1]: 0.98,
              n[2]: 0.96,
              n[3]: 0.83,
              n[4]: 0.98,
              n[5]: 0.91,
              n[6]: 0.97,
              n[7]: 0.89}

# Percentage
rt_audience = {n[0]: 0.82,
               n[1]: 0.91,
               n[2]: 0.95,
               n[3]: 0.59,
               n[4]: 0.88,
               n[5]: 0.88,
               n[6]: 0.91,
               n[7]: 0.91}

# Scoring Model

In [23]:
# We will calculate the percentage chance that an Actress will win the Oscars.
# Our paper is a commentary on public opinion 
# Therefore we need to scale our metrics  to 0.25 each

def scaling(score, old_range):
    new_range = (0, 0.25)
    mini = old_range[0]
    maxi = old_range[1]
    percent = (score - (mini)) / (maxi - (mini))
    # Scaling formula
    weighted = new_range[1] * percent + new_range[0]
    return weighted

In [24]:
movie_win = {}
movies_df = pd.DataFrame(columns = ["name", "category", "imdb_audience_score", "rt_critic_score", "rt_audience_score", "sentiment_score", "oscar_win"])

In [25]:
for movie in n:
    sentiment_score = scaling(sentiment[movie], (-1, 1))
    imdb_score = scaling(imdb[movie], (0, 10))
    rt_critics_score = scaling(rt_critics[movie], (0, 1))
    rt_audience_score = scaling(rt_audience[movie], (0, 1))
    percentage_win = sentiment_score + imdb_score + rt_critics_score + rt_audience_score
    movie_win[movie] = percentage_win
    movies_df.loc[len(movies_df.index)] = [movie, "movie", imdb_score, rt_critics_score, rt_audience_score, 
                                           sentiment_score, percentage_win]
movies_df.to_csv("picture_results.csv", index = False)

In [26]:
for key in movie_win:
    print(key, "score is", movie_win[key])

Nomadland score is 0.8085474999999998
The Father score is 0.8878047500000001
Judas and the Black Messiah score is 0.79767225
Mank score is 0.7082014999999999
Minari score is 0.8691184999999995
Promising Young Woman score is 0.8324236249999997
Sound of Metal score is 0.8492471250000002
The Trial of the Chicago 7 score is 0.824037
