# Import Required Libraries

In [1]:
import pandas as pd
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from sklearn.preprocessing import MinMaxScaler

# Load Dataset

In [2]:
# Load the sentimentdataset.csv file into a pandas DataFrame
df = pd.read_csv('rotten_tomatoes_critic_reviews.csv')
df.head()

Unnamed: 0,rotten_tomatoes_link,critic_name,top_critic,publisher_name,review_type,review_score,review_date,review_content
0,m/0814255,Andrew L. Urban,False,Urban Cinefile,Fresh,,2010-02-06,A fantasy adventure that fuses Greek mythology...
1,m/0814255,Louise Keller,False,Urban Cinefile,Fresh,,2010-02-06,"Uma Thurman as Medusa, the gorgon with a coiff..."
2,m/0814255,,False,FILMINK (Australia),Fresh,,2010-02-09,With a top-notch cast and dazzling special eff...
3,m/0814255,Ben McEachen,False,Sunday Mail (Australia),Fresh,3.5/5,2010-02-09,Whether audiences will get behind The Lightnin...
4,m/0814255,Ethan Alter,True,Hollywood Reporter,Rotten,,2010-02-10,What's really lacking in The Lightning Thief i...


# Preprocess dataset

In [3]:
# Remove the specified columns
columns_to_remove = ['critic_name', 'top_critic', 'publisher_name', 'review_type', 'review_score']
df = df.drop(columns=columns_to_remove)
df.head()

Unnamed: 0,rotten_tomatoes_link,review_date,review_content
0,m/0814255,2010-02-06,A fantasy adventure that fuses Greek mythology...
1,m/0814255,2010-02-06,"Uma Thurman as Medusa, the gorgon with a coiff..."
2,m/0814255,2010-02-09,With a top-notch cast and dazzling special eff...
3,m/0814255,2010-02-09,Whether audiences will get behind The Lightnin...
4,m/0814255,2010-02-10,What's really lacking in The Lightning Thief i...


In [4]:
# Extract the year from the 'review_date' column and create a new 'year' column
df['year'] = pd.to_datetime(df['review_date']).dt.year

# Remove the 'review_date' column
df = df.drop(columns=['review_date'])

# Rename the 'review_content' column to 'text'
df = df.rename(columns={'review_content': 'text'})

# Remove rows with NaN values
df = df.dropna()

# Display the first few rows of the modified DataFrame
df.head()

Unnamed: 0,rotten_tomatoes_link,text,year
0,m/0814255,A fantasy adventure that fuses Greek mythology...,2010
1,m/0814255,"Uma Thurman as Medusa, the gorgon with a coiff...",2010
2,m/0814255,With a top-notch cast and dazzling special eff...,2010
3,m/0814255,Whether audiences will get behind The Lightnin...,2010
4,m/0814255,What's really lacking in The Lightning Thief i...,2010


In [5]:
# Initialize the VADER sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

# Calculate the sentiment for each row in the 'text' column
df['sentiment'] = df['text'].apply(lambda x: analyzer.polarity_scores(x)['compound'])
df.head()

Unnamed: 0,rotten_tomatoes_link,text,year,sentiment
0,m/0814255,A fantasy adventure that fuses Greek mythology...,2010,0.7579
1,m/0814255,"Uma Thurman as Medusa, the gorgon with a coiff...",2010,0.0
2,m/0814255,With a top-notch cast and dazzling special eff...,2010,0.4019
3,m/0814255,Whether audiences will get behind The Lightnin...,2010,0.705
4,m/0814255,What's really lacking in The Lightning Thief i...,2010,-0.5267


In [6]:
# Normalize the sentiment column to be in the interval 0 to 1
scaler = MinMaxScaler()
df['sentiment'] = scaler.fit_transform(df[['sentiment']])
df.head()

Unnamed: 0,rotten_tomatoes_link,text,year,sentiment
0,m/0814255,A fantasy adventure that fuses Greek mythology...,2010,0.881707
1,m/0814255,"Uma Thurman as Medusa, the gorgon with a coiff...",2010,0.499874
2,m/0814255,With a top-notch cast and dazzling special eff...,2010,0.702353
3,m/0814255,Whether audiences will get behind The Lightnin...,2010,0.855056
4,m/0814255,What's really lacking in The Lightning Thief i...,2010,0.234521


In [7]:
# Load the rotten_tomatoes_movies.csv file into a pandas DataFrame
movies_df = pd.read_csv('rotten_tomatoes_movies.csv')
# Keep only the 'rotten_tomatoes_link' and 'movie_title' columns
movies_df = movies_df[['rotten_tomatoes_link', 'movie_title', 'genres' ,'directors']]
movies_df.head()

Unnamed: 0,rotten_tomatoes_link,movie_title,genres,directors
0,m/0814255,Percy Jackson & the Olympians: The Lightning T...,"Action & Adventure, Comedy, Drama, Science Fic...",Chris Columbus
1,m/0878835,Please Give,Comedy,Nicole Holofcener
2,m/10,10,"Comedy, Romance",Blake Edwards
3,m/1000013-12_angry_men,12 Angry Men (Twelve Angry Men),"Classics, Drama",Sidney Lumet
4,m/1000079-20000_leagues_under_the_sea,"20,000 Leagues Under The Sea","Action & Adventure, Drama, Kids & Family",Richard Fleischer


In [8]:
# Perform an inner join with df on 'rotten_tomatoes_link'
merged_df = pd.merge(df, movies_df, on='rotten_tomatoes_link', how='inner')
merged_df.head()

Unnamed: 0,rotten_tomatoes_link,text,year,sentiment,movie_title,genres,directors
0,m/0814255,A fantasy adventure that fuses Greek mythology...,2010,0.881707,Percy Jackson & the Olympians: The Lightning T...,"Action & Adventure, Comedy, Drama, Science Fic...",Chris Columbus
1,m/0814255,"Uma Thurman as Medusa, the gorgon with a coiff...",2010,0.499874,Percy Jackson & the Olympians: The Lightning T...,"Action & Adventure, Comedy, Drama, Science Fic...",Chris Columbus
2,m/0814255,With a top-notch cast and dazzling special eff...,2010,0.702353,Percy Jackson & the Olympians: The Lightning T...,"Action & Adventure, Comedy, Drama, Science Fic...",Chris Columbus
3,m/0814255,Whether audiences will get behind The Lightnin...,2010,0.855056,Percy Jackson & the Olympians: The Lightning T...,"Action & Adventure, Comedy, Drama, Science Fic...",Chris Columbus
4,m/0814255,What's really lacking in The Lightning Thief i...,2010,0.234521,Percy Jackson & the Olympians: The Lightning T...,"Action & Adventure, Comedy, Drama, Science Fic...",Chris Columbus


In [9]:
# Remove the 'rotten_tomatoes_link' and 'text' columns
merged_df = merged_df.drop(columns=['rotten_tomatoes_link'])
merged_df.head()

Unnamed: 0,text,year,sentiment,movie_title,genres,directors
0,A fantasy adventure that fuses Greek mythology...,2010,0.881707,Percy Jackson & the Olympians: The Lightning T...,"Action & Adventure, Comedy, Drama, Science Fic...",Chris Columbus
1,"Uma Thurman as Medusa, the gorgon with a coiff...",2010,0.499874,Percy Jackson & the Olympians: The Lightning T...,"Action & Adventure, Comedy, Drama, Science Fic...",Chris Columbus
2,With a top-notch cast and dazzling special eff...,2010,0.702353,Percy Jackson & the Olympians: The Lightning T...,"Action & Adventure, Comedy, Drama, Science Fic...",Chris Columbus
3,Whether audiences will get behind The Lightnin...,2010,0.855056,Percy Jackson & the Olympians: The Lightning T...,"Action & Adventure, Comedy, Drama, Science Fic...",Chris Columbus
4,What's really lacking in The Lightning Thief i...,2010,0.234521,Percy Jackson & the Olympians: The Lightning T...,"Action & Adventure, Comedy, Drama, Science Fic...",Chris Columbus


In [11]:
# Function to categorize sentiment
def categorize_sentiment(score):
    if score < 0.4:
        return 'Negative'
    elif 0.4 <= score <= 0.6:
        return 'Neutral'
    else:
        return 'Positive'

# Apply the function to create a new column with sentiment labels
merged_df['sentiment'] = merged_df['sentiment'].apply(categorize_sentiment)
merged_df.head()

Unnamed: 0,text,year,sentiment,movie_title,genres,directors
0,A fantasy adventure that fuses Greek mythology...,2010,Positive,Percy Jackson & the Olympians: The Lightning T...,"Action & Adventure, Comedy, Drama, Science Fic...",Chris Columbus
1,"Uma Thurman as Medusa, the gorgon with a coiff...",2010,Neutral,Percy Jackson & the Olympians: The Lightning T...,"Action & Adventure, Comedy, Drama, Science Fic...",Chris Columbus
2,With a top-notch cast and dazzling special eff...,2010,Positive,Percy Jackson & the Olympians: The Lightning T...,"Action & Adventure, Comedy, Drama, Science Fic...",Chris Columbus
3,Whether audiences will get behind The Lightnin...,2010,Positive,Percy Jackson & the Olympians: The Lightning T...,"Action & Adventure, Comedy, Drama, Science Fic...",Chris Columbus
4,What's really lacking in The Lightning Thief i...,2010,Negative,Percy Jackson & the Olympians: The Lightning T...,"Action & Adventure, Comedy, Drama, Science Fic...",Chris Columbus


# Save Preprocessed dataset

In [13]:
# Rename the 'movie_title' column to 'text'
merged_df1 = merged_df.rename(columns={'text': 'Title',
                                       'year': 'Year',
                                       'sentiment': 'Conference',
                                       'movie_title': 'Author Names'})
# Remove the 'rotten_tomatoes_link' and 'text' columns
merged_df1 = merged_df1.drop(columns=['genres', 'directors'])
merged_df1.head()

Unnamed: 0,Title,Year,Conference,Author Names
0,A fantasy adventure that fuses Greek mythology...,2010,Positive,Percy Jackson & the Olympians: The Lightning T...
1,"Uma Thurman as Medusa, the gorgon with a coiff...",2010,Neutral,Percy Jackson & the Olympians: The Lightning T...
2,With a top-notch cast and dazzling special eff...,2010,Positive,Percy Jackson & the Olympians: The Lightning T...
3,Whether audiences will get behind The Lightnin...,2010,Positive,Percy Jackson & the Olympians: The Lightning T...
4,What's really lacking in The Lightning Thief i...,2010,Negative,Percy Jackson & the Olympians: The Lightning T...


In [14]:
# # Save the preprocessed DataFrame to a new CSV file
# Export the DataFrame to a new .tsv file
merged_df1.to_csv('RottenTomatoes_Sent.tsv', sep='\t', index=False)

In [15]:
# Rename the 'movie_title' column to 'text'
merged_df2 = merged_df.rename(columns={'text': 'Title',
                                       'year': 'Year',
                                       'genres': 'Conference',
                                       'movie_title': 'Author Names'})
# Remove the 'rotten_tomatoes_link' and 'text' columns
merged_df2 = merged_df2.drop(columns=['sentiment', 'directors'])
merged_df2.head()

Unnamed: 0,Title,Year,Author Names,Conference
0,A fantasy adventure that fuses Greek mythology...,2010,Percy Jackson & the Olympians: The Lightning T...,"Action & Adventure, Comedy, Drama, Science Fic..."
1,"Uma Thurman as Medusa, the gorgon with a coiff...",2010,Percy Jackson & the Olympians: The Lightning T...,"Action & Adventure, Comedy, Drama, Science Fic..."
2,With a top-notch cast and dazzling special eff...,2010,Percy Jackson & the Olympians: The Lightning T...,"Action & Adventure, Comedy, Drama, Science Fic..."
3,Whether audiences will get behind The Lightnin...,2010,Percy Jackson & the Olympians: The Lightning T...,"Action & Adventure, Comedy, Drama, Science Fic..."
4,What's really lacking in The Lightning Thief i...,2010,Percy Jackson & the Olympians: The Lightning T...,"Action & Adventure, Comedy, Drama, Science Fic..."


In [16]:
# # Save the preprocessed DataFrame to a new CSV file
# Export the DataFrame to a new .tsv file
merged_df2.to_csv('RottenTomatoes_Genres.tsv', sep='\t', index=False)

In [17]:
# Rename the 'movie_title' column to 'text'
merged_df3 = merged_df.rename(columns={'text': 'Title',
                                       'year': 'Year',
                                       'directors': 'Conference',
                                       'movie_title': 'Author Names'})
# Remove the 'rotten_tomatoes_link' and 'text' columns
merged_df3 = merged_df3.drop(columns=['sentiment', 'genres'])
merged_df3.head()

Unnamed: 0,Title,Year,Author Names,Conference
0,A fantasy adventure that fuses Greek mythology...,2010,Percy Jackson & the Olympians: The Lightning T...,Chris Columbus
1,"Uma Thurman as Medusa, the gorgon with a coiff...",2010,Percy Jackson & the Olympians: The Lightning T...,Chris Columbus
2,With a top-notch cast and dazzling special eff...,2010,Percy Jackson & the Olympians: The Lightning T...,Chris Columbus
3,Whether audiences will get behind The Lightnin...,2010,Percy Jackson & the Olympians: The Lightning T...,Chris Columbus
4,What's really lacking in The Lightning Thief i...,2010,Percy Jackson & the Olympians: The Lightning T...,Chris Columbus


In [18]:
# # Save the preprocessed DataFrame to a new CSV file
# Export the DataFrame to a new .tsv file
merged_df3.to_csv('RottenTomatoes_Directors.tsv', sep='\t', index=False)