# Movie Review Sentiment Prediction

## 📦 Install required libraries



In [None]:
#Basic Python libs
import pandas as pd
import numpy as np

#Text pre-processing
import re
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer

#Machine Learning model
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

## *Loading the dataset*

In [None]:
#Load the dataset
df = pd.read_csv("/content/drive/MyDrive/IMDB Dataset.csv")
df.head()

NameError: name 'pd' is not defined

## Cleaning the dataset

In [None]:
#Function to clean reviews
def clean_text(text):
  text = text.lower()
  text = re.sub(r"<.*?>", " ", text) #removes HTML tags!!
  text = re.sub(r"[^a-zA-Z]", " ", text) #keep only letters!!
  text = re.sub(r"\s+", " ", text) #remove extra spaces!!
  return text.strip()

#Applying cleaning function to all reviews
df['clean_review'] = df['review'].apply(clean_text)

#Show Sample
df[['review', 'clean_review', 'sentiment']].head()

Unnamed: 0,review,clean_review,sentiment
0,One of the other reviewers has mentioned that ...,one of the other reviewers has mentioned that ...,positive
1,A wonderful little production. <br /><br />The...,a wonderful little production the filming tech...,positive
2,I thought this was a wonderful way to spend ti...,i thought this was a wonderful way to spend ti...,positive
3,Basically there's a family where a little boy ...,basically there s a family where a little boy ...,negative
4,"Petter Mattei's ""Love in the Time of Money"" is...",petter mattei s love in the time of money is a...,positive


# Convert Text to TF-IDF Features


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

#Create TF-IDF vectorizer
vectorizer = TfidfVectorizer(max_features=5000)

#Fit and transform the cleaned reviews
X = vectorizer.fit_transform(df['clean_review'])

#Target variable
y = df['sentiment']

# Train & Evaluate Logistic Regression Model


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

#split data into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#train logistic regression
model = LogisticRegression(max_iter=200)
model.fit(X_train, y_train)

#Prediction
y_pred = model.predict(X_test)

#Evaluation
print('Accuracy:', accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.8947
              precision    recall  f1-score   support

    negative       0.90      0.88      0.89      4961
    positive       0.89      0.91      0.90      5039

    accuracy                           0.89     10000
   macro avg       0.89      0.89      0.89     10000
weighted avg       0.89      0.89      0.89     10000



# Save Model and Vectorizer


In [None]:
import pickle

#save model and vectorizer
with open("sentiment_model.pkl", "wb") as f:
  pickle.dump(model, f)

with open("tfidf_vectorizer.pkl", "wb") as f:
  pickle.dump(vectorizer, f)

# Load Saved Model and Vectorizer


In [None]:
#Load Model and Vectorizer
with open("sentiment_model.pkl", "rb") as f:
    loaded_model = pickle.load(f)

with open("tfidf_vectorizer.pkl", "rb") as f:
    loaded_vectorizer = pickle.load(f)

# Test Model on Custom Reviews


In [None]:
#Testing on custom reviews
sample_reviews = [
    "This movie kept me in to the end!", #positive
    "It was intresting, but not my cup of tea!", #neutral
    "I dont like this kinda movies.", #negative
    "This movie was okish" #neutral
]

#Transform using the loaded vectorizer
sample_vecs = loaded_vectorizer.transform(sample_reviews)

#predicting using loaded model
predictions = loaded_model.predict(sample_vecs)

for review, sentiment in zip(sample_reviews, predictions):
  print(f"Review: {review}\nPredicted Sentiment: {sentiment}\n")

Review: This movie kept me in to the end!
Predicted Sentiment: positive

Review: It was intresting, but not my cup of tea!
Predicted Sentiment: negative

Review: I dont like this kinda movies.
Predicted Sentiment: negative

Review: This movie was okish
Predicted Sentiment: negative



# Install Streamlit & PyCloudflared


In [None]:
!pip -q install pycloudflared streamlit

# Check Files and Current Directory


In [None]:
import os, glob
print("cwd:", os.getcwd())
print("Have app.py?", os.path.exists("app.py"))
print("Have model?", os.path.exists("sentiment_model.pkl"))
print("Have vectorizer?", os.path.exists("tfidf_vectorizer.pkl"))

cwd: /content
Have app.py? True
Have model? True
Have vectorizer? True


# Launch Streamlit App via PyCloudflared


In [None]:
from pycloudflared import try_cloudflare
!pkill -f "streamlit run" || true
!streamlit run app.py --server.address 0.0.0.0 --server.port 8501 --browser.gatherUsageStats false &>/content/s.log &
try_cloudflare(port=8501)

^C


Download cloudflared...:   0%|          | 0/41194413 [00:00<?, ?it/s]

 * Running on https://baker-emphasis-reviewer-targeted.trycloudflare.com
 * Traffic stats available on http://127.0.0.1:20241/metrics


Urls(tunnel='https://baker-emphasis-reviewer-targeted.trycloudflare.com', metrics='http://127.0.0.1:20241/metrics', process=<Popen: returncode: None args: ['/usr/local/lib/python3.12/dist-packages/pyc...>)