In [12]:
# ==========================================
# MOVIE REVIEW SENTIMENT & RATING PREDICTION
# ==========================================

# -------- IMPORT LIBRARIES --------
import pandas as pd
import re

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# -------- LOAD DATASET --------
df = pd.read_csv(
    "/content/IMDB Dataset.csv",
    engine="python",
    encoding="utf-8",
    on_bad_lines="skip"
)

print("Dataset Loaded Successfully!")
print("Shape:", df.shape)

# -------- PREPROCESS DATA --------
df['sentiment'] = df['sentiment'].str.lower()

def clean_text(text):
    text = str(text).lower()
    text = re.sub(r'<.*?>', '', text)
    text = re.sub(r'[^a-z\s]', '', text)
    return text

df['clean_review'] = df['review'].apply(clean_text)

# -------- FEATURE EXTRACTION --------
tfidf = TfidfVectorizer(max_features=5000)
X = tfidf.fit_transform(df['clean_review'])
y = df['sentiment']

# -------- TRAIN-TEST SPLIT --------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# -------- TRAIN MODEL --------
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# -------- MODEL EVALUATION --------
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print("\nModel Accuracy:", round(accuracy * 100, 2), "%")

# -------- USER REVIEW PREDICTION --------
def predict_movie_review(review):
    cleaned = clean_text(review)
    vector = tfidf.transform([cleaned])

    sentiment = model.predict(vector)[0]
    confidence = model.predict_proba(vector).max()

    # Rating estimation
    if sentiment == 'positive':
        rating = round(5.5 + confidence * 4.5, 1)
    else:
        rating = round(5.5 - confidence * 4.5, 1)

    return sentiment.capitalize(), rating

# -------- USER INPUT --------
print("\n==============================")
print(" MOVIE REVIEW ANALYSIS SYSTEM ")
print("==============================")

user_review = input("\nEnter your movie review: ")

sentiment, rating = predict_movie_review(user_review)

print("\n----- RESULT -----")
print("Sentiment:", sentiment)
print("Predicted Rating:", rating, "/ 10")


Dataset Loaded Successfully!
Shape: (50000, 2)

Model Accuracy: 89.36 %

 MOVIE REVIEW ANALYSIS SYSTEM 

Enter your movie review: “The film was boring and a complete waste of time.”

----- RESULT -----
Sentiment: Negative
Predicted Rating: 1.0 / 10
