In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [2]:
# Load the IMDB Movie Reviews dataset
url = "IMDB Dataset.csv"
df = pd.read_csv(url)

In [3]:
# Data Preprocessing

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df['review'], df['sentiment'], test_size=0.2, random_state=42)


In [4]:
# Text Vectorization (TF-IDF)
tfidf_vectorizer = TfidfVectorizer(max_features=5000)  # You can adjust max_features based on your needs
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)


In [5]:
# Train a Logistic Regression model
clf = LogisticRegression(random_state=42)
clf.fit(X_train_tfidf, y_train)

In [6]:
# Make predictions on the test set
y_pred = clf.predict(X_test_tfidf)

In [7]:
# Model evaluation
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

Accuracy: 0.8959

Classification Report:
              precision    recall  f1-score   support

    negative       0.90      0.88      0.89      4961
    positive       0.89      0.91      0.90      5039

    accuracy                           0.90     10000
   macro avg       0.90      0.90      0.90     10000
weighted avg       0.90      0.90      0.90     10000


Confusion Matrix:
[[4380  581]
 [ 460 4579]]


In [8]:
# Example: Predict sentiment for a custom review
custom_review = "This movie was absolutely fantastic! I loved every moment of it."
custom_review_tfidf = tfidf_vectorizer.transform([custom_review])
custom_sentiment = clf.predict(custom_review_tfidf)[0]

In [9]:
if custom_sentiment == 'positive':
    print("\nCustom Review Sentiment: Positive")
else:
    print("\nCustom Review Sentiment: Negative")



Custom Review Sentiment: Positive
