In [9]:
# Install and Import Required Libraries
import nltk
import joblib
import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from nltk.corpus import movie_reviews
nltk.download('movie_reviews')

[nltk_data] Downloading package movie_reviews to
[nltk_data]     C:\Users\lalit\AppData\Roaming\nltk_data...
[nltk_data]   Package movie_reviews is already up-to-date!


True

In [10]:
# Load dataset from NLTK
documents = [(movie_reviews.raw(fileid), category)
             for category in movie_reviews.categories()
             for fileid in movie_reviews.fileids(category)]

# Shuffle dataset
random.shuffle(documents)

# Split into texts and labels
texts = [text for (text, label) in documents]
labels = [1 if label == 'pos' else 0 for (text, label) in documents]

# Create DataFrame
df = pd.DataFrame({'review': texts, 'sentiment': labels})
df.head()

Unnamed: 0,review,sentiment
0,sometimes you just have to tip your hat to a f...,1
1,i heard actor skeet ulrich discussing this fil...,0
2,not a great twelve months for either of the pr...,0
3,i'm currently accepting all future names for d...,0
4,have you ever been in an automobile accident w...,0


In [11]:
# Preprocessing and train-test split
X_train, X_test, y_train, y_test = train_test_split(
    df['review'], df['sentiment'], test_size=0.2, random_state=42)


In [12]:
# TF-IDF Vectorization
tfidf = TfidfVectorizer(stop_words='english', max_df=0.7)
X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf = tfidf.transform(X_test)

In [13]:
# Logistic Regression Model
model = LogisticRegression(max_iter=1000)
model.fit(X_train_tfidf, y_train)

In [14]:
# Model Evaluation
y_pred = model.predict(X_test_tfidf)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.81

Confusion Matrix:
 [[165  51]
 [ 25 159]]

Classification Report:
               precision    recall  f1-score   support

           0       0.87      0.76      0.81       216
           1       0.76      0.86      0.81       184

    accuracy                           0.81       400
   macro avg       0.81      0.81      0.81       400
weighted avg       0.82      0.81      0.81       400



In [15]:
# Predict custom review
your_review = ["I absolutely loved the movie! It was fantastic and inspiring."]
your_review_tfidf = tfidf.transform(your_review)
prediction = model.predict(your_review_tfidf)
print("Sentiment:", "Positive" if prediction[0] == 1 else "Negative")

Sentiment: Positive


In [16]:
# Save the trained model and vectorizer
joblib.dump(model, 'sentiment_model.pkl')
joblib.dump(tfidf, 'tfidf_vectorizer.pkl')

['tfidf_vectorizer.pkl']