In [None]:
!pip install pandas scikit-learn nltk


In [2]:

import nltk
import random
from nltk.corpus import movie_reviews
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Download dataset (only first time)
nltk.download('movie_reviews')

# Load data
documents = []

for category in movie_reviews.categories():
    for fileid in movie_reviews.fileids(category):
        review = movie_reviews.raw(fileid)
        documents.append((review, category))

# Shuffle data
random.shuffle(documents)

# Separate features and labels
texts = [doc[0] for doc in documents]
labels = [doc[1] for doc in documents]

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    texts, labels, test_size=0.2, random_state=42
)

# TF-IDF vectorization
vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Train model
model = LogisticRegression()
model.fit(X_train_tfidf, y_train)

# Evaluate
y_pred = model.predict(X_test_tfidf)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n")
print(classification_report(y_test, y_pred))

# Test custom text
def predict_sentiment(text):
    text_tfidf = vectorizer.transform([text])
    return model.predict(text_tfidf)[0]

print("\nCustom Tests:")
print(predict_sentiment("This movie was amazing and wonderful!"))
print(predict_sentiment("This was the worst film I have ever seen."))


[nltk_data] Downloading package movie_reviews to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package movie_reviews is already up-to-date!


Accuracy: 0.8375

Classification Report:

              precision    recall  f1-score   support

         neg       0.85      0.82      0.83       198
         pos       0.83      0.86      0.84       202

    accuracy                           0.84       400
   macro avg       0.84      0.84      0.84       400
weighted avg       0.84      0.84      0.84       400


Custom Tests:
pos
neg
