In [1]:
# Install necessary libraries
!pip install nltk scikit-learn

# Import libraries
import nltk
from nltk.corpus import movie_reviews
import random
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

# Download NLTK data
nltk.download('movie_reviews')

# Load the dataset
documents = [(list(movie_reviews.words(fileid)), category)
             for category in movie_reviews.categories()
             for fileid in movie_reviews.fileids(category)]

# Shuffle documents
random.shuffle(documents)

# Prepare data
texts = [" ".join(words) for words, label in documents]
labels = [label for words, label in documents]

# Split dataset into train and test
X_train, X_test, y_train, y_test = train_test_split(texts, labels, test_size=0.2, random_state=42)

# Convert text to features
vectorizer = CountVectorizer()
X_train_counts = vectorizer.fit_transform(X_train)
X_test_counts = vectorizer.transform(X_test)

# Train Naive Bayes classifier
nb = MultinomialNB()
nb.fit(X_train_counts, y_train)

# Make predictions
y_pred = nb.predict(X_test_counts)

# Evaluate
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Test with custom input
test_review = ["I loved the movie, it was fantastic!"]
test_count = vectorizer.transform(test_review)
prediction = nb.predict(test_count)
print("Prediction for test review:", prediction[0])




[nltk_data] Downloading package movie_reviews to /root/nltk_data...
[nltk_data]   Unzipping corpora/movie_reviews.zip.


Accuracy: 0.815

Classification Report:
               precision    recall  f1-score   support

         neg       0.80      0.83      0.82       200
         pos       0.83      0.80      0.81       200

    accuracy                           0.81       400
   macro avg       0.82      0.81      0.81       400
weighted avg       0.82      0.81      0.81       400

Prediction for test review: pos
