In [3]:
import nltk
import pandas as pd
from nltk.corpus import movie_reviews
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Download the movie_reviews dataset from nltk
nltk.download('movie_reviews')

# Load movie reviews dataset
documents = [(list(movie_reviews.words(fileid)), category)
             for category in movie_reviews.categories()
             for fileid in movie_reviews.fileids(category)]

# Shuffle the documents
import random
random.shuffle(documents)

# Separate features and labels
all_words = nltk.FreqDist(w.lower() for w in movie_reviews.words())
word_features = list(all_words)[:3000]

def document_features(document):
    document_words = set(document)
    features = {word: (word in document_words) for word in word_features}
    return features

featuresets = [(document_features(d), c) for (d,c) in documents]

# Split the dataset into training and testing sets
train_set, test_set = train_test_split(featuresets, test_size=0.25, random_state=42)

# Train a Naive Bayes classifier
classifier = nltk.NaiveBayesClassifier.train(train_set)

# Evaluate the classifier on the test set
accuracy = nltk.classify.accuracy(classifier, test_set)
print(f"Accuracy: {accuracy:.2%}")

# Example: Predict sentiment for a movie review
example_review = "This movie was great! The acting and storyline were fantastic."
features = document_features(example_review.split())
sentiment = classifier.classify(features)
print(f"Predicted Sentiment: {sentiment}")

[nltk_data] Downloading package movie_reviews to
[nltk_data]     C:\Users\amart/nltk_data...
[nltk_data]   Package movie_reviews is already up-to-date!


Accuracy: 79.00%
Predicted Sentiment: neg
