In [None]:
import re
import random
import nltk
from nltk import NaiveBayesClassifier
from nltk.corpus import stopwords

nltk.download('stopwords')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\HanDong\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\HanDong\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [None]:
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^a-z\s]', '', text)
    tokens = text.split()
    tokens = [word for word in tokens if word not in stopwords.words('english')]
    return tokens

In [3]:
def build_naive_bayes_classifier(positive_data, negative_data):
    dataset = []

    for review in positive_data:
        words = preprocess_text(review)
        dataset.append((dict((word, True) for word in words), 'pos'))

    for review in negative_data:
        words = preprocess_text(review)
        dataset.append((dict((word, True) for word in words), 'neg'))

    random.shuffle(dataset)
    classifier = NaiveBayesClassifier.train(dataset)
    return classifier

In [4]:
def predict_sentiments(classifier, test_data):
    predictions = []
    for review in test_data:
        words = preprocess_text(review)
        features = dict((word, True) for word in words)
        prediction = classifier.classify(features)
        predictions.append(prediction)
    return predictions

In [5]:
def evaluate_classifier(predictions, true_labels):
    correct = sum([pred == label for pred, label in zip(predictions, true_labels)])
    accuracy = correct / len(true_labels)
    return accuracy

In [6]:
# Dataset
positive_reviews = [
"I loved the movie! The acting was great.",
"The plot was engaging and the characters were well-developed.",
"The visuals were amazing. Highly recommended.",
"The movie exceeded my expectations. A must-watch!"]

negative_reviews = [
"The movie was a complete disappointment.",
"I couldn't stand the acting and the plot was confusing.",
"The film lacked depth and failed to impress.",
"It was a waste of time and money."]

test_reviews = [
"The movie was fantastic! The storyline was captivating.",
"I didn't enjoy the film. The acting was mediocre.",
"Overall, the movie was decent, but not exceptional.",
"The plot had potential, but execution fell short."]

# True labels for test reviews
true_labels = ['pos', 'neg', 'pos', 'neg']

In [7]:
# Build classifier
classifier = build_naive_bayes_classifier(positive_reviews, negative_reviews)

# Predict
predictions = predict_sentiments(classifier, test_reviews)

# Evaluate
accuracy = evaluate_classifier(predictions, true_labels)

print("Predictions:", predictions)
print("Accuracy:", accuracy)
classifier.show_most_informative_features()

Predictions: ['pos', 'neg', 'pos', 'pos']
Accuracy: 0.75
Most Informative Features
                   movie = True              pos : neg    =      1.7 : 1.0
                   movie = None              neg : pos    =      1.4 : 1.0
                 amazing = None              neg : pos    =      1.3 : 1.0
              characters = None              neg : pos    =      1.3 : 1.0
                complete = None              pos : neg    =      1.3 : 1.0
               confusing = None              pos : neg    =      1.3 : 1.0
                 couldnt = None              pos : neg    =      1.3 : 1.0
                   depth = None              pos : neg    =      1.3 : 1.0
          disappointment = None              pos : neg    =      1.3 : 1.0
                engaging = None              neg : pos    =      1.3 : 1.0
