In [1]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score

# Custom dataset: texts and labels
texts = [
    "I love this movie, it is amazing!",  # Positive
    "What a fantastic experience, truly enjoyed it!",  # Positive
    "This is the worst film I have ever seen.",  # Negative
    "Absolutely terrible, I hated it.",  # Negative
    "Not bad, but could have been better.",  # Neutral/Positive
    "The story was boring and dull.",  # Negative
]

labels = ["positive", "positive", "negative", "negative", "positive", "negative"]

# Split into training and test sets
train_texts = texts[:4]  # First 4 for training
test_texts = texts[4:]   # Last 2 for testing
train_labels = labels[:4]
test_labels = labels[4:]

# Convert text to numerical features using CountVectorizer
vectorizer = CountVectorizer()
X_train = vectorizer.fit_transform(train_texts)  # Fit and transform training data
X_test = vectorizer.transform(test_texts)        # Transform test data

# Train a Naive Bayes classifier
classifier = MultinomialNB()
classifier.fit(X_train, train_labels)

# Make predictions on the test set
predictions = classifier.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(test_labels, predictions)
print(f"Model Accuracy: {accuracy * 100:.2f}%")

# Test with a custom input
custom_review = ["I found the movie quite enjoyable and engaging."]
custom_features = vectorizer.transform(custom_review)
custom_prediction = classifier.predict(custom_features)
print(f"Sentiment for custom review: {custom_prediction[0]}")


Model Accuracy: 50.00%
Sentiment for custom review: negative
