<a href="https://colab.research.google.com/github/SajlaKM/NaiveBayesClassifier/blob/main/NaiveBayesClassifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn import metrics

# Sample dataset: Positive vs. Negative sentiment classification
data = {
    'message': [
        "I love this movie, it's amazing!",
        "The product is terrible, I hate it.",
        "Such a wonderful experience!",
        "This was the worst decision ever.",
        "I highly recommend this!",
        "Absolutely awful, never buying again."
    ],
    'label': ['pos', 'neg', 'pos', 'neg', 'pos', 'neg']
}

# Convert dataset to DataFrame
msg = pd.DataFrame(data)

# Convert labels to numerical values (pos=1, neg=0)
msg['labelnum'] = msg['label'].map({'pos': 1, 'neg': 0})

# Extract features (X) and labels (y)
X = msg['message']
y = msg['labelnum']

# Print feature and label values
print("Messages:\n", X)
print("\nLabels:\n", y)

# Split into train and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert text into numerical feature vectors using CountVectorizer
vectorizer = CountVectorizer()
X_train_transformed = vectorizer.fit_transform(X_train)
X_test_transformed = vectorizer.transform(X_test)

# Train Naïve Bayes model
nb_classifier = MultinomialNB()
nb_classifier.fit(X_train_transformed, y_train)

# Make predictions
y_pred = nb_classifier.predict(X_test_transformed)

# Evaluate model performance
accuracy = metrics.accuracy_score(y_test, y_pred)
precision = metrics.precision_score(y_test, y_pred)
recall = metrics.recall_score(y_test, y_pred)

# Print results
print(f"\nAccuracy: {accuracy * 100:.2f}%")
print(f"Precision: {precision * 100:.2f}%")
print(f"Recall: {recall * 100:.2f}%")
print("\nClassification Report:\n", metrics.classification_report(y_test, y_pred, target_names=['neg', 'pos']))


Messages:
 0         I love this movie, it's amazing!
1      The product is terrible, I hate it.
2             Such a wonderful experience!
3        This was the worst decision ever.
4                 I highly recommend this!
5    Absolutely awful, never buying again.
Name: message, dtype: object

Labels:
 0    1
1    0
2    1
3    0
4    1
5    0
Name: labelnum, dtype: int64

Accuracy: 100.00%
Precision: 100.00%
Recall: 100.00%

Classification Report:
               precision    recall  f1-score   support

         neg       1.00      1.00      1.00         1
         pos       1.00      1.00      1.00         1

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2

