In [10]:
import re
from collections import defaultdict
import math

In [11]:
# Sample training data
data = [
    ("I love this movie", "Positive"),
    ("This film is fantastic", "Positive"),
    ("What a wonderful experience", "Positive"),
    ("I hate this movie", "Negative"),
    ("This film is boring and terrible", "Negative")
]

In [12]:
# Preprocessing function
def preprocess(text):
    text = text.lower()
    text = re.sub(r"[^\w\s]", "", text)  # remove punctuation
    return text.split()

In [16]:
class NaiveBayesClassifier:
    def __init__(self,alpha=1):
        self.alpha=alpha
        self.vocab=set()
        self.class_word_counts=defaultdict(lambda:defaultdict(int))
        self.class_totals=defaultdict(int)
        self.class_priors=defaultdict(float)

    def train(self,dataset):
        class_counts=defaultdict(int)
        total_docs=len(dataset)

        for sentence,label in dataset:
            words=preprocess(sentence)
            class_counts[label]+=1
            self.class_totals[label]+=len(words)

            for word in words:
                self.vocab.add(word)
                self.class_word_counts[label][word]+=1

        for label in class_counts:
            self.class_priors[label]=class_counts[label]/total_docs

    def predict(self, sentence):
        words=preprocess(sentence)
        vocab_size=len(self.vocab)

        scores={}

        for label in self.class_priors:
            #start with log prior
            log_prob=math.log(self.class_priors[label])

            for word in words:
                # apply laplace smoothing
                word_count=self.class_word_counts[label].get(word,0)
                total_words=self.class_totals[label]
                prob=(word_count+self.alpha)/(total_words +self.alpha * vocab_size)
                log_prob+=math.log(prob)
            scores[label]=log_prob

        return max(scores,key=scores.get),scores

nb=NaiveBayesClassifier(alpha=1)


nb.train(data)

#predict
test_sentence="this movie is wonderful"
label,scores=nb.predict(test_sentence)

print(f'Predicted Class:{label}')
print("class scores(log-probabilities):")
for cls, score in scores.items():
    print(f'{cls}:{score:.4f}')


    

Predicted Class:Positive
class scores(log-probabilities):
Positive:-10.5161
Negative:-11.3069
