In [58]:
import numpy as np
import pandas as pd
from collections import defaultdict

In [63]:
data = {
    "text": [
        "This coffee is fantastic",
        "I am not happy with this service",
        "The weather today is perfect",
        "I hate waiting in long lines",
        "The concert was incredible",
        "I cannot stand this movie",
        "The food here is excellent",
        "This book is really boring",
        "I adore this new gadget",
        "The hotel room is quite dirty",
        "What a lovely garden",
        "This restaurant is overrated",
        "The party was so much fun",
        "I am frustrated with the delay",
        "The view from the top is stunning",
        "This place is a total disappointment",
        "I enjoyed the vacation",
        "The instructions were unclear",
        "This smartphone is amazing",
        "I am so happy with my new job",
        "This experience was terrible",
        "The show was really entertaining",
        "I am not impressed with the results",
        "The music at the event was great",
        "I dislike this city's traffic",
        "This artwork is captivating",
        "The product quality is poor",
        "I had a fantastic time at the festival",
        "This is the worst restaurant I've been to",
        "I love the ambiance of this café",
        "The service was incredibly slow",
        "The beach was absolutely gorgeous",
        "I feel let down by the service",
        "The hike was challenging but rewarding",
        "This phone's camera is fantastic",
        "I am really upset with the customer support",
        "The meal was delightful",
        "The traffic was unbearable",
        "The team did an excellent job",
        "I am disappointed with the outcome",
        "The presentation was impressive",
        "I don't like the design of this product",
        "The experience was truly remarkable",
        "I am dissatisfied with the purchase",
        "The tour guide was informative and friendly",
        "The class was very engaging",
        "The new app is user-friendly",
        "I am not happy with the quality of this service",
        "The festival was a blast",
        "I am really pleased with the new features",
        "The wait was too long",
        "This location is perfect for a vacation",
        "The customer service was terrible",
        "The movie was a masterpiece",
        "The quality of the product was subpar",
        "The event was a huge success",
        "I am frustrated with the constant changes",
        "The decor of the restaurant was stunning",
        "The book was a real page-turner",
        "I am really happy with the outcome",
        "The city is too crowded",
        "The chef did a great job",
        "I am not satisfied with the event planning",
        "The tour was well-organized",
        "I am pleased with the quick resolution"
    ],
    "label": [
        "pos", "neg", "pos", "neg", "pos",
        "neg", "pos", "neg", "pos", "neg",
        "pos", "neg", "pos", "neg", "pos",
        "neg", "pos", "neg", "pos", "pos",
        "neg", "pos", "neg", "pos", "neg",
        "pos", "neg", "pos", "neg", "pos",
        "neg", "pos", "neg", "pos", "neg",
        "pos", "neg", "pos", "neg", "pos",
        "neg", "pos", "neg", "pos", "neg",
        "pos", "neg", "pos", "neg", "pos",
        "neg", "pos", "neg", "pos", "neg",
        "pos", "neg", "pos", "neg", "pos",
        "neg", "pos", "neg", "pos", "pos"
    ]
}

df = pd.DataFrame(data)
df

Unnamed: 0,text,label
0,This coffee is fantastic,pos
1,I am not happy with this service,neg
2,The weather today is perfect,pos
3,I hate waiting in long lines,neg
4,The concert was incredible,pos
...,...,...
60,The city is too crowded,neg
61,The chef did a great job,pos
62,I am not satisfied with the event planning,neg
63,The tour was well-organized,pos


In [67]:
class NaiveBayesClassifier:

    def __init__(self):
        self.vocab = set()
        self.label_count = defaultdict(int)
        self.labelled_word_count = defaultdict(lambda: defaultdict(int))
        self.word_count = defaultdict(int)
        self.label_prob = {}

    def fit(self, X, y):

        for text, label in zip(X, y):

            self.label_count[label]+=1
            words = text.split()

            for word in words:
                self.vocab.add(word)
                self.labelled_word_count[label][word] += 1
                self.word_count[word] += 1
        
        total_count = sum(self.label_count.values())
        for label, count in self.label_count.items():
            self.label_prob[label] = count/total_count

    def predict(self, X):
        prediction = []
        for text in X:
            
            words = text.split()
            class_scores = {}
            for label in self.label_count.keys():
                score = self.label_prob[label]

                for word in words:
                    word_count =  self.labelled_word_count[label][word]+1
                    total_count = self.label_count[label]+len(self.vocab)
                    score *= (word_count)/total_count
                class_scores[label] = score

            l=""
            s=float('-inf')
            for label, score in class_scores.items():
               if(score>s):
                   l=label
                   s=score  
            prediction.append((l, s))
            
        return prediction

In [68]:
X = df['text'].values
y = df['label'].values

nb = NaiveBayesClassifier()

nb.fit(X, y)

input = [
    "This coffee is fantastic",
    "I am not happy with this service",
    "The weather today is perfect",
    "I hate waiting in long lines",
    "The concert was incredible"
]

# predictions = [
#     "pos",  # Prediction for "This coffee is fantastic"
#     "neg",  # Prediction for "I am not happy with this service"
#     "pos",  # Prediction for "The weather today is perfect"
#     "neg",  # Prediction for "I hate waiting in long lines"
#     "pos"   # Prediction for "The concert was incredible"
# ]

y_pred = nb.predict(input)
print(*y_pred, sep = '\n')

('pos', 7.846153846153846e-08)
('pos', 3.707307692307693e-12)
('pos', 2.667692307692308e-09)
('neg', 3.91644667395273e-12)
('pos', 2.8900000000000006e-07)
