In [1]:
# Toy dataset for sentiment analysis
documents = [
    ("I love this product", "positive"),
    ("This is amazing", "positive"),
    ("I hate this product", "negative"),
    ("This is terrible", "negative"),
    ("This product is okay", "neutral"),
    ("I don't like it", "neutral")
]

from collections import defaultdict
import re

def preprocess_text(text):
    text = text.lower()  # Convert to lowercase
    text = re.sub(r'\W', ' ', text)  # Remove non-alphanumeric characters
    tokens = text.split()  # Tokenization
    return tokens

# Preprocess documents and extract features
word_counts = defaultdict(lambda: [0, 0, 0])  # [positive_count, negative_count, neutral_count]
for doc, label in documents:
    tokens = preprocess_text(doc)
    for token in tokens:
        if label == 'positive':
            word_counts[token][0] += 1
        elif label == 'negative':
            word_counts[token][1] += 1
        else:
            word_counts[token][2] += 1

total_positive = sum(counts[0] for counts in word_counts.values())
total_negative = sum(counts[1] for counts in word_counts.values())
total_neutral = sum(counts[2] for counts in word_counts.values())
total_docs = len(documents)

positive_probs = {word: counts[0] / total_positive for word, counts in word_counts.items()}
negative_probs = {word: counts[1] / total_negative for word, counts in word_counts.items()}
neutral_probs = {word: counts[2] / total_neutral for word, counts in word_counts.items()}

prior_positive = total_positive / total_docs
prior_negative = total_negative / total_docs
prior_neutral = total_neutral / total_docs

In [2]:
def classify(text):
    tokens = preprocess_text(text)
    positive_score = prior_positive
    negative_score = prior_negative
    neutral_score = prior_neutral
    
    for token in tokens:
        positive_score *= positive_probs.get(token, 0) + 1  # Laplace smoothing
        negative_score *= negative_probs.get(token, 0) + 1
        neutral_score *= neutral_probs.get(token, 0) + 1
    
    max_score = max(positive_score, negative_score, neutral_score)
    if max_score == positive_score:
        return "positive"
    elif max_score == negative_score:
        return "negative"
    else:
        return "neutral"

# Test the classifier
test_text = "This product is amazing"
predicted_sentiment = classify(test_text)
print(f"Predicted sentiment: {predicted_sentiment}")

Predicted sentiment: positive
