In [1]:
# Basic libraries
import pandas as pd
import numpy as np

# For text processing
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score

# For preprocessing
import re


In [2]:
# Sample data (you can use your own CSV later)
data = {
    'text': [
        "I love this product, it's amazing!",
        "Worst experience ever.",
        "Not bad, could be better.",
        "Absolutely fantastic!",
        "I hate it so much.",
        "It's okay, nothing special.",
        "Best thing I bought this year!",
        "Terrible service and rude staff.",
        "Very happy with the results.",
        "It doesn’t work at all!"
    ],
    'sentiment': ['positive', 'negative', 'neutral', 'positive', 'negative', 'neutral',
                  'positive', 'negative', 'positive', 'negative']
}

df = pd.DataFrame(data)
df.head()


Unnamed: 0,text,sentiment
0,"I love this product, it's amazing!",positive
1,Worst experience ever.,negative
2,"Not bad, could be better.",neutral
3,Absolutely fantastic!,positive
4,I hate it so much.,negative


In [3]:
# Basic text cleaning
def clean_text(text):
    text = text.lower()
    text = re.sub(r"[^a-zA-Z\s]", "", text)
    return text

df['cleaned_text'] = df['text'].apply(clean_text)
df.head()


Unnamed: 0,text,sentiment,cleaned_text
0,"I love this product, it's amazing!",positive,i love this product its amazing
1,Worst experience ever.,negative,worst experience ever
2,"Not bad, could be better.",neutral,not bad could be better
3,Absolutely fantastic!,positive,absolutely fantastic
4,I hate it so much.,negative,i hate it so much


In [4]:
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(df['cleaned_text'])

# Encode labels
y = df['sentiment']


In [5]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = MultinomialNB()
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy * 100:.2f}%")


Model Accuracy: 50.00%


In [6]:
# Predict your own text
sample = ["This is the best app ever!"]
sample_clean = [clean_text(sample[0])]
sample_vector = vectorizer.transform(sample_clean)
result = model.predict(sample_vector)
print(f"Sentiment: {result[0]}")


Sentiment: positive
