In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report


In [2]:
data = {
    'review': [
        "This phone is amazing, battery lasts long",
        "Worst product ever, waste of money",
        "Excellent sound quality and great design",
        "Terrible customer service and bad quality",
        "I love this laptop, super fast and smooth",
        "The product stopped working after 2 days",
        "Really good camera and easy to use",
        "Very disappointed, not worth the price"
    ],
    'sentiment': [
        "positive", "negative", "positive", "negative",
        "positive", "negative", "positive", "negative"
    ]
}

df = pd.DataFrame(data)
print(df)


                                      review sentiment
0  This phone is amazing, battery lasts long  positive
1         Worst product ever, waste of money  negative
2   Excellent sound quality and great design  positive
3  Terrible customer service and bad quality  negative
4  I love this laptop, super fast and smooth  positive
5   The product stopped working after 2 days  negative
6         Really good camera and easy to use  positive
7     Very disappointed, not worth the price  negative


In [3]:
X = df['review']
y = df['sentiment']

vectorizer = CountVectorizer(stop_words='english')
X_vectorized = vectorizer.fit_transform(X)

# Split into train/test
X_train, X_test, y_train, y_test = train_test_split(
    X_vectorized, y, test_size=0.25, random_state=42
)


In [4]:
model = MultinomialNB()
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)


In [5]:
new_reviews = [
    "Battery backup is great and phone works fast",
    "Worst experience ever, it broke in a week",
    "Really happy with the purchase",
    "Not good, very low quality"
]

new_vectorized = vectorizer.transform(new_reviews)
predictions = model.predict(new_vectorized)

for review, sentiment in zip(new_reviews, predictions):
    print(f"Review: {review} --> Sentiment: {sentiment}")


Review: Battery backup is great and phone works fast --> Sentiment: positive
Review: Worst experience ever, it broke in a week --> Sentiment: positive
Review: Really happy with the purchase --> Sentiment: positive
Review: Not good, very low quality --> Sentiment: positive


In [6]:
# using if-idf

In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report


In [8]:
data = {
    'review': [
        "This phone is amazing, battery lasts long",
        "Worst product ever, waste of money",
        "Excellent sound quality and great design",
        "Terrible customer service and bad quality",
        "I love this laptop, super fast and smooth",
        "The product stopped working after 2 days",
        "Really good camera and easy to use",
        "Very disappointed, not worth the price"
    ],
    'sentiment': [
        "positive", "negative", "positive", "negative",
        "positive", "negative", "positive", "negative"
    ]
}

df = pd.DataFrame(data)


TF-IDF Vectorization

TF (Term Frequency): How often a word appears in a document.

IDF (Inverse Document Frequency): How rare a word is across all documents.

Together: Highlights important unique words and reduces weight of common words.

In [9]:
X = df['review']
y = df['sentiment']

vectorizer = TfidfVectorizer(stop_words='english')
X_tfidf = vectorizer.fit_transform(X)

# Split into train/test
X_train, X_test, y_train, y_test = train_test_split(
    X_tfidf, y, test_size=0.25, random_state=42
)


In [10]:
model = LogisticRegression()
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)


In [11]:
new_reviews = [
    "Battery backup is great and phone works fast",
    "Worst experience ever, it broke in a week",
    "Really happy with the purchase",
    "Not good, very low quality"
]

new_tfidf = vectorizer.transform(new_reviews)
predictions = model.predict(new_tfidf)

for review, sentiment in zip(new_reviews, predictions):
    print(f"Review: {review} --> Sentiment: {sentiment}")


Review: Battery backup is great and phone works fast --> Sentiment: positive
Review: Worst experience ever, it broke in a week --> Sentiment: positive
Review: Really happy with the purchase --> Sentiment: positive
Review: Not good, very low quality --> Sentiment: positive
