In [1]:
!pip install pandas nltk scikit-learn




In [3]:
import pandas as pd
import nltk
import string

from nltk.corpus import stopwords
from nltk.stem import PorterStemmer

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, accuracy_score


In [None]:
nltk.download('stopwords')


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\user\AppData\Roaming\nltk_data...


In [None]:
df = pd.read_csv("reviews.csv")
df.head()


In [None]:
stemmer = PorterStemmer()
stop_words = set(stopwords.words('english'))

def clean_text(text):
    text = text.lower()
    text = text.translate(str.maketrans('', '', string.punctuation))
    words = text.split()
    words = [stemmer.stem(word) for word in words if word not in stop_words]
    return " ".join(words)

df['Cleaned_Review'] = df['Review'].apply(clean_text)
df.head()


In [None]:
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(df['Cleaned_Review'])
y = df['Sentiment']


In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42
)


In [None]:
model = MultinomialNB()
model.fit(X_train, y_train)


In [None]:
y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


In [None]:
new_reviews = [
    "The product quality is amazing",
    "Terrible experience and bad service",
    "It is okay, not great not bad"
]

new_reviews_cleaned = [clean_text(r) for r in new_reviews]
new_reviews_vectorized = vectorizer.transform(new_reviews_cleaned)

predictions = model.predict(new_reviews_vectorized)

for review, sentiment in zip(new_reviews, predictions):
    print(f"Review: {review}")
    print(f"Sentiment: {sentiment}\n")
