In [32]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize


nltk.download('punkt_tab')
nltk.download('stopwords')

[nltk_data] Downloading package punkt_tab to
[nltk_data]     /Users/guilhermepifferchristo/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/guilhermepifferchristo/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [33]:
def preprocess_text(text):
    tokens = word_tokenize(text.lower())
    tokens = [word for word in tokens if word.isalnum()]

    stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if word not in stop_words]
    return ' '.join(tokens)

In [34]:

data = {
    "text": [
        "I love this product, it is amazing!",
        "This is the worst experience I've ever had.",
        "The quality is excellent, I highly recommend it.",
        "I hated the service, very disappointing.",
        "Absolutely fantastic, exceeded my expectations!",
        "Not worth the money, very poor quality."
    ],
    "sentiment": ["positive", "negative", "positive", "negative", "positive", "negative"]
}
df = pd.DataFrame(data)
df.describe()

df['cleaned_text'] = df['text'].apply(preprocess_text)

X = df['cleaned_text']
y = df['sentiment']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)

In [35]:
vectorizer = TfidfVectorizer()
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

model = MultinomialNB()
model.fit(X_train_tfidf, y_train)

y_pred = model.predict(X_test_tfidf)
print("Classification Report:")
print(classification_report(y_test, y_pred))

Classification Report:
              precision    recall  f1-score   support

    negative       0.00      0.00      0.00         2
    positive       0.33      1.00      0.50         1

    accuracy                           0.33         3
   macro avg       0.17      0.50      0.25         3
weighted avg       0.11      0.33      0.17         3



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [36]:
new_text = "I really enjoyed the product, it was fantastic!"
cleaned_new_text = preprocess_text(new_text)
new_text_vectorized = vectorizer.transform([cleaned_new_text])
prediction = model.predict(new_text_vectorized)
print(f"The sentiment for the new text is: {prediction[0]}")

The sentiment for the new text is: positive
