In [8]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer


data = pd.read_csv('sentiment_emotions.csv')  # Make sure you have a CSV file with 'content' and 'sentiment' columns

stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

def preprocess_text(content):
    # Tokenization
    words = nltk.word_tokenize(content)
    # Remove stopwords and lemmatize
    words = [lemmatizer.lemmatize(word.lower()) for word in words if word.isalpha() and word.lower() not in stop_words]
    return ' '.join(words)

data['cleaned_text'] = data['content'].apply(preprocess_text)

X_train, X_test, y_train, y_test = train_test_split(data['cleaned_text'], data['sentiment'], test_size=0.2, random_state=42)

vectorizer = CountVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

model = MultinomialNB()
model.fit(X_train_vec, y_train)

y_pred = model.predict(X_test_vec)

print(f'Accuracy: {accuracy_score(y_test, y_pred)}')
print('Classification Report:')
print(classification_report(y_test, y_pred))

def predict_sentiment(text):
    cleaned_text = preprocess_text(text)
    text_vec = vectorizer.transform([cleaned_text])
    return model.predict(text_vec)[0]

# Example usage
new_text = "I love this product! It's amazing."
print(f'Sentiment: {predict_sentiment(new_text)}')

Accuracy: 0.321
Classification Report:
              precision    recall  f1-score   support

       anger       0.00      0.00      0.00        19
     boredom       0.00      0.00      0.00        31
       empty       0.00      0.00      0.00       162
  enthusiasm       0.00      0.00      0.00       163
         fun       0.00      0.00      0.00       338
   happiness       0.33      0.33      0.33      1028
        hate       0.38      0.01      0.02       268
        love       0.49      0.31      0.38       762
     neutral       0.35      0.40      0.37      1740
      relief       0.00      0.00      0.00       352
     sadness       0.32      0.14      0.20      1046
    surprise       0.33      0.00      0.01       425
       worry       0.29      0.68      0.40      1666

    accuracy                           0.32      8000
   macro avg       0.19      0.15      0.13      8000
weighted avg       0.30      0.32      0.27      8000

Sentiment: love


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
