In [1]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score

# Download NLTK resources if not already downloaded
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

# Example dataset
reviews = [
    ("This product is great", "positive"),
    ("Worst product ever", "negative"),
    ("I love this product", "positive"),
    ("Disappointed with the quality", "negative")
]

# Preprocessing function
def preprocess(text):
    # Tokenization
    tokens = word_tokenize(text.lower())
    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    filtered_tokens = [word for word in tokens if word not in stop_words]
    # Lemmatization
    lemmatizer = WordNetLemmatizer()
    lemmatized_tokens = [lemmatizer.lemmatize(word) for word in filtered_tokens]
    return ' '.join(lemmatized_tokens)

# Preprocess reviews
preprocessed_reviews = [(preprocess(text), sentiment) for text, sentiment in reviews]

# Split dataset into features and labels
X = [review[0] for review in preprocessed_reviews]
y = [review[1] for review in preprocessed_reviews]

# Vectorize features
vectorizer = CountVectorizer()
X_vectorized = vectorizer.fit_transform(X)

# Split dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_vectorized, y, test_size=0.2, random_state=42)

# Train Naive Bayes classifier
classifier = MultinomialNB()
classifier.fit(X_train, y_train)

# Predict on test set
y_pred = classifier.predict(X_test)

# Evaluate model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Output reviews with sentiment analysis result
print("\nReviews with Sentiment Analysis Result:")
for review_text, sentiment in zip([review[0] for review in reviews], y_pred):
    sentiment_label = "positive" if sentiment == 'positive' else "negative"
    print(f"Review: {review_text}, Sentiment: {sentiment_label}")


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


Accuracy: 0.0

Reviews with Sentiment Analysis Result:
Review: This product is great, Sentiment: positive
