%pip install -U nltk

In [None]:
import nltk
import warnings
warnings.filterwarnings('ignore')

%pip install -U pandas sklearn seaborn matplotlib

In [None]:
import pandas as pd
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv("amazon_review.csv")

In [None]:
nltk.download('all')

In [None]:
def preprocess_text(text):
    tokens = word_tokenize(str(text).lower())
    stop_words = set(stopwords.words('english'))
    filtered_tokens = [token for token in tokens if token.isalpha() and token not in stop_words]
    lemmatizer = WordNetLemmatizer()
    lemmatized_tokens = [lemmatizer.lemmatize(token) for token in filtered_tokens]
    return ' '.join(lemmatized_tokens)

In [None]:
df['processed_review'] = df['reviewText'].apply(preprocess_text)

In [None]:
analyzer = SentimentIntensityAnalyzer()

In [None]:
def get_sentiment(text):
    scores = analyzer.polarity_scores(text)
    return 1 if scores['compound'] >= 0 else 0

In [None]:
df['predicted_sentiment'] = df['processed_review'].apply(get_sentiment)

In [None]:
cm = confusion_matrix(df['Positive'], df['predicted_sentiment'])
print("Confusion Matrix:\n", cm)

In [None]:
print("\nClassification Report:\n")
print(classification_report(df['Positive'], df['predicted_sentiment']))

In [None]:
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Negative', 'Positive'], yticklabels=['Negative', 'Positive'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()