# Amazon Product Reviews Sentiment Analysis
This notebook performs sentiment analysis on Amazon product reviews using VADER sentiment analysis from the NLTK library. The code has been enhanced for better error handling, text preprocessing, and visualization.

In [None]:
# Import necessary packages
import pandas as pd
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import matplotlib.pyplot as plt
from wordcloud import WordCloud
import re

# Download VADER lexicon
nltk.download('vader_lexicon')

# Initialize the sentiment analyzer
sentiments = SentimentIntensityAnalyzer()


In [None]:
# Load the dataset with error handling
try:
    data = pd.read_csv('amazon_reviews.csv')
    print(f"Data loaded successfully with {len(data)} entries")
except FileNotFoundError:
    print("Error: CSV file not found. Please ensure the file is in the correct location.")
    exit()


In [None]:
# Basic data cleaning
# Handling missing data
data = data.dropna(subset=['reviewText'])
print(f"Data after removing missing reviews: {len(data)} entries")


In [None]:
# Text preprocessing function
def preprocess_text(text):
    # Remove non-alphabetic characters, convert to lowercase, and strip whitespace
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    text = text.lower().strip()
    return text

# Apply text preprocessing
data['cleaned_reviews'] = data['reviewText'].apply(preprocess_text)


In [None]:
# Sentiment Analysis: Adding Positive, Negative, and Neutral columns
data['Positive'] = data['cleaned_reviews'].apply(lambda review: sentiments.polarity_scores(review)['pos'])
data['Negative'] = data['cleaned_reviews'].apply(lambda review: sentiments.polarity_scores(review)['neg'])
data['Neutral'] = data['cleaned_reviews'].apply(lambda review: sentiments.polarity_scores(review)['neu'])


In [None]:
# Summarizing the sentiment scores
positive_sum = data['Positive'].sum()
negative_sum = data['Negative'].sum()
neutral_sum = data['Neutral'].sum()

# Print the summary of sentiment scores
print("Sentiment Summary:")
print(f"Positive Sentiment: {positive_sum}")
print(f"Negative Sentiment: {negative_sum}")
print(f"Neutral Sentiment: {neutral_sum}")


In [None]:
# Visualize sentiment distribution as a pie chart
labels = ['Positive', 'Negative', 'Neutral']
sizes = [positive_sum, negative_sum, neutral_sum]
colors = ['#1f77b4', '#ff7f0e', '#2ca02c']
explode = (0.1, 0, 0)  # Explode the first slice (Positive)

plt.figure(figsize=(8, 8))
plt.pie(sizes, explode=explode, labels=labels, colors=colors, autopct='%1.1f%%', shadow=True, startangle=140)
plt.title('Sentiment Distribution of Amazon Reviews')
plt.show()


In [None]:
# Visualize the word cloud of the reviews
all_reviews = ' '.join(data['cleaned_reviews'])
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(all_reviews)

plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.title('Word Cloud of Amazon Reviews')
plt.show()


In [None]:
# Display the first few rows of the processed data
print('Processed Data Sample:')
print(data.head())