In [3]:
# -*- coding: utf-8 -*-
"""Amazon Review Analysis with spaCy - NER and Sentiment Analysis.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1ABC123XYZ
"""

# Install required packages
!pip install spacy textblob
!python -m spacy download en_core_web_sm

print("✅ Packages installed successfully!")

# Import libraries
import spacy
from textblob import TextBlob
from IPython.display import HTML, display
import sys

# Load spaCy model
try:
    nlp = spacy.load("en_core_web_sm")
    print("✅ spaCy model loaded successfully!")
except Exception as e:
    print(f"❌ Error loading model: {e}")

# Sample Amazon reviews data
reviews = [
    "I absolutely love the new Kindle Paperwhite. The battery life is incredible and the screen is so easy on the eyes. Amazon has outdone itself.",
    "Do not buy the SuperClean vacuum cleaner. It broke after two weeks and the customer service from SuperClean was terrible. Worst purchase ever!",
    "The Samsung Galaxy Watch is a decent product. The display is nice but the battery doesn't last as long as I'd hoped.",
    "Apple's iPhone 15 Pro Max has an amazing camera system. The photos are stunning and the performance is blazing fast. Highly recommended!",
    "The Bose QuietComfort headphones are disappointing. The noise cancellation doesn't work well and they're uncomfortable for long periods."
]

print("\n" + "="*70)
print("AMAZON PRODUCT REVIEW ANALYSIS")
print("Named Entity Recognition & Sentiment Analysis")
print("="*70)

# Analysis function
def analyze_reviews(reviews):
    results = []

    for i, review in enumerate(reviews, 1):
        print(f"\n🔍 REVIEW {i}")
        print(f"📝 Text: {review}")

        # NER Analysis
        doc = nlp(review)
        entities = [(ent.text, ent.label_) for ent in doc.ents if ent.label_ in ["PRODUCT", "ORG"]]
        print(f"🏷️  Extracted Entities: {entities}")

        # Sentiment Analysis
        blob = TextBlob(review)
        polarity = blob.sentiment.polarity
        subjectivity = blob.sentiment.subjectivity

        # Classify sentiment
        if polarity > 0.2:
            sentiment = "POSITIVE"
            emoji = "😊"
        elif polarity < -0.2:
            sentiment = "NEGATIVE"
            emoji = "😠"
        else:
            sentiment = "NEUTRAL"
            emoji = "😐"

        print(f"💬 Sentiment: {sentiment} {emoji}")
        print(f"📊 Polarity: {polarity:.3f} | Subjectivity: {subjectivity:.3f}")
        print("-" * 60)

        # Store results for PDF
        results.append({
            'review_number': i,
            'text': review,
            'entities': entities,
            'sentiment': sentiment,
            'polarity': polarity,
            'subjectivity': subjectivity
        })

    return results

# Run analysis
results = analyze_reviews(reviews)

# Summary Statistics
print("\n" + "="*70)
print("📈 ANALYSIS SUMMARY")
print("="*70)

positive_count = sum(1 for r in results if "POSITIVE" in r['sentiment'])
negative_count = sum(1 for r in results if "NEGATIVE" in r['sentiment'])
neutral_count = sum(1 for r in results if "NEUTRAL" in r['sentiment'])

total_entities = sum(len(r['entities']) for r in results)

print(f"📊 Total Reviews Analyzed: {len(reviews)}")
print(f"✅ Positive Reviews: {positive_count}")
print(f"❌ Negative Reviews: {negative_count}")
print(f"⚖️  Neutral Reviews: {neutral_count}")
print(f"🏷️  Total Entities Found: {total_entities}")

# Entity breakdown
print(f"\n🔍 ENTITY BREAKDOWN:")
product_count = sum(1 for r in results for entity in r['entities'] if entity[1] == "PRODUCT")
org_count = sum(1 for r in results for entity in r['entities'] if entity[1] == "ORG")
print(f"   Products: {product_count}")
print(f"   Brands/Organizations: {org_count}")

# Generate and download PDF report
!pip install fpdf

from fpdf import FPDF
from datetime import datetime

# Create PDF
pdf = FPDF()
pdf.add_page()
pdf.set_auto_page_break(auto=True, margin=15)

# Title
pdf.set_font('Arial', 'B', 16)
pdf.cell(0, 10, 'Amazon Review Analysis Report', 0, 1, 'C')
pdf.ln(5)

# Date
pdf.set_font('Arial', 'I', 10)
pdf.cell(0, 10, f'Generated on: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}', 0, 1, 'C')
pdf.ln(10)

# Summary
pdf.set_font('Arial', 'B', 14)
pdf.cell(0, 10, 'Executive Summary', 0, 1)
pdf.set_font('Arial', '', 12)
pdf.multi_cell(0, 8, f'''
Total Reviews Analyzed: {len(reviews)}
Positive Reviews: {positive_count}
Negative Reviews: {negative_count}
Neutral Reviews: {neutral_count}
Total Entities Found: {total_entities}
''')
pdf.ln(10)

# Detailed Results
pdf.set_font('Arial', 'B', 14)
pdf.cell(0, 10, 'Detailed Analysis', 0, 1)

for result in results:
    pdf.set_font('Arial', 'B', 12)
    pdf.cell(0, 10, f'Review {result["review_number"]}', 0, 1)

    pdf.set_font('Arial', 'I', 10)
    pdf.multi_cell(0, 6, f'Text: {result["text"]}')

    pdf.set_font('Arial', '', 10)
    pdf.cell(0, 6, f'Entities: {result["entities"]}', 0, 1)
    pdf.cell(0, 6, f'Sentiment: {result["sentiment"]} (Polarity: {result["polarity"]:.3f})', 0, 1)
    pdf.ln(5)

# Save PDF
pdf_output = "amazon_review_analysis_report.pdf"
pdf.output(pdf_output)

# Download the PDF
from google.colab import files
files.download(pdf_output)

print(f"✅ PDF report generated and downloaded: {pdf_output}")

Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m97.7 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.
✅ Packages installed successfully!
✅ spaCy model loaded successfully!

AMAZON PRODUCT REVIEW ANALYSIS
Named Entity Recognition & Sentiment Analysis

🔍 REVIEW 1
📝 Text: I absolutely love the new Kindle Paperwhite. The battery life is incredible and the screen is so easy on the eyes. Amazon has outdone itself.
🏷️  Extracted Entities: [

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

✅ PDF report generated and downloaded: amazon_review_analysis_report.pdf
