<a href="https://colab.research.google.com/github/Anngladys/AGPLP/blob/main/NLP_with_spaCy_(NER_%26_Sentiment_Analysis).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install spacy
!python -m spacy download en_core_web_sm

# Import necessary libraries
import spacy

# --- 1. Load spaCy Model ---
# It's important to download the model first if you haven't:
# In your terminal or command prompt, run:
# python -m spacy download en_core_web_sm
# This small English model includes capabilities for NER.
try:
    nlp = spacy.load("en_core_web_sm")
    print("spaCy model 'en_core_web_sm' loaded successfully.")
except OSError:
    print("spaCy model 'en_core_web_sm' not found. Please run:")
    print("python -m spacy download en_core_web_sm")
    print("Then restart your environment and try again.")
    exit() # Exit if the model isn't loaded

# --- 2. Sample Amazon Product Review Text ---
# We'll use a few example reviews to demonstrate.
# In a real-world scenario, this would come from a dataset.
amazon_reviews = [
    "I absolutely love my new Echo Dot! The sound quality is surprisingly good for its size.",
    "This Samsung Galaxy phone is amazing. The camera takes stunning photos, but the battery life could be better.",
    "The Apple AirPods Pro are comfortable and have great noise cancellation. A bit pricey though.",
    "Worst purchase ever. My XYZ Brand coffee maker broke after a week. Do not recommend this product!",
    "Great value for money. The Logitech MX Master mouse is very ergonomic and precise.",
    "The Kindle Paperwhite is perfect for reading, but I wish the screen was larger."
]

print("\n--- Performing NER and Sentiment Analysis on Sample Reviews ---")

# --- 3. Perform Named Entity Recognition (NER) & Sentiment Analysis ---
for i, review_text in enumerate(amazon_reviews):
    print(f"\nReview {i+1}: \"{review_text}\"")

    # Process the review text with the spaCy model
    doc = nlp(review_text)

    # Extract Product Names and Brands using NER
    # spaCy's default NER labels might include:
    # ORG (Organizations - often brands)
    # PRODUCT (Products - sometimes available, depending on model training)
    # GPE (Geopolitical Entity - sometimes mistaken for brand/product, needs refinement in real apps)
    # NOTE: For very specific product names or brands not caught by default,
    # custom NER rules or training a custom model would be required.
    extracted_entities = []
    for ent in doc.ents:
        # Focusing on common entity types for brands and products
        if ent.label_ in ["ORG", "PRODUCT", "WORK_OF_ART", "NORP", "FAC", "LOC", "GPE"]:
            # 'PRODUCT' is ideal, but 'ORG' is good for brands.
            # 'WORK_OF_ART' can sometimes capture specific product models/series.
            # Other labels like GPE (country names), FAC (buildings), LOC (locations)
            # and NORP (nationalities, religious, political groups) are included as fallbacks
            # for generic product detection due to the diverse nature of product names.
            # For a more robust solution, fine-tuning or custom rules are needed.
            extracted_entities.append((ent.text, ent.label_))

    if extracted_entities:
        print("  Extracted Entities (Product Names/Brands):")
        for entity, label in extracted_entities:
            print(f"    - {entity} (Type: {label})")
    else:
        print("  No significant product names or brands extracted by NER for this review.")

    # Rule-based Sentiment Analysis
    # A very simple approach: count positive/negative keywords.
    # For a real application, consider using TextBlob, VADER, or a pre-trained model.
    positive_keywords = ["love", "amazing", "great", "excellent", "perfect", "good", "precise", "comfortable", "stunning", "value"]
    negative_keywords = ["worst", "broke", "not recommend", "pricey", "could be better", "wish"]

    sentiment_score = 0
    for token in doc:
        if token.lemma_.lower() in positive_keywords: # Use lemma for better matching (e.g., "loved" -> "love")
            sentiment_score += 1
        elif token.lemma_.lower() in negative_keywords:
            sentiment_score -= 1

    sentiment = "Neutral"
    if sentiment_score > 0:
        sentiment = "Positive"
    elif sentiment_score < 0:
        sentiment = "Negative"

    print(f"  Sentiment: {sentiment} (Score: {sentiment_score})")

print("\n--- Analysis Complete ---")

Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m89.6 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.
spaCy model 'en_core_web_sm' loaded successfully.

--- Performing NER and Sentiment Analysis on Sample Reviews ---

Review 1: "I absolutely love my new Echo Dot! The sound quality is surprisingly good for its size."
  No significant product names or brands extracted by NER for this review.
  Sentiment: Positive (Score: 2)

Review 2: 