In [1]:
import spacy
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import pandas as pd # Used for a cleaner deliverable table

# Download the required data packages
!python -m spacy download en_core_web_sm
nltk.download('vader_lexicon')

# Initialize the tools
nlp = spacy.load("en_core_web_sm")
sid = SentimentIntensityAnalyzer()

print("Setup Complete. Tools are ready for analysis.")

Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m103.0 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


Setup Complete. Tools are ready for analysis.


In [2]:
def process_reviews(text_list):
    results = []

    for text in text_list:
        # 1. NER - Extracting Brands/Organizations
        doc = nlp(text)
        # We look for ORG (Companies) and PRODUCT labels
        entities = [f"{ent.text} ({ent.label_})" for ent in doc.ents if ent.label_ in ["ORG", "PRODUCT"]]

        # 2. Sentiment - Rule-based VADER analysis
        scores = sid.polarity_scores(text)
        compound = scores['compound']

        # Rule-based classification
        if compound >= 0.05:
            sentiment = "Positive"
        elif compound <= -0.05:
            sentiment = "Negative"
        else:
            sentiment = "Neutral"

        results.append({
            "Review Text": text,
            "Extracted Entities": ", ".join(entities) if entities else "None Detected",
            "Sentiment": sentiment,
            "Confidence Score": compound
        })
    return results

In [3]:
amazon_reviews = [
    "The Apple MacBook Pro is incredibly fast, though the price is steep.",
    "Avoid this Samsung fridge at all costs; it stopped cooling after a week.",
    "Sony WH-1000XM5 headphones offer the best silence I have ever experienced.",
    "The delivery was late, but the Nike shoes fit perfectly.",
    "This generic charger didn't even work with my Dell laptop."
]

# Process the data
analysis_results = process_reviews(amazon_reviews)

# Create a DataFrame for a professional-looking deliverable table
df = pd.DataFrame(analysis_results)

# Display the final output
print("--- AMAZON PRODUCT REVIEW ANALYSIS DELIVERABLE ---")
display(df)

--- AMAZON PRODUCT REVIEW ANALYSIS DELIVERABLE ---


Unnamed: 0,Review Text,Extracted Entities,Sentiment,Confidence Score
0,"The Apple MacBook Pro is incredibly fast, thou...",The Apple MacBook Pro (ORG),Neutral,0.0
1,Avoid this Samsung fridge at all costs; it sto...,Samsung (ORG),Negative,-0.4767
2,Sony WH-1000XM5 headphones offer the best sile...,Sony (ORG),Positive,0.6369
3,"The delivery was late, but the Nike shoes fit ...",Nike (ORG),Positive,0.8765
4,This generic charger didn't even work with my ...,Dell (ORG),Neutral,0.0
