# Part 2 - Task 3: NLP with spaCy (Laptop Reviews)
**Goal:** Perform Named Entity Recognition (extract product names & brands) and perform rule-based sentiment analysis.


In [ ]:
!pip install -q spacy
!python -m spacy download en_core_web_sm
!pip install -q vaderSentiment

import spacy
from spacy.matcher import PhraseMatcher
import pandas as pd
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
nlp = spacy.load("en_core_web_sm")
analyzer = SentimentIntensityAnalyzer()


In [ ]:
from google.colab import files
import io
print("If you have 'laptop_reviews.csv', upload it now. Otherwise the sample will be used.")
uploaded = files.upload()
if 'laptop_reviews.csv' in uploaded:
    df = pd.read_csv(io.BytesIO(uploaded['laptop_reviews.csv']))
else:
    data = {
        "review_text": [
            "I recently bought a Dell XPS 13 and the battery life is amazing. Highly recommend!",
            "The MacBook Pro keyboard keeps failing after only a month. Very disappointed.",
            "HP Pavilion gives good value for money. Performance is decent but fan is noisy.",
            "Acer Aspire is lightweight and great for travel, battery could be better though.",
            "Lenovo ThinkPad has an excellent keyboard and robust build. Great business laptop."
        ]
    }
    df = pd.DataFrame(data)
df.head()


In [ ]:
def extract_entities(text):
    doc = nlp(text)
    ents = [(ent.text, ent.label_) for ent in doc.ents]
    return ents
df['entities'] = df['review_text'].apply(extract_entities)
df[['review_text','entities']].head(10)


In [ ]:
brands = ["Dell", "Apple", "MacBook", "MacBook Pro", "HP", "Acer", "Lenovo", "ThinkPad", "Aspire", "XPS", "Pavilion"]
matcher = PhraseMatcher(nlp.vocab, attr="LOWER")
patterns = [nlp.make_doc(b) for b in brands]
matcher.add("BRAND", patterns)
def extract_brand_phrases(text):
    doc = nlp(text)
    matches = matcher(doc)
    found = set()
    for match_id, start, end in matches:
        found.add(doc[start:end].text)
    return list(found)
df['brands'] = df['review_text'].apply(extract_brand_phrases)
df[['review_text','entities','brands']].head(10)


In [ ]:
def vader_sentiment(text):
    score = analyzer.polarity_scores(text)['compound']
    if score >= 0.05:
        return 'positive'
    elif score <= -0.05:
        return 'negative'
    else:
        return 'neutral'
df['sentiment'] = df['review_text'].apply(vader_sentiment)
df[['review_text','brands','sentiment']].head(10)


In [ ]:
print("Sentiment distribution:\n", df['sentiment'].value_counts())
print("\nBrand mentions sample:\n", df.explode('brands')[['brands','review_text']].dropna().head(10))


**Screenshot instructions for report:**

- Table showing sample reviews with `brands`, `entities`, and `sentiment`.
- Counts of sentiment distribution.
