In [29]:
!pip install spacy
!python -m spacy download en_core_web_sm



huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting en-core-web-sm==3.8.0
  Using cached https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[38;5;2m‚úî Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')


In [30]:
import torch
import spacy
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
from peft import PeftModel

# Load spaCy for aspect extraction
nlp = spacy.load("en_core_web_sm")

# Your fine-tuned LoRA model
MODEL_ID = "Shiva-k22/sentiment-lora-distilbert"
BASE_MODEL = "distilbert-base-uncased"

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
base_model = AutoModelForSequenceClassification.from_pretrained(BASE_MODEL, num_labels=3)
model = PeftModel.from_pretrained(base_model, MODEL_ID)

# Define label mapping
LABEL_MAP = {
    "LABEL_0": "Negative",
    "LABEL_1": "Neutral",
    "LABEL_2": "Positive"
}

# Create pipeline
pipe = pipeline("text-classification", model=model, tokenizer=tokenizer, return_all_scores=False)


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Device set to use mps:0


In [31]:
# Extract aspects using spaCy
def extract_aspects(text):
    """Extract candidate aspects using spaCy noun phrases + named entities."""
    doc = nlp(text)
    aspects = set()
    for chunk in doc.noun_chunks:
        aspects.add(chunk.text.strip())
    for ent in doc.ents:
        aspects.add(ent.text.strip())
    return list(aspects)


In [40]:
def aspect_sentiment_analysis(text):
    aspects = extract_aspects(text)
    results = []
    for aspect in aspects:
        prompt = f"Aspect: {aspect}. Sentence: {text}"
        output = pipe(prompt)[0]
        label = LABEL_MAP.get(output["label"], output["label"])
        results.append({
            "aspect": aspect,
            "sentiment": label,
            "score": round(float(output['score']), 3)
        })
    return results


In [41]:
sentences = [
    "Battery life is good but camera is disappointing.",
    "Shell's $70 Billion BG deal meets shareholder skepticism.",
    "$MSFT SQL Server revenue grew double-digit.",
    "The bank posted a net profit of 8.2 million euros."
]

for s in sentences:
    print("\nSentence:", s)
    for res in aspect_sentiment_analysis(s):
        print(f" ‚Üí {res['aspect']} ‚Üí {res['sentiment']} ({res['score']})")



Sentence: Battery life is good but camera is disappointing.
 ‚Üí camera ‚Üí Neutral (0.674)
 ‚Üí Battery life ‚Üí Positive (0.524)

Sentence: Shell's $70 Billion BG deal meets shareholder skepticism.
 ‚Üí Shell's $70 Billion BG deal ‚Üí Negative (0.563)
 ‚Üí shareholder skepticism ‚Üí Neutral (0.643)
 ‚Üí Shell ‚Üí Neutral (0.799)
 ‚Üí $70 Billion ‚Üí Negative (0.543)

Sentence: $MSFT SQL Server revenue grew double-digit.
 ‚Üí SQL Server revenue ‚Üí Neutral (0.445)
 ‚Üí SQL Server ‚Üí Neutral (0.518)
 ‚Üí $MSFT ‚Üí Negative (0.574)

Sentence: The bank posted a net profit of 8.2 million euros.
 ‚Üí The bank ‚Üí Neutral (0.98)
 ‚Üí 8.2 million ‚Üí Neutral (0.951)
 ‚Üí a net profit ‚Üí Neutral (0.954)
 ‚Üí 8.2 million euros ‚Üí Neutral (0.988)


In [42]:
import pandas as pd
dataset = pd.read_csv("/Users/shivakumarkaranam/Documents/infosys/cleaned_sentiment_data.csv")

In [43]:
for i in range(10):
    text = dataset['Cleaned_Sentence'][i]
    print("\nSentence:", text)
    for res in aspect_sentiment_analysis(text):
        print(f" ‚Üí {res['aspect']} ‚Üí {res['sentiment']} ({res['score']})")
        


Sentence: geosolutions technology leverage benefon gps solution provide location base search technology community platform location relevant multimedia content new powerful commercial model .
 ‚Üí solution ‚Üí Neutral (0.777)
 ‚Üí geosolutions technology leverage benefon ‚Üí Neutral (0.646)
 ‚Üí location base search technology community platform location relevant multimedia content new powerful commercial model ‚Üí Neutral (0.712)

Sentence: $ esi low $ 1.50 $ 2.50 bk real possibility
 ‚Üí real possibility ‚Üí Negative (0.649)
 ‚Üí $ esi low ‚Üí Negative (0.888)
 ‚Üí 2.50 ‚Üí Negative (0.839)

Sentence: last quarter 2010 componenta net sale double eur131m eur76m period year earlier move zero pretax profit pretax loss eur7m .
 ‚Üí m ‚Üí Neutral (0.695)
 ‚Üí period year earlier ‚Üí Positive (0.547)
 ‚Üí eur76 ‚Üí Positive (0.628)
 ‚Üí double eur131m eur76 ‚Üí Positive (0.917)
 ‚Üí eur7 ‚Üí Positive (0.53)
 ‚Üí last quarter 2010 ‚Üí Positive (0.679)
 ‚Üí zero pretax profit pretax loss ‚Ü

In [44]:
def analyze_aspects_with_overall(text):
    aspects = extract_aspects(text)
    results = []

    # üåü 1Ô∏è‚É£ Predict Overall Sentiment
    overall_res = pipe(text)[0]
    overall_label = LABEL_MAP.get(overall_res["label"], overall_res["label"])
    overall_score = round(float(overall_res["score"]), 3)

    # üåü 2Ô∏è‚É£ Predict Aspect-Level Sentiment
    for aspect in aspects:
        prompt = f"Aspect: {aspect}. Sentence: {text}"
        res = pipe(prompt)[0]
        label = LABEL_MAP.get(res["label"], res["label"])
        score = round(float(res["score"]), 3)
        results.append({
            "aspect": aspect,
            "aspect_sentiment": label,
            "aspect_score": score
        })

    # üåü Combine Results
    return {
        "sentence": text,
        "overall_sentiment": overall_label,
        "overall_score": overall_score,
        "aspects": results
    }



In [49]:

sentences = [
    "Battery life is good but camera is disappointing.",
]

results = [analyze_aspects_with_overall(s) for s in sentences]




In [50]:

# Convert to DataFrame

rows = []
for r in results:
    for a in r["aspects"]:
        rows.append({
            "Sentence": r["sentence"],
            "Aspect": a["aspect"],
            "Aspect Sentiment": a["aspect_sentiment"],
            "Aspect Score": a["aspect_score"],
            "Overall Sentiment": r["overall_sentiment"],
            "Overall Score": r["overall_score"]
        })

df = pd.DataFrame(rows)
print(df)

                                            Sentence        Aspect  \
0  Battery life is good but camera is disappointing.        camera   
1  Battery life is good but camera is disappointing.  Battery life   

  Aspect Sentiment  Aspect Score Overall Sentiment  Overall Score  
0          Neutral         0.674           Neutral          0.747  
1         Positive         0.524           Neutral          0.747  
