# **Installing Libraries**

In [12]:
!pip install pandas transformers torch scikit-learn sentencepiece





# **Loading CSV Dataset**

In [14]:
import pandas as pd

df = pd.read_csv("amazon_reviews.csv")

print("Total rows:", len(df))
df.head()


Total rows: 4915


Unnamed: 0,reviewerID,asin,reviewerName,helpful,reviewText,overall,summary,unixReviewTime,reviewTime,day_diff,helpful_yes,total_vote
0,A3SBTW3WS4IQSN,B007WTAJTO,,"[0, 0]",No issues.,4.0,Four Stars,1406073600,2014-07-23,138,0,0
1,A18K1ODH1I2MVB,B007WTAJTO,0mie,"[0, 0]","Purchased this for my device, it worked as adv...",5.0,MOAR SPACE!!!,1382659200,2013-10-25,409,0,0
2,A2FII3I2MBMUIA,B007WTAJTO,1K3,"[0, 0]",it works as expected. I should have sprung for...,4.0,nothing to really say....,1356220800,2012-12-23,715,0,0
3,A3H99DFEG68SR,B007WTAJTO,1m2,"[0, 0]",This think has worked out great.Had a diff. br...,5.0,Great buy at this price!!! *** UPDATE,1384992000,2013-11-21,382,0,0
4,A375ZM4U047O79,B007WTAJTO,2&amp;1/2Men,"[0, 0]","Bought it with Retail Packaging, arrived legit...",5.0,best deal around,1373673600,2013-07-13,513,0,0


# **Checking Column Names**

In [15]:
print(df.columns)


Index(['reviewerID', 'asin', 'reviewerName', 'helpful', 'reviewText',
       'overall', 'summary', 'unixReviewTime', 'reviewTime', 'day_diff',
       'helpful_yes', 'total_vote'],
      dtype='object')


# **Extracting Reviews & Labels**

In [18]:
reviews = df["reviewText"].astype(str).tolist()
ratings = df["overall"].tolist()

labels = []
for r in ratings:
    if r >= 4:
        labels.append("positive")
    elif r <= 2:
        labels.append("negative")
    else:
        labels.append("neutral")

print("Sample review:", reviews[0])
print("Label:", labels[0])


Sample review: No issues.
Label: positive


# **Loading NLP Models**

In [19]:
from transformers import pipeline

sentiment_model = pipeline("sentiment-analysis")
summarizer = pipeline("summarization")


No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

Device set to use cpu
No model was supplied, defaulted to sshleifer/distilbart-cnn-12-6 and revision a4f8f3e (https://huggingface.co/sshleifer/distilbart-cnn-12-6).
Using a pipeline without specifying a model name and revision in production is not recommended.


config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

Device set to use cpu


# **NLP Tools**

In [20]:
def analyze_sentiment(text):
    result = sentiment_model(text[:512])[0]
    return result["label"], result["score"]

def summarize_text(text):
    summary = summarizer(text[:1000], max_length=60, min_length=25, do_sample=False)
    return summary[0]["summary_text"]


# **Agentic AI System**

In [21]:
class AgenticAI:
    def __init__(self):
        self.memory = {}

    def perceive(self, user_input):
        return user_input.lower()

    def plan(self, text):
        actions = []
        if len(text.split()) > 15:
            actions.append("summarize")
        if "good" in text or "bad" in text or "great" in text or "poor" in text:
            actions.append("sentiment")
        return actions

    def act(self, text, actions):
        results = {}

        if "sentiment" in actions:
            label, score = analyze_sentiment(text)
            results["sentiment"] = label

        if "summarize" in actions:
            results["summary"] = summarize_text(text)

        self.memory = results
        return results

    def run(self, user_input):
        perceived = self.perceive(user_input)
        actions = self.plan(perceived)
        results = self.act(user_input, actions)
        return results


# **Running Agent on Real Review**

In [22]:
agent = AgenticAI()

sample_review = reviews[10]
print("Review:")
print(sample_review)

print("\nAgent Output:")
print(agent.run(sample_review))


Review:
I like this SD Card because it can take music video downloads, personal videos, files,docs, and multimedia images with a fast transfer rate of Class 10 speed. It can take games with large files very easily and still have enough space for apps. It&#34;s great for video cameras and camcorders with the supplied adapter. Fits very easily into smartphones and tablets SD Card slots. I recommend this 32GB SD Card to everyone.

Agent Output:
{'sentiment': 'POSITIVE', 'summary': ' I like this SD Card because it can take music video downloads, personal videos, files, docs, and multimedia images with a fast transfer rate of Class 10 speed . It can take games with large files very easily and still have enough space for apps . Fits very easily into smartphones and'}


# **Evaluating Sentiment Model**

In [23]:
from sklearn.metrics import accuracy_score, f1_score

true = []
pred = []

for i in range(50):
    text = reviews[i]
    label = labels[i]

    prediction = sentiment_model(text[:512])[0]["label"]

    if prediction == "POSITIVE":
        pred.append("positive")
    else:
        pred.append("negative")

    true.append(label)

accuracy = accuracy_score(true, pred)
f1 = f1_score(true, pred, average="weighted")

print("Accuracy:", accuracy)
print("F1 Score:", f1)


Accuracy: 0.68
F1 Score: 0.7464935064935065


# **Showing Multiple Agent Interactions**

In [24]:
for i in range(3):
    print("\nReview", i+1)
    print(reviews[i])
    print("Agent:", agent.run(reviews[i]))


Your max_length is set to 60, but your input_length is only 39. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=19)



Review 1
No issues.
Agent: {}

Review 2
Purchased this for my device, it worked as advertised. You can never have too much phone memory, since I download a lot of stuff this was a no brainer for me.


Your max_length is set to 60, but your input_length is only 38. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=19)


Agent: {'summary': ' You can never have too much phone memory, since I download a lot of stuff, this was a no brainer for me . Purchased this for my device, it worked as advertised .'}

Review 3
it works as expected. I should have sprung for the higher capacity.  I think its made a bit cheesier than the earlier versions; the paint looks not as clean as before
Agent: {'summary': ' It works as expected. I should have sprung for the higher capacity . I think its made a bit cheesier than the earlier versions; the paint looks not as clean as before .'}
