In [1]:
!pip install torch transformers scikit-learn pandas



In [2]:
import os
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from transformers import BertTokenizer, BertForSequenceClassification, get_linear_schedule_with_warmup
from torch.optim import AdamW
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import pandas as pd
import numpy as np

In [3]:
df=pd.read_csv(r"C:\Users\Hp\Downloads\Amazon Reviews.csv")
df

Unnamed: 0,s.no,age,division_name,department_name,class_name,clothing_id,title,review_text,alike_feedback_count,rating,recommend_index
0,0,40,General,Bottoms,Jeans,1028,Amazing fit and wash,Like other reviewers i was hesitant to spend t...,0,5,1
1,1,62,General Petite,Tops,Blouses,850,Lovely and unique!,As is true of a bunch of the fall clothing pho...,12,5,1
2,2,47,General Petite,Bottoms,Skirts,993,Meh,"I so wanted this skirt to work, love the desig...",3,1,0
3,3,45,General Petite,Bottoms,Pants,1068,Wow,Love love this! i was hesitant to buy this at ...,0,5,1
4,4,37,Initmates,Intimate,Swim,24,Great for bigger busts,I absolutely love the retro look of this swims...,0,5,1
...,...,...,...,...,...,...,...,...,...,...,...
23481,23481,44,General Petite,Dresses,Dresses,1081,Love it!,I oot this dress in the blue. it fits great--h...,0,5,1
23482,23482,39,General,Dresses,Dresses,1110,Great piece,I was very patient with this dress. i was wait...,1,5,1
23483,23483,29,General Petite,Tops,Knits,862,So soft and flattering,"The deep v doesn't gape, and flatters the neck...",0,5,1
23484,23484,57,General,Dresses,Dresses,1082,Another winner from isabella sinclair,"I saw this dress online this morning, went int...",10,5,1


In [4]:
if "recommend_index " in df.columns:
    df = df.rename(columns={"recommend_index ": "recommend_index"})

# Keep only required columns
df = df[["review_text", "recommend_index"]].dropna()
df["recommend_index"] = df["recommend_index"].astype(int)

print(df.head())

                                         review_text  recommend_index
0  Like other reviewers i was hesitant to spend t...                1
1  As is true of a bunch of the fall clothing pho...                1
2  I so wanted this skirt to work, love the desig...                0
3  Love love this! i was hesitant to buy this at ...                1
4  I absolutely love the retro look of this swims...                1


In [5]:
X_train, X_test, y_train, y_test = train_test_split(
    df["review_text"].astype(str).tolist(),
    df["recommend_index"].tolist(),
    test_size=0.2,
    random_state=42,
    stratify=df["recommend_index"]
)

In [6]:
# Tokenization with Pretrained BERT

MODEL_NAME = "bert-base-uncased"
MAX_LEN = 256
BATCH_SIZE = 16

tokenizer = BertTokenizer.from_pretrained(MODEL_NAME)

def encode_data(texts, labels, max_len=MAX_LEN):
    encodings = tokenizer(
        texts,
        truncation=True,
        padding=True,
        max_length=max_len,
        return_tensors="pt"
    )
    return TensorDataset(encodings["input_ids"], encodings["attention_mask"], torch.tensor(labels))

train_ds = encode_data(X_train, y_train)
test_ds  = encode_data(X_test, y_test)

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
test_loader  = DataLoader(test_ds, batch_size=BATCH_SIZE)



In [7]:
# Load Pretrained BERT Model

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = BertForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=2).to(device)

optimizer = AdamW(model.parameters(), lr=2e-5)
epochs = 3
total_steps = epochs * len(train_loader)
scheduler = get_linear_schedule_with_warmup(
    optimizer, 
    num_warmup_steps=int(0.1 * total_steps),
    num_training_steps=total_steps
)



Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# Training Loop

loss_fn = torch.nn.CrossEntropyLoss()

for epoch in range(epochs):
    model.train()
    total_loss = 0
    for batch in train_loader:
        optimizer.zero_grad()
        input_ids, attention_mask, labels = [t.to(device) for t in batch]
        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        scheduler.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}/{epochs} | Loss: {total_loss/len(train_loader):.4f}")


In [None]:
# Evaluation

model.eval()
preds, true_labels = [], []
with torch.no_grad():
    for batch in test_loader:
        input_ids, attention_mask, labels = [t.to(device) for t in batch]
        outputs = model(input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        y_hat = torch.argmax(logits, dim=-1)
        preds.extend(y_hat.cpu().numpy())
        true_labels.extend(labels.cpu().numpy())

print("✅ Accuracy:", accuracy_score(true_labels, preds))
print(classification_report(true_labels, preds, digits=4))


In [None]:
# Prediction

def predict_sentiment(texts):
    model.eval()
    if isinstance(texts, str):
        texts = [texts]
    enc = tokenizer(texts, truncation=True, padding=True, max_length=MAX_LEN, return_tensors="pt").to(device)
    with torch.no_grad():
        logits = model(**enc).logits
        probs = torch.softmax(logits, dim=-1).cpu().numpy()
        labels = probs.argmax(axis=1).tolist()
    return labels, probs


In [None]:
# Example Prediction

sample_text = "This product is amazing, I love it!"
label, prob = predict_sentiment(sample_text)
print("Text:", sample_text)
print("Predicted Label:", label, "| Probabilities:", prob)