In [1]:

# 1. Imports (same structure)
from datasets import load_dataset
from transformers import (AutoTokenizer,AutoModelForSequenceClassification,TrainingArguments,Trainer,DataCollatorWithPadding)
import evaluate
import numpy as np
import torch


  from .autonotebook import tqdm as notebook_tqdm


In [2]:

# 2. Load Amazon Reviews Dataset

dataset_dict = load_dataset(
    "csv",
    data_files={
        "train": r"C:\Projects\Sentiment_project\data\Reviews.csv"
    }
)

dataset_dict




DatasetDict({
    train: Dataset({
        features: ['Id', 'ProductId', 'UserId', 'ProfileName', 'HelpfulnessNumerator', 'HelpfulnessDenominator', 'Score', 'Time', 'Summary', 'Text'],
        num_rows: 568454
    })
})

In [3]:
# 3. Label Mapping (Tri-class)

id2label = {
    0: "Negative",
    1: "Neutral",
    2: "Positive"
}
label2id = {
    "Negative": 0,
    "Neutral": 1,
    "Positive": 2
}


In [4]:

# Step 3b: Map ratings to sentiment
def map_sentiment(example):
    if example["Score"] <= 2:
        example["label"] = 0
    elif example["Score"] == 3:
        example["label"] = 1
    else:
        example["label"] = 2
    return example

dataset_dict = dataset_dict.map(map_sentiment)


In [5]:
# 4. Load RoBERTa Model

model_path = "roberta-base"

tokenizer = AutoTokenizer.from_pretrained(model_path)

model = AutoModelForSequenceClassification.from_pretrained(
    model_path,
    num_labels=3,
    id2label=id2label,
    label2id=label2id
)


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [6]:

# 5. Freeze Base Model (same idea)

for name, param in model.base_model.named_parameters():
    param.requires_grad = False

# Unfreeze classification head
for name, param in model.named_parameters():
    if "classifier" in name:
        param.requires_grad = True


In [7]:
# 6. Preprocessing

def preprocess_function(examples):
    text = [(s if s is not None else "") + " " + (t if t is not None else "")
             for s, t in zip(examples["Summary"], examples["Text"])]
    return tokenizer(text, truncation=True)


tokenized_data = dataset_dict.map(preprocess_function, batched=True)

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)


In [9]:

# Trainâ€“Validation Split

dataset_split = tokenized_data["train"].train_test_split(
    test_size=0.1,
    seed=42
)

train_dataset = dataset_split["train"]
val_dataset = dataset_split["test"]


In [10]:

# 7. Metrics (3-class accuracy)

accuracy = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=1)
    acc = accuracy.compute(predictions=predictions, references=labels)
    return {"accuracy": acc["accuracy"]}


In [11]:
# 8. Training Arguments

training_args = TrainingArguments(
    output_dir="roberta-amazon-sentiment",
    learning_rate=2e-4,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=5,
    fp16=True,
    logging_strategy="epoch",
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
)


In [12]:


device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)  # move your model to GPU

# Check if the model is on GPU
print(next(model.parameters()).device)  # should print 'cuda:0'


cuda:0


In [16]:
# 9. Trainer



trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset.shuffle(seed=42).select(range(500000)),  
    eval_dataset = val_dataset.shuffle(seed=42).select(range(50000)),
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()

  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.397,0.304585,0.88176
2,0.3786,0.302343,0.88492
3,0.3726,0.311136,0.87804
4,0.3694,0.301251,0.88288
5,0.3663,0.299621,0.88332


TrainOutput(global_step=156250, training_loss=0.3767756, metrics={'train_runtime': 18231.9536, 'train_samples_per_second': 137.122, 'train_steps_per_second': 8.57, 'total_flos': 4.306875825657828e+17, 'train_loss': 0.3767756, 'epoch': 5.0})

In [17]:

# 10. Evaluation
metrics = trainer.evaluate(
    eval_dataset=val_dataset.select(range(55000))
)

print(metrics)


{'eval_loss': 0.29998713731765747, 'eval_accuracy': 0.8831272727272728, 'eval_runtime': 384.4137, 'eval_samples_per_second': 143.075, 'eval_steps_per_second': 8.943, 'epoch': 5.0}


In [18]:
# 11. Inference on New Review

input_text = "The product quality is decent but delivery was slow"
inputs = tokenizer(input_text, return_tensors="pt").to(device)

with torch.no_grad():
    outputs = model(**inputs)
    prediction = torch.argmax(outputs.logits, dim=-1)

print("Predicted Sentiment:", model.config.id2label[prediction.item()])


Predicted Sentiment: Positive


In [23]:
reviews = [
    "Excellent food!",
    "Delivery was late and food  was also smelling!!.",
    "Okay, but not great."
]

inputs = tokenizer(reviews, padding=True, truncation=True, return_tensors="pt").to(device)
with torch.no_grad():
    outputs = model(**inputs)
    predictions = torch.argmax(outputs.logits, dim=-1)

for text, pred in zip(reviews, predictions):
    print(text, ":", model.config.id2label[pred.item()])


Excellent food! : Positive
Delivery was late and food  was also smelling!!. : Negative
Okay, but not great. : Positive


In [20]:
from sklearn.metrics import classification_report, confusion_matrix

output = trainer.predict(val_dataset.select(range(10000)))
preds = np.argmax(output.predictions, axis=1)
labels = output.label_ids

print(classification_report(labels, preds, target_names=['Negative','Neutral','Positive']))
print(confusion_matrix(labels, preds))


              precision    recall  f1-score   support

    Negative       0.77      0.75      0.76      1480
     Neutral       0.39      0.10      0.16       745
    Positive       0.91      0.98      0.94      7775

    accuracy                           0.88     10000
   macro avg       0.69      0.61      0.62     10000
weighted avg       0.85      0.88      0.85     10000

[[1114   68  298]
 [ 192   73  480]
 [ 148   46 7581]]


In [24]:
model.save_pretrained("../models/roberta_finetuned/")
tokenizer.save_pretrained("../models/roberta_finetuned/")


('../models/roberta_finetuned/tokenizer_config.json',
 '../models/roberta_finetuned/special_tokens_map.json',
 '../models/roberta_finetuned/vocab.json',
 '../models/roberta_finetuned/merges.txt',
 '../models/roberta_finetuned/added_tokens.json',
 '../models/roberta_finetuned/tokenizer.json')

In [25]:
from sklearn.metrics import classification_report
import json

report = classification_report(
    labels,
    preds,
    target_names=['Negative', 'Neutral', 'Positive'],
    output_dict=True
)

metrics = {
    "accuracy": report["accuracy"],
    "precision": report["weighted avg"]["precision"],
    "recall": report["weighted avg"]["recall"],
    "f1-score": report["weighted avg"]["f1-score"]
}
metrics = {
    "Fine-tune_roberta": metrics
}

with open("../models/roberta_finetuned/metrics.json", "w") as f:
    json.dump(metrics, f, indent=4)

metrics


{'Fine-tune_roberta': {'accuracy': 0.8768,
  'precision': 0.847610392422031,
  'recall': 0.8768,
  'f1-score': 0.8547169525656833}}