In [1]:
%pip install -q transformers==4.39.0
%pip install -q peft==0.12.0
%pip install -q datasets==2.20.0
%pip install -q evaluate==0.4.2
%pip install -q accelerate==0.34.2

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [2]:
import numpy as np
import evaluate
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import TrainingArguments, Trainer, DataCollatorWithPadding
from peft import LoraConfig, get_peft_model, TaskType
from transformers import EarlyStoppingCallback

In [3]:
# Load dataset
dataset = load_dataset("yelp_review_full")
print(f"Dataset labels: {set(dataset['train']['label'])}")
print(f"Number of classes: {len(set(dataset['train']['label']))}")

Dataset labels: {0, 1, 2, 3, 4}
Number of classes: 5


In [4]:
# Select smaller subsets for training and evaluation
training_dataset = dataset["train"].shuffle(seed=42).select(range(500))
evaluation_dataset = dataset["test"].shuffle(seed=42).select(range(500))

In [5]:

# Load model and tokenizer
model_name = "FacebookAI/roberta-base"
num_labels = 5  # Yelp has 5 classes (0-4)

In [6]:
# Initialize model with correct number of labels
model = AutoModelForSequenceClassification.from_pretrained(
    model_name, 
    num_labels=num_labels,
    id2label={0: "1 star", 1: "2 stars", 2: "3 stars", 3: "4 stars", 4: "5 stars"},
    label2id={"1 star": 0, "2 stars": 1, "3 stars": 2, "4 stars": 3, "5 stars": 4}
)
tokenizer = AutoTokenizer.from_pretrained(model_name)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [7]:

# Add padding token if it doesn't exist
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_toke

In [8]:
# Tokenization function
def tokenize_function(examples):
    # Tokenize the text
    tokenized = tokenizer(
        examples["text"], 
        padding=True,  # We'll use data collator for dynamic padding
        truncation=True, 
        max_length=512,
        return_tensors=None  # Return as lists, not tensors
    )
    
    # Add labels to the output
    tokenized["labels"] = examples["label"]
    return tokenized

In [9]:
# Tokenize datasets
tokenized_train = training_dataset.map(tokenize_function, batched=True)
tokenized_eval = evaluation_dataset.map(tokenize_function, batched=True)

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

In [10]:
# Remove the original text column to save memory
tokenized_train = tokenized_train.remove_columns(["text"])
tokenized_eval = tokenized_eval.remove_columns(["text"])

In [11]:
# Set format for PyTorch
tokenized_train.set_format("torch")
tokenized_eval.set_format("torch")

In [12]:
# LoRA configuration
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["query", "value", "key", "dense"],  # Added more target modules
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.SEQ_CLS,
)

In [13]:
# Apply LoRA to model
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

trainable params: 3,273,221 || all params: 127,947,274 || trainable%: 2.5583


In [14]:
# Data collator for dynamic padding
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [15]:
# Training arguments
training_args = TrainingArguments(
    output_dir="roberta_peft_yelp",
    overwrite_output_dir=True,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    num_train_epochs=3,
    learning_rate=2e-4,  # Slightly higher learning rate for LoRA
    per_device_train_batch_size=4,  # Reduced batch size
    per_device_eval_batch_size=4,
    optim="adamw_torch",
    gradient_accumulation_steps=2,  # Effective batch size of 8
    gradient_checkpointing=True,
    max_grad_norm=1.0,  # More conservative gradient clipping
    weight_decay=0.01,
    warmup_ratio=0.1,  # Add warmup
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    greater_is_better=False,
    fp16=False,
    bf16=False,
    dataloader_pin_memory=False,
)

In [16]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_eval,
    data_collator=data_collator,
    # Removed compute_metrics parameter
    callbacks=[EarlyStoppingCallback(early_stopping_patience=2)],
)

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


In [17]:
# Train the model
print("Starting training...")
trainer.train()

Starting training...


  return fn(*args, **kwargs)


Epoch,Training Loss,Validation Loss
0,1.6358,1.599941
2,1.5624,1.586587


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


TrainOutput(global_step=186, training_loss=1.6049273296069073, metrics={'train_runtime': 2645.4532, 'train_samples_per_second': 0.567, 'train_steps_per_second': 0.07, 'total_flos': 406594474573824.0, 'train_loss': 1.6049273296069073, 'epoch': 2.98})

In [18]:
# Evaluate the model (will only show loss without accuracy)
print("Evaluating model...")
results = trainer.evaluate()
print(f"Evaluation results: {results}")

Evaluating model...


Evaluation results: {'eval_loss': 1.5865873098373413, 'eval_runtime': 406.7429, 'eval_samples_per_second': 1.229, 'eval_steps_per_second': 0.307, 'epoch': 2.98}


In [19]:
# Save the model
trainer.save_model("roberta_peft_yelp_final")
tokenizer.save_pretrained("roberta_peft_yelp_final")
print("Model saved successfully!")



Model saved successfully!


In [12]:
from transformers import AutoTokenizer, AutoModel
from peft import PeftModel
import torch
import torch.nn as nn


In [13]:
# Path to your fine-tuned PEFT model
model_path = r"C:\Users\User\Desktop\ABC\roberta_peft_yelp_final"

# Device (CPU)
device = "cpu"

try:
    # Load base Roberta model
    base_model = AutoModel.from_pretrained("roberta-base")
    
    # Load PEFT adapters on top
    model = PeftModel.from_pretrained(base_model, model_path)
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    
    model.to(device)
    model.eval()
    print("✅ PEFT model loaded successfully!")

except Exception as e:
    print(f"❌ Loading failed: {e}")


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


✅ PEFT model loaded successfully!


In [None]:
# CPU device
device = "cpu"

# Simple linear classifier on top of CLS embedding
classifier = nn.Linear(model.base_model.config.hidden_size, 5).to(device)
classifier.eval()  # eval mode

rating_map = {
    0: ("Very Negative 😠", "⭐"),
    1: ("Negative 😞", "⭐⭐"),
    2: ("Neutral 😐", "⭐⭐⭐"),
    3: ("Positive 🙂", "⭐⭐⭐⭐"),
    4: ("Very Positive 😄", "⭐⭐⭐⭐⭐")
}

def analyze_sentiment():
    print("🤖 Yelp Review Sentiment Analyzer (CPU mode)")
    print("Type 'quit' to exit")
    print("=" * 40)
    
    while True:
        user_input = input("\n📝 Enter your review: ").strip()
        if user_input.lower() in ['quit', 'exit', 'q']:
            print("Goodbye! 👋")
            break
        if not user_input:
            continue
        
        try:
            # Tokenize input
            inputs = tokenizer(
                user_input,
                return_tensors="pt",
                truncation=True,
                padding=True,
                max_length=512
            )
            inputs = {k: v.to(device) for k, v in inputs.items()}
            
            with torch.no_grad():
                # Forward pass through the PEFT model but only get hidden states
                outputs = model.base_model(**inputs)  # NOTE: use base_model
                cls_emb = outputs.last_hidden_state[:, 0, :]  # CLS token
                
                # Pass through linear classifier
                logits = classifier(cls_emb)
                predictions = torch.nn.functional.softmax(logits, dim=-1)
                
                predicted_class = torch.argmax(predictions, dim=1).item()
                confidence = predictions[0][predicted_class].item()
            
            sentiment_text, stars = rating_map[predicted_class]
            print(f"\n🎯 Sentiment: {sentiment_text}")
            print(f"⭐ Rating: {stars}")
            print(f"📊 Confidence: {confidence:.1%}")
            print("=" * 40)
            
        except Exception as e:
            print(f"❌ Error: {e}")

# Run analyzer
analyze_sentiment()


🤖 Yelp Review Sentiment Analyzer (CPU mode)
Type 'quit' to exit



📝 Enter your review:  This product is so good



🎯 Sentiment: Neutral 😐
⭐ Rating: ⭐⭐⭐
📊 Confidence: 26.6%



📝 Enter your review:  Eww i don't like this product at all



🎯 Sentiment: Neutral 😐
⭐ Rating: ⭐⭐⭐
📊 Confidence: 26.2%



📝 Enter your review:  bad



🎯 Sentiment: Neutral 😐
⭐ Rating: ⭐⭐⭐
📊 Confidence: 26.3%



📝 Enter your review:  ?



🎯 Sentiment: Neutral 😐
⭐ Rating: ⭐⭐⭐
📊 Confidence: 26.2%
