<a href="https://colab.research.google.com/github/Sirisha-R-N/Transformers/blob/main/sentiment_analysis_lora.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install transformers datasets accelerate peft trl

In [None]:
!pip install evaluate

In [None]:
!pip install -U transformers

In [None]:
import transformers
print(transformers.__version__)


In [None]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from peft import get_peft_model, LoraConfig, TaskType
import torch
import evaluate

# Load IMDb dataset
dataset = load_dataset("imdb")
dataset = dataset.map(lambda x: {'label': int(x['label'])})  # Ensure labels are int

# Load tokenizer and tokenize
model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize_function(example):
    return tokenizer(example["text"], padding="max_length", truncation=True, max_length=512)

tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets.set_format("torch", columns=["input_ids", "attention_mask", "label"])

# Load model
base_model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

# Configure LoRA
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["query", "value"],  # typical for BERT
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.SEQ_CLS
)

# Apply LoRA
model = get_peft_model(base_model, lora_config)
model.print_trainable_parameters()

# Evaluation metric
accuracy = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = torch.argmax(torch.tensor(logits), dim=-1)
    return accuracy.compute(predictions=predictions, references=labels)

# Training arguments
training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    save_strategy="epoch",
    per_device_train_batch_size=16,
    per_device_eval_batch_size=64,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=100,
    save_total_limit=1,
    load_best_model_at_end=True,
    report_to="none"
)

# Create Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"].shuffle(seed=42).select(range(20000)),  # Subset for faster training
    eval_dataset=tokenized_datasets["test"].select(range(5000)),
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

# Train
trainer.train()


In [None]:
# Run evaluation
results = trainer.evaluate()

# Print the results
print("Evaluation results:", results)


In [None]:
def predict_sentiment(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(model.device)
    outputs = model(**inputs)
    probs = torch.nn.functional.softmax(outputs.logits, dim=1)
    pred = torch.argmax(probs).item()
    label = "Positive" if pred == 1 else "Negative"
    return label, probs[0][pred].item()

# Example
text = "This movie was absolutely amazing!"
label, confidence = predict_sentiment(text)
print(f"Prediction: {label} (Confidence: {confidence:.2f})")

In [None]:
def predict_sentiment(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(model.device)
    outputs = model(**inputs)
    probs = torch.nn.functional.softmax(outputs.logits, dim=1)
    pred = torch.argmax(probs).item()
    label = "Positive" if pred == 1 else "Negative"
    return label, probs[0][pred].item()

# Example
text = "This movie was horrible"
label, confidence = predict_sentiment(text)
print(f"Prediction: {label} (Confidence: {confidence:.2f})")