<a href="https://colab.research.google.com/github/KaifAhmad1/code-test/blob/main/Llumo_AI_Assignment_Mohd_Kaif.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#### **Fine Tuning Meta's Llama 3.2B Model on Meta Review Summarization Task**

In [16]:
!pip install -qU transformers datasets evaluate rouge_score trl peft bitsandbytes accelerate

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/330.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m330.9/330.9 kB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0m
[?25h

In [17]:
import torch
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from trl import SFTTrainer
import evaluate
import matplotlib.pyplot as plt
from accelerate import Accelerator

In [18]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [20]:
# Load and Explore the Dataset
dataset = load_dataset("zqz979/meta-review")
print(f"Dataset size: {len(dataset['train'])} train, {len(dataset['validation'])} validation, {len(dataset['test'])} test")

print("\nSample Meta-Review:")
print(dataset['train'][0]['Input'][:500] + "...")
print("\nSample Summary:")
print(dataset['train'][0]['Output'])

Dataset size: 7692 train, 1648 validation, 1649 test

Sample Meta-Review:
In this paper, the author investigates how to utilize large-scale human video to train dexterous robot manipulation skills. To leverage the information from the Internet videos, the author proposes a handful of techniques to pre-process the video data to extract the action information. Then the network is trained on the extracted hand data and deployed to the real robot with some human demonstration collected by teleoperation for fine-tuning. Experiments show that the proposed pipeline can solve...

Sample Summary:
This paper studies how to learn dexterous manipulation from human videos.    In the initial review, the reviewer appreciated the direction and real-world experiment but also raised  concerns about the need of special sensor for tracking. During rebuttal, the authors effectively addressed this concern by providing additional experiment results, and reviewers were satisfied with the response.  AC would l

In [23]:
# Load tokenizer
model_name = "meta-llama/Llama-2-7b-hf"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

# Configure quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

# Load model with 4-bit quantization and bf16 compute dtype
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
    use_auth_token=True
)

# Enable gradient checkpointing for memory efficiency
model.gradient_checkpointing_enable()

# Disable caching to save memory
model.config.use_cache = False



AttributeError: 'HfTrainerDeepSpeedConfig' object has no attribute 'is_zero3'

In [24]:
# Preprocess Data
def preprocess_function(examples):
    inputs = [f"Summarize the following meta-review:\n\n{review}\n\nSummary:" for review in examples["Input"]]
    model_inputs = tokenizer(inputs, max_length=512, truncation=True, padding="max_length")
    labels = tokenizer(examples["Output"], max_length=128, truncation=True, padding="max_length")
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

tokenized_dataset = dataset.map(preprocess_function, batched=True, remove_columns=dataset["train"].column_names, num_proc=4)

In [25]:
# Define LoRA Configuration
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

In [26]:
# Set Up Training Arguments
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=6,
    per_device_train_batch_size=8,  # Increased batch size
    gradient_accumulation_steps=8,  # Increased gradient accumulation steps
    learning_rate=2e-4,
    fp16=True,
    logging_steps=10,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    warmup_steps=100,
    weight_decay=0.01,
    lr_scheduler_type="cosine",
    report_to="tensorboard",
)



In [28]:
# Set Up Trainer
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, lora_config)

# Define evaluation metric
rouge = evaluate.load('rouge')

def compute_metrics(pred):
    labels_ids = pred.label_ids
    pred_ids = pred.predictions

    pred_str = tokenizer.batch_decode(pred_ids, skip_special_tokens=True)
    labels_ids[labels_ids == -100] = tokenizer.pad_token_id
    label_str = tokenizer.batch_decode(labels_ids, skip_special_tokens=True)

    rouge_output = rouge.compute(predictions=pred_str, references=label_str, use_stemmer=True)
    return {
        'rouge1': rouge_output['rouge1'].mid.fmeasure,
        'rouge2': rouge_output['rouge2'].mid.fmeasure,
        'rougeL': rouge_output['rougeL'].mid.fmeasure,
    }

from transformers import DataCollatorForSeq2Seq

data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

# Initialize Accelerator
accelerator = Accelerator()

# Prepare model, optimizer, and dataloader with Accelerator
model, optimizer, train_dataloader, eval_dataloader = accelerator.prepare(
    model,
    training_args.optimizer,
    tokenized_dataset["train"],
    tokenized_dataset["validation"]
)

# Train the Model
print("Starting model training...")
trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"],
    peft_config=lora_config,
    dataset_text_field="Input",
    max_seq_length=512,
    compute_metrics=compute_metrics,
    data_collator=data_collator
)

# Train the model
print("Starting model training...")
trainer.train()

AttributeError: 'TrainingArguments' object has no attribute 'optimizer'

In [None]:
# Save the Fine-tuned Model
model.save_pretrained("./fine_tuned_model")
tokenizer.save_pretrained("./fine_tuned_model")
print("Fine-tuned model saved.")

In [None]:
# Define Summarization Prompt
def generate_summary(meta_review):
    prompt = f"""You are an AI assistant trained to summarize meta-reviews of academic papers. Your task is to provide a concise and informative summary that captures the key points of the meta-review. Focus on the following aspects:

1. Overall assessment of the paper
2. Main strengths highlighted in the review
3. Primary weaknesses or concerns raised
4. Any specific recommendations for improvement or future work

Please ensure your summary is clear, objective, and captures the essence of the meta-review without including specific details about individual reviewers' comments.

Meta-review:
{meta_review}

Summary:"""

    return prompt

# Generate Summaries
from transformers import pipeline

summarizer = pipeline("text-generation", model="./fine_tuned_model", tokenizer=tokenizer, device="cuda" if torch.cuda.is_available() else "cpu")

def generate_summary_with_model(meta_review):
    prompt = generate_summary(meta_review)
    response = summarizer(prompt, max_new_tokens=150, do_sample=True, top_p=0.9, temperature=0.7)
    return response[0]["generated_text"].split("Summary:")[-1].strip()

In [None]:
# Evaluate Model Performance
# Evaluate on test set
test_results = trainer.evaluate(tokenized_dataset['test'])

print("\nTest Set Evaluation Results:")
print(f"Loss: {test_results['eval_loss']:.4f}")
print(f"ROUGE-1: {test_results['eval_rouge1']:.4f}")
print(f"ROUGE-2: {test_results['eval_rouge2']:.4f}")
print(f"ROUGE-L: {test_results['eval_rougeL']:.4f}")

# Generate and Compare Summaries
num_samples = 5
for i in range(num_samples):
    test_meta_review = dataset["test"][i]["Input"]
    generated_summary = generate_summary_with_model(test_meta_review)
    actual_summary = dataset["test"][i]["Output"]

    print(f"\nSample {i+1}:")
    print("Generated Summary:", generated_summary)
    print("Actual Summary:", actual_summary)

    # Compute ROUGE scores for this sample
    rouge_scores = rouge.compute(predictions=[generated_summary], references=[actual_summary], use_stemmer=True)
    print(f"ROUGE-1: {rouge_scores['rouge1'].mid.fmeasure:.4f}")
    print(f"ROUGE-2: {rouge_scores['rouge2'].mid.fmeasure:.4f}")
    print(f"ROUGE-L: {rouge_scores['rougeL'].mid.fmeasure:.4f}")

In [None]:
# Visualize Training Progress
# Plot training loss
plt.figure(figsize=(10, 6))
plt.plot(trainer.state.log_history)
plt.title('Training Loss')
plt.xlabel('Step')
plt.ylabel('Loss')
plt.show()

# Plot ROUGE scores
plt.figure(figsize=(10, 6))
for metric in ['rouge1', 'rouge2', 'rougeL']:
    plt.plot([log[f'eval_{metric}'] for log in trainer.state.log_history if f'eval_{metric}' in log], label=metric)
plt.title('ROUGE Scores During Training')
plt.xlabel('Evaluation Step')
plt.ylabel('Score')
plt.legend()
plt.show()