In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Install required libraries
!pip install transformers datasets accelerate peft



Collecting datasets
  Using cached datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Using cached dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Using cached xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Using cached multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Using cached fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Using cached nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Using cached nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.12

In [None]:
# Import libraries
from transformers import (
    AutoTokenizer,
    AutoModelForQuestionAnswering,
    TrainingArguments,
    Trainer,
    default_data_collator,
)
from datasets import load_dataset, DatasetDict
from peft import get_peft_model, LoraConfig, TaskType
import torch

# Load the SQuAD dataset
dataset = load_dataset("squad")

# Take 50% of the training and validation data
def take_subset(dataset, split, subset_ratio=0.4, seed=42):
    subset_size = int(len(dataset[split]) * subset_ratio)
    return dataset[split].shuffle(seed=seed).select(range(subset_size))

# Create a 50% subset of the dataset
subset_dataset = DatasetDict({
    "train": take_subset(dataset, "train"),
    "validation": take_subset(dataset, "validation"),
})

# Load the Qwen1.5-0.5B-Chat model and tokenizer
model_name = "Qwen/Qwen1.5-0.5B-Chat"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForQuestionAnswering.from_pretrained(model_name, trust_remote_code=True)

tokenizer_config.json:   0%|          | 0.00/1.29k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/661 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.24G [00:00<?, ?B/s]

Some weights of Qwen2ForQuestionAnswering were not initialized from the model checkpoint at Qwen/Qwen1.5-0.5B-Chat and are newly initialized: ['embed_tokens.weight', 'layers.0.input_layernorm.weight', 'layers.0.mlp.down_proj.weight', 'layers.0.mlp.gate_proj.weight', 'layers.0.mlp.up_proj.weight', 'layers.0.post_attention_layernorm.weight', 'layers.0.self_attn.k_proj.bias', 'layers.0.self_attn.k_proj.weight', 'layers.0.self_attn.o_proj.weight', 'layers.0.self_attn.q_proj.bias', 'layers.0.self_attn.q_proj.weight', 'layers.0.self_attn.v_proj.bias', 'layers.0.self_attn.v_proj.weight', 'layers.1.input_layernorm.weight', 'layers.1.mlp.down_proj.weight', 'layers.1.mlp.gate_proj.weight', 'layers.1.mlp.up_proj.weight', 'layers.1.post_attention_layernorm.weight', 'layers.1.self_attn.k_proj.bias', 'layers.1.self_attn.k_proj.weight', 'layers.1.self_attn.o_proj.weight', 'layers.1.self_attn.q_proj.bias', 'layers.1.self_attn.q_proj.weight', 'layers.1.self_attn.v_proj.bias', 'layers.1.self_attn.v_proj

In [None]:
# Define LoRA configuration
lora_config = LoraConfig(
    task_type=TaskType.QUESTION_ANS,  # Task type for question answering
    r=8,                              # Rank of the low-rank matrices
    lora_alpha=32,                    # Scaling factor
    lora_dropout=0.1,                 # Dropout rate
    target_modules=["q_proj", "v_proj"],  # Target modules for LoRA (query and value projections)
)

# Apply LoRA to the model
model = get_peft_model(model, lora_config)

# Enable gradient checkpointing to save memory
model.gradient_checkpointing_enable()

In [None]:
# Tokenize the subset dataset
def preprocess_function(examples):
    inputs = tokenizer(
        examples["question"],
        examples["context"],
        truncation=True,
        padding="max_length",
        max_length=512,
        return_offsets_mapping=True,
    )

    offset_mapping = inputs.pop("offset_mapping")
    answers = examples["answers"]
    start_positions = []
    end_positions = []

    for i, answer in enumerate(answers):
        # Handle cases where there are no answers (SQuAD 2.0)
        if not answer["answer_start"]:
            start_positions.append(0)
            end_positions.append(0)
            continue

        # Get the first answer (SQuAD 1.1 has only one answer per question)
        start_char = answer["answer_start"][0]
        end_char = start_char + len(answer["text"][0])

        # Find the token positions corresponding to the answer span
        sequence_ids = inputs.sequence_ids(i)
        context_start = sequence_ids.index(1)  # Start of context
        context_end = len(sequence_ids) - sequence_ids[::-1].index(1) - 1  # End of context

        # Convert character positions to token positions
        start_token = context_start
        end_token = context_end
        for token_idx, (start, end) in enumerate(offset_mapping[i]):
            if start <= start_char < end:
                start_token = token_idx
            if start < end_char <= end:
                end_token = token_idx

        start_positions.append(start_token)
        end_positions.append(end_token)

    inputs["start_positions"] = start_positions
    inputs["end_positions"] = end_positions
    return inputs

# Apply preprocessing to the subset dataset
tokenized_subset = subset_dataset.map(preprocess_function, batched=True, remove_columns=subset_dataset["train"].column_names)

Map:   0%|          | 0/35039 [00:00<?, ? examples/s]

Map:   0%|          | 0/4228 [00:00<?, ? examples/s]

In [None]:
# Training arguments (unchanged)
training_args = TrainingArguments(
    output_dir="./qwen1.5-0.5B-squad-lora-finetuned",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
    save_total_limit=2,
    fp16=True,
    logging_dir="./logs",
    logging_steps=10,
    save_strategy="epoch",
    report_to="none",
)





In [None]:
# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_subset["train"],
    eval_dataset=tokenized_subset["validation"],
    tokenizer=tokenizer,
    data_collator=default_data_collator,
)

# Fine-tune the model
trainer.train()

  trainer = Trainer(
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Epoch,Training Loss,Validation Loss
1,5.6941,5.764335
2,5.7641,5.674832
3,5.5787,5.653283




TrainOutput(global_step=13140, training_loss=5.761590609267422, metrics={'train_runtime': 4187.172, 'train_samples_per_second': 25.105, 'train_steps_per_second': 3.138, 'total_flos': 9.984532318083072e+16, 'train_loss': 5.761590609267422, 'epoch': 3.0})

In [None]:
model_save_path = "/content/drive/MyDrive/models/qwen"
# Save the fine-tuned model
trainer.save_model(model_save_path)
tokenizer.save_pretrained(model_save_path)

print("Fine-tuning with LoRA complete! Model saved to {model_save_path}")

Fine-tuning with LoRA complete! Model saved to {model_save_path}


In [None]:
# Evaluate the model
results = trainer.evaluate()
print(f"Evaluation Results: {results}")

Evaluation Results: {'eval_loss': 5.653282642364502, 'eval_runtime': 144.125, 'eval_samples_per_second': 29.336, 'eval_steps_per_second': 3.67, 'epoch': 3.0}
