In [1]:
!pip install -q transformers datasets accelerate peft bitsandbytes

[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m76.0/76.0 MB[0m [31m22.9 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, DataCollatorForLanguageModeling
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training, PeftModel
from datasets import load_dataset
import torch
import os

print("üîß Starting continued LoRA fine-tuning script...")

# --- Settings ---
base_model_name = "TinyLLaMA/TinyLLaMA-1.1B-Chat-v1.0"
lora_path = "/kaggle/input/finetuned/tinyllama-lora-finetuned"
dataset_path = "/kaggle/input/medidata"
dataset_file = "second_half.json"
output_dir = "/kaggle/working/tinyllama-lora-finetuned-v2"

print(f"üìÅ Base Model: {base_model_name}")
print(f"üîÅ Continue LoRA fine-tuning from: {lora_path}")
print(f"üìÇ Dataset path: {os.path.join(dataset_path, dataset_file)}")
print(f"üíæ Output will be saved to: {output_dir}")

# --- Load tokenizer ---
print("üì¶ Step 1: Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(base_model_name, use_fast=True)
tokenizer.pad_token = tokenizer.eos_token
print("‚úÖ Tokenizer loaded.")

# --- Load base model ---
print("üì¶ Step 2: Loading base model in 4-bit...")
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    load_in_4bit=True,
    device_map="auto",
    torch_dtype=torch.float16,
)
print("‚úÖ Base model loaded.")

# --- Prepare model for further LoRA training ---
print("üõ† Step 3: Preparing model for LoRA stacking...")
base_model = prepare_model_for_kbit_training(base_model)

print("üîå Step 4: Loading existing LoRA weights...")
model = PeftModel.from_pretrained(base_model, lora_path)
print("‚úÖ Existing LoRA weights loaded.")

# --- Apply new LoRA config ---
print("üß© Step 5: Applying new LoRA configuration for stacking...")
lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()
print("‚úÖ New LoRA configuration applied.")

# --- Load and preprocess dataset ---
print("üìÑ Step 6: Loading dataset...")
raw_dataset = load_dataset("json", data_files=os.path.join(dataset_path, dataset_file))["train"]
print(f"üìä Original dataset size: {len(raw_dataset)} samples")

# --- Tokenize dataset ---
print("‚úç Step 7: Formatting and tokenizing...")

def format_alpaca(example):
    instruction = example["instruction"]
    input_text = example["input"]
    output_text = example["output"]

    if input_text:
        prompt = f"### Instruction:\n{instruction}\n\n### Input:\n{input_text}\n\n### Response:\n"
    else:
        prompt = f"### Instruction:\n{instruction}\n\n### Response:\n"

    return {
        "input_ids": tokenizer(prompt + output_text, truncation=True, padding="max_length", max_length=512, return_tensors="pt")["input_ids"][0],
        "labels": tokenizer(output_text, truncation=True, padding="max_length", max_length=512, return_tensors="pt")["input_ids"][0]
    }

tokenized_dataset = raw_dataset.map(format_alpaca, num_proc=4)
print("‚úÖ Tokenization complete.")

# --- Training ---
print("üöÄ Step 8: Beginning training...")

training_args = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=1,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    warmup_steps=10,
    logging_dir=f"{output_dir}/logs",
    logging_steps=10,
    save_strategy="epoch",
    evaluation_strategy="no",
    save_total_limit=1,
    learning_rate=2e-4,
    fp16=True,
    report_to="none",
)

data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)

trainer = Trainer(
    model=model,
    train_dataset=tokenized_dataset,
    args=training_args,
    tokenizer=tokenizer,
    data_collator=data_collator,
)

print("üèÅ Training in progress...")
trainer.train()
print("‚úÖ Training complete.")

# --- Save Model ---
print("üíæ Step 9: Saving new LoRA weights...")
model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)
print(f"‚úÖ Final stacked LoRA model saved to: {output_dir}")

üîß Starting continued LoRA fine-tuning script...
üìÅ Base Model: TinyLLaMA/TinyLLaMA-1.1B-Chat-v1.0
üîÅ Continue LoRA fine-tuning from: /kaggle/input/finetuned/tinyllama-lora-finetuned
üìÇ Dataset path: /kaggle/input/medidata/second_half.json
üíæ Output will be saved to: /kaggle/working/tinyllama-lora-finetuned-v2
üì¶ Step 1: Loading tokenizer...


tokenizer_config.json:   0%|          | 0.00/1.29k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

‚úÖ Tokenizer loaded.
üì¶ Step 2: Loading base model in 4-bit...


config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

‚úÖ Base model loaded.
üõ† Step 3: Preparing model for LoRA stacking...
üîå Step 4: Loading existing LoRA weights...
‚úÖ Existing LoRA weights loaded.
üß© Step 5: Applying new LoRA configuration for stacking...
trainable params: 1,126,400 || all params: 1,101,174,784 || trainable%: 0.1023
‚úÖ New LoRA configuration applied.
üìÑ Step 6: Loading dataset...


Generating train split: 0 examples [00:00, ? examples/s]

üìä Original dataset size: 62734 samples
‚úç Step 7: Formatting and tokenizing...


Map (num_proc=4):   0%|          | 0/62734 [00:00<?, ? examples/s]

‚úÖ Tokenization complete.
üöÄ Step 8: Beginning training...
üèÅ Training in progress...


  trainer = Trainer(
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


Step,Training Loss
10,10.9051
20,10.4157
30,9.0726
40,8.9852
50,8.9002
60,8.5583
70,8.7288
80,9.0691
90,9.0385
100,8.9058


‚úÖ Training complete.
üíæ Step 9: Saving new LoRA weights...
‚úÖ Final stacked LoRA model saved to: /kaggle/working/tinyllama-lora-finetuned-v2
