In [1]:
!pip install -q transformers datasets accelerate peft bitsandbytes

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.0/76.0 MB[0m [31m22.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, DataCollatorForLanguageModeling
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from datasets import load_dataset
import torch
import os

print("🔧 Starting LoRA fine-tuning script...")

# --- Settings ---
model_name = "TinyLLaMA/TinyLLaMA-1.1B-Chat-v1.0"
dataset_path = "/kaggle/input/medidata"
dataset_file = "dataset.json"
output_dir = "/kaggle/working/tinyllama-lora-finetuned"

print(f"📁 Model: {model_name}")
print(f"📂 Dataset path: {os.path.join(dataset_path, dataset_file)}")
print(f"💾 Output will be saved to: {output_dir}")

# --- Load tokenizer and base model ---
print("📦 Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
tokenizer.pad_token = tokenizer.eos_token  # Set pad token

print("📦 Loading base model in 4-bit...")
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_4bit=True,
    device_map="auto",
    torch_dtype=torch.float16,
)

# --- Prepare model for LoRA ---
print("🛠️ Preparing model for LoRA training...")
base_model = prepare_model_for_kbit_training(base_model)

lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

print("🔌 Applying LoRA configuration...")
model = get_peft_model(base_model, lora_config)
model.print_trainable_parameters()

# --- Load dataset (Alpaca format) ---
print("📄 Loading dataset...")
raw_dataset = load_dataset("json", data_files=os.path.join(dataset_path, dataset_file))["train"]
print(f"📊 Original dataset size: {len(raw_dataset)} samples")

# --- Reduce to 3,000 samples ---
raw_dataset = raw_dataset.shuffle(seed=42).select(range(63000))
print(f"📉 Reduced dataset size: {len(raw_dataset)} samples")

# --- Format dataset ---
print("✍️ Tokenizing dataset...")

def format_alpaca(example):
    instruction = example["instruction"]
    input_text = example["input"]
    output_text = example["output"]

    if input_text:
        prompt = f"### Instruction:\n{instruction}\n\n### Input:\n{input_text}\n\n### Response:\n"
    else:
        prompt = f"### Instruction:\n{instruction}\n\n### Response:\n"

    return {
        "input_ids": tokenizer(prompt + output_text, truncation=True, padding="max_length", max_length=512, return_tensors="pt")["input_ids"][0],
        "labels": tokenizer(output_text, truncation=True, padding="max_length", max_length=512, return_tensors="pt")["input_ids"][0]
    }

tokenized_dataset = raw_dataset.map(format_alpaca, num_proc=4)
print("✅ Dataset tokenization complete.")

# --- Training ---
print("🚀 Starting training...")

training_args = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=1,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    warmup_steps=10,
    logging_dir=f"{output_dir}/logs",
    logging_steps=10,
    save_strategy="epoch",
    evaluation_strategy="no",
    save_total_limit=1,
    learning_rate=2e-4,
    fp16=True,
    report_to="none",
)

data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)

trainer = Trainer(
    model=model,
    train_dataset=tokenized_dataset,
    args=training_args,
    tokenizer=tokenizer,
    data_collator=data_collator,
)

trainer.train()
print("✅ Training complete.")

# --- Save LoRA model ---
print("💾 Saving model and tokenizer...")
model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)

print(f"✅ LoRA fine-tuned TinyLLaMA model saved to: {output_dir}")


🔧 Starting LoRA fine-tuning script...
📁 Model: TinyLLaMA/TinyLLaMA-1.1B-Chat-v1.0
📂 Dataset path: /kaggle/input/medidata/dataset.json
💾 Output will be saved to: /kaggle/working/tinyllama-lora-finetuned
📦 Loading tokenizer...


tokenizer_config.json:   0%|          | 0.00/1.29k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

📦 Loading base model in 4-bit...


config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

🛠️ Preparing model for LoRA training...
🔌 Applying LoRA configuration...
trainable params: 1,126,400 || all params: 1,101,174,784 || trainable%: 0.1023
📄 Loading dataset...


Generating train split: 0 examples [00:00, ? examples/s]

📊 Original dataset size: 125734 samples
📉 Reduced dataset size: 63000 samples
✍️ Tokenizing dataset...


Map (num_proc=4):   0%|          | 0/63000 [00:00<?, ? examples/s]

✅ Dataset tokenization complete.
🚀 Starting training...


  trainer = Trainer(
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


Step,Training Loss
10,11.2447
20,10.4601
30,9.6438
40,9.3229
50,9.0365
60,9.1647
70,8.8384
80,9.1388
90,9.1561
100,8.8511


✅ Training complete.
💾 Saving model and tokenizer...
✅ LoRA fine-tuned TinyLLaMA model saved to: /kaggle/working/tinyllama-lora-finetuned
