# 2nd run of fine tuning.
###  this file was second iteration and is inspired by the approach on online learning ( its not a true online learing ) and trains model and saves the checkpoints so that when a new run in intialized the progress does not reset, this was done to overcome the limitations of the free tier of the google colab.
---



### Prerequisite to run the notebook
- Mount the Google drive  and autheticate it.
- Change the runtime to use T4 GPU.
- Create a folder in you drive named as "llm_finetuning",and create the directory as shown below.
```
Drive/
└── llm_finetuning/
      ├── finetuning_dataset/
      │   └── final_dataset.json
      └── llama3-finetuned/
          └── <this is where all the adapter model files will be   stored>
```
- Add hugging face token to secrets named as "llama_3_token" and allow it access to the notebook, make sure the token is in "READ" mode and  has the permissions to use "Meta-Llama-3-8B-Instruct" model.
-the dataset must be uploaded to the drive  and named as "final_dataset.json".


In [None]:
# 1. Install dependencies
!pip install -q transformers datasets peft accelerate bitsandbytes
!pip install -q huggingface_hub

In [None]:
# 2. Hugging Face Login
from huggingface_hub import login

In [None]:
# autheticating to access HF using secrests from google colab
from google.colab import userdata
token=userdata.get('HF_TOKEN')
login(token=token)

In [None]:
# 3. Imports
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model, TaskType, prepare_model_for_kbit_training
from datasets import Dataset
import json
import os
import re


In [None]:
# mounting the google drive for persistent storage of the required assets
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
os.listdir("/content/drive/MyDrive")


['llm_finetuning']

In [None]:

# Set output directory in Google Drive
output_dir = "/content/drive/MyDrive/llm_finetuning/llama3-finetuned"

In [None]:
# Function to find latest checkpoint
def get_latest_checkpoint(output_dir):
    checkpoints = [
        os.path.join(output_dir, d) for d in os.listdir(output_dir)
        if re.match(r"checkpoint-\d+", d)
    ]
    if not checkpoints:
        return None
    return max(checkpoints, key=lambda x: int(re.findall(r"checkpoint-(\d+)", x)[0]))

In [None]:
# 4. Load dataset (JSON with 'input' and 'output')
dataset_path ="/content/drive/MyDrive/llm_finetuning/finetuning_dataset/final_dataset.json"
with open(dataset_path, "r") as f:
    data = json.load(f)

In [None]:
# Format to LLaMA3 Instruct style
for example in data:
    example["text"] = f"<s>[INST] {example['input']} [/INST] {example['output']}</s>"


dataset = Dataset.from_list(data)

In [None]:
# 5. Load tokenizer and model
base_model = "meta-llama/Meta-Llama-3-8B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    base_model,
    load_in_4bit=True,
    device_map="auto",
    torch_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True
)

tokenizer_config.json:   0%|          | 0.00/51.0k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/654 [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/187 [00:00<?, ?B/s]

In [None]:
# 6. Apply LoRA config
model = prepare_model_for_kbit_training(model)

lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.CAUSAL_LM
)

model = get_peft_model(model, lora_config)

In [None]:
# 7. Tokenization function
def tokenize(example):
    tokenized = tokenizer(
        example["text"],
        truncation=True,
        padding="max_length",
        max_length=512
    )
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

tokenized_dataset = dataset.map(tokenize)

Map:   0%|          | 0/2835 [00:00<?, ? examples/s]

In [None]:
# 8. Split into train/test
data_split = tokenized_dataset.train_test_split(test_size=0.1)



In [None]:
import transformers
print(transformers.__version__)

4.53.2


In [None]:

# 9. Training arguments
# Load latest checkpoint if exists
resume_from_checkpoint = get_latest_checkpoint(output_dir)

# Training arguments — 1 epoch per session
training_args = TrainingArguments(
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    num_train_epochs=3,  # one epoch per session
    logging_steps=5,
    output_dir=output_dir,
    save_total_limit=2,
    save_strategy="epoch",
    fp16=True,
    report_to="none"
)


In [None]:

# Create Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=data_split["train"],
    eval_dataset=data_split["test"],
    tokenizer=tokenizer,
)


  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [None]:
# Train with automatic checkpoint resume
trainer.train(resume_from_checkpoint=resume_from_checkpoint)

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(

Step,Training Loss
1280,0.3731
1285,0.1762
1290,0.2837
1295,0.2903
1300,0.299
1305,0.2926
1310,0.4012
1315,0.2098
1320,0.237
1325,0.256


TrainOutput(global_step=1913, training_loss=0.09448050648768536, metrics={'train_runtime': 6359.8007, 'train_samples_per_second': 1.203, 'train_steps_per_second': 0.301, 'total_flos': 1.767286086209372e+17, 'train_loss': 0.09448050648768536, 'epoch': 3.0})

In [None]:
# Save the adapter and tokenizer after each run (safe for incremental builds)
adapter_output_dir = os.path.join(output_dir, "lora_adapter_latest")
model.save_pretrained(adapter_output_dir)
tokenizer.save_pretrained(adapter_output_dir)

('/content/drive/MyDrive/llm_finetuning/llama3-finetuned/lora_adapter_latest/tokenizer_config.json',
 '/content/drive/MyDrive/llm_finetuning/llama3-finetuned/lora_adapter_latest/special_tokens_map.json',
 '/content/drive/MyDrive/llm_finetuning/llama3-finetuned/lora_adapter_latest/chat_template.jinja',
 '/content/drive/MyDrive/llm_finetuning/llama3-finetuned/lora_adapter_latest/tokenizer.json')

In [None]:
# import shutil
# from google.colab import files

# # Zip the folder
# shutil.make_archive('llama3-finetuned', 'zip', '/content/llama3-finetuned')


# # Download the zipped folder
# # files.download('llama3-finetuned.zip') #uncomment this line if you want to download the fine tuning data locally


In [None]:

# Then move the folder to your Google Drive
# !cp -r /content/llama3-finetuned /content/drive/MyDrive/llm_finetuning
