# Initial run of fine tunin.
###  this file was the first iteration to fine tune the llama 3 8b instruct model and trains the whole model in one go so its difficult to run on machines which are decently speced out for the similar purposes.

### Prerequisite to run the notebook
- Mount the Google drive  and autheticate it.
- Change the runtime to use T4 GPU.
- Create a folder in you drive named as "llm_finetuning",and create the directory as shown below.
```
Drive/
└── llm_finetuning/
      ├── finetuning_dataset/
      │   └── final_dataset.json
      └── llama3-finetuned/
          └── <this is where all the adapter model files will be   stored>
```
- Add hugging face token to secrets named as "llama_3_token" and allow it access to the notebook, make sure the token is in "READ" mode and  has the permissions to use "Meta-Llama-3-8B-Instruct" model.
-the dataset must be uploaded to the drive  and named as "final_dataset.json".


In [None]:
# 1. Install dependencies
!pip install -q transformers datasets peft accelerate bitsandbytes
!pip install -q huggingface_hub

In [None]:
# 2. Hugging Face Login
from huggingface_hub import login

In [None]:
# autheticating to access HF using secrests from google colab
from google.colab import userdata
token=userdata.get('HF_TOKEN')
login(token=token)

In [None]:
# 3. Imports
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model, TaskType, prepare_model_for_kbit_training
from datasets import Dataset
import json
import os

In [None]:
# mounting the google drive for persistent storage of the required assets
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
os.listdir("/content/drive/MyDrive")


['Colab Notebooks', 'llm_finetuning']

In [None]:
# 4. Load dataset (JSON with 'input' and 'output')
dataset_path ="/content/drive/MyDrive/llm_finetuning/finetuning_dataset/final_dataset.json"
with open(dataset_path, "r") as f:
    data = json.load(f)

In [None]:
# Format to LLaMA3 Instruct style
for example in data:
    example["text"] = f"<s>[INST] {example['input']} [/INST] {example['output']}</s>"


dataset = Dataset.from_list(data)

In [None]:
# 5. Load tokenizer and model
base_model = "meta-llama/Meta-Llama-3-8B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    base_model,
    load_in_4bit=True,
    device_map="auto",
    torch_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/654 [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/187 [00:00<?, ?B/s]

In [None]:
# 6. Apply LoRA config
model = prepare_model_for_kbit_training(model)

lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.CAUSAL_LM
)

model = get_peft_model(model, lora_config)

In [None]:
# 7. Tokenization function
def tokenize(example):
    tokenized = tokenizer(
        example["text"],
        truncation=True,
        padding="max_length",
        max_length=512
    )
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

tokenized_dataset = dataset.map(tokenize)

Map:   0%|          | 0/2835 [00:00<?, ? examples/s]

In [None]:
# 8. Split into train/test
data_split = tokenized_dataset.train_test_split(test_size=0.1)



In [None]:
import transformers
print(transformers.__version__)

4.53.2


In [None]:

# 9. Training arguments
training_args = TrainingArguments(
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    num_train_epochs=3,
    logging_steps=5,
    output_dir="./llama3-finetuned",
    save_total_limit=1,
    save_strategy="epoch",
    fp16=True,
    report_to="none"
)

In [None]:

# 10. Trainer setup
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=data_split["train"],
    eval_dataset=data_split["test"],
    tokenizer=tokenizer,
)

  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [None]:
# 11. Train
trainer.train()

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(

Step,Training Loss
5,9.3397
10,7.7929
15,7.0624
20,6.7203
25,4.1219
30,2.0125
35,0.8318
40,0.5042
45,0.5935
50,0.5727


  return fn(*args, **kwargs)


In [None]:
# 12. Save model and tokenizer
model.save_pretrained("./llama3-finetuned")
tokenizer.save_pretrained("./llama3-finetuned")

('./llama3-finetuned/tokenizer_config.json',
 './llama3-finetuned/special_tokens_map.json',
 './llama3-finetuned/chat_template.jinja',
 './llama3-finetuned/tokenizer.json')

In [None]:
import shutil
from google.colab import files

# Zip the folder
shutil.make_archive('llama3-finetuned', 'zip', '/content/llama3-finetuned')


# Download the zipped folder
# files.download('llama3-finetuned.zip') #uncomment this line if you want to download the fine tuning data locally


'/content/llama3-finetuned.zip'

In [None]:

# Then move the folder to your Google Drive
!cp -r /content/llama3-finetuned /content/drive/MyDrive/llm_finetuning
