## Instruction fine-tuning of pre-trained model.

In [56]:
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    # Do this only in Colab notebooks! Otherwise use pip install unsloth
    import torch; v = re.match(r"[0-9\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + "0.0.32.post2" if v == "2.8.0" else "0.0.29.post3"
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth

In [None]:
import os
from dotenv import load_dotenv
load_dotenv()

HF_TOKEN = os.getenv("HF_TOKEN")
COMET_API_KEY = os.getenv("COMET_API_KEY")

In [57]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 ## Choose Any! We auto support RoPE Scaling Internally
dtype = None          ## None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Amerpe +
load_in_4bit = True   ## Use 4bit quantization to reduce memory usage. Can be false.

In [58]:
import torch, gc
gc.collect()
torch.cuda.empty_cache()

In [59]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Phi-3.5-mini-instruct",
    max_seq_length = 1024,
    load_in_4bit = True,
)

==((====))==  Unsloth 2025.8.9: Fast Llama patching. Transformers: 4.55.2.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = FALSE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [60]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 32,
    lora_alpha = 32,
    lora_dropout = 0,
    target_modules = ["q_proj","k_proj","v_proj","up_proj","down_proj","o_proj","gate_proj"]
)

In [61]:
from datasets import load_dataset,concatenate_datasets

In [62]:
dataset1 = load_dataset("mlabonne/llmtwin",split = "train")
dataset2 = load_dataset("mlabonne/FineTome-Alpaca-100k",split="train[:10000]")
dataset = concatenate_datasets([dataset1,dataset2])

In [63]:
dataset.column_names

['instruction', 'output', 'source', 'score']

In [64]:
dataset.shape

(13001, 4)

In [65]:
alpaca_template = """Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
{}
### Response:
{}
"""

EOS_TOKEN = tokenizer.eos_token

def format_samples(examples):
  text = []
  for instruction, output in zip(examples["instruction"],examples["output"],strict=False):
    message = alpaca_template.format(instruction,output) + EOS_TOKEN
    text.append(message)
  return {"text": text}

In [66]:
dataset = dataset.map(format_samples,batched=True,remove_columns = dataset.column_names)

Map:   0%|          | 0/13001 [00:00<?, ? examples/s]

In [67]:
dataset = dataset.train_test_split(test_size=0.05)

In [68]:
dataset

DatasetDict({
    train: Dataset({
        features: ['text'],
        num_rows: 12350
    })
    test: Dataset({
        features: ['text'],
        num_rows: 651
    })
})

In [72]:
!pip install comet-ml

Collecting comet-ml
  Downloading comet_ml-3.51.0-py3-none-any.whl.metadata (4.1 kB)
Collecting dulwich!=0.20.33,>=0.20.6 (from comet-ml)
  Downloading dulwich-0.24.1-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (5.2 kB)
Collecting everett<3.2.0,>=1.0.1 (from everett[ini]<3.2.0,>=1.0.1->comet-ml)
  Downloading everett-3.1.0-py2.py3-none-any.whl.metadata (17 kB)
Collecting python-box<7.0.0 (from comet-ml)
  Downloading python_box-6.1.0-py3-none-any.whl.metadata (7.8 kB)
Collecting configobj (from everett[ini]<3.2.0,>=1.0.1->comet-ml)
  Downloading configobj-5.0.9-py2.py3-none-any.whl.metadata (3.2 kB)
Downloading comet_ml-3.51.0-py3-none-any.whl (731 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m731.9/731.9 kB[0m [31m50.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dulwich-0.24.1-cp312-cp312-manylinux_2_28_x86_64.whl (1.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m71.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading 

In [74]:
import comet_ml

experiment = comet_ml.Experiment(
    api_key = COMET_API_KEY,
    project_name = "unsloth-training",
)

[1;38;5;39mCOMET INFO:[0m Experiment is live on comet.com https://www.comet.com/dev-mangukiya/unsloth-training/f7fab330464242a384c34e9dba8f384f



In [76]:
from trl import SFTConfig, SFTTrainer

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset["train"],
    eval_dataset = dataset["test"],
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    packing = False,     ## Can make training 5x faster for short sequences
    args = SFTConfig(
      per_device_train_batch_size = 2,
      gradient_accumulation_steps = 8,
      warmup_steps = 5,
      num_train_epochs = 1,
      max_steps = 60,
      learning_rate = 3e-4,
      logging_steps = 1,
      optim = "adamw_8bit",
      weight_decay = 0.01,
      lr_scheduler_type = "linear",
      seed = 3407,
      output_dir = "outputs",
    ),
)

Unsloth: Tokenizing ["text"] (num_proc=2):   0%|          | 0/12350 [00:00<?, ? examples/s]

Unsloth: Tokenizing ["text"] (num_proc=2):   0%|          | 0/651 [00:00<?, ? examples/s]

In [77]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 12,350 | Num Epochs = 1 | Total steps = 60
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 8
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 8 x 1) = 16
 "-____-"     Trainable parameters = 59,768,832 of 3,880,848,384 (1.54% trained)
  | |_| | '_ \/ _` / _` |  _/ -_)


<IPython.core.display.Javascript object>

wandb: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
wandb: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


wandb: No netrc file found, creating one.
wandb: Appending key for api.wandb.ai to your netrc file: /root/.netrc
wandb: Currently logged in as: devmangukiya561 (devmangukiya561-nirma-university) to https://api.wandb.ai. Use `wandb login --relogin` to force relogin


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
1,1.0844
2,1.0619
3,0.9993
4,1.2802
5,0.8099
6,0.79
7,0.8603
8,0.8765
9,0.9596
10,0.7033


## Let's run the model! You can change the instruction and input - leave the output blank!

In [83]:
from transformers import TextStreamer

In [84]:
FastLanguageModel.for_inference(model)  ## Enable native 2x faster inference

message = alpaca_template.format("Write a paragraph to introduce supervised fine-tuning.","")
inputs = tokenizer([message],return_tensors="pt").to("cuda")
text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs,streamer=text_streamer,max_new_tokens=256,use_cache=True)

Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
Write a paragraph to introduce supervised fine-tuning.
### Response:

Supervised fine-tuning is a technique used to improve the performance of pre-trained models on specific tasks. It involves using labeled data to further train the model, allowing it to learn task-specific features and improve its accuracy. This approach is particularly useful when dealing with complex tasks that require a deep understanding of the data. By leveraging the knowledge gained from pre-training, supervised fine-tuning enables models to achieve better results and adapt to new domains or datasets.
<|endoftext|>


In [86]:
# ✅ Save LoRA after SFT
model.save_pretrained("Phi-3.5-sft-lora")
tokenizer.save_pretrained("Phi-3.5-sft-lora")

# ✅ Push to Hugging Face Hub (optional)
model.push_to_hub("devmangukiya/Phi-3.5-sft-lora", token=HF_TOKEN)
tokenizer.push_to_hub("devmangukiya/Phi-3.5-sft-lora", token=HF_TOKEN)


Saved model to https://huggingface.co/devmangukiya/Phi-3.5-sft-lora
