In [None]:
# =================================================================
# 1. INSTALL & IMPORT LIBRARIES
# =================================================================
# Install the necessary libraries with the --upgrade flag to ensure you have the latest versions.
!pip install -q --upgrade transformers datasets accelerate peft bitsandbytes trl

import torch
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, BitsAndBytesConfig, pipeline
from peft import LoraConfig, get_peft_model, PeftModel
from trl import SFTTrainer


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.0/42.0 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.3/11.3 MB[0m [31m75.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m374.9/374.9 kB[0m [31m29.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m504.9/504.9 kB[0m [31m35.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.3/61.3 MB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m511.9/511.9 kB[0m [31m26.6 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
# =================================================================
!pip install -q --upgrade transformers datasets accelerate peft bitsandbytes trl

import torch
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments,
    BitsAndBytesConfig,
)
from peft import LoraConfig, get_peft_model
from trl import SFTTrainer


In [None]:

# =================================================================
# 2. CONFIGURATION
# =================================================================
# The base model from Hugging Face
BASE_MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

# The local path to your JSONL dataset file
DATASET_FILE = "/content/dataset.jsonl"

# The name for your fine-tuned model
NEW_MODEL_NAME = "tinyllama-chat-finetuned"


In [None]:
# =================================================================
# 3. LOAD & PREPARE THE DATASET
# =================================================================
print("Step 3: Loading and preparing the dataset...")
dataset = load_dataset('json', data_files="/content/instruction_response_swapped.jsonl", split='train')

def format_prompt(example):
    """Creates a formatted prompt string from a dataset example."""
    return {
        "text": f"### Instruction:\n{example['instruction']}\n\n### Response:\n{example['response']}"
    }

dataset = dataset.map(format_prompt)
print("Dataset prepared successfully.")


Step 3: Loading and preparing the dataset...


Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/769 [00:00<?, ? examples/s]

Dataset prepared successfully.


In [None]:

# =================================================================
# 4. LOAD MODEL & TOKENIZER
# =================================================================
print("\nStep 4: Loading the base model and tokenizer...")

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)

model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    quantization_config=bnb_config,
    device_map="auto"
)

tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

print("Base model and tokenizer loaded successfully.")



Step 4: Loading the base model and tokenizer...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

Base model and tokenizer loaded successfully.


In [None]:

# =================================================================
# 5. CONFIGURE LoRA
# =================================================================
print("\nStep 5: Configuring LoRA...")

lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)

model.enable_input_require_grads()  # 👈 yeh line zaroor add karo LoRA ke baad
print("LoRA configured. Trainable parameters:")
model.print_trainable_parameters()



Step 5: Configuring LoRA...
LoRA configured. Trainable parameters:
trainable params: 2,252,800 || all params: 1,102,301,184 || trainable%: 0.2044


In [None]:




# =================================================================
# 6. CONFIGURE THE TRAINER
# =================================================================
print("\nStep 6: Configuring the SFTTrainer...")


# training yahan se ab
from trl import SFTTrainer
from transformers import TrainingArguments, DataCollatorForLanguageModeling

# Training arguments
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    num_train_epochs=3,
    learning_rate=2e-4,
    logging_steps=10,
    save_steps=100,
    save_total_limit=2,
    bf16=True,
    optim="paged_adamw_8bit",
    report_to="none"
)

# Data collator
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

# Trainer
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    args=training_args,
    data_collator=data_collator

)









Step 6: Configuring the SFTTrainer...


Adding EOS to train dataset:   0%|          | 0/769 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/769 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/769 [00:00<?, ? examples/s]

In [None]:

print("Trainer configured successfully. Now you can run trainer.train()")

# =================================================================
# 7. TRAIN MODEL
# =================================================================
trainer.train()


Trainer configured successfully. Now you can run trainer.train()


Step,Training Loss
10,3.4352
20,3.3987
30,3.5159
40,3.5741
50,3.4993
60,3.4922
70,3.5444
80,3.6021
90,3.5266
100,3.5076


TrainOutput(global_step=291, training_loss=3.5130938467700865, metrics={'train_runtime': 505.9941, 'train_samples_per_second': 4.559, 'train_steps_per_second': 0.575, 'total_flos': 891615984709632.0, 'train_loss': 3.5130938467700865})

In [None]:

# =================================================================
# 8. SAVE MODEL
# =================================================================
trainer.save_model(f"./{NEW_MODEL_NAME}")
print(f"Model saved to ./{NEW_MODEL_NAME}")




Model saved to ./tinyllama-chat-finetuned


In [None]:
import torch
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer

BASE_MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
FINETUNED_MODEL = "./tinyllama-chat-finetuned"
MERGED_DIR = "./tinyllama-merged"

print("Loading base model...")
base_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.float16,
    device_map="cpu"
)

print("Loading LoRA adapters...")
lora_model = PeftModel.from_pretrained(base_model, FINETUNED_MODEL)

print("Merging LoRA into base...")
merged_model = lora_model.merge_and_unload()

print(f"Saving merged model to {MERGED_DIR}...")
merged_model.save_pretrained(MERGED_DIR)

tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
tokenizer.save_pretrained(MERGED_DIR)

print("✅ Merge complete")


Loading base model...
Loading LoRA adapters...
Merging LoRA into base...
Saving merged model to ./tinyllama-merged...
✅ Merge complete


In [None]:
!git clone https://github.com/ggerganov/llama.cpp
%cd llama.cpp


Cloning into 'llama.cpp'...
remote: Enumerating objects: 60239, done.[K
remote: Counting objects: 100% (147/147), done.[K
remote: Compressing objects: 100% (121/121), done.[K
remote: Total 60239 (delta 92), reused 26 (delta 26), pack-reused 60092 (from 3)[K
Receiving objects: 100% (60239/60239), 150.24 MiB | 23.98 MiB/s, done.
Resolving deltas: 100% (43681/43681), done.
/content/llama.cpp


In [None]:
# Forcefully remove the old directory if it exists
!rm -rf llama.cpp
print("✅ Old directory removed.")


✅ Old directory removed.


In [None]:
!python3 llama.cpp/convert_hf_to_gguf.py --help

usage: convert_hf_to_gguf.py [-h] [--vocab-only] [--outfile OUTFILE]
                             [--outtype {f32,f16,bf16,q8_0,tq1_0,tq2_0,auto}]
                             [--bigendian] [--use-temp-file] [--no-lazy]
                             [--model-name MODEL_NAME] [--verbose]
                             [--split-max-tensors SPLIT_MAX_TENSORS]
                             [--split-max-size SPLIT_MAX_SIZE] [--dry-run]
                             [--no-tensor-first-split] [--metadata METADATA]
                             [--print-supported-models] [--remote] [--mmproj]
                             [--mistral-format]
                             [--disable-mistral-community-chat-template]
                             [model]

Convert a huggingface model to a GGML compatible file

positional arguments:
  model                 directory containing model file or huggingface
                        repository ID (if --remote)

options:
  -h, --help            show this help messag

In [None]:

# Step 2: Clone the latest version of the llama.cpp repository
!git clone https://github.com/ggerganov/llama.cpp

# Step 3: Run the conversion with the updated script
# The new script will recognize 'q4_K_M'
!python3 llama.cpp/convert_hf_to_gguf.py \
    ./tinyllama-merged \
    --outfile ./tinyllama-q4.gguf \
    --outtype q4_K_M

print("✅ GGUF conversion complete! Find your model at ./tinyllama-q4.gguf")

Cloning into 'llama.cpp'...
remote: Enumerating objects: 60239, done.[K
remote: Counting objects: 100% (140/140), done.[K
remote: Compressing objects: 100% (119/119), done.[K
remote: Total 60239 (delta 88), reused 21 (delta 21), pack-reused 60099 (from 4)[K
Receiving objects: 100% (60239/60239), 150.22 MiB | 19.56 MiB/s, done.
Resolving deltas: 100% (43697/43697), done.
usage: convert_hf_to_gguf.py [-h] [--vocab-only] [--outfile OUTFILE]
                             [--outtype {f32,f16,bf16,q8_0,tq1_0,tq2_0,auto}]
                             [--bigendian] [--use-temp-file] [--no-lazy]
                             [--model-name MODEL_NAME] [--verbose]
                             [--split-max-tensors SPLIT_MAX_TENSORS]
                             [--split-max-size SPLIT_MAX_SIZE] [--dry-run]
                             [--no-tensor-first-split] [--metadata METADATA]
                             [--print-supported-models] [--remote] [--mmproj]
                             [--mistr

In [None]:
 !python3 convert_hf_to_gguf.py ../tinyllama-merged \
    --outfile ../tinyllama-merged.gguf \
    --outtype auto


ERROR:hf-to-gguf:Error: ../tinyllama-merged is not a directory


In [None]:

# now lets start testing

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# Fine-tuned model path
MODEL_PATH = "./tinyllama-chat-finetuned"

# Load tokenizer & model
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
model = AutoModelForCausalLM.from_pretrained(MODEL_PATH, device_map="auto")

# Function to get response
def chat_with_bot(instruction):
    prompt = f"### Instruction:\n{instruction}\n\n### Response:\n"
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=200,
            temperature=0.4,
            top_p=0.9,
            do_sample=True
        )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response.split("### Response:")[-1].strip()

# Test
print(chat_with_bot("how easy was it for you to get used to living"))

I found it very easy to get used to living in this new place. The people were friendly and welcoming, and the surroundings were beautiful. The weather was also perfect for outdoor activities, and I enjoyed spending my days exploring the city and its parks. Overall, I felt like I was at home in no time.


In [None]:
import json

# Input and output file paths
input_file = "/content/instruction_response_dataset.jsonl"
output_file = "/content/instruction_response_swapped.jsonl"

with open(input_file, "r") as infile, open(output_file, "w") as outfile:
    for line in infile:
        data = json.loads(line)

        # Swap values
        swapped = {
            "instruction": data["response"],
            "response": data["instruction"]
        }

        # Write back as JSONL
        outfile.write(json.dumps(swapped) + "\n")

print(f"Swapped dataset saved to {output_file}")


Swapped dataset saved to /content/instruction_response_swapped.jsonl
