# Loading Dataset

In [16]:
from datasets import load_dataset

ds = load_dataset("mlabonne/guanaco-llama2-1k", split="train")
ds

Dataset({
    features: ['text'],
    num_rows: 1000
})

# Loading Model and tokenizer

In [17]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

Performing quantization

In [18]:
!pip install bitsandbytes

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,                      # Enable 4-bit loading
    bnb_4bit_quant_type="nf4",              # Use NormalFloat4 quantization
    bnb_4bit_compute_dtype=torch.float16,   # Compute in float16
    bnb_4bit_use_double_quant=True,         # Double quantization for more memory savings
)



model name

In [19]:
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

Loading tokenizer

In [20]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

Loading model

In [21]:
!pip install -U bitsandbytes
!pip install -U transformers accelerate


model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=quantization_config,  # Apply 4-bit quantization
    device_map="auto",                        # Automatically use GPU
    dtype=torch.float16,                      # Use dtype instead of deprecated torch_dtype
)



# Model preperation with LoRA

In [22]:
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

model = prepare_model_for_kbit_training(model)
# Configure LoRA
lora_config = LoraConfig(
    r=32,
    lora_alpha=32,
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
    ],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

trainable params: 9,011,200 || all params: 1,109,059,584 || trainable%: 0.8125


# Model training

In [23]:
from transformers import TrainingArguments

In [27]:
training_args = TrainingArguments(
    output_dir="./tinyllama-finetuned",
    num_train_epochs=20,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=10,
    save_steps=100,
    save_total_limit=2,
    warmup_steps=50,
    optim="paged_adamw_8bit",
    report_to="none",  # Change to "wandb" if you want to use Weights & Biases

)

In [28]:
!pip install -q trl

from trl import SFTTrainer

def formatting_func(example):
    return example["text"]

trainer = SFTTrainer(
    model=model,
    train_dataset=ds,
    peft_config=lora_config,
    args=training_args,
    formatting_func=formatting_func
)



In [29]:
trainer.train()

  return fn(*args, **kwargs)


Step,Training Loss
10,0.6139
20,0.6322
30,0.5988
40,0.561
50,0.5764
60,0.5348
70,0.4929
80,0.495
90,0.5235
100,0.5174


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


TrainOutput(global_step=1260, training_loss=0.2502554873625437, metrics={'train_runtime': 9279.3539, 'train_samples_per_second': 2.155, 'train_steps_per_second': 0.136, 'total_flos': 9.068594778631373e+16, 'train_loss': 0.2502554873625437, 'epoch': 20.0})

Saving model

In [30]:
trainer.model.save_pretrained("./tinyllama-finetuned-final")
tokenizer.save_pretrained("./tinyllama-finetuned-final")

('./tinyllama-finetuned-final/tokenizer_config.json',
 './tinyllama-finetuned-final/special_tokens_map.json',
 './tinyllama-finetuned-final/chat_template.jinja',
 './tinyllama-finetuned-final/tokenizer.model',
 './tinyllama-finetuned-final/added_tokens.json',
 './tinyllama-finetuned-final/tokenizer.json')