In [8]:
import os
os.environ["UNSLOTH_COMPILE_DISABLE"] = "1"
os.environ["UNSLOTH_DISABLE_FAST_GENERATION"] = "1"

In [22]:
from google.colab import drive
import os

if os.path.exists('/content/drive') and os.path.isdir('/content/drive'):
    try:
        os.system('sudo umount -l /content/drive')
    except:
        pass
    os.system('rm -rf /content/drive/*')

drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [9]:
BASE_DIR = "/content/drive/MyDrive/smart_ecom_assistant"
!mkdir -p "$BASE_DIR"

In [5]:
!pip install -q "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


In [23]:
CONFIG = {
    "model_name": "meta-llama/Meta-Llama-3.1-8B-Instruct",
    "max_seq_length": 768,
    "epochs": 1,
    "batch_size": 1,
    "accum_steps": 16,
    "learning_rate": 2e-4,
    "warmup_ratio": 0.05,
    "weight_decay": 0.01,

    # <-- update these two paths if your file names differ
    "train_path": "/content/drive/MyDrive/My_Machine_Learning/smart_E-commerce_assistant/samples/train_pairs_train.jsonl",
    "val_path":   "/content/drive/MyDrive/My_Machine_Learning/smart_E-commerce_assistant/samples/train_pairs_val.jsonl",

    "output_dir": "/content/drive/MyDrive/My_Machine_Learning/smart_E-commerce_assistant/models/lora-shopping-assistant",
    "seed": 42,
}
os.makedirs(os.path.dirname(CONFIG["output_dir"]), exist_ok=True)


In [24]:
from unsloth import FastLanguageModel, is_bfloat16_supported
from datasets import load_dataset
from transformers import TrainingArguments, Trainer
import torch
import random
import numpy as np


In [25]:
random.seed(CONFIG["seed"])
np.random.seed(CONFIG["seed"])
torch.manual_seed(CONFIG["seed"])
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(CONFIG["seed"])


In [26]:
print("Loading dataset from:", CONFIG["train_path"], CONFIG["val_path"])
dataset = load_dataset(
    "json",
    data_files={"train": CONFIG["train_path"], "validation": CONFIG["val_path"]},
)
print(dataset)


Loading dataset from: /content/drive/MyDrive/My_Machine_Learning/smart_E-commerce_assistant/samples/train_pairs_train.jsonl /content/drive/MyDrive/My_Machine_Learning/smart_E-commerce_assistant/samples/train_pairs_val.jsonl


Generating train split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['text', 'meta'],
        num_rows: 4500
    })
    validation: Dataset({
        features: ['text', 'meta'],
        num_rows: 250
    })
})


In [28]:
MAX_SEQ_LENGTH = CONFIG["max_seq_length"]
MODEL_NAME = CONFIG["model_name"]

dtype = "bfloat16" if is_bfloat16_supported() else "float16"
print("Using dtype:", dtype)

print("Loading base model (this will download or use cache)...")
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name     = MODEL_NAME,
    max_seq_length = MAX_SEQ_LENGTH,
    dtype          = dtype,
    load_in_4bit   = True,   # QLoRA
)

# tokenizer config
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"


Using dtype: float16
Loading base model (this will download or use cache)...
==((====))==  Unsloth 2025.12.1: Fast Llama patching. Transformers: 4.57.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.5.0
\        /    Bfloat16 = FALSE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.96G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

In [29]:
print("Applying LoRA (PEFT)")
model = FastLanguageModel.get_peft_model(
    model,
    r                          = 16,
    target_modules             = ["q_proj", "v_proj"],
    lora_alpha                 = 32,
    lora_dropout               = 0.05,
    bias                       = "none",
    use_gradient_checkpointing = "unsloth",
    random_state               = CONFIG["seed"],
    use_rslora                 = False,
)

# Make sure model is in training mode for Unsloth
FastLanguageModel.for_training(model)


Unsloth: Dropout = 0 is supported for fast patching. You are using dropout = 0.05.
Unsloth will patch all other layers, except LoRA matrices, causing a performance hit.


Applying LoRA (PEFT)


Unsloth 2025.12.1 patched 32 layers with 0 QKV layers, 0 O layers and 0 MLP layers.


PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(128256, 4096, padding_idx=128004)
        (layers): ModuleList(
          (0): LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (

In [30]:
def tokenize_batch(batch):
    enc = tokenizer(
        batch["text"],
        truncation   = True,
        max_length   = MAX_SEQ_LENGTH,
        padding      = "max_length",
    )
    # enc["input_ids"] is a list of lists when batched=True
    pad_id = tokenizer.pad_token_id
    labels = []
    for seq in enc["input_ids"]:
        labels.append([(tok if tok != pad_id else -100) for tok in seq])
    enc["labels"] = labels
    return enc

print("Tokenizing dataset (this may take a minute)...")
tokenized = dataset.map(
    tokenize_batch,
    batched=True,
    remove_columns=dataset["train"].column_names,
)

train_dataset = tokenized["train"]
eval_dataset  = tokenized["validation"]
print("Tokenization complete. Example keys:", train_dataset[0].keys())



Tokenizing dataset (this may take a minute)...


Map:   0%|          | 0/4500 [00:00<?, ? examples/s]

Map:   0%|          | 0/250 [00:00<?, ? examples/s]

Tokenization complete. Example keys: dict_keys(['input_ids', 'attention_mask', 'labels'])


In [31]:
bf16_supported = is_bfloat16_supported()
training_args = TrainingArguments(
    output_dir                  = CONFIG["output_dir"],
    num_train_epochs            = CONFIG["epochs"],
    per_device_train_batch_size = CONFIG["batch_size"],
    gradient_accumulation_steps = CONFIG["accum_steps"],
    learning_rate               = CONFIG["learning_rate"],
    weight_decay                = CONFIG["weight_decay"],
    warmup_ratio                = CONFIG["warmup_ratio"],

    logging_steps               = 10,
    logging_first_step          = True,
    save_steps                  = 200,
    eval_strategy               = "epoch",
    save_total_limit            = 2,

    bf16                        = bf16_supported,
    fp16                        = not bf16_supported,
    optim                       = "paged_adamw_8bit",

    lr_scheduler_type           = "cosine",
    report_to                   = "none",
)


In [32]:
trainer = Trainer(
    model         = model,
    args          = training_args,
    train_dataset = train_dataset,
    eval_dataset  = eval_dataset,
)

print("Starting training")
trainer.train()


Starting training


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 4,500 | Num Epochs = 1 | Total steps = 282
O^O/ \_/ \    Batch size per device = 1 | Gradient accumulation steps = 16
\        /    Data Parallel GPUs = 1 | Total batch size (1 x 16 x 1) = 16
 "-____-"     Trainable parameters = 6,815,744 of 8,037,076,992 (0.08% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Epoch,Training Loss,Validation Loss
1,0.3037,0.305351


Unsloth: Not an error, but LlamaForCausalLM does not accept `num_items_in_batch`.
Using gradient accumulation will be very slightly less accurate.
Read more on gradient accumulation issues here: https://unsloth.ai/blog/gradient


TrainOutput(global_step=282, training_loss=0.7005945475811654, metrics={'train_runtime': 10938.3563, 'train_samples_per_second': 0.411, 'train_steps_per_second': 0.026, 'total_flos': 1.55763449266176e+17, 'train_loss': 0.7005945475811654, 'epoch': 1.0})

In [33]:
save_dir = f'{CONFIG["output_dir"]}_final'
import os
os.makedirs(save_dir, exist_ok=True)

model.save_pretrained(save_dir)
tokenizer.save_pretrained(save_dir)

print("Saved to:", save_dir)

Saved to: /content/drive/MyDrive/My_Machine_Learning/smart_E-commerce_assistant/models/lora-shopping-assistant_final


In [34]:
!zip -r /content/lora-shopping-assistant-final.zip "$save_dir"
from google.colab import files
files.download("/content/lora-shopping-assistant-final.zip")

  adding: content/drive/MyDrive/My_Machine_Learning/smart_E-commerce_assistant/models/lora-shopping-assistant_final/ (stored 0%)
  adding: content/drive/MyDrive/My_Machine_Learning/smart_E-commerce_assistant/models/lora-shopping-assistant_final/README.md (deflated 66%)
  adding: content/drive/MyDrive/My_Machine_Learning/smart_E-commerce_assistant/models/lora-shopping-assistant_final/adapter_model.safetensors (deflated 7%)
  adding: content/drive/MyDrive/My_Machine_Learning/smart_E-commerce_assistant/models/lora-shopping-assistant_final/adapter_config.json (deflated 56%)
  adding: content/drive/MyDrive/My_Machine_Learning/smart_E-commerce_assistant/models/lora-shopping-assistant_final/chat_template.jinja (deflated 72%)
  adding: content/drive/MyDrive/My_Machine_Learning/smart_E-commerce_assistant/models/lora-shopping-assistant_final/tokenizer_config.json (deflated 96%)
  adding: content/drive/MyDrive/My_Machine_Learning/smart_E-commerce_assistant/models/lora-shopping-assistant_final/spe

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>