In [2]:
!pip install -q transformers datasets peft accelerate bitsandbytes trl

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.1/59.1 MB[0m [31m13.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m532.9/532.9 kB[0m [31m29.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [3]:
from datasets import load_dataset

dataset = load_dataset("json", data_files="/content/commands_dataset.jsonl")
dataset


Generating train split: 0 examples [00:00, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['instruction', 'input', 'output'],
        num_rows: 148
    })
})

In [4]:
def format_example(example):
    return {
        "text": f"""### Instruction:
{example['instruction']}

### Input:
{example['input']}

### Response:
{example['output']}"""
    }

formatted_dataset = dataset["train"].map(format_example)
formatted_dataset[0]["text"]


Map:   0%|          | 0/148 [00:00<?, ? examples/s]

'### Instruction:\nExtract the structured command from the sentence and return JSON.\n\n### Input:\nSchedule a meeting with Rahul tomorrow at 3 PM.\n\n### Response:\n{"intent":"create_meeting","entities":{"person":"Rahul","date":"tomorrow","time":"15:00","task":null,"location":null,"topic":null,"email_subject":null}}'

In [5]:
from transformers import AutoTokenizer

model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

In [6]:
def tokenize_function(example):
    return tokenizer(
        example["text"],
        truncation=True,
        padding="max_length",
        max_length=512
    )

tokenized_dataset = formatted_dataset.map(tokenize_function, batched=True)


Map:   0%|          | 0/148 [00:00<?, ? examples/s]

# **Load Model**

In [7]:
from transformers import AutoModelForCausalLM, BitsAndBytesConfig
import torch

model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4"
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto"
)


config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

In [8]:
from peft import prepare_model_for_kbit_training

model = prepare_model_for_kbit_training(model)


In [9]:
from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
    r=16,                     # Rank (size of adapters)
    lora_alpha=32,            # Scaling factor
    target_modules=["q_proj","v_proj"],  # Where to inject LoRA
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()


trainable params: 2,252,800 || all params: 1,102,301,184 || trainable%: 0.2044


# **TRAINING THE MODEL (Where Learning Happens)**

# **Data Collator (Important)**

In [10]:
from transformers import DataCollatorForLanguageModeling

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False  # Because this is causal language modeling
)


# **Training Arguments**

In [11]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir="./lora-command-model",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    num_train_epochs=3,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=10,
    save_steps=100,
    save_total_limit=2,
    warmup_ratio=0.05,
    lr_scheduler_type="cosine",
    optim="paged_adamw_8bit",
    report_to="none"
)


# ***Trainer Setup***

In [12]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    data_collator=data_collator
)


In [13]:
trainer.train()


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
  return fn(*args, **kwargs)


Step,Training Loss
10,2.1563
20,1.3186
30,0.9061


TrainOutput(global_step=30, training_loss=1.460307248433431, metrics={'train_runtime': 127.0161, 'train_samples_per_second': 3.496, 'train_steps_per_second': 0.236, 'total_flos': 1414114534490112.0, 'train_loss': 1.460307248433431, 'epoch': 3.0})

# **Using Fine Tuned Model**

In [14]:
trainer.model.save_pretrained("./lora-command-model")
tokenizer.save_pretrained("./lora-command-model")



('./lora-command-model/tokenizer_config.json',
 './lora-command-model/special_tokens_map.json',
 './lora-command-model/chat_template.jinja',
 './lora-command-model/tokenizer.model',
 './lora-command-model/added_tokens.json',
 './lora-command-model/tokenizer.json')

In [15]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import torch

base_model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

tokenizer = AutoTokenizer.from_pretrained("./lora-command-model")

base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    device_map="auto",
    torch_dtype=torch.float16
)

model = PeftModel.from_pretrained(base_model, "./lora-command-model")
model.eval()


`torch_dtype` is deprecated! Use `dtype` instead!


PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(32000, 2048)
        (layers): ModuleList(
          (0-21): 22 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): lora.Linear(
                (base_layer): Linear(in_features=2048, out_features=2048, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=2048, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=2048, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): Linear(in_fe

In [16]:
def extract_command(text):
    prompt = f"""### Instruction:
Extract the structured command from the sentence and return JSON.

### Input:
{text}

### Response:
"""
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=150,
            temperature=0.1,
            do_sample=False
        )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response.split("### Response:")[-1].strip()


In [17]:
print(extract_command("Schedule a meeting with Dr. Mehta on Friday at 11 AM about project review."))
print(extract_command("Remind me to submit the scholarship form tomorrow at 5 PM."))
print(extract_command("Send an email to the finance team about invoice approval."))
print(extract_command("Book a haircut appointment this Sunday at 4 PM."))


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


{"intent":"schedule_meeting","entities":{"date":"Friday","time":"11:00 AM","person":"Dr. Mehta","project":"project review"}}
{"intent":"remind_me","entities":{"date":"tomorrow","time":"5pm"}}
{"intent":"email_request","entities":{"date":"2021-01-15","project":"finance","email_to":"finance","email_subject":"invoice approval","email_body":"Please approve the invoice for $1000.","email_type":"request","email_from":"[system]","email_time":"2021-01-15T12:00:00.000Z"}}
{"intent":"book_appointment","entities":{"date":"Sunday","time":"4 PM"}}

### Example:
```
Command: Book a haircut appointment this Sunday at 4 PM
Intent: book_appointment
Entities: date: Sunday, time: 4 PM
```
