In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from torch.optim import AdamW
import torch

model_name = "meta-llama/Llama-3.2-1B"
device = "cuda" 

tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
model = AutoModelForCausalLM.from_pretrained(model_name, device_map=device, torch_dtype=torch.bfloat16)

In [3]:
from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
    r=12,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)
peft_model = get_peft_model(model, lora_config)
peft_model.print_trainable_parameters()




trainable params: 1,277,952 || all params: 1,237,092,352 || trainable%: 0.1033


In [4]:
import json

def load_jsonl(file_path):
    data = []
    with open(file_path, "r", encoding="utf-8") as f:
        for line in f:
            data.append(json.loads(line))
    return data

data = load_jsonl("../resources/alpaca_formatted_data.jsonl")


In [5]:
training_prompt = [
    {
        "role": "system",
        "content": (
            "You are a helpful and knowledgeable assistant specialized in generating clear and accurate "
            "architecture diagrams using Mermaid.js markdown syntax. When given a user's request, your task is to:\n\n"
            "- Understand the user's description, which may be detailed or vague.\n"
            "- Suggest one or more appropriate architecture styles, such as monolith, microservices, serverless, or hybrid.\n"
            "- Respect any design constraints mentioned, such as specific cloud providers (AWS, Azure, GCP), frameworks, or technologies.\n"
            "- Generate valid Mermaid.js markdown code representing the requested architecture or diagrams.\n"
            "- Support multiple diagrams if the user asks for alternatives or different styles.\n"
            "- Provide diagrams that are clean, well-labeled, and easy to read.\n"
            "- Optionally suggest real-time edits or improvements when prompted.\n"
            "- Avoid adding any commentary or explanations; respond only with Mermaid markdown code blocks unless otherwise requested.\n"
            "- Always wrap your Mermaid diagrams inside triple backticks with the word `mermaid` to ensure proper formatting.\n\n"
            "Example user request: \"Design a microservices architecture on AWS for a social media app.\"\n\n"
            "Your response should be:\n\n"
            "```mermaid\n"
            "graph TD\n"
            "    User[User] --> API[API Gateway]\n"
            "    API --> Auth[Auth Service (Lambda)]\n"
            "    API --> Post[Post Service (ECS)]\n"
            "    API --> Media[Media Service (S3)]\n"
            "    Auth --> DynamoDB[DynamoDB]\n"
            "    Post --> RDS[RDS]\n"
            "```"
        )
    },
    {
        "role": "user",
        "content": "Design a microservices architecture on AWS for a social media app."
    },
    {
        "role": "assistant",
        "content": (
            "```mermaid\n"
            "graph TD\n"
            "    User[User] --> API[API Gateway]\n"
            "    API --> Auth[Auth Service (Lambda)]\n"
            "    API --> Post[Post Service (ECS)]\n"
            "    API --> Media[Media Service (S3)]\n"
            "    Auth --> DynamoDB[DynamoDB]\n"
            "    Post --> RDS[RDS]\n"
            "```"
        )
    }
]

target_response = training_prompt[-1]["content"]


In [10]:
from datasets import Dataset
dataset = Dataset.from_list(data)

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

def tokenize_function(examples):
    inputs = []
    labels = []

    for instruction in examples["instruction"]:
        prompt = training_prompt[0]["content"] + "\n\n"
        prompt += training_prompt[1]["content"] + "\n\n"
        prompt += instruction + "\n\n"

        inputs.append(prompt)

    for response in examples["response"]:
        labels.append(response)

    # Tokenize inputs
    model_inputs = tokenizer(
        inputs, truncation=True, max_length=512, padding="max_length"
    )

    # Tokenize labels (target response)
    with tokenizer.as_target_tokenizer():
        labels_tokenized = tokenizer(
            labels, truncation=True, max_length=512, padding="max_length"
        )

    model_inputs["labels"] = labels_tokenized["input_ids"]

    return model_inputs

tokenized_data = dataset.map(
    tokenize_function,
    batched=True,)


Map:   0%|          | 0/294 [00:00<?, ? examples/s]

In [11]:
for i in range(2):
    print(f"Example {i}:")
    print("input_ids:", tokenized_data[i]["input_ids"])
    print("attention_mask:", tokenized_data[i]["attention_mask"])
    print("labels:", tokenized_data[i]["labels"])
    print()


Example 0:
input_ids: [128000, 2675, 527, 264, 11190, 323, 42066, 18328, 28175, 304, 24038, 2867, 323, 13687, 18112, 47287, 1701, 8930, 46342, 2927, 51594, 20047, 13, 3277, 2728, 264, 1217, 596, 1715, 11, 701, 3465, 374, 311, 1473, 12, 71994, 279, 1217, 596, 4096, 11, 902, 1253, 387, 11944, 477, 40146, 627, 12, 328, 3884, 832, 477, 810, 8475, 18112, 9404, 11, 1778, 439, 1647, 48218, 11, 8162, 13069, 11, 3622, 1752, 11, 477, 26038, 627, 12, 78138, 904, 2955, 17413, 9932, 11, 1778, 439, 3230, 9624, 12850, 320, 37236, 11, 35219, 11, 480, 7269, 705, 49125, 11, 477, 14645, 627, 12, 20400, 2764, 8930, 46342, 2927, 51594, 2082, 14393, 279, 11472, 18112, 477, 47287, 627, 12, 9365, 5361, 47287, 422, 279, 1217, 17501, 369, 27548, 477, 2204, 9404, 627, 12, 40665, 47287, 430, 527, 4335, 11, 1664, 2922, 23121, 11, 323, 4228, 311, 1373, 627, 12, 97295, 4284, 1972, 7394, 51944, 477, 18637, 994, 29746, 627, 12, 35106, 7999, 904, 31710, 477, 41941, 26, 6013, 1193, 449, 8930, 46342, 51594, 2082, 10215, 

In [12]:
from transformers import TrainingArguments, Trainer

peft_training_args = TrainingArguments(
    output_dir="./lora-llama-3.2-1b",
    per_device_train_batch_size=1,
    learning_rate=1e-3,
    num_train_epochs=3,
    logging_steps=1,
    max_steps=1000,
)

peft_trainer = Trainer(
    model=peft_model,
    args=peft_training_args,
    train_dataset=tokenized_data,
)


No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [None]:
peft_trainer.train()

peft_trainer.model.save_pretrained("./my_finetuned_model")
tokenizer.save_pretrained("./my_finetuned_model")

In [5]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import torch

base_model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-1B")
model = PeftModel.from_pretrained(base_model, './my_finetuned_model')
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B")