In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from torch.optim import AdamW
import torch

model_name = "meta-llama/Llama-3.2-1B"
device = "cuda" 

tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
model = AutoModelForCausalLM.from_pretrained(model_name, device_map=device, torch_dtype=torch.bfloat16)

model.safetensors:   0%|          | 0.00/2.47G [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


generation_config.json:   0%|          | 0.00/185 [00:00<?, ?B/s]

In [4]:
from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
    r=12,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()


trainable params: 1,277,952 || all params: 1,237,092,352 || trainable%: 0.1033


In [21]:
import json

def load_jsonl(file_path):
    data = []
    with open(file_path, "r", encoding="utf-8") as f:
        for line in f:
            data.append(json.loads(line))
    return data

data = load_jsonl("../resources/alpaca_formatted_data.jsonl")


In [10]:
training_prompt = [
    {
        "role": "system",
        "content": (
            "You are a helpful and knowledgeable assistant specialized in generating clear and accurate "
            "architecture diagrams using Mermaid.js markdown syntax. When given a user's request, your task is to:\n\n"
            "- Understand the user's description, which may be detailed or vague.\n"
            "- Suggest one or more appropriate architecture styles, such as monolith, microservices, serverless, or hybrid.\n"
            "- Respect any design constraints mentioned, such as specific cloud providers (AWS, Azure, GCP), frameworks, or technologies.\n"
            "- Generate valid Mermaid.js markdown code representing the requested architecture or diagrams.\n"
            "- Support multiple diagrams if the user asks for alternatives or different styles.\n"
            "- Provide diagrams that are clean, well-labeled, and easy to read.\n"
            "- Optionally suggest real-time edits or improvements when prompted.\n"
            "- Avoid adding any commentary or explanations; respond only with Mermaid markdown code blocks unless otherwise requested.\n"
            "- Always wrap your Mermaid diagrams inside triple backticks with the word `mermaid` to ensure proper formatting.\n\n"
            "Example user request: \"Design a microservices architecture on AWS for a social media app.\"\n\n"
            "Your response should be:\n\n"
            "```mermaid\n"
            "graph TD\n"
            "    User[User] --> API[API Gateway]\n"
            "    API --> Auth[Auth Service (Lambda)]\n"
            "    API --> Post[Post Service (ECS)]\n"
            "    API --> Media[Media Service (S3)]\n"
            "    Auth --> DynamoDB[DynamoDB]\n"
            "    Post --> RDS[RDS]\n"
            "```"
        )
    },
    {
        "role": "user",
        "content": "Design a microservices architecture on AWS for a social media app."
    },
    {
        "role": "assistant",
        "content": (
            "```mermaid\n"
            "graph TD\n"
            "    User[User] --> API[API Gateway]\n"
            "    API --> Auth[Auth Service (Lambda)]\n"
            "    API --> Post[Post Service (ECS)]\n"
            "    API --> Media[Media Service (S3)]\n"
            "    Auth --> DynamoDB[DynamoDB]\n"
            "    Post --> RDS[RDS]\n"
            "```"
        )
    }
]

target_response = training_prompt[-1]["content"]
