<a href="https://colab.research.google.com/github/Manoj2409/RISC-V-Random-Assembly-Generator/blob/main/LlamaFinal.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Step 1: Install Required Libraries
!pip install numpy==1.24.0 fsspec==2024.9.0
!pip install --upgrade transformers huggingface-hub accelerate torchvision torch datasets
!pip install peft

import os
import json
import torch
from datasets import Dataset

def load_dataset_from_json(json_file):
    """Loads dataset from a JSON file."""
    with open(json_file, "r") as f:
        data_dict = json.load(f)
    return Dataset.from_dict(data_dict)

def main():
    ############################################################################
    # 1. Load Dataset from JSON File
    ############################################################################
    dataset_file = "dataset.json"
    if not os.path.exists(dataset_file):
        # Default dataset if the file does not exist
        default_data = {
            "data": [
                "lw x1, 5(x2)",
                "sw x1, 4(x2)",
                "lw x3, 8(x4)",
                "sw x5, 12(x6)",
                "lw x7, 16(x8)",
                "sw x9, 20(x10)",
                "lw x11, 24(x12)",
                "sw x13, 28(x14)",
                "lw x15, 32(x16)",
                "sw x17, 36(x18)"
            ]
        }
        with open(dataset_file, "w") as f:
            json.dump(default_data, f, indent=4)
        print(f"Dataset file '{dataset_file}' created with default data.")

    dataset = load_dataset_from_json(dataset_file)
    train_dataset = dataset  # Using full dataset for training

    model_name = 'meta-llama/Llama-3.2-3B-Instruct'
    from huggingface_hub import login
    hf_token = ""
    #hf_token = os.getenv("HF_HUB_TOKEN")
    #if not hf_token:
    #    raise ValueError("HuggingFace Hub token not found. Set it as an environment variable.")
    login(token=hf_token)

    from transformers import AutoTokenizer, AutoModelForCausalLM

    print(f"Loading tokenizer for model: {model_name}")
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    # Ensure the tokenizer has a pad_token_id
    if tokenizer.pad_token_id is None:
        tokenizer.pad_token_id = tokenizer.eos_token_id

    print(f"Loading base model from {model_name} ...")
    base_model = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype=torch.float16,
        device_map="auto",
        trust_remote_code=True
    )

    # Move the model out of `meta` using to_empty()
    if any(p.device.type == "meta" for p in base_model.parameters()):
        print("Model is on meta device, initializing properly...")
        base_model = base_model.to_empty(device=torch.device("cuda" if torch.cuda.is_available() else "cpu"))
    else:
        print("Model loaded properly on non-meta device.")

    ############################################################################
    # 4. LoRA Configuration
    ############################################################################
    from peft import LoraConfig, get_peft_model
    lora_config = LoraConfig(
        r=8,
        lora_alpha=32,
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM",
    )
    print("Wrapping the base model with LoRA...")
    lora_model = get_peft_model(base_model, lora_config)
    print("LoRA model is ready.")

    ############################################################################
    # 5. Data Collator
    ############################################################################
    def data_collator(batch):
        texts = [item["data"] for item in batch]

        # Tokenize the input texts
        tokenized = tokenizer(
            texts,
            padding=True,
            truncation=True,
            return_tensors="pt",
        )

        # For causal language modeling, labels are the same as input_ids
        tokenized["labels"] = tokenized["input_ids"].clone()

        return tokenized

    ############################################################################
    # 6. Training Arguments and Trainer
    ############################################################################
    from transformers import Trainer, TrainingArguments
    training_args = TrainingArguments(
        output_dir="lora-llama3.2-1b-riscv-ckpt",
        per_device_train_batch_size=2,
        gradient_accumulation_steps=1,
        num_train_epochs=5,
        learning_rate=1e-6,  # Very minimal learning rate
        fp16=True,
        logging_steps=1,
        save_steps=10,
        save_total_limit=2,
        optim="adamw_torch",
        report_to="none",
        remove_unused_columns=False,
        dataloader_pin_memory=True,  # Ensure this only applies to CPU tensors
    )

    trainer = Trainer(
        model=lora_model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=None,
        data_collator=data_collator
    )

    ############################################################################
    # 7. Fine-Tune with Debugging
    ############################################################################
    print("Starting training on LLaMA3.2 3B ...")
    for batch in trainer.get_train_dataloader():
        inputs = batch["input_ids"].to(training_args.device)
        labels = batch["labels"].to(training_args.device)

        # Forward pass
        outputs = lora_model(input_ids=inputs, labels=labels)
        loss = outputs.loss

        # Debug shapes
        print(f"Input IDs shape: {inputs.shape}")
        print(f"Labels shape: {labels.shape}")
        print(f"Logits shape: {outputs.logits.shape}")

        # Print loss
        print(f"Loss: {loss.item()}")

    print("Training completed.")

    # Save final LoRA adapter weights
    print("Saving final LoRA adapter weights...")
    trainer.save_model("lora-llama3.2-1b-riscv-final")
    print("LoRA weights saved to lora-llama3.2-1b-riscv-final.")

    ############################################################################
    # 8. Quick Inference / Generation Test
    ############################################################################
    test_prompt = "Generate a random rv32 assembly test with load store instructions"
    print(f"\nTest prompt: {test_prompt}")
    inputs = tokenizer(test_prompt, return_tensors="pt").to(base_model.device)
    with torch.no_grad():
        output_tokens = lora_model.generate(
            **inputs,
            max_new_tokens=100,
            do_sample=True,
            temperature=0.7,
        )
    print("=== Generated output ===")
    print(tokenizer.decode(output_tokens[0], skip_special_tokens=True))

if __name__ == "__main__":
    main()

Loading tokenizer for model: meta-llama/Llama-3.2-3B-Instruct


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading base model from meta-llama/Llama-3.2-3B-Instruct ...


Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:  33%|###3      | 1.65G/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.46G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/189 [00:00<?, ?B/s]

Model loaded properly on non-meta device.
Wrapping the base model with LoRA...
LoRA model is ready.
Starting training on LLaMA3.2 3B ...
Input IDs shape: torch.Size([2, 10])
Labels shape: torch.Size([2, 10])
Logits shape: torch.Size([2, 10, 128256])
Loss: 5.805235862731934
Input IDs shape: torch.Size([2, 10])
Labels shape: torch.Size([2, 10])
Logits shape: torch.Size([2, 10, 128256])
Loss: 5.821253776550293
Input IDs shape: torch.Size([2, 10])
Labels shape: torch.Size([2, 10])
Logits shape: torch.Size([2, 10, 128256])
Loss: 6.230808734893799
Input IDs shape: torch.Size([2, 10])
Labels shape: torch.Size([2, 10])
Logits shape: torch.Size([2, 10, 128256])
Loss: 6.017599105834961
Input IDs shape: torch.Size([2, 10])
Labels shape: torch.Size([2, 10])
Logits shape: torch.Size([2, 10, 128256])
Loss: 6.282090663909912
Training completed.
Saving final LoRA adapter weights...


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


LoRA weights saved to lora-llama3.2-1b-riscv-final.

Test prompt: Generate a random rv32 assembly test with load store instructions
=== Generated output ===
Generate a random rv32 assembly test with load store instructions

Below is a simple assembly code snippet in RV32I that generates a random number and stores it in memory.

```assembly
.data
    result:.word 0
    seed:.word 0
    state:.word 0
    state2:.word 0

.text
   .globl main
    main:
        li $t0, 0
        li $t1, 0
        li $t2,
