In [None]:
# 1. FIRST RUN THESE COMMANDS
!pip uninstall -y -q bitsandbytes transformers
!pip install -U bitsandbytes
!pip install datasets
!pip install -q transformers==4.49.0 accelerate==0.34 peft==0.8.2
!pip install trl
!pip install ijson
from huggingface_hub import login
login('login')  # Keep your login

[0mCollecting bitsandbytes
  Downloading bitsandbytes-0.45.3-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<3,>=2.0->bitsandbytes)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch<3,>=2.0->bitsandbytes)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch<3,>=2.0->bitsandbytes)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch<3,>=2.0->bitsandbytes)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch<3,>=2.0->bitsandbytes)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvi

In [None]:
from google.colab import drive
drive.mount("/content/drive")
file_path = "/content/drive/MyDrive/cleaned_verilog_dataset.json"

Mounted at /content/drive


In [None]:
import json
import os
import random
import torch
import numpy as np
from datasets import Dataset
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    DataCollatorForLanguageModeling
)
from trl import SFTTrainer
import ijson
# Clear CUDA cache to ensure we have enough memory
import gc
gc.collect()
torch.cuda.empty_cache()

# Set seeds for reproducibility
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

set_seed(42)


In [None]:
def load_and_prepare_dataset(file_path, tokenizer, max_length=512, max_samples=50):
    """Load and tokenize dataset properly"""

    def data_generator():
        count = 0
        with open(file_path, 'r') as f:
            for item in ijson.items(f, 'item'):
                if count >= max_samples:
                    break
                if 'prompt' not in item or 'completion' not in item:
                    continue

                # Format text without tokenization
                text = (
                    f"### Instruction: {item['prompt']}\n\n"
                    f"### Response: {item['completion']}"
                    f"{tokenizer.eos_token}"
                )
                yield {"text": text}
                count += 1

    # Create dataset
    dataset = Dataset.from_generator(data_generator)

    # Tokenize the dataset
    def tokenize_function(examples):
        return tokenizer(
            examples["text"],
            truncation=True,
            max_length=max_length,
            padding="max_length",
            return_tensors=None,  # Don't convert to tensors here
        )

    tokenized_dataset = dataset.map(
        tokenize_function,
        batched=True,
        remove_columns=["text"]
    )

    return tokenized_dataset

In [None]:
# 2. Prepare the model with 4-bit quantization and LoRA (safer approach)
def prepare_model_and_tokenizer():
    """Load and prepare the model and tokenizer with 4-bit quantization and LoRA"""

    print("Loading model with 4-bit quantization...")

    # Define 4-bit quantization configuration
    compute_dtype = torch.float16
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_compute_dtype=compute_dtype,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4"
    )

    # Load model with 4-bit quantization
    model = AutoModelForCausalLM.from_pretrained(
        "meta-llama/Llama-3.2-1B",
        quantization_config=bnb_config,
        device_map="auto",
        trust_remote_code=True
    )

    print("Model loaded successfully.")

    # Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained(
        "meta-llama/Llama-3.2-1B",
        padding_side="right",
        truncation_side="left"
    )

    # Set padding token
    if not tokenizer.pad_token:
        tokenizer.pad_token = tokenizer.eos_token

    model.config.use_cache = False
    model.config.pretraining_tp = 1

    # LoRA configuration
    lora_config = LoraConfig(
        r=8,  # Rank dimension
        lora_alpha=16,  # Alpha parameter for LoRA scaling
        target_modules=["q_proj", "v_proj"],
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM"
    )

    # Prepare model for k-bit training and apply LoRA
    print("Preparing model for 4-bit training...")
    model = prepare_model_for_kbit_training(model)
    model = get_peft_model(model, lora_config)
    print("Model prepared with LoRA successfully.")

    return model, tokenizer



In [None]:
from transformers import DataCollatorForLanguageModeling, AutoTokenizer, AutoModelForCausalLM
from trl import SFTConfig, SFTTrainer
import torch

def train_model(model, tokenizer, train_dataset, output_dir):
    """Train the model using the SFTTrainer from the TRL library"""

    training_args = TrainingArguments(
        output_dir=output_dir,
        num_train_epochs=1,
        per_device_train_batch_size=1,
        gradient_accumulation_steps=32,
        learning_rate=1e-4,
        fp16=True,
        logging_steps=10,
        optim="paged_adamw_8bit",
        max_grad_norm=0.3,
        warmup_ratio=0.01,
        lr_scheduler_type="constant",
        report_to="none",
        gradient_checkpointing=True
    )

    trainer = SFTTrainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        tokenizer=tokenizer,
        data_collator=DataCollatorForLanguageModeling(
            tokenizer=tokenizer,
            mlm=False,
            pad_to_multiple_of=8  # Critical for alignment
        )
    )

    model.config.use_cache = False
    torch.cuda.empty_cache()

    print("Starting training...")
    trainer.train()


    return trainer


In [None]:
# 4. Generate Verilog code function
def generate_verilog_code(model, tokenizer, prompt, max_new_tokens=1024):
    """Generate Verilog code based on a prompt"""
    formatted_prompt = f"### Instruction: Generate Verilog code based on the following specification.\n\n### Input:\n{prompt}\n\n### Response:"

    inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model.generate(
            input_ids=inputs["input_ids"],
            attention_mask=inputs["attention_mask"],
            max_new_tokens=max_new_tokens,
            temperature=0.7,
            top_p=0.9,
            do_sample=True,
            pad_token_id=tokenizer.pad_token_id
        )

    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    response = generated_text.split("### Response:")[-1].strip()

    return response



In [None]:
# Print system info for debugging
print("Python version:", os.sys.version)
print("PyTorch version:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("CUDA version:", torch.version.cuda)
    print("GPU device:", torch.cuda.get_device_name(0))
    print("GPU memory allocated:", torch.cuda.memory_allocated(0))
    print("GPU memory reserved:", torch.cuda.memory_reserved(0))

# Set file paths
file_path = "/content/drive/MyDrive/cleaned_verilog_dataset.json"
output_dir = "llama3-verilog-finetuned"

# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)


Python version: 3.11.11 (main, Dec  4 2024, 08:55:07) [GCC 11.4.0]
PyTorch version: 2.6.0+cu124
CUDA available: True
CUDA version: 12.4
GPU device: Tesla T4
GPU memory allocated: 0
GPU memory reserved: 0


In [None]:
# 1. Load and prepare dataset
tokenizer = AutoTokenizer.from_pretrained(
    "meta-llama/Llama-3.2-1B",
    padding_side="right",
    truncation_side="left",
    use_fast=True  # Ensure fast tokenizer
)
tokenizer.pad_token = tokenizer.eos_token
print("\nLoading and preparing dataset...")
train_dataset = load_and_prepare_dataset(file_path, tokenizer)
print(f"Train dataset size: {len(train_dataset)}")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/50.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/301 [00:00<?, ?B/s]


Loading and preparing dataset...


Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/50 [00:00<?, ? examples/s]

Train dataset size: 50


In [None]:
print("\nSample dataset entry:")
print(train_dataset[0]["text"])


Sample dataset entry:


KeyError: 'text'

In [None]:
# 2. Prepare model and tokenizer
print("\nPreparing model and tokenizer...")
model, tokenizer = prepare_model_and_tokenizer()



Preparing model and tokenizer...
Loading model with 4-bit quantization...


config.json:   0%|          | 0.00/843 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.47G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/185 [00:00<?, ?B/s]

Model loaded successfully.
Preparing model for 4-bit training...
Model prepared with LoRA successfully.


In [None]:
# 3. Train the model
print("\nTraining the model...")
trainer = train_model(model, tokenizer, train_dataset, output_dir)



Training the model...


  trainer = SFTTrainer(


Converting train dataset to ChatML:   0%|          | 0/50 [00:00<?, ? examples/s]

Applying chat template to train dataset:   0%|          | 0/50 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/50 [00:00<?, ? examples/s]

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Starting training...


Step,Training Loss


In [None]:
# 4. Test the model with an example prompt
print("\nTesting the model with an example prompt...")
example_prompt = "Design a simple AND gate with 2 inputs (a, b) and 1 output (y)"
generated_code = generate_verilog_code(model, tokenizer, example_prompt)

print("\nExample Verilog Code Generation:")
print("Prompt:", example_prompt)
print("Generated Code:\n", generated_code)


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.



Testing the model with an example prompt...





Example Verilog Code Generation:
Prompt: Design a simple AND gate with 2 inputs (a, b) and 1 output (y)
Generated Code:
 Verilog code for the OR gate
```
module OR (a, b, y);
  input a;
  input b;
  output y;
  wire y;
  y = a || b;
endmodule
```


In [None]:
print("\nFine-tuning completed successfully!")



Fine-tuning completed successfully!


In [None]:
# Complete model saving and uploading process

# 1. After training, save the adapter model
output_dir = "llama-3.2-finetuned-adapter"
model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)

# 2. Save the model configuration
model_config = model.config.to_dict()
with open(f"{output_dir}/config.json", "w") as f:
    json.dump(model_config, f)

# 3. Save training arguments - fixed approach
if hasattr(trainer, "args"):
    # Save as JSON instead of using save_pretrained
    training_args_dict = trainer.args.to_dict()
    with open(f"{output_dir}/training_args.json", "w") as f:
        json.dump(training_args_dict, f)

# 4. Create a README.md with model information
readme_content = f"""
# Fine-tuned Llama-3.2-1B Model

This is a fine-tuned version of [meta-llama/Llama-3.2-1B](https://huggingface.co/meta-llama/Llama-3.2-1B) with PEFT/LoRA.

## Model Details
- Base model: meta-llama/Llama-3.2-1B
- Fine-tuning method: PEFT with LoRA
- Rank (r): 8
- Alpha: 16
- Target modules: q_proj, v_proj

## Usage
```python
from transformers import AutoTokenizer
from peft import AutoPeftModelForCausalLM

# Load model and tokenizer
model = AutoPeftModelForCausalLM.from_pretrained("MangoLassi/llama-3.2-1b-finetuned")
tokenizer = AutoTokenizer.from_pretrained("MangoLassi/llama-3.2-1b-finetuned")

# Generate text
prompt = "Your prompt here"
inputs = tokenizer(prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_length=200)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
```
"""

with open(f"{output_dir}/README.md", "w") as f:
    f.write(readme_content)

# 5. Login to Hugging Face Hub
from huggingface_hub import login

login(token="login")

# 6. Push to Hugging Face Hub
from huggingface_hub import HfApi

api = HfApi()
api.upload_folder(
    folder_path=output_dir,
    repo_id="MangoLassi/llama-3.2-1b-finetuned",
    repo_type="model"
)

# 7. Verify the model can be loaded
print("Attempting to load the model from Hugging Face to verify upload...")
try:
    from peft import AutoPeftModelForCausalLM

    # Test loading the model (with reduced size if needed for verification)
    test_model = AutoPeftModelForCausalLM.from_pretrained(
        "MangoLassi/llama-3.2-1b-finetuned",
        device_map="auto"
    )

    test_tokenizer = AutoTokenizer.from_pretrained("MangoLassi/llama-3.2-1b-finetuned")

    # Test with a sample input
    test_prompt = "Generate verilog code to Design a simple AND gate with 2 inputs (a, b) and 1 output (y)"
    test_inputs = test_tokenizer(test_prompt, return_tensors="pt").to(test_model.device)
    test_outputs = test_model.generate(**test_inputs, max_length=200)
    print("Model loaded and tested successfully!")
    print(test_tokenizer.decode(test_outputs[0], skip_special_tokens=True))
except Exception as e:
    print(f"Note: Verification failed with error: {str(e)}")
    print("This might be because the model is still uploading or indexing on Hugging Face.")
    print("Check your model page after a few minutes and try loading it manually.")

- empty or missing yaml metadata in repo card


Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

adapter_model.safetensors:   0%|          | 0.00/3.42M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

Attempting to load the model from Hugging Face to verify upload...


adapter_config.json:   0%|          | 0.00/598 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/50.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/335 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/3.42M [00:00<?, ?B/s]

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Model loaded and tested successfully!
Design a simple AND gate with 2 inputs (a, b) and 1 output (y) using only 1 NOR gate and 1 OR gate.
We have a 2-input AND gate that has the following truth table:
1. Draw the schematic diagram of the circuit.
2. Draw the truth table of the circuit.
3. Draw the schematic diagram of the circuit using only 1 NOR gate and 1 OR gate.
4. Draw the truth table of the circuit using only 1 NOR gate and 1 OR gate.
5. Draw the schematic diagram of the circuit using only 1 NOR gate and 1 OR gate.
6. Draw the truth table of the circuit using only 1 NOR gate and 1 OR gate.
7. Draw the schematic diagram of the circuit using only 1 NOR gate and 1 OR gate.
8. Draw the truth table of the circuit using only 1 NOR gate and 1 OR gate.
9. Draw the schematic


In [None]:
test_prompt = " Generate Verilog code no explanation for AND gate with 2 inputs (a, b) and 1 output (y)"
test_inputs = test_tokenizer(test_prompt, return_tensors="pt").to(test_model.device)
test_outputs = test_model.generate(**test_inputs, max_length=200)
print("Model loaded and tested successfully!")
print(test_tokenizer.decode(test_outputs[0], skip_special_tokens=True))

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Model loaded and tested successfully!
 Generate Verilog code no explanation for AND gate with 2 inputs (a, b) and 1 output (y) and a function (f) for a, b, y.

# Solution

This problem is quite simple. We can simply write a Verilog code for the AND gate. However, we need to add a function for the AND gate. We can add a function for the AND gate. The function will take three inputs, a, b, and y. It will then return a boolean value of whether the AND gate is true or false. This boolean value will then be used to determine whether to output y or not. The Verilog code is as follows:

module AND_gate (a, b, y);

input a;
input b;
output y;

always @ (a, b)
begin
y = a && b;
end

endmodule
