<a href="https://colab.research.google.com/github/HalgasAdrian/LLM-For-Automatic-Hardware-Testbench-Generation/blob/main/notebooks/train_on_colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# LLM Testbench Generation Training on Colab This notebook trains the model on Google Colab with GPU acceleration.

In [34]:
# Check GPU availability
!nvidia-smi

import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

Tue Aug 12 02:19:30 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA A100-SXM4-40GB          Off |   00000000:00:04.0 Off |                    0 |
| N/A   33C    P0             42W /  400W |       5MiB /  40960MiB |      0%      Default |
|                                         |                        |             Disabled |
+-----------------------------------------+------------------------+----------------------+
                                                

## 1. Setup Environment

## 3. Upload and Extract Project Files

In [35]:
# Option 1: Upload from local computer
from google.colab import files
uploaded = files.upload()  # Select llm_testbench_colab.zip

# Extract
!unzip -q llm_testbench_colab.zip
!ls -la

Saving llm_testbench_colab.zip to llm_testbench_colab (2).zip
replace requirements.txt? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
replace .env? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
replace requirements_colab.txt? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
replace utils/data_utils.py? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
replace utils/verilog_utils.py? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
replace utils/__pycache__/verilog_utils.cpython-311.pyc? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
replace utils/__pycache__/data_utils.cpython-311.pyc? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
replace configs/config.yaml? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
replace scripts/prepare_for_colab.py? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
replace scripts/download_data.py? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
replace scripts/train.py? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
replace scripts/analyze_dataset.py? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
replace scripts/process_data.py? [y]es, [n]o, [A]ll

## 4. Install Dependencies

In [36]:
# Install requirements
!pip install -q -r requirements_colab.txt

# Verify installations
!pip list | grep -E "torch|transformers|peft|bitsandbytes"

bitsandbytes                          0.47.0
peft                                  0.17.0
sentence-transformers                 5.0.0
torch                                 2.6.0+cu124
torchao                               0.10.0
torchaudio                            2.6.0+cu124
torchdata                             0.11.0
torchsummary                          1.5.1
torchtune                             0.6.1
torchvision                           0.21.0+cu124
transformers                          4.55.0


## 5. Configure Weights & Biases (Optional)

In [37]:
# Set up wandb for experiment tracking
import os
import wandb

# Set API key directly
os.environ['WANDB_API_KEY'] = 'b29c0d2102aa226ead1fded36b786769f969d4f7'

# Initialize wandb
wandb.init(
    project="llm-testbench-gen",
    name="colab-training-run",
    config={
        "model": "TinyLlama-1.1B",
        "dataset": "AutoBench",
        "epochs": 10
    }
)

## 6. Update Config for Colab GPU

In [38]:
# Create optimized config for Colab
import yaml

# Load existing config
with open('configs/config.yaml', 'r') as f:
    config = yaml.safe_load(f)

# Update for GPU training with IMPROVED settings
config['model']['quantization']['load_in_4bit'] = True  # Enable 4-bit quantization
config['training']['per_device_train_batch_size'] = 2   # Smaller batch for better learning
config['training']['per_device_eval_batch_size'] = 2    # Match train batch size
config['training']['gradient_accumulation_steps'] = 8   # Increased for effective batch of 16
config['training']['fp16'] = True  # Enable mixed precision
config['training']['num_train_epochs'] = 10  # Increased from 5

# Adjusted learning parameters
config['training']['learning_rate'] = 1e-4   # Lowered from 2e-4 for more stable learning
config['training']['warmup_steps'] = 50      # Increased from 10 for gradual warmup
config['training']['warmup_ratio'] = 0.03    # Keep this
config['training']['max_grad_norm'] = 0.3    # Keep this for gradient clipping

# Improved LoRA configuration
config['model']['lora']['r'] = 32                          # Increased from 16
config['model']['lora']['lora_alpha'] = 64                 # Scaled with r (2x)
config['model']['lora']['target_modules'] = ["q_proj", "v_proj", "k_proj", "o_proj"]  # Target all attention

# Training schedule improvements
config['training']['logging_steps'] = 2      # More frequent logging
config['training']['save_steps'] = 20        # Save more frequently
config['training']['eval_steps'] = 10        # Evaluate more often

# Keep optimizer
config['training']['optim'] = 'adamw_torch'

# Save updated config
with open('configs/config_colab.yaml', 'w') as f:
    yaml.dump(config, f)

print("Config updated for Colab GPU training with improvements")
print(f"Learning rate: {config['training']['learning_rate']} (type: {type(config['training']['learning_rate'])})")
print(f"Optimizer: {config['training']['optim']}")
print(f"Epochs: {config['training']['num_train_epochs']}")
print(f"LoRA rank: {config['model']['lora']['r']}")
print(f"Effective batch size: {config['training']['per_device_train_batch_size'] * config['training']['gradient_accumulation_steps']}")
print(f"Total training examples: Check your augmented dataset size")

Config updated for Colab GPU training with improvements
Learning rate: 0.0001 (type: <class 'float'>)
Optimizer: adamw_torch
Epochs: 10
LoRA rank: 32
Effective batch size: 16
Total training examples: Check your augmented dataset size


## 7. Run Training

In [39]:
# Fix the train.py file directly in Colab
!sed -i 's/evaluation_strategy=/eval_strategy=/g' /content/scripts/train.py

# Verify the change was made
!grep -n "eval_strategy" /content/scripts/train.py

159:        eval_strategy="steps",


In [40]:
# Set config file
!cp configs/config_colab.yaml configs/config.yaml

# Run training
!python scripts/train.py

2025-08-12 02:21:30.424530: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-08-12 02:21:30.443953: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1754965290.465565   32214 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1754965290.472220   32214 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1754965290.490161   32214 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

## 8. Save Trained Model

In [41]:
# Compress the trained model
!cd models/checkpoints && zip -r /content/trained_model.zip * && cd /content
!ls -lh trained_model.zip

updating: adapter_config.json (deflated 56%)
updating: adapter_model.safetensors (deflated 8%)
updating: chat_template.jinja (deflated 60%)
updating: checkpoint-20/ (stored 0%)
updating: checkpoint-20/tokenizer.model (deflated 55%)
updating: checkpoint-20/rng_state.pth (deflated 25%)
updating: checkpoint-20/tokenizer_config.json (deflated 69%)
updating: checkpoint-20/scaler.pt (deflated 60%)
updating: checkpoint-20/training_args.bin (deflated 51%)
updating: checkpoint-20/README.md (deflated 66%)
updating: checkpoint-20/optimizer.pt (deflated 8%)
updating: checkpoint-20/special_tokens_map.json (deflated 73%)
updating: checkpoint-20/adapter_config.json (deflated 56%)
updating: checkpoint-20/chat_template.jinja (deflated 60%)
updating: checkpoint-20/scheduler.pt (deflated 58%)
updating: checkpoint-20/tokenizer.json (deflated 85%)
updating: checkpoint-20/trainer_state.json (deflated 73%)
updating: checkpoint-20/adapter_model.safetensors (deflated 8%)
updating: checkpoint-30/ (stored 0%)
up

In [42]:
# Download to local computer
from google.colab import files
files.download('trained_model.zip')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## 9. Install Icarus Verilog for Testing


In [43]:
# Install Icarus Verilog for compilation tests
!apt-get update
!apt-get install -y iverilog
!iverilog -version

0% [Working]            Hit:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease
Hit:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Hit:3 https://r2u.stat.illinois.edu/ubuntu jammy InRelease
Hit:4 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:5 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Hit:6 http://archive.ubuntu.com/ubuntu jammy-updates InRelease
Hit:7 http://archive.ubuntu.com/ubuntu jammy-backports InRelease
Hit:8 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:9 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Hit:10 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Get:11 http://security.ubuntu.com/ubuntu jammy-security/universe amd64 Packages [1,270 kB]
Fetched 1,399 kB in 1s (1,154 kB/s)
Reading package lists... Done
W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.ill

## 10. Run Evaluation

In [44]:
# Create the evaluation script if it doesn't exist
!cp /content/scripts/evaluate.py /content/scripts/evaluate_colab.py || echo "Creating new evaluation script"

# If evaluate.py doesn't exist, download it
if not os.path.exists('/content/scripts/evaluate.py'):
    print("Downloading evaluation script...")
    # You can paste the evaluate.py content here or upload it

In [45]:
# Run evaluation on the trained model
!python scripts/evaluate.py

# Check results
!ls -la data/test_results/

2025-08-12 02:27:01.084483: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-08-12 02:27:01.101904: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1754965621.123393   34422 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1754965621.129839   34422 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1754965621.146217   34422 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [46]:
# Load and display evaluation results
import json

with open('data/test_results/evaluation_results.json', 'r') as f:
    results = json.load(f)

print("Evaluation Metrics:")
print("="*50)
for metric, value in results['metrics'].items():
    if isinstance(value, float):
        print(f"{metric}: {value:.2%}")
    else:
        print(f"{metric}: {value}")

Evaluation Metrics:
total_examples: 8
generation_success_rate: 100.00%
compilation_success_rate: 0.00%
simulation_success_rate: 0.00%
syntax_valid_rate: 0.00%
has_timescale_rate: 100.00%
has_module_rate: 100.00%
has_initial_rate: 100.00%
has_finish_rate: 87.50%
has_display_rate: 50.00%


In [56]:
# View a sample generated testbench
import random

# List generated files
import glob
tb_files = glob.glob('data/test_results/generated_tb_*.v')

if tb_files:
    # Pick a random one to display
    sample_file = random.choice(tb_files)
    print(f"Viewing: {sample_file}")
    print("="*60)

    with open(sample_file, 'r') as f:
        print(f.read())
else:
    print("No generated testbenches found")

Viewing: data/test_results/generated_tb_3.v
`timescale 1ns / 1ps module tb;
reg clk;
reg d;
reg ar;
wire q;
integer file, line;
integer err_count = 0;
top_module dut ( .clk, .d, .ar, .q );
initial begin
clk = 0;
ar = 0;
d = 0;
q = 0;
// Initial conditions #10 // Sync reset $finish;
end#10 // Sync reset // Test Cases initial begin
file = $time;
#10 // Sync reset ar =


In [51]:
# Compress evaluation results
!cd data/test_results && zip -r /content/evaluation_results.zip * && cd /content

# Download
from google.colab import files
files.download('evaluation_results.zip')
print("Evaluation results downloaded!")

  adding: evaluation_results.json (deflated 92%)
  adding: generated_tb_0_fixed.v (deflated 35%)
  adding: generated_tb_0.v (deflated 59%)
  adding: generated_tb_1.v (deflated 64%)
  adding: generated_tb_2_fixed.v (deflated 67%)
  adding: generated_tb_2.v (deflated 82%)
  adding: generated_tb_3.v (deflated 41%)
  adding: generated_tb_4.v (deflated 79%)
  adding: generated_tb_5.v (deflated 45%)
  adding: generated_tb_6.v (deflated 83%)
  adding: generated_tb_7.v (deflated 87%)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Evaluation results downloaded!


In [57]:
# Analyze training data quality
import json
import re

# Load training data
with open('data/processed/train/train.jsonl', 'r') as f:
    train_data = [json.loads(line) for line in f]

print(f"Total training examples: {len(train_data)}")

# Check for problematic patterns
issues = {
    'has_c_code': 0,
    'has_initial_brace': 0,
    'has_wrong_bit_width': 0,
    'has_mixed_comments': 0,
    'wire_assignment': 0
}

clean_examples = []
problematic_indices = []

for i, example in enumerate(train_data):
    tb = example['testbench_code']
    has_issue = False

    # Check for C code
    if 'int main' in tb or '#include' in tb:
        issues['has_c_code'] += 1
        has_issue = True

    # Check for wrong initial syntax
    if 'initial {' in tb:
        issues['has_initial_brace'] += 1
        has_issue = True

    # Check for wrong bit widths
    if re.search(r'\d{2,}\'b', tb):  # 10'b, 12'b, etc.
        issues['has_wrong_bit_width'] += 1
        has_issue = True

    # Check for mixed comments/documentation
    if '// Change:' in tb or '### Correctness:' in tb:
        issues['has_mixed_comments'] += 1
        has_issue = True

    # Check for wire assignments in initial blocks
    if re.search(r'initial.*?wire.*?=', tb, re.DOTALL):
        issues['wire_assignment'] += 1
        has_issue = True

    if has_issue:
        problematic_indices.append(i)
    else:
        clean_examples.append(example)

print("\nIssues found in training data:")
for issue, count in issues.items():
    print(f"  {issue}: {count} ({count/len(train_data)*100:.1f}%)")

print(f"\nClean examples: {len(clean_examples)} ({len(clean_examples)/len(train_data)*100:.1f}%)")
print(f"Problematic examples: {len(problematic_indices)}")

# Save clean training data
if clean_examples:
    with open('data/processed/train/train_clean.jsonl', 'w') as f:
        for example in clean_examples:
            f.write(json.dumps(example) + '\n')
    print(f"\nClean training data saved to train_clean.jsonl")

Total training examples: 38

Issues found in training data:
  has_c_code: 0 (0.0%)
  has_initial_brace: 0 (0.0%)
  has_wrong_bit_width: 0 (0.0%)
  has_mixed_comments: 0 (0.0%)
  wire_assignment: 11 (28.9%)

Clean examples: 27 (71.1%)
Problematic examples: 11

Clean training data saved to train_clean.jsonl


In [58]:
# Check what's actually in the training data responses
import json

with open('data/processed/train/train.jsonl', 'r') as f:
    data = [json.loads(line) for line in f]

# Look at a few response formats
for i in range(min(3, len(data))):
    print(f"\n{'='*60}")
    print(f"Example {i} response format:")
    print(f"{'='*60}")
    response = data[i]['response']
    print(response[:500])

    # Check for non-Verilog content
    if "```" in response:
        print("⚠️  Contains markdown")
    if "Change:" in response or "###" in response:
        print("⚠️  Contains comments/analysis")
    if "int main" in response:
        print("⚠️  Contains C code")


Example 0 response format:
```verilog
`timescale 1ns / 1ps

module testbench;
    reg [7:0] a, b, c, d;
    wire [7:0] min;
    integer file;

    // Instantiate the Device Under Test (DUT)
    top_module DUT (
        .a(a),
        .b(b),
        .c(c),
        .d(d),
        .min(min)
    );

    initial begin
        // Open the file for output
        file = $fopen("TBout.txt", "w");

        // Scenario 1: All inputs are equal
        a = 127; b = 127; c = 127; d = 127;
        #10; // Wait for the output to stabili
⚠️  Contains markdown

Example 1 response format:
```verilog
`timescale 1ns / 1ps
module testbench;
reg  clk;
reg  d;
reg  ar;
wire  q;

integer file, scenario;
// DUT instantiation
top_module DUT (
    .clk(clk),
    .d(d),
    .ar(ar),
    .q(q)
);
// Clock generation
initial begin
    clk = 0;
    forever #5 clk = ~clk;
end

initial begin
    file = $fopen("TBout.txt", "w");
end
// Scenario Based Test
initial begin
    // Scenario 1
    scenario = 1;
    ar = 1; d