<a href="https://colab.research.google.com/github/HalgasAdrian/LLM-For-Automatic-Hardware-Testbench-Generation/blob/main/notebooks/train_on_colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# LLM Testbench Generation Training on Colab This notebook trains the model on Google Colab with GPU acceleration.

In [None]:
# Check GPU availability
!nvidia-smi

import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

Mon Aug 11 04:53:57 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   41C    P8             10W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

## 1. Setup Environment

## 2. Mount Google Drive (Optional)

In [None]:
# Mount Google Drive to save/load files
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## 3. Upload and Extract Project Files

In [None]:
# Option 1: Upload from local computer
from google.colab import files
uploaded = files.upload()  # Select llm_testbench_colab.zip

# Extract
!unzip -q llm_testbench_colab.zip
!ls -la

Saving llm_testbench_colab.zip to llm_testbench_colab (2).zip
replace requirements.txt? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
replace .env? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
replace requirements_colab.txt? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
replace utils/data_utils.py? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
replace utils/verilog_utils.py? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
replace utils/__pycache__/verilog_utils.cpython-311.pyc? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
replace utils/__pycache__/data_utils.cpython-311.pyc? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
replace configs/config.yaml? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
replace scripts/prepare_for_colab.py? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
replace scripts/download_data.py? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
replace scripts/train.py? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
replace scripts/analyze_dataset.py? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
replace scripts/process_data.py? [y]es, [n]o, [A]ll

## 4. Install Dependencies

In [None]:
# Install requirements
!pip install -q -r requirements_colab.txt

# Verify installations
!pip list | grep -E "torch|transformers|peft|bitsandbytes"

bitsandbytes                          0.46.1
peft                                  0.17.0
sentence-transformers                 5.0.0
torch                                 2.6.0+cu124
torchao                               0.10.0
torchaudio                            2.6.0+cu124
torchdata                             0.11.0
torchsummary                          1.5.1
torchtune                             0.6.1
torchvision                           0.21.0+cu124
transformers                          4.55.0


## 5. Configure Weights & Biases (Optional)

In [None]:
# Set up wandb for experiment tracking
import os
import wandb

# Set API key directly
os.environ['WANDB_API_KEY'] = 'b29c0d2102aa226ead1fded36b786769f969d4f7'

# Initialize wandb
wandb.init(
    project="llm-testbench-gen",
    name="colab-training-run",
    config={
        "model": "TinyLlama-1.1B",
        "dataset": "AutoBench",
        "epochs": 10
    }
)

[34m[1mwandb[0m: Currently logged in as: [33mhalgas-a[0m ([33mhalgas-a-northeastern-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


## 6. Update Config for Colab GPU

In [None]:
# Create optimized config for Colab
import yaml

# Load existing config
with open('configs/config.yaml', 'r') as f:
    config = yaml.safe_load(f)

# Update for GPU training with IMPROVED settings
config['model']['quantization']['load_in_4bit'] = True  # Enable 4-bit quantization
config['training']['per_device_train_batch_size'] = 2   # Smaller batch for better learning
config['training']['per_device_eval_batch_size'] = 2    # Match train batch size
config['training']['gradient_accumulation_steps'] = 8   # Increased for effective batch of 16
config['training']['fp16'] = True  # Enable mixed precision
config['training']['num_train_epochs'] = 10  # Increased from 5

# Adjusted learning parameters
config['training']['learning_rate'] = 1e-4   # Lowered from 2e-4 for more stable learning
config['training']['warmup_steps'] = 50      # Increased from 10 for gradual warmup
config['training']['warmup_ratio'] = 0.03    # Keep this
config['training']['max_grad_norm'] = 0.3    # Keep this for gradient clipping

# Improved LoRA configuration
config['model']['lora']['r'] = 32                          # Increased from 16
config['model']['lora']['lora_alpha'] = 64                 # Scaled with r (2x)
config['model']['lora']['target_modules'] = ["q_proj", "v_proj", "k_proj", "o_proj"]  # Target all attention

# Training schedule improvements
config['training']['logging_steps'] = 2      # More frequent logging
config['training']['save_steps'] = 20        # Save more frequently
config['training']['eval_steps'] = 10        # Evaluate more often

# Keep optimizer
config['training']['optim'] = 'adamw_torch'

# Save updated config
with open('configs/config_colab.yaml', 'w') as f:
    yaml.dump(config, f)

print("Config updated for Colab GPU training with improvements")
print(f"Learning rate: {config['training']['learning_rate']} (type: {type(config['training']['learning_rate'])})")
print(f"Optimizer: {config['training']['optim']}")
print(f"Epochs: {config['training']['num_train_epochs']}")
print(f"LoRA rank: {config['model']['lora']['r']}")
print(f"Effective batch size: {config['training']['per_device_train_batch_size'] * config['training']['gradient_accumulation_steps']}")
print(f"Total training examples: Check your augmented dataset size")

Config updated for Colab GPU training
Learning rate: 0.0002 (type: <class 'float'>)
Optimizer: adamw_torch


## 7. Run Training

In [None]:
# Fix the train.py file directly in Colab
!sed -i 's/evaluation_strategy=/eval_strategy=/g' /content/scripts/train.py

# Verify the change was made
!grep -n "eval_strategy" /content/scripts/train.py

159:        eval_strategy="steps",


In [None]:
# Set config file
!cp configs/config_colab.yaml configs/config.yaml

# Run training
!python scripts/train.py

2025-08-11 04:58:46.590895: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1754888326.612670   13107 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1754888326.618993   13107 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1754888326.636022   13107 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1754888326.636054   13107 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1754888326.636057   13107 computation_placer.cc:177] computation placer alr

## 8. Save Trained Model

In [None]:
# Compress the trained model
!cd models/checkpoints && zip -r /content/trained_model.zip * && cd /content
!ls -lh trained_model.zip

  adding: adapter_config.json (deflated 57%)
  adding: adapter_model.safetensors (deflated 8%)
  adding: chat_template.jinja (deflated 60%)
  adding: checkpoint-10/ (stored 0%)
  adding: checkpoint-10/trainer_state.json (deflated 56%)
  adding: checkpoint-10/adapter_config.json (deflated 57%)
  adding: checkpoint-10/tokenizer.json (deflated 85%)
  adding: checkpoint-10/README.md (deflated 66%)
  adding: checkpoint-10/training_args.bin (deflated 51%)
  adding: checkpoint-10/chat_template.jinja (deflated 60%)
  adding: checkpoint-10/tokenizer_config.json (deflated 69%)
  adding: checkpoint-10/optimizer.pt (deflated 7%)
  adding: checkpoint-10/adapter_model.safetensors (deflated 8%)
  adding: checkpoint-10/special_tokens_map.json (deflated 73%)
  adding: checkpoint-10/scaler.pt (deflated 60%)
  adding: checkpoint-10/scheduler.pt (deflated 58%)
  adding: checkpoint-10/rng_state.pth (deflated 25%)
  adding: checkpoint-10/tokenizer.model (deflated 55%)
  adding: README.md (deflated 66%)
  ad

In [None]:
# Option 1: Download to local computer
from google.colab import files
files.download('trained_model.zip')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# Option 2: Save to Google Drive
!cp trained_model.zip /content/drive/MyDrive/
print("Model saved to Google Drive")

## 9. Install Icarus Verilog for Testing


In [None]:
# Install Icarus Verilog for compilation tests
!apt-get update
!apt-get install -y iverilog
!iverilog -version

0% [Working]            Get:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,632 B]
0% [Connecting to archive.ubuntu.com] [Connecting to security.ubuntu.com (185.10% [Connecting to archive.ubuntu.com] [Connecting to security.ubuntu.com (185.1                                                                               Get:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease [1,581 B]
Get:3 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Hit:4 https://r2u.stat.illinois.edu/ubuntu jammy InRelease
Hit:5 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:6 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  Packages [1,923 kB]
Get:7 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
Get:8 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease [18.1 kB]
Get:9 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease [24.3 kB]
Get

## 10. Run Evaluation

In [None]:
# Create the evaluation script if it doesn't exist
!cp /content/scripts/evaluate.py /content/scripts/evaluate_colab.py || echo "Creating new evaluation script"

# If evaluate.py doesn't exist, download it
if not os.path.exists('/content/scripts/evaluate.py'):
    print("Downloading evaluation script...")
    # You can paste the evaluate.py content here or upload it

In [None]:
# Run evaluation on the trained model
!python scripts/evaluate.py

# Check results
!ls -la data/test_results/

2025-08-11 05:48:45.937745: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1754891325.959316   26453 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1754891325.965611   26453 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1754891325.982769   26453 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1754891325.982795   26453 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1754891325.982798   26453 computation_placer.cc:177] computation placer alr

In [None]:
# Load and display evaluation results
import json

with open('data/test_results/evaluation_results.json', 'r') as f:
    results = json.load(f)

print("Evaluation Metrics:")
print("="*50)
for metric, value in results['metrics'].items():
    if isinstance(value, float):
        print(f"{metric}: {value:.2%}")
    else:
        print(f"{metric}: {value}")

Evaluation Metrics:
total_examples: 8
generation_success_rate: 100.00%
compilation_success_rate: 0.00%
simulation_success_rate: 0.00%
syntax_valid_rate: 0.00%
has_timescale_rate: 37.50%
has_module_rate: 100.00%
has_initial_rate: 100.00%
has_finish_rate: 75.00%
has_display_rate: 62.50%


In [None]:
# View a sample generated testbench
import random

# List generated files
import glob
tb_files = glob.glob('data/test_results/generated_tb_*.v')

if tb_files:
    # Pick a random one to display
    sample_file = random.choice(tb_files)
    print(f"Viewing: {sample_file}")
    print("="*60)

    with open(sample_file, 'r') as f:
        print(f.read())
else:
    print("No generated testbenches found")

Viewing: data/test_results/generated_tb_5.v
module top_module_tb;
Bit#(1) clk_in = 1'b0;
Bit#(1) clk_in_neg = 1'b0;
Bit#(1) d_in = 0;
Bit#(1) ar_in = 1;
Bit#(1) q_out = 1;
wire clk = 1'b0;
wire d = 1'b0;
wire ar = 1'b0;
wire q = 1'b0;
wire q_out_neg = 1'b0;
top_module dut( .clk(clk_in), .d(d), .ar(ar_in), .q(q_out) );
initial begin
clk_in_neg = 1'b1;
d_in = 1;
ar_in = 1;
clk = 1'b0;
#5 clk = 1'b1;
$display("DUT: %0d %0d %0d", dut.q, dut.d, dut.ar);
$finish(0);
endinitial begin
$display("DUT: %0d %0d %0d", dut.q, dut.d, dut.ar);
$finish(0);
endinitial begin
clk_in = 1'b1;
$display("DUT: %0d %0d %0d", dut.q, dut.d, dut.ar);
$finish(0);
endinitial begin
clk_in_neg = 1'b0;
#5 clk_in_neg = 1'b1;
$display("DUT: %0d %0d %0d", dut.q, dut.d, dut.ar);
$finish(0);
endinitial begin
$display("DUT: %0d %0d %0d", dut.q, dut.d, dut.ar);
$finish(0);
endinitial begin
clk_in = 1'b1;
$display("DUT: %0d %0d %0d", dut.q, dut.d, dut.ar);
$finish(0);
endinitial begin
clk_in_neg = 1'b0;
#5 clk_in_neg = 1'b1;
$

In [None]:
# Compress evaluation results
!cd data/test_results && zip -r /content/evaluation_results.zip * && cd /content

# Download
from google.colab import files
files.download('evaluation_results.zip')
print("Evaluation results downloaded!")

  adding: evaluation_results.json (deflated 95%)
  adding: generated_tb_0.v (deflated 98%)
  adding: generated_tb_1.v (deflated 93%)
  adding: generated_tb_2.v (deflated 97%)
  adding: generated_tb_3.v (deflated 93%)
  adding: generated_tb_4.v (deflated 93%)
  adding: generated_tb_5.v (deflated 92%)
  adding: generated_tb_6.v (deflated 94%)
  adding: generated_tb_7.v (deflated 94%)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Evaluation results downloaded!


## 11. Test with Custom Verilog Design


In [None]:
# Test the model with a custom Verilog design
custom_dut = """
module mux2to1(
    input a,
    input b,
    input sel,
    output y
);
    assign y = sel ? b : a;
endmodule
"""

print("Generating testbench for custom DUT...")
print("="*60)

# Generate using the loaded model
test_prompt = f"""Generate a Verilog testbench for the following design under test (DUT):
```verilog
{custom_dut}

SyntaxError: incomplete input (ipython-input-4186297528.py, line 17)