In [None]:
# --- 3. DATASET (Dummy Placeholder) ---
# Replace this list with your actual Debug/Logic dataset
raw_data = [

# 1. Debug RTL (MOST IMPORTANT)
{"instruction": "The following FIFO implementation has incorrect full/empty logic. Fix the bug:\nassign empty = (wr_ptr[$clog2(DEPTH)] != rd_ptr[$clog2(DEPTH)]) && (wr_ptr[$clog2(DEPTH)-1:0] == rd_ptr[$clog2(DEPTH)-1:0]);\nassign full = (wr_ptr == rd_ptr);",
 "output": "The logic is swapped. Correct implementation:\nassign empty = (wr_ptr == rd_ptr);\nassign full = (wr_ptr[$clog2(DEPTH)] != rd_ptr[$clog2(DEPTH)]) && (wr_ptr[$clog2(DEPTH)-1:0] == rd_ptr[$clog2(DEPTH)-1:0]);"},

{"instruction": "The following AXI-Stream logic violates ready/valid handshake. Fix it:\nif (s_tvalid) m_tvalid <= 1;",
 "output": "The sender must only assert m_tvalid when data is available and must hold it until m_tready is high:\nif (!m_tvalid || m_tready) begin\n  m_tvalid <= s_tvalid;\n  m_tdata  <= s_tdata;\nend"},

{"instruction": "The following CDC FIFO uses binary pointers across clock domains. Fix the design approach.",
 "output": "Binary pointers must not cross clock domains directly. Use Gray-coded pointers for crossing and synchronize Gray pointers with two-flop synchronizers in each domain, then compare synchronized Gray pointers for full/empty detection."},

{"instruction": "The following round-robin arbiter can starve requests due to fixed priority. Fix the fairness issue.",
 "output": "Track the last granted index and rotate priority each cycle. After a grant, update the pointer so the next cycle starts arbitration from the next index to ensure fairness."},

{"instruction": "This UART transmitter drops data when tx_ready is deasserted. Fix the control logic.",
 "output": "Gate loading of the shift register with tx_ready and hold tx_valid until the transfer completes. Ensure the transmitter only accepts new data when not busy and keeps tx_valid asserted until the stop bit is sent."},

{"instruction": "The following FIFO write logic allows writes when full. Fix the condition:\nif (wr_en) mem[wr_ptr] <= data_in;",
 "output": "Guard writes with the full flag:\nif (wr_en && !full) mem[wr_ptr[ADDR_W-1:0]] <= data_in;"},

{"instruction": "The following counter overflows silently. Add saturation logic.",
 "output": "Clamp the counter at max value:\nif (cnt == MAX) cnt <= MAX; else cnt <= cnt + 1;"},

{"instruction": "This arbiter asserts multiple grants simultaneously. Fix it.",
 "output": "Ensure one-hot grant by selecting the first eligible request in priority order or rotating priority and clearing all other grant bits when one is asserted."},

# 2. Explain-Why Samples (Hardware Reasoning)
{"instruction": "Explain why Gray code is used for pointer synchronization in asynchronous FIFOs.",
 "output": "Gray code changes only one bit between successive values, reducing the chance of metastability when pointers cross clock domains. This ensures safe comparison of synchronized pointers for full/empty detection."},

{"instruction": "Explain why ready/valid handshaking requires the sender to hold valid high until ready is asserted.",
 "output": "Holding valid until ready ensures no data is lost when the receiver applies backpressure. The handshake completes only when both valid and ready are high in the same cycle, guaranteeing reliable data transfer."},

{"instruction": "Explain why FIFO full detection uses MSB inversion with address equality.",
 "output": "The extra MSB tracks wrap-around. When lower address bits match but MSBs differ, the write pointer has lapped the read pointer by one full buffer depth, indicating the FIFO is full."},

{"instruction": "Explain why non-blocking assignments are required in sequential always blocks.",
 "output": "Non-blocking assignments model clocked behavior by updating all registers simultaneously at the clock edge, preventing unintended ordering effects and race conditions present with blocking assignments."},

{"instruction": "Explain why synchronizers require two flip-flops when crossing clock domains.",
 "output": "Two flip-flops reduce the probability of metastability propagating into downstream logic by giving metastable signals time to settle before being used."},

{"instruction": "Explain why backpressure is required in streaming protocols.",
 "output": "Backpressure allows receivers to throttle senders when buffers are full, preventing data loss and ensuring flow control across variable-rate producers and consumers."},

# 3. Testbench + Assertions (Verification-Aware Thinking)
{"instruction": "Write SystemVerilog assertions to ensure no FIFO write occurs when full.",
 "output": "assert property (@(posedge clk) disable iff(!rst_n) !(wr_en && full));"},

{"instruction": "Write SystemVerilog assertions to ensure no FIFO read occurs when empty.",
 "output": "assert property (@(posedge clk) disable iff(!rst_n) !(rd_en && empty));"},

{"instruction": "Write an assertion to check AXI-Stream valid remains asserted until ready.",
 "output": "assert property (@(posedge clk) disable iff(!rst_n) (m_tvalid && !m_tready) |-> ##1 m_tvalid);"},

{"instruction": "Write a simple testbench sequence to validate FIFO push/pop ordering.",
 "output": "initial begin\n  wr_en=1; data_in=8'hA5; #10;\n  wr_en=1; data_in=8'h3C; #10;\n  wr_en=0; rd_en=1; #10;\n  rd_en=1; #10;\nend"},

{"instruction": "Write an assertion to check UART start bit precedes data bits.",
 "output": "assert property (@(posedge clk) disable iff(rst) (tx_start) |-> (tx == 1'b0));"},

{"instruction": "Write an assertion to ensure arbiter grants are one-hot.",
 "output": "assert property (@(posedge clk) $onehot0(grant));"},

# 4. Small Amount of Hard Generation (Stability Anchor)
{"instruction": "Design a parameterizable AXI-Stream skid buffer with ready/valid handshaking.",
 "output": "module axis_skid #(parameter WIDTH=32) (input clk, rst, input [WIDTH-1:0] s_tdata, input s_tvalid, output s_tready, output reg [WIDTH-1:0] m_tdata, output reg m_tvalid, input m_tready);\n  reg [WIDTH-1:0] buf;\n  reg buf_valid;\n  assign s_tready = !buf_valid || (m_tready);\n  always @(posedge clk) begin\n    if (rst) begin buf_valid<=0; m_tvalid<=0; end\n    else begin\n      if (s_tvalid && s_tready) begin buf <= s_tdata; buf_valid <= 1; end\n      if (m_tready || !m_tvalid) begin m_tdata <= buf; m_tvalid <= buf_valid; buf_valid <= 0; end\n    end\n  end\nendmodule"},

{"instruction": "Design a CDC-safe pulse synchronizer for event transfer between two clock domains.",
 "output": "module pulse_sync (input src_clk, dst_clk, rst, input src_pulse, output reg dst_pulse);\n  reg src_toggle;\n  reg [1:0] dst_sync;\n  always @(posedge src_clk) if (rst) src_toggle <= 0; else if (src_pulse) src_toggle <= ~src_toggle;\n  always @(posedge dst_clk) if (rst) dst_sync <= 0; else dst_sync <= {dst_sync[0], src_toggle};\n  always @(posedge dst_clk) dst_pulse <= dst_sync[1] ^ dst_sync[0];\nendmodule"},

{"instruction": "Design a 4-client round-robin arbiter with rotating priority pointer.",
 "output": "module rr_arb (input clk, rst, input [3:0] req, output reg [3:0] grant);\n  reg [1:0] ptr;\n  integer i;\n  always @(posedge clk) begin\n    if (rst) begin grant<=0; ptr<=0; end\n    else begin\n      grant <= 0;\n      for (i=0;i<4;i=i+1) begin\n        if (req[(ptr+i)%4] && grant==0) begin\n          grant[(ptr+i)%4] <= 1'b1;\n          ptr <= (ptr+i+1)%4;\n        end\n      end\n    end\n  end\nendmodule"},

{"instruction": "Design a simple two-stage pipelined MAC with valid/ready handshake.",
 "output": "module mac2 (input clk, rst, input valid_in, output ready_in, input [15:0] a, b, output reg valid_out, input ready_out, output reg [31:0] y);\n  reg [31:0] mul;\n  reg v1;\n  assign ready_in = !v1;\n  always @(posedge clk) begin\n    if (rst) begin v1<=0; valid_out<=0; y<=0; end\n    else begin\n      if (valid_in && ready_in) begin mul <= a*b; v1<=1; end\n      if (v1 && (!valid_out || ready_out)) begin y <= mul; valid_out<=1; v1<=0; end\n      if (valid_out && ready_out) valid_out<=0;\n    end\n  end\nendmodule"},

{"instruction": "Design a small AXI-Lite register block with 4 registers and write response channel.",
 "output": "module axi_lite_regs (input clk, rst_n, input [3:0] awaddr, input awvalid, output reg awready, input [31:0] wdata, input wvalid, output reg wready, output reg [1:0] bresp, output reg bvalid, input bready);\n  reg [31:0] reg0, reg1, reg2, reg3;\n  always @(posedge clk) begin\n    if (!rst_n) begin awready<=0; wready<=0; bvalid<=0; end\n    else begin\n      if (awvalid && wvalid && !awready) begin\n        awready<=1; wready<=1;\n        case(awaddr[3:2])\n          2'b00: reg0<=wdata;\n          2'b01: reg1<=wdata;\n          2'b10: reg2<=wdata;\n          2'b11: reg3<=wdata;\n        endcase\n      end else begin awready<=0; wready<=0; end\n      if (awready && wready) begin bvalid<=1; bresp<=2'b00; end\n      if (bvalid && bready) bvalid<=0;\n    end\n  end\nendmodule"},

{"instruction": "The following AXI4-Lite slave drops write responses when AW and W arrive in different cycles. Fix the protocol handling.",
 "output": "AW and W channels must be accepted independently and latched until both are received. Implement separate registers for awaddr and wdata with valid flags, then assert bvalid only after both have been captured."},

{"instruction": "The following FIFO read logic produces stale data when rd_en toggles. Fix the read timing.",
 "output": "Register the read enable and read data on the same clock edge. Ensure data_out is only updated when rd_en && !empty, and avoid combinational reads from memory that can cause stale outputs."},

{"instruction": "The following AXI-Stream sink deasserts ready combinationally from valid, causing a combinational loop. Fix the deadlock.",
 "output": "Register tready and ensure no combinational dependency between tvalid and tready. Break the loop by computing tready from buffer availability only."},

{"instruction": "The following dual-port RAM allows write-write collision on the same address without defined behavior. Fix the arbitration.",
 "output": "Add collision detection and define priority or stall one port when both ports write the same address in the same cycle to ensure deterministic behavior."},

{"instruction": "The following CDC synchronizer uses only one flip-flop. Fix the metastability risk.",
 "output": "Use a two-flop synchronizer in the destination clock domain for all single-bit control signals crossing clock domains."},

{"instruction": "The following arbiter can grant when no requests are asserted due to stale state. Fix the grant logic.",
 "output": "Clear grant when no req bits are high and recompute grants each cycle based solely on current requests and the rotating pointer."},

{"instruction": "The following counter-based baud generator accumulates drift due to truncation. Fix the fractional baud rate error.",
 "output": "Use an accumulator-based fractional divider (phase accumulator) to distribute error over time or increase counter width to reduce truncation error."},

{"instruction": "The following UART receiver fails when stop bit is sampled early. Fix the sampling strategy.",
 "output": "Sample data bits at the center of each bit period using a half-bit offset after detecting the start bit to improve robustness to jitter."},

{"instruction": "The following FIFO does not support simultaneous read and write correctly. Fix the pointer update logic.",
 "output": "Allow independent pointer updates when rd_en && !empty and wr_en && !full in the same cycle. Do not block one operation due to the other."},

{"instruction": "The following ready/valid pipeline drops beats under backpressure. Fix the skid buffering.",
 "output": "Insert a skid buffer that captures data when downstream deasserts ready and holds valid high until the beat is accepted."},

{"instruction": "Explain why combinational feedback between valid and ready can deadlock a streaming interface.",
 "output": "Combinational feedback can form loops where neither side asserts readiness, causing deadlock. Registering at least one side breaks the loop and guarantees forward progress."},

{"instruction": "Explain why CDC FIFOs require synchronizing pointers rather than data buses.",
 "output": "Pointers are single- or few-bit control signals that can be safely synchronized. Data buses must be transferred through memory written in one domain and read in another to avoid multi-bit metastability."},

{"instruction": "Explain why AXI write address and write data channels are decoupled.",
 "output": "Decoupling allows independent flow control and reordering of address and data, improving throughput and avoiding unnecessary coupling between producers and consumers."},

{"instruction": "Explain why credit-based flow control is preferred over simple ready/valid in long pipelines.",
 "output": "Credits allow senders to transmit multiple beats without waiting for per-beat ready, increasing throughput and hiding latency in deep pipelines."},

{"instruction": "Explain why write-through caches simplify coherence at the cost of bandwidth.",
 "output": "Write-through immediately updates memory, avoiding dirty state management and simplifying coherence, but increases memory traffic compared to write-back caches."},

{"instruction": "Explain why hazard detection is required in a pipelined CPU load-use scenario.",
 "output": "A load-use hazard occurs when an instruction consumes a value before it is available from memory. Hazard detection inserts stalls or forwarding to preserve correctness."},

{"instruction": "Write SystemVerilog assertions to ensure AXI write responses are eventually returned after a write request.",
 "output": "assert property (@(posedge clk) disable iff(!rst_n) (awvalid && awready && wvalid && wready) |-> ##[1:$] bvalid);"},

{"instruction": "Write an assertion to ensure no combinational path exists from valid to ready in an AXI-Stream interface.",
 "output": "assert property (@(posedge clk) $stable(s_tready) throughout (s_tvalid));"},

{"instruction": "Write a testbench sequence to verify simultaneous read and write in a FIFO does not change occupancy.",
 "output": "initial begin wr_en=1; rd_en=1; repeat(5) @(posedge clk); wr_en=0; rd_en=0; end"},

{"instruction": "Write an assertion to ensure arbiter grants are mutually exclusive and cover all requests over time.",
 "output": "assert property (@(posedge clk) $onehot0(grant)); cover property (@(posedge clk) req != 0 ##[1:16] grant != 0);"},

{"instruction": "Design a parameterizable AXI-Stream register slice with bubble-free backpressure handling.",
 "output": "module axis_reg #(parameter WIDTH=32) (input clk, rst, input [WIDTH-1:0] s_tdata, input s_tvalid, output s_tready, output reg [WIDTH-1:0] m_tdata, output reg m_tvalid, input m_tready);\n  reg full;\n  assign s_tready = !full || (m_tready);\n  always @(posedge clk) begin\n    if (rst) begin full<=0; m_tvalid<=0; end\n    else begin\n      if (s_tvalid && s_tready) begin m_tdata<=s_tdata; m_tvalid<=1; full<=1; end\n      if (m_tvalid && m_tready) full<=0;\n    end\n  end\nendmodule"},

{"instruction": "Design a CDC-safe multi-bit data transfer using handshake synchronizers.",
 "output": "module cdc_handshake #(parameter W=8) (input sclk, dclk, rst, input s_valid, output s_ready, input [W-1:0] s_data, output reg d_valid, input d_ready, output reg [W-1:0] d_data);\n  reg s_toggle, d_toggle_sync1, d_toggle_sync2;\n  assign s_ready = (s_toggle == d_toggle_sync2);\n  always @(posedge sclk) if (rst) s_toggle<=0; else if (s_valid && s_ready) s_toggle <= ~s_toggle;\n  always @(posedge dclk) begin d_toggle_sync1 <= s_toggle; d_toggle_sync2 <= d_toggle_sync1; end\n  always @(posedge dclk) if (d_toggle_sync2 != d_toggle_sync1) begin d_data <= s_data; d_valid <= 1; end else if (d_valid && d_ready) d_valid <= 0;\nendmodule"},

{"instruction": "Design a reorder buffer skeleton for out-of-order completion with in-order retirement.",
 "output": "module rob #(parameter N=8) (input clk, rst, input enq, input deq, output reg empty, full);\n  reg [$clog2(N):0] head, tail;\n  always @(posedge clk) begin\n    if (rst) begin head<=0; tail<=0; end\n    else begin if (enq && !full) tail<=tail+1; if (deq && !empty) head<=head+1; end\n    empty <= (head==tail); full <= ((tail+1)==head);\n  end\nendmodule"},

{"instruction": "Design a credit-based flow control module for a streaming producer with initial credits parameter.",
 "output": "module credit_fc #(parameter CREDITS=8) (input clk, rst, input send, input credit_return, output reg can_send);\n  reg [$clog2(CREDITS+1)-1:0] credits;\n  always @(posedge clk) begin\n    if (rst) credits <= CREDITS;\n    else begin if (send && credits>0) credits <= credits-1; if (credit_return) credits <= credits+1; end\n    can_send <= (credits > 0);\n  end\nendmodule"},

{"instruction": "Design a MESI coherence state transition block for read and write hits.",
 "output": "module mesi_core (input clk, rst, input rd, wr, output reg [1:0] state);\n  localparam I=2'b00, S=2'b01, E=2'b10, M=2'b11;\n  always @(posedge clk) begin\n    if (rst) state<=I;\n    else case(state)\n      I: if (rd) state<=S; else if (wr) state<=M;\n      S: if (wr) state<=M;\n      E: if (wr) state<=M;\n      M: state<=M;\n    endcase\n  end\nendmodule"},

{"instruction": "Design a scoreboard for tracking register dependencies in a pipelined CPU.",
 "output": "module scoreboard #(parameter R=32) (input clk, rst, input issue, input [4:0] rd, input complete, input [4:0] wr, output reg stall);\n  reg busy [0:R-1];\n  integer i;\n  always @(posedge clk) begin\n    if (rst) for (i=0;i<R;i=i+1) busy[i] <= 0;\n    else begin if (issue) busy[rd] <= 1; if (complete) busy[wr] <= 0; stall <= busy[rd]; end\n  end\nendmodule"},

{"instruction": "Design a hardware timer with compare and interrupt generation.",
 "output": "module timer_cmp (input clk, rst, input [31:0] cmp, output reg irq);\n  reg [31:0] cnt;\n  always @(posedge clk) begin\n    if (rst) begin cnt<=0; irq<=0; end\n    else begin cnt<=cnt+1; if (cnt==cmp) irq<=1; else irq<=0; end\n  end\nendmodule"},

{"instruction": "Design a write-combining buffer for merging consecutive stores to the same cache line.",
 "output": "module wcb (input clk, rst, input store, input [31:0] addr, input [31:0] data, output reg flush);\n  reg [31:0] last_addr;\n  always @(posedge clk) begin\n    if (rst) begin last_addr<=0; flush<=0; end\n    else if (store) begin if (addr[31:6] != last_addr[31:6]) flush<=1; else flush<=0; last_addr<=addr; end\n  end\nendmodule"},

{"instruction": "Design a simple AXI burst length limiter to cap maximum burst size.",
 "output": "module axi_burst_limit (input clk, rst, input [7:0] arlen_in, output reg [7:0] arlen_out);\n  always @(posedge clk) begin\n    if (rst) arlen_out <= 8'd0;\n    else arlen_out <= (arlen_in > 8'd15) ? 8'd15 : arlen_in;\n  end\nendmodule"}

]

raw_data += [

# ============================================================
# CATEGORY 1: DEBUG RTL — 37 examples
# ============================================================

{"instruction": "The following synchronous reset always block has a logic error. Fix it:\nalways @(posedge clk) begin\n  if (!rst_n)\n    data <= 0;\n  data <= data_in;\nend",
 "output": "Missing else causes data_in to always overwrite the reset value. Fix:\nalways @(posedge clk) begin\n  if (!rst_n)\n    data <= 0;\n  else\n    data <= data_in;\nend"},

{"instruction": "The following one-hot FSM has unreachable states due to a bad default. Fix it:\nalways @(posedge clk) begin\n  case (state)\n    IDLE: if (start) state <= RUN;\n    RUN:  if (done) state <= IDLE;\n  endcase\nend",
 "output": "Add a default clause to handle undefined states and prevent synthesis latches:\nalways @(posedge clk) begin\n  if (!rst_n) state <= IDLE;\n  else case (state)\n    IDLE: if (start) state <= RUN;\n    RUN:  if (done) state <= IDLE;\n    default: state <= IDLE;\n  endcase\nend"},

{"instruction": "The following pipelined adder has a data hazard: it reads result one cycle before it is valid. Fix it:\nassign sum = a + b;\nreg [31:0] pipe_out;\nalways @(posedge clk) pipe_out <= sum;",
 "output": "Register both inputs and add a valid-tracking signal:\nalways @(posedge clk) begin\n  a_r <= a; b_r <= b; valid_r <= valid_in;\nend\nassign sum = a_r + b_r;\nalways @(posedge clk) begin\n  pipe_out <= sum; valid_out <= valid_r;\nend"},

{"instruction": "The following SPI master deasserts CS before the last bit is clocked. Fix the control logic.",
 "output": "Assert CS low before the first clock edge and deassert only after the final SCLK edge completes the last bit transfer. Track bit count and gate CS deassertion to bit_count == TOTAL_BITS - 1 with SCLK falling edge."},

{"instruction": "The following priority encoder outputs zero when all inputs are zero but no valid signal is generated. Fix it.",
 "output": "Add an explicit 'valid' output that is low when no input bits are asserted. Do not conflate index 0 with no-request. Set valid = |req and only update the encoded index when valid is high."},

{"instruction": "The following clock divider produces glitches because it uses combinational logic. Fix it:\nassign clk_div2 = (cnt[0]);",
 "output": "Drive the divided clock from a register, not combinational logic:\nalways @(posedge clk) clk_div2 <= ~clk_div2;\nFor even dividers, toggle on count comparison; never gate clocks with combinational terms to avoid glitches."},

{"instruction": "The following module has a latch inferred in synthesis due to incomplete case:\nalways @(*) begin\n  case (sel)\n    2'b00: y = a;\n    2'b01: y = b;\n    2'b10: y = c;\n  endcase\nend",
 "output": "Add a default assignment or complete all cases to prevent latch inference:\nalways @(*) begin\n  y = '0; // default\n  case (sel)\n    2'b00: y = a;\n    2'b01: y = b;\n    2'b10: y = c;\n    default: y = '0;\n  endcase\nend"},

{"instruction": "The following AXI4 burst address is not aligned to the data width. Fix the address generation.\nassign next_addr = addr + INCR;",
 "output": "For INCR bursts, increment must be aligned to the transfer size. Use:\nassign next_addr = (addr + (1 << arsize)) & ~((1 << arsize) - 1);\nAlso validate that the start address is aligned before issuing the burst."},

{"instruction": "The following reset synchronizer releases reset asynchronously but asserts synchronously, causing potential issues. Fix it.",
 "output": "A proper reset synchronizer should assert asynchronously (immediately) and deassert synchronously (after two flops). Fix:\nalways @(posedge clk or negedge rst_async_n) begin\n  if (!rst_async_n) sync <= 2'b00;\n  else sync <= {sync[0], 1'b1};\nend\nassign rst_sync_n = sync[1];"},

{"instruction": "The following shift register taps the wrong bit for an LFSR feedback:\nassign feedback = sr[7] ^ sr[5];\n// LFSR width = 8",
 "output": "For a maximal-length 8-bit LFSR use the correct taps. Standard 8-bit LFSR taps are positions [7,5,4,3]:\nassign feedback = sr[7] ^ sr[5] ^ sr[4] ^ sr[3];\nIncorrect taps produce non-maximal sequences."},

{"instruction": "The following handshake logic asserts both req and ack in the same cycle from the same module. Fix the protocol violation.",
 "output": "In a two-wire handshake, the requestor asserts req and holds it. The acknowledger independently asserts ack after capturing data. They must not originate from the same combinational block. Separate req generation from ack generation across module boundaries."},

{"instruction": "This memory model has X propagation because uninitialized memory is read before write. Fix it.",
 "output": "Initialize the memory array in simulation using an initial block or $readmemh/$readmemb. In synthesis, if reset is required, add a state machine that zeroes all locations on power-up before enabling reads."},

{"instruction": "The following write-enable pulse is only one cycle wide but the memory needs two cycles to settle. Fix the timing.",
 "output": "Extend the write-enable by registering it and keeping it asserted for the required hold time, or use a counter to stretch the pulse. Alternatively, insert a wait state and hold the address and data stable throughout."},

{"instruction": "The following FSM output is registered but combinational glitches appear on transitions. Fix it.",
 "output": "Register all FSM outputs explicitly and compute the next output in the next-state logic block, not as a combinational function of the current state. This adds one cycle of latency but eliminates transition glitches."},

{"instruction": "The following bus arbiter ignores lock signals and can preempt a locked transaction. Fix it.",
 "output": "Track a lock_active register. When the current master asserts lock, hold its grant and suppress new arbitration until lock is deasserted or a timeout expires. Only then re-run arbitration."},

{"instruction": "The following pipeline stage drops valid when downstream applies backpressure but does not buffer the data. Fix it:\nalways @(posedge clk) if (ready_out) begin valid_out <= valid_in; data_out <= data_in; end",
 "output": "Add a holding register to retain data during stall:\nalways @(posedge clk) begin\n  if (!stall) begin data_r <= data_in; valid_r <= valid_in; end\n  data_out <= stall ? data_r : data_in;\n  valid_out <= stall ? valid_r : valid_in;\nend"},

{"instruction": "The following I2C master does not respect the setup and hold times before START/STOP conditions. Fix it.",
 "output": "Insert idle time on SDA before toggling for START or STOP. SDA must be stable for t_SU_STA before SCL goes low for START, and SDA must not change until t_HD_STA after SCL falls. Add counter-based delay states between SCL and SDA transitions."},

{"instruction": "The following AXI4-Lite master issues a second request before receiving a response to the first. Fix it.",
 "output": "Track an outstanding_txn register. Set it on issuing valid AW+W, clear it on receiving bvalid && bready. Gate new transactions behind ~outstanding_txn to ensure only one inflight request at a time."},

{"instruction": "This CDC design synchronizes an 8-bit data bus directly with two flip-flops. Explain the bug and fix it.",
 "output": "Multi-bit buses cannot be safely synchronized with a simple two-flop synchronizer because each bit can transition at a different time, causing metastability on multiple bits simultaneously. Fix by using a CDC FIFO with Gray-coded pointers or a handshake-based transfer where the receiver synchronizes a single toggle signal."},

{"instruction": "The following parameterized shift register has an off-by-one in depth:\nmodule shreg #(parameter DEPTH=4) (...)\n  reg [DEPTH-1:0] sr;",
 "output": "The register has exactly DEPTH bits, meaning DEPTH-1 delay cycles, not DEPTH. If DEPTH=1 means a single-cycle register, use:\nreg [DEPTH:1] sr;\nOr document clearly that output is (DEPTH-1) cycles delayed and adjust instantiation parameters accordingly."},

{"instruction": "The following APB slave does not de-assert PREADY fast enough, causing timeout on the master side. Fix it.",
 "output": "PREADY must be asserted within the required number of wait states. Set PREADY=1 by default (zero wait states) and only deassert it when the slave needs extra cycles. Ensure PREADY is registered and returns to 1 within the master's timeout budget."},

{"instruction": "The following counter resets to 1 instead of 0 on rst. Fix it:\nalways @(posedge clk or posedge rst)\n  if (rst) cnt <= 1;\n  else cnt <= cnt + 1;",
 "output": "Reset should initialize to 0 unless intentional:\nalways @(posedge clk or posedge rst)\n  if (rst) cnt <= 0;\n  else cnt <= cnt + 1;"},

{"instruction": "The following Gray-to-binary decoder has incorrect bit ordering. Fix it:\nfor (i=0; i<N; i++) bin[i] = gray[i];",
 "output": "Gray-to-binary conversion requires XOR folding from the MSB down:\nbin[N-1] = gray[N-1];\nfor (i=N-2; i>=0; i--) bin[i] = bin[i+1] ^ gray[i];\nSimply copying bits produces incorrect binary values."},

{"instruction": "The following dual-clock FIFO uses a binary counter as a write pointer passed directly to the read domain. Fix it.",
 "output": "Convert the write pointer to Gray code before passing it to the read clock domain:\nfor (i=0; i<PTR_W; i++) wr_ptr_gray[i] = wr_ptr_bin[i] ^ wr_ptr_bin[i+1];\nSynchronize wr_ptr_gray with two flip-flops in the read clock domain, then convert back to binary for occupancy calculation."},

{"instruction": "The following pipelined multiply-accumulate has overflow with no detection. Fix it.",
 "output": "Widen the accumulator by log2(NUM_TERMS) bits beyond the product width to prevent overflow, or add explicit overflow detection:\nassign overflow = (acc[MSB] == acc[MSB-1]) ? 0 : 1;\nClamp or saturate the accumulator when overflow is detected."},

{"instruction": "The following DMA controller does not throttle when the destination FIFO is nearly full. Fix it.",
 "output": "Monitor the destination occupancy signal. When occupancy exceeds a programmable high-watermark threshold, deassert the DMA request to the bus arbiter and pause transfers until occupancy drops below a low-watermark."},

{"instruction": "The following edge detector produces a two-cycle pulse instead of one cycle. Fix it:\nalways @(posedge clk) begin\n  sig_r <= sig;\n  pulse <= sig & ~sig_r;\nend",
 "output": "The current logic is actually correct for a one-cycle pulse. If you are seeing two cycles, check that sig_r is being registered before the AND. If sig itself has glitches, add an extra register stage:\nsig_r1 <= sig;\nsig_r2 <= sig_r1;\npulse <= sig_r1 & ~sig_r2;"},

{"instruction": "The following PWM generator has a dead zone where duty cycle 0 still produces a one-cycle pulse. Fix it.",
 "output": "Handle the edge case explicitly:\nalways @(posedge clk) begin\n  if (cnt == period - 1) cnt <= 0;\n  else cnt <= cnt + 1;\n  pwm_out <= (duty == 0) ? 0 : (duty == period) ? 1 : (cnt < duty);\nend"},

{"instruction": "The following FIFO status flags are computed combinationally but registered outputs lag by one cycle. Fix the inconsistency.",
 "output": "Either make all status flags registered (update on same clock edge as pointer updates) or keep all combinational. Mixing registered and combinational flags causes status to be inconsistent within the same cycle and leads to spurious full/empty conditions."},

{"instruction": "The following module instantiation connects ports by position but the order has changed in the target module. Fix it.",
 "output": "Always use named port connections instead of positional:\nmod_name inst (.clk(clk), .rst(rst), .data_in(d), .data_out(q));\nPositional connections break silently when port order changes in the target module definition."},

{"instruction": "The following synchronous FIFO can corrupt data because read and write pointers are updated in separate always blocks. Fix it.",
 "output": "Place all pointer updates in a single always block or ensure both update on the same clock edge with no combinational dependency between them. Alternatively, use a single always block for the full state machine to ensure atomic updates."},

{"instruction": "The following AXI interconnect drops an AR channel request because ARREADY is deasserted before the master samples it. Fix the handshake.",
 "output": "ARREADY must be held high until ARVALID && ARREADY is true in the same cycle. Do not deassert ARREADY combinationally in response to ARVALID. Register ARREADY and clear it only after a confirmed handshake."},

{"instruction": "The following configurable CRC module uses the wrong polynomial for CRC-32. Fix it.\nparameter POLY = 32'h04C11DB7;",
 "output": "For standard Ethernet CRC-32, the reflected polynomial should be 32'hEDB88320. The non-reflected form 32'h04C11DB7 requires bit-reversal of input and output. Choose consistently: use 32'hEDB88320 with reflected input, or 32'h04C11DB7 with non-reflected input and output reversal."},

{"instruction": "The following clock gating cell is implemented with a latch but without the enable qualified by a clean clock. Fix it.",
 "output": "Use an integrated clock gating (ICG) cell or the standard latch-based pattern:\nalways_latch\n  if (!clk) en_lat <= enable; // latch enable on low clock phase\nassign gated_clk = clk & en_lat;\nThis prevents glitches by ensuring enable only changes when clock is low."},

{"instruction": "The following watchdog timer does not reset on a valid kick signal because the kick is only one cycle wide. Fix it.",
 "output": "The one-cycle kick width is normally fine if the watchdog samples the kick on a clock edge. Ensure the watchdog counter resets when kick is high at the clock edge:\nalways @(posedge clk)\n  if (kick) wdt_cnt <= WDT_TIMEOUT;\n  else if (wdt_cnt > 0) wdt_cnt <= wdt_cnt - 1;"},

{"instruction": "The following barrel shifter produces incorrect output for shift amount zero. Fix it.\nassign out = in >> shift_amt;",
 "output": "Verilog right-shift with shift_amt=0 should work correctly, but verify that shift_amt is unsigned. If shift_amt is signed and negative, behavior is undefined. Use $unsigned(shift_amt) to be explicit:\nassign out = in >> $unsigned(shift_amt);"},

{"instruction": "The following SERDES alignment logic locks on a false comma character due to no error count threshold. Fix it.",
 "output": "Require N consecutive valid comma characters (or a programmable count) before asserting word_lock. A single match can be a false positive due to data aliasing. Use a shift-and-count state machine that resets on any failed match."},

# ============================================================
# CATEGORY 2: EXPLAIN-WHY (Hardware Reasoning) — 37 examples
# ============================================================

{"instruction": "Explain why setup time violations are more dangerous than hold time violations in deep sub-micron designs.",
 "output": "Setup violations cause functional errors at speed — data does not arrive before the clock edge captures it. Hold violations can usually be fixed post-silicon by adding buffers, but setup violations require redesign or frequency reduction. In deep sub-micron, interconnect delay variance makes setup closure harder as clock periods shrink."},

{"instruction": "Explain why wires in deep sub-micron dominate delay more than gates.",
 "output": "As feature sizes shrink, transistors become faster but wire RC delay scales worse due to increasing resistivity and coupling capacitance. At 28nm and below, long wires dominate critical paths, motivating pipelining, clock tree optimization, and physical placement-aware synthesis."},

{"instruction": "Explain why FPGAs use LUT-based logic rather than standard cells.",
 "output": "LUTs implement any combinational function of N inputs in a single configurable element, enabling reconfigurability without manufacturing changes. Standard cells are optimized for a specific process and layout, giving better PPA but no reconfigurability."},

{"instruction": "Explain why metastability cannot be fully eliminated but only managed.",
 "output": "Metastability is a physical phenomenon arising from setup/hold violations where a flip-flop enters an analog indeterminate state. Given enough time, it resolves — but not in guaranteed time. Multi-stage synchronizers reduce the probability of downstream propagation exponentially but cannot reduce it to exactly zero."},

{"instruction": "Explain why AXI4 allows out-of-order read responses but not write responses.",
 "output": "Read data can return out of order because each response carries an ARID tag allowing the master to reassemble results. Write responses are in-order because AXI4 write data ordering is defined per-ID and the simpler ordering rule reduces slave complexity for write commit tracking."},

{"instruction": "Explain why SRAMs require a valid address hold time after WE deassertion.",
 "output": "The address must remain stable until the write pulse trailing edge so the memory cell completes its write cycle. Changing address before WE deasserts can cause writes to a partially correct address, corrupting adjacent cells."},

{"instruction": "Explain why power gating requires isolation cells on outputs of a powered-down domain.",
 "output": "When a domain is powered off, its output signals float to unknown values. Isolation cells clamp outputs to a safe known level (typically 0 or 1) before the domain powers down, preventing X propagation into always-on logic."},

{"instruction": "Explain why low-power designs use operand isolation to reduce dynamic power.",
 "output": "Logic that computes results with stale inputs still toggles bits even when the output is ignored, consuming dynamic power. Operand isolation gates the inputs to zero when the block output is not needed, eliminating unnecessary switching activity."},

{"instruction": "Explain why clock domain crossings cannot be validated by static timing analysis alone.",
 "output": "STA assumes a fixed clock relationship. For asynchronous crossings, there is no fixed phase relationship, so STA cannot model timing across the boundary. CDC must be verified by structural CDC analysis tools that identify unqualified paths crossing domains."},

{"instruction": "Explain why pipelining improves throughput but not latency.",
 "output": "Pipelining divides a long combinational path into stages, each completing in one cycle. This raises clock frequency and increases throughput (more results per second), but each individual result still takes multiple cycles (latency) to traverse all stages."},

{"instruction": "Explain why register retiming is safe for throughput but changes observable behavior.",
 "output": "Retiming moves registers across combinational logic to balance pipeline stages, preserving the functional input-output relationship. However, intermediate register values and their timing change, which can affect debug observability and reset behavior."},

{"instruction": "Explain why you cannot simply double-register a Gray-coded multi-bit bus at a CDC crossing.",
 "output": "You can double-register Gray-coded pointers because they change only one bit per increment. If the bus is not Gray-coded, multiple bits may change simultaneously, and each bit's synchronizer can independently resolve to different values, producing a corrupted intermediate code."},

{"instruction": "Explain why FIFO depth must be a power of two for pointer-based full/empty detection.",
 "output": "Using an extra MSB on binary or Gray-coded pointers requires that the pointer wraps cleanly at 2^N so the MSB inversion trick works correctly. Non-power-of-two depths require more complex wrap detection logic."},

{"instruction": "Explain why synthesis tools can optimize away logic that drives only unconnected outputs.",
 "output": "Synthesis tools perform cone-of-influence pruning. If an output drives nothing downstream, the logic feeding it has no effect and is removed. This can silently drop important functionality if outputs are mistakenly left unconnected in the netlist."},

{"instruction": "Explain why AXI4 requires that a master not depend on a transaction completing before issuing another transaction to avoid deadlock.",
 "output": "If a master waits for a response before issuing a new request, and the interconnect or slave requires new transactions to make forward progress to return the previous response, a deadlock occurs. AXI4 requires masters to be able to accept responses independently of issuing new requests."},

{"instruction": "Explain why voltage scaling reduces dynamic power quadratically.",
 "output": "Dynamic power P = alpha * C * V^2 * f. Since power scales with V squared, halving voltage reduces dynamic power by 4x. This makes DVFS an extremely effective technique for power reduction in workload-adaptive designs."},

{"instruction": "Explain why hold violations cannot be fixed by reducing clock frequency.",
 "output": "Hold violations occur because data arrives too quickly — it changes before the old value has been safely captured. Clock frequency affects setup timing (how long data has to arrive) but not hold timing. Hold fixes require adding delay (buffers) on the data path."},

{"instruction": "Explain why false paths must be explicitly specified to synthesis and STA tools.",
 "output": "Tools assume all paths are timing-critical by default. False paths (such as between asynchronous clock domains or static configuration registers) waste optimization effort and can push the tool to fix timing that does not matter, degrading other paths. Explicit false path constraints direct effort correctly."},

{"instruction": "Explain why glitch power in combinational logic increases with fan-in.",
 "output": "Glitches arise from unequal propagation delays through different input paths. With more inputs and logic levels, there are more opportunities for a signal to briefly toggle before settling to its correct value. Each toggle dissipates switching power, so high fan-in logic generates more glitch energy."},

{"instruction": "Explain why write-back caches require dirty bit tracking.",
 "output": "Write-back caches hold modified data until eviction. The dirty bit marks cache lines that contain data not yet written to memory. Without it, the cache cannot distinguish between lines that need writeback on eviction and lines that can be silently replaced."},

{"instruction": "Explain why branch prediction is required for performance in out-of-order CPUs.",
 "output": "Fetching and executing subsequent instructions before resolving a branch is necessary to keep execution units busy. Without prediction, the pipeline stalls waiting for branch resolution. Mispredictions require flushing the pipeline, but correct predictions (>95% in modern predictors) allow continuous execution."},

{"instruction": "Explain why load-store forwarding improves performance over a pure stall approach.",
 "output": "Forwarding bypasses the register file writeback stage by sending the result directly from the execute or memory stage to a dependent instruction's input. This eliminates the 1-2 cycle stall that would otherwise occur waiting for the result to be committed and read back."},

{"instruction": "Explain why content-addressable memories (CAMs) consume more power than SRAMs.",
 "output": "CAMs compare the input tag against all stored entries simultaneously by driving the entire match line. This involves simultaneous switching of the large capacitive match line across all rows, consuming far more dynamic power than a single-row SRAM read."},

{"instruction": "Explain why ECC is preferred over simple parity for memory error protection.",
 "output": "Parity detects single-bit errors but cannot correct them, forcing expensive system-level recovery. ECC (such as SECDED) corrects single-bit errors and detects double-bit errors in hardware without interrupting operation, which is essential for high-availability memory systems."},

{"instruction": "Explain why stall-based hazard resolution is simpler but slower than forwarding.",
 "output": "Stalling inserts NOPs (bubbles) into the pipeline, requiring no extra hardware but reducing throughput by the stall cycles. Forwarding adds bypass multiplexers and hazard detection logic, increasing area and complexity but maintaining full throughput for most hazards."},

{"instruction": "Explain why critical paths worsen with temperature in CMOS.",
 "output": "MOSFET carrier mobility decreases with temperature, increasing propagation delay. Higher temperature also increases threshold voltage under some bias conditions. Both effects slow logic switching, tightening setup margins and potentially violating timing at elevated operating temperatures."},

{"instruction": "Explain why formal verification is better suited than simulation for control plane logic.",
 "output": "Simulation exercises a finite set of input sequences and cannot exhaustively cover state space. Formal verification mathematically proves properties across all possible inputs and states, making it far superior for control logic where rare corner cases can cause critical failures."},

{"instruction": "Explain why toggling unused clock enables wastes power even if the gated logic never switches.",
 "output": "The clock enable signal itself consumes power driving its own net and the enable input of the clock gate cell. Additionally, if clock gating is implemented as a latch-based ICG, the latch still samples the enable every cycle, consuming dynamic power regardless of whether the downstream logic switches."},

{"instruction": "Explain why speculative execution introduces security vulnerabilities like Spectre.",
 "output": "Speculatively executed instructions can leave microarchitectural side effects (such as cache state changes) that persist even if the architectural result is discarded. An attacker can measure these side effects to infer values that were never architecturally visible, leaking sensitive data."},

{"instruction": "Explain why a DDR PHY requires per-bit deskew calibration.",
 "output": "Each data bit travels through physically different wire lengths and buffer chains, introducing varying propagation delays. Per-bit deskew aligns all bits of a byte lane to a common sampling window, ensuring valid data is captured simultaneously across the full bus width."},

{"instruction": "Explain why address decoding in a bus fabric must be mutually exclusive and collectively exhaustive.",
 "output": "Overlapping address ranges cause multiple slaves to respond simultaneously, corrupting the data bus. Gaps in coverage cause master requests to go unacknowledged, resulting in timeouts. Full, non-overlapping address mapping ensures deterministic routing for every possible address."},

{"instruction": "Explain why synthesis of asynchronous resets requires special care for FPGA targets.",
 "output": "Many FPGA flip-flops support only synchronous reset in their dedicated reset paths. Asynchronous resets may be implemented using LUT logic, increasing area and potentially creating timing issues. Target-specific constraints must be checked, and synchronous reset is often preferred for portability."},

{"instruction": "Explain why DRAM refresh must be interleaved carefully with normal access.",
 "output": "DRAM cells lose charge over time and must be refreshed periodically. Refresh temporarily blocks all banks being refreshed, adding latency. Distributing refresh commands across the refresh interval (distributed refresh) reduces worst-case latency compared to burst refresh."},

{"instruction": "Explain why power-on reset must be held long enough for supply voltage to stabilize.",
 "output": "Digital logic requires supply voltage above a minimum threshold for reliable operation. A POR circuit must hold reset active until VDD has risen above this threshold and stabilized. Premature release of reset while supply is still ramping causes undefined state in flip-flops and initialization logic."},

{"instruction": "Explain why SRAM bitcell read disturb limits the minimum cell size.",
 "output": "During a read, the bit line is precharged and allowed to discharge through the cell. If the cell ratio (pull-down transistor to access transistor strength) is too small, the storage node can be inadvertently flipped, destroying the stored value. Minimum cell size is constrained by the read disturb noise margin."},

{"instruction": "Explain why SystemVerilog interfaces improve parameterized design reuse.",
 "output": "Interfaces encapsulate signal groups, directions, and protocol-specific modports into a single abstraction. Parameterized interfaces propagate width and configuration changes automatically to all connected modules, eliminating repetitive port declarations and reducing connection errors during design reuse."},

# ============================================================
# CATEGORY 3: TESTBENCH + ASSERTIONS (Verification) — 36 examples
# ============================================================

{"instruction": "Write a SystemVerilog assertion to verify AXI4 ARVALID never deasserts before ARREADY.",
 "output": "assert property (@(posedge clk) disable iff(!rst_n)\n  (arvalid && !arready) |-> ##1 arvalid);"},

{"instruction": "Write a SystemVerilog assertion to verify WVALID never deasserts before WREADY.",
 "output": "assert property (@(posedge clk) disable iff(!rst_n)\n  (wvalid && !wready) |-> ##1 wvalid);"},

{"instruction": "Write a cover property to verify that a FIFO reaches full state during simulation.",
 "output": "cover property (@(posedge clk) disable iff(!rst_n) full);"},

{"instruction": "Write an assertion to verify that a one-hot signal has exactly one bit set.",
 "output": "assert property (@(posedge clk) $onehot(state));"},

{"instruction": "Write SystemVerilog assertions to verify a ready/valid pipeline never loses data under backpressure.",
 "output": "assert property (@(posedge clk) disable iff(!rst_n)\n  (m_valid && !m_ready) |-> ##1 (m_valid && ($stable(m_data))));"},

{"instruction": "Write a clocking block and program block skeleton for a pipelined DUT testbench.",
 "output": "clocking cb @(posedge clk);\n  output #1 valid_in, data_in;\n  input  #1 valid_out, data_out;\nendclocking\nprogram test;\n  initial begin\n    cb.valid_in <= 1; cb.data_in <= 16'hBEEF;\n    @(cb); cb.valid_in <= 0;\n    wait(cb.valid_out); $display(cb.data_out);\n  end\nendprogram"},

{"instruction": "Write an assertion that the reset state is entered within 3 cycles of rst_n deassertion.",
 "output": "assert property (@(posedge clk)\n  $fell(rst_n) |-> ##[1:3] (state == IDLE));"},

{"instruction": "Write a functional coverage group for AXI burst lengths of 1, 4, 8, and 16.",
 "output": "covergroup axi_burst;\n  cp_len: coverpoint arlen {\n    bins len1  = {0};\n    bins len4  = {3};\n    bins len8  = {7};\n    bins len16 = {15};\n  }\nendgroup"},

{"instruction": "Write an assertion to verify that after a write to a config register, the output reflects the new value within 2 cycles.",
 "output": "assert property (@(posedge clk) disable iff(!rst_n)\n  (wr_en && wr_addr == CFG_ADDR) |-> ##[1:2] (cfg_out == wr_data));"},

{"instruction": "Write a SystemVerilog checker to verify no two masters receive a grant simultaneously.",
 "output": "checker mutual_exclusion (input clk, input [N-1:0] grant);\n  assert property (@(posedge clk) $onehot0(grant))\n    else $error(\"Multiple grants asserted: %b\", grant);\nendchecker"},

{"instruction": "Write a constrained random test to drive an AXI-Stream source with random valid/data and 30% backpressure probability.",
 "output": "task drive_axis;\n  repeat(1000) begin\n    @(posedge clk);\n    s_tvalid = $urandom_range(0,1);\n    s_tdata  = $urandom;\n    m_tready = ($urandom_range(0,9) >= 3); // 70% ready\n  end\nendtask"},

{"instruction": "Write an assertion to check that data_out is stable throughout a multi-cycle valid window.",
 "output": "assert property (@(posedge clk) disable iff(!rst_n)\n  (valid_out && !ready_in) |-> ##1 $stable(data_out));"},

{"instruction": "Write a SystemVerilog assertion to check FIFO occupancy never exceeds DEPTH.",
 "output": "assert property (@(posedge clk) disable iff(!rst_n)\n  (occupancy <= DEPTH))\n  else $error(\"FIFO overflow: occupancy=%0d\", occupancy);"},

{"instruction": "Write a testbench task to reset a DUT and verify outputs are cleared.",
 "output": "task reset_dut;\n  rst_n = 0;\n  repeat(5) @(posedge clk);\n  rst_n = 1;\n  @(posedge clk);\n  assert (dut_out == '0) else $error(\"Output not cleared after reset\");\nendtask"},

{"instruction": "Write an assertion to verify that a burst transaction never exceeds the maximum AXI burst length of 256.",
 "output": "assert property (@(posedge clk) disable iff(!rst_n)\n  (arvalid && arready) |-> (arlen <= 8'd255));"},

{"instruction": "Write coverage for both edges of a valid signal to ensure it is exercised asserted and deasserted.",
 "output": "covergroup valid_coverage;\n  cp_valid: coverpoint valid {\n    bins asserted   = {1};\n    bins deasserted = {0};\n    bins rise = (0 => 1);\n    bins fall = (1 => 0);\n  }\nendgroup"},

{"instruction": "Write a sequence to model a two-phase handshake: req asserted, then ack within 4 cycles.",
 "output": "sequence handshake;\n  req ##[1:4] ack;\nendsequence\nassert property (@(posedge clk) req |-> handshake);"},

{"instruction": "Write an assertion to ensure that no write occurs to a read-only register address.",
 "output": "assert property (@(posedge clk) disable iff(!rst_n)\n  (wr_en && wr_addr == RO_ADDR) |-> 0)\n  else $error(\"Illegal write to read-only register\");"},

{"instruction": "Write a testbench to inject an error into an ECC memory and verify correction.",
 "output": "initial begin\n  mem[0] = 32'hDEADBEEF;\n  #10 mem_corrupt = mem[0] ^ 32'h00000001; // flip bit 0\n  inject_error(0, mem_corrupt);\n  #10;\n  assert (read_data == 32'hDEADBEEF) else $error(\"ECC correction failed\");\nend"},

{"instruction": "Write a SystemVerilog assertion to verify that an interrupt is cleared within 8 cycles of an ack.",
 "output": "assert property (@(posedge clk) disable iff(!rst_n)\n  (irq_ack) |-> ##[1:8] (!irq));"},

{"instruction": "Write a testbench to verify FIFO simultaneous read/write maintains correct occupancy.",
 "output": "initial begin\n  // Pre-fill half the FIFO\n  repeat(DEPTH/2) begin @(posedge clk); wr_en=1; rd_en=0; end\n  @(posedge clk); wr_en=0;\n  // Simultaneous R/W - occupancy should stay constant\n  repeat(10) begin\n    @(posedge clk); wr_en=1; rd_en=1;\n    assert (occupancy == DEPTH/2);\n  end\nend"},

{"instruction": "Write a property to verify a pipeline flush completes within 5 cycles of a flush command.",
 "output": "assert property (@(posedge clk) disable iff(!rst_n)\n  flush_cmd |-> ##[1:5] pipe_empty);"},

{"instruction": "Write a bind-based assertion module to attach assertions to a FIFO without modifying it.",
 "output": "module fifo_sva (input clk, rst_n, wr_en, rd_en, full, empty);\n  assert property (@(posedge clk) disable iff(!rst_n) !(wr_en && full));\n  assert property (@(posedge clk) disable iff(!rst_n) !(rd_en && empty));\nendmodule\nbind my_fifo fifo_sva u_fifo_sva(.clk(clk), .rst_n(rst_n), .wr_en(wr_en), .rd_en(rd_en), .full(full), .empty(empty));"},

{"instruction": "Write an assertion to verify that a state machine only transitions to valid next states from IDLE.",
 "output": "assert property (@(posedge clk) disable iff(!rst_n)\n  (state == IDLE) |=> (state inside {IDLE, START, CONFIG}));"},

{"instruction": "Write a coverage point to track all combinations of full and empty flags.",
 "output": "covergroup fifo_flags;\n  cp_flags: coverpoint {full, empty} {\n    bins empty_only  = {2'b01};\n    bins neither     = {2'b00};\n    bins full_only   = {2'b10};\n    illegal_bins both_set = {2'b11};\n  }\nendgroup"},

{"instruction": "Write an assertion to verify that a DMA transfer length matches what was programmed.",
 "output": "assert property (@(posedge clk) disable iff(!rst_n)\n  (dma_done) |-> (beats_transferred == programmed_length));"},

{"instruction": "Write a SystemVerilog assertion for WDT: if no kick arrives within TIMEOUT cycles, wdt_rst fires.",
 "output": "assert property (@(posedge clk) disable iff(!rst_n)\n  !kick throughout ##[TIMEOUT:TIMEOUT] 1'b1 |-> wdt_rst);"},

{"instruction": "Write a testbench task to verify AXI-Lite register write and read-back.",
 "output": "task axi_reg_test (input [31:0] addr, data);\n  axi_write(addr, data);\n  #20;\n  axi_read(addr);\n  assert (rdata == data) else $error(\"Readback mismatch at 0x%0h\", addr);\nendtask"},

{"instruction": "Write an SVA to verify that BVALID is asserted exactly once per write transaction.",
 "output": "assert property (@(posedge clk) disable iff(!rst_n)\n  (awvalid && awready && wvalid && wready) |-> ##[1:$] (bvalid && bready) ##1 !bvalid);"},

{"instruction": "Write a randomized sequence to exercise the full range of AXI burst sizes.",
 "output": "for (int s = 0; s <= 6; s++) begin\n  arsize = s[2:0];\n  arlen  = $urandom_range(0, 255);\n  arvalid = 1;\n  wait(arready); @(posedge clk);\n  arvalid = 0;\nend"},

{"instruction": "Write a functional coverage group to track whether the arbiter grants all 4 clients.",
 "output": "covergroup arb_coverage;\n  cp_grant: coverpoint grant[3:0] {\n    bins client0 = {4'b0001};\n    bins client1 = {4'b0010};\n    bins client2 = {4'b0100};\n    bins client3 = {4'b1000};\n  }\nendgroup"},

{"instruction": "Write a check to verify that a CDC pulse arrives in the destination domain within 5 destination clock cycles.",
 "output": "assert property (@(posedge dst_clk) disable iff(rst)\n  $rose(src_toggle_sync) |-> ##[1:5] $rose(dst_pulse));"},

{"instruction": "Write an assertion to verify the AXI4 burst does not cross a 4KB address boundary.",
 "output": "assert property (@(posedge clk) disable iff(!rst_n)\n  (arvalid && arready) |-> (({araddr[11:0]} + ((arlen+1) << arsize)) <= 12'hFFF));"},

{"instruction": "Write a testbench to measure and report FIFO throughput in transactions per simulation time unit.",
 "output": "integer txn_count = 0;\nalways @(posedge clk) if (wr_en && !full) txn_count++;\ninitial begin\n  #SIM_TIME;\n  $display(\"Throughput: %0d txns in %0t\", txn_count, $time);\nend"},

{"instruction": "Write a SystemVerilog assertion to verify no X or Z values appear on a critical data bus.",
 "output": "assert property (@(posedge clk) disable iff(!rst_n)\n  valid_out |-> !$isunknown(data_out));"},

{"instruction": "Write an assertion to verify a round-robin arbiter grants all active clients within 4 cycles.",
 "output": "genvar i;\ngenerate\n  for (i=0;i<4;i++) begin\n    assert property (@(posedge clk) disable iff(rst)\n      req[i] |-> ##[1:4] grant[i]);\n  end\nendgenerate"},

# ============================================================
# CATEGORY 4: HARD GENERATION (Complex RTL) — 36 examples
# ============================================================

{"instruction": "Design a parameterizable synchronous FIFO with configurable depth and width, including valid full/empty/occupancy outputs.",
 "output": "module sync_fifo #(parameter WIDTH=8, DEPTH=16) (\n  input clk, rst_n,\n  input wr_en, [WIDTH-1:0] din,\n  input rd_en, output reg [WIDTH-1:0] dout,\n  output full, empty,\n  output reg [$clog2(DEPTH):0] occupancy\n);\n  reg [WIDTH-1:0] mem [0:DEPTH-1];\n  reg [$clog2(DEPTH):0] wr_ptr, rd_ptr;\n  assign full  = (occupancy == DEPTH);\n  assign empty = (occupancy == 0);\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin wr_ptr<=0; rd_ptr<=0; occupancy<=0; end\n    else begin\n      if (wr_en && !full)  begin mem[wr_ptr[$clog2(DEPTH)-1:0]] <= din;  wr_ptr <= wr_ptr+1; end\n      if (rd_en && !empty) begin dout <= mem[rd_ptr[$clog2(DEPTH)-1:0]]; rd_ptr <= rd_ptr+1; end\n      occupancy <= wr_ptr - rd_ptr;\n    end\n  end\nendmodule"},

{"instruction": "Design a configurable frequency divider supporting both even and odd divisors.",
 "output": "module freq_div #(parameter N=5) (input clk, rst, output clk_div);\n  reg [$clog2(N)-1:0] cnt;\n  reg clk_r;\n  // For odd N: use half-period toggle trick\n  wire toggle_pos = (cnt == N-1);\n  wire toggle_neg = (cnt == (N-1)/2);\n  always @(posedge clk or posedge rst) begin\n    if (rst) begin cnt<=0; clk_r<=0; end\n    else begin if (cnt==N-1) cnt<=0; else cnt<=cnt+1; if(toggle_pos) clk_r<=~clk_r; end\n  end\n  assign clk_div = (N%2==0) ? clk_r : clk_r ^ (toggle_neg & clk);\nendmodule"},

{"instruction": "Design a deterministic reset synchronizer that asserts reset asynchronously and deasserts synchronously.",
 "output": "module rst_sync #(parameter STAGES=2) (input clk, rst_async_n, output rst_sync_n);\n  reg [STAGES-1:0] sync_chain;\n  always @(posedge clk or negedge rst_async_n) begin\n    if (!rst_async_n) sync_chain <= '0;\n    else sync_chain <= {sync_chain[STAGES-2:0], 1'b1};\n  end\n  assign rst_sync_n = sync_chain[STAGES-1];\nendmodule"},

{"instruction": "Design a UART receiver that handles 8N1 format and outputs a valid data byte with a done pulse.",
 "output": "module uart_rx #(parameter CLKS_PER_BIT=87) (\n  input clk, rst, rxd,\n  output reg [7:0] rx_data, output reg rx_done\n);\n  reg [3:0] state; reg [6:0] clk_cnt; reg [2:0] bit_idx;\n  localparam IDLE=0, START=1, DATA=2, STOP=3;\n  always @(posedge clk) begin\n    rx_done <= 0;\n    case(state)\n      IDLE:  if(!rxd) begin clk_cnt<=0; state<=START; end\n      START: if(clk_cnt==CLKS_PER_BIT/2) begin clk_cnt<=0; bit_idx<=0; state<=DATA; end else clk_cnt<=clk_cnt+1;\n      DATA:  if(clk_cnt==CLKS_PER_BIT) begin clk_cnt<=0; rx_data<={rxd,rx_data[7:1]}; if(bit_idx==7) state<=STOP; else bit_idx<=bit_idx+1; end else clk_cnt<=clk_cnt+1;\n      STOP:  if(clk_cnt==CLKS_PER_BIT) begin rx_done<=1; clk_cnt<=0; state<=IDLE; end else clk_cnt<=clk_cnt+1;\n    endcase\n  end\nendmodule"},

{"instruction": "Design a simple 5-stage in-order CPU pipeline with IF, ID, EX, MEM, WB stages and stall on load-use hazard.",
 "output": "module pipeline (input clk, rst);\n  // Stage registers\n  reg [31:0] if_id_pc, id_ex_a, id_ex_b, ex_mem_alu, mem_wb_data;\n  reg [4:0]  id_ex_rd, ex_mem_rd, mem_wb_rd;\n  reg        id_ex_memrd, ex_mem_memrd;\n  // Hazard detection\n  wire load_use = id_ex_memrd && ((id_ex_rd == rs1) || (id_ex_rd == rs2));\n  // Stall: freeze IF/ID, insert bubble into EX\n  always @(posedge clk) begin\n    if (!load_use) begin\n      // advance all stages normally\n    end else begin\n      // hold if_id_pc, insert NOP into id_ex\n      id_ex_rd <= 0; id_ex_memrd <= 0;\n    end\n  end\nendmodule"},

{"instruction": "Design an AXI4-Lite to APB bridge converting single AXI-Lite transactions to APB cycles.",
 "output": "module axi2apb (\n  input clk, rst_n,\n  // AXI-Lite slave\n  input [31:0] awaddr, wdata, araddr,\n  input awvalid, wvalid, arvalid,\n  output reg awready, wready, bvalid, arready, rvalid,\n  output reg [31:0] rdata, output reg [1:0] bresp, rresp,\n  // APB master\n  output reg [31:0] paddr, pwdata, output reg pwrite, psel, penable,\n  input [31:0] prdata, input pready\n);\n  reg [2:0] state;\n  localparam IDLE=0, SETUP=1, ACCESS=2, RESP=3;\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin state<=IDLE; psel<=0; penable<=0; bvalid<=0; rvalid<=0; end\n    else case(state)\n      IDLE: begin\n        if (awvalid && wvalid) begin paddr<=awaddr; pwdata<=wdata; pwrite<=1; psel<=1; awready<=1; wready<=1; state<=SETUP; end\n        else if (arvalid) begin paddr<=araddr; pwrite<=0; psel<=1; arready<=1; state<=SETUP; end\n      end\n      SETUP:  begin awready<=0; wready<=0; arready<=0; penable<=1; state<=ACCESS; end\n      ACCESS: if (pready) begin\n        penable<=0; psel<=0;\n        if (pwrite) begin bvalid<=1; bresp<=0; end\n        else begin rvalid<=1; rdata<=prdata; rresp<=0; end\n        state<=RESP;\n      end\n      RESP: begin\n        if (bvalid) bvalid<=0;\n        if (rvalid) rvalid<=0;\n        state<=IDLE;\n      end\n    endcase\n  end\nendmodule"},

{"instruction": "Design a parameterizable barrel shifter supporting logical left, logical right, and arithmetic right shifts.",
 "output": "module barrel_shift #(parameter W=32) (\n  input [W-1:0] in, input [$clog2(W)-1:0] shamt,\n  input [1:0] mode, // 00=LSL, 01=LSR, 10=ASR\n  output reg [W-1:0] out\n);\n  always @(*) begin\n    case(mode)\n      2'b00: out = in << shamt;\n      2'b01: out = in >> shamt;\n      2'b10: out = $signed(in) >>> shamt;\n      default: out = in;\n    endcase\n  end\nendmodule"},

{"instruction": "Design a token bucket rate limiter for an AXI-Stream data path.",
 "output": "module token_bucket #(parameter MAX_TOKENS=64, REFILL_RATE=4) (\n  input clk, rst, input s_valid, output s_ready,\n  output reg m_valid, input m_ready\n);\n  reg [6:0] tokens;\n  reg [7:0] refill_cnt;\n  assign s_ready = (tokens > 0) && (m_ready || !m_valid);\n  always @(posedge clk) begin\n    if (rst) begin tokens<=MAX_TOKENS; refill_cnt<=0; m_valid<=0; end\n    else begin\n      if (refill_cnt == REFILL_RATE-1) begin refill_cnt<=0; if(tokens<MAX_TOKENS) tokens<=tokens+1; end\n      else refill_cnt<=refill_cnt+1;\n      if (s_valid && s_ready) begin tokens<=tokens-1; m_valid<=1; end\n      else if (m_valid && m_ready) m_valid<=0;\n    end\n  end\nendmodule"},

{"instruction": "Design a simple non-blocking cache with direct-mapped organization for instruction fetch.",
 "output": "module icache #(parameter SETS=64, TAG_W=20) (\n  input clk, rst, input [31:0] req_addr, input req_valid,\n  output reg [31:0] data_out, output reg hit, miss\n);\n  reg [31:0] data_arr [0:SETS-1];\n  reg [TAG_W-1:0] tag_arr [0:SETS-1];\n  reg valid_arr [0:SETS-1];\n  wire [$clog2(SETS)-1:0] index = req_addr[$clog2(SETS)+1:2];\n  wire [TAG_W-1:0] tag = req_addr[31:$clog2(SETS)+2];\n  always @(posedge clk) begin\n    if (rst) begin integer i; for(i=0;i<SETS;i=i+1) valid_arr[i]<=0; end\n    else if (req_valid) begin\n      if (valid_arr[index] && tag_arr[index]==tag) begin data_out<=data_arr[index]; hit<=1; miss<=0; end\n      else begin hit<=0; miss<=1; end\n    end\n  end\nendmodule"},

{"instruction": "Design a configurable linear feedback shift register (LFSR) with programmable seed and polynomial.",
 "output": "module lfsr #(parameter W=16, parameter POLY=16'hD008) (\n  input clk, rst, input load, input [W-1:0] seed,\n  output reg [W-1:0] lfsr_out\n);\n  wire feedback = ^(lfsr_out & POLY);\n  always @(posedge clk) begin\n    if (rst) lfsr_out <= 1;\n    else if (load) lfsr_out <= seed;\n    else lfsr_out <= {lfsr_out[W-2:0], feedback};\n  end\nendmodule"},

{"instruction": "Design an AHB to AXI-Lite bridge for read transactions only.",
 "output": "module ahb2axilite_rd (\n  input clk, rst_n,\n  input [31:0] haddr, input htrans, input hsel, input hready_in,\n  output reg hready_out, output reg [31:0] hrdata,\n  output reg [31:0] araddr, output reg arvalid, input arready,\n  input [31:0] rdata, input rvalid, output reg rready\n);\n  reg [1:0] state;\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin state<=0; arvalid<=0; rready<=0; hready_out<=1; end\n    else case(state)\n      0: if (hsel && htrans[1] && hready_in) begin araddr<=haddr; arvalid<=1; hready_out<=0; state<=1; end\n      1: if (arready) begin arvalid<=0; rready<=1; state<=2; end\n      2: if (rvalid) begin hrdata<=rdata; rready<=0; hready_out<=1; state<=0; end\n    endcase\n  end\nendmodule"},

{"instruction": "Design a parameterizable delay line (shift register) with an output tap at a configurable depth.",
 "output": "module delay_line #(parameter W=8, DEPTH=16, TAP=8) (\n  input clk, rst, input [W-1:0] din, output [W-1:0] dout_full, dout_tap\n);\n  reg [W-1:0] sr [0:DEPTH-1];\n  integer i;\n  always @(posedge clk) begin\n    if (rst) for(i=0;i<DEPTH;i=i+1) sr[i]<=0;\n    else begin sr[0]<=din; for(i=1;i<DEPTH;i=i+1) sr[i]<=sr[i-1]; end\n  end\n  assign dout_full = sr[DEPTH-1];\n  assign dout_tap  = sr[TAP-1];\nendmodule"},

{"instruction": "Design a synchronous dual-port RAM with independent read and write ports.",
 "output": "module dp_ram #(parameter W=32, DEPTH=256) (\n  input clk,\n  input wr_en, input [$clog2(DEPTH)-1:0] wr_addr, input [W-1:0] wr_data,\n  input rd_en, input [$clog2(DEPTH)-1:0] rd_addr, output reg [W-1:0] rd_data\n);\n  reg [W-1:0] mem [0:DEPTH-1];\n  always @(posedge clk) begin\n    if (wr_en) mem[wr_addr] <= wr_data;\n    if (rd_en) rd_data <= mem[rd_addr];\n  end\nendmodule"},

{"instruction": "Design a hardware Fibonacci sequence generator with reset and step control.",
 "output": "module fib_gen #(parameter W=32) (\n  input clk, rst, step,\n  output reg [W-1:0] fib_out\n);\n  reg [W-1:0] a, b;\n  always @(posedge clk) begin\n    if (rst) begin a<=0; b<=1; fib_out<=0; end\n    else if (step) begin fib_out<=a; b<=a+b; a<=b; end\n  end\nendmodule"},

{"instruction": "Design a configurable clock multiplexer with glitch-free switching using synchronizers.",
 "output": "module clk_mux_gf (\n  input clk0, clk1, rst, select,\n  output clk_out\n);\n  reg [1:0] sync0, sync1;\n  reg en0, en1;\n  // Synchronize select into each domain\n  always @(posedge clk0 or posedge rst) sync0 <= rst ? 0 : {sync0[0], !select & !en1};\n  always @(posedge clk1 or posedge rst) sync1 <= rst ? 0 : {sync1[0],  select & !en0};\n  always @(posedge clk0 or posedge rst) en0 <= rst ? 0 : sync0[1];\n  always @(posedge clk1 or posedge rst) en1 <= rst ? 0 : sync1[1];\n  assign clk_out = (clk0 & en0) | (clk1 & en1);\nendmodule"},

{"instruction": "Design a bus transaction logger that records the last N AXI-Lite writes into a circular buffer.",
 "output": "module txn_logger #(parameter N=8) (\n  input clk, rst_n,\n  input wr_hit, input [31:0] wr_addr, wr_data,\n  output reg [31:0] log_addr [0:N-1],\n  output reg [31:0] log_data [0:N-1]\n);\n  reg [$clog2(N)-1:0] ptr;\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) ptr <= 0;\n    else if (wr_hit) begin\n      log_addr[ptr] <= wr_addr;\n      log_data[ptr] <= wr_data;\n      ptr <= (ptr == N-1) ? 0 : ptr + 1;\n    end\n  end\nendmodule"},

{"instruction": "Design a stream width converter that merges two 8-bit AXI-Stream inputs into one 16-bit output.",
 "output": "module width_upconv (\n  input clk, rst,\n  input [7:0] s0_data, s1_data,\n  input s0_valid, s1_valid,\n  output reg s0_ready, s1_ready,\n  output reg [15:0] m_data, output reg m_valid, input m_ready\n);\n  reg got0, got1;\n  reg [7:0] hold0;\n  always @(posedge clk) begin\n    if (rst) begin got0<=0; got1<=0; m_valid<=0; end\n    else begin\n      if (s0_valid && !got0) begin hold0<=s0_data; got0<=1; end\n      if (s1_valid && got0 && !m_valid) begin m_data<={s1_data,hold0}; m_valid<=1; got0<=0; got1<=0; end\n      if (m_valid && m_ready) m_valid<=0;\n      s0_ready <= !got0; s1_ready <= got0 && !m_valid;\n    end\n  end\nendmodule"},

{"instruction": "Design a hardware divider using the non-restoring algorithm for 8-bit operands.",
 "output": "module divider_8 (\n  input clk, rst, input [7:0] dividend, divisor,\n  input start, output reg [7:0] quotient, remainder,\n  output reg done\n);\n  reg [15:0] partial;\n  reg [3:0] cnt;\n  always @(posedge clk) begin\n    if (rst || start) begin partial<={8'b0,dividend}; cnt<=8; done<=0; quotient<=0; end\n    else if (cnt>0) begin\n      partial <= partial << 1;\n      if (partial[15:8] >= divisor) begin partial[15:8] <= partial[15:8]-divisor; partial[0]<=1; end\n      cnt <= cnt-1;\n    end else begin quotient<=partial[7:0]; remainder<=partial[15:8]; done<=1; end\n  end\nendmodule"},

{"instruction": "Design an I2C controller state machine for single-byte master write transactions.",
 "output": "module i2c_master_wr (\n  input clk, rst, input [6:0] addr, input [7:0] data,\n  input start, output reg sda_out, scl_out, output reg done\n);\n  reg [4:0] state;\n  reg [3:0] bit_cnt;\n  reg [7:0] shift_reg;\n  // States: IDLE, START, ADDR, ACK1, DATA, ACK2, STOP\n  // SCL is clocked at clk/4; sda transitions when scl is low\n  // Full state machine omitted for brevity; key transitions below:\n  always @(posedge clk) begin\n    if (rst) begin state<=0; scl_out<=1; sda_out<=1; done<=0; end\n    // ... detailed state encoding with bit_cnt tracking ...\n  end\nendmodule"},

{"instruction": "Design a register file with read forwarding to handle the same-cycle write-read to the same address.",
 "output": "module regfile #(parameter W=32, D=32) (\n  input clk, input wr_en, input [$clog2(D)-1:0] wr_addr, rd_addr,\n  input [W-1:0] wr_data, output [W-1:0] rd_data\n);\n  reg [W-1:0] regs [0:D-1];\n  always @(posedge clk) if (wr_en) regs[wr_addr] <= wr_data;\n  // Forward: if reading the same address being written, return new data\n  assign rd_data = (wr_en && wr_addr == rd_addr) ? wr_data : regs[rd_addr];\nendmodule"},

{"instruction": "Design a parameterizable round-robin scheduler that issues credits to N clients.",
 "output": "module rr_scheduler #(parameter N=4, CREDITS=2) (\n  input clk, rst, input [N-1:0] req,\n  output reg [N-1:0] grant\n);\n  reg [$clog2(N)-1:0] ptr;\n  integer i;\n  always @(posedge clk) begin\n    if (rst) begin grant<=0; ptr<=0; end\n    else begin\n      grant <= 0;\n      for (i=0;i<N;i=i+1)\n        if (req[(ptr+i)%N] && grant==0) begin\n          grant[(ptr+i)%N] <= 1;\n          ptr <= (ptr+i+1)%N;\n        end\n    end\n  end\nendmodule"},

{"instruction": "Design a two-port register with write-capture and read-acknowledge handshake.",
 "output": "module handshake_reg #(parameter W=32) (\n  input clk, rst,\n  input wr_valid, input [W-1:0] wr_data, output reg wr_ready,\n  input rd_ready, output reg [W-1:0] rd_data, output reg rd_valid\n);\n  reg [W-1:0] buf_r;\n  reg full;\n  assign wr_ready = !full;\n  always @(posedge clk) begin\n    if (rst) begin full<=0; rd_valid<=0; end\n    else begin\n      if (wr_valid && !full) begin buf_r<=wr_data; full<=1; end\n      if (full && (!rd_valid || rd_ready)) begin rd_data<=buf_r; rd_valid<=1; full<=0; end\n      if (rd_valid && rd_ready) rd_valid<=0;\n    end\n  end\nendmodule"},

{"instruction": "Design a hardware CRC-8 checker using the polynomial x^8+x^2+x+1.",
 "output": "module crc8 (\n  input clk, rst, input [7:0] data_in, input data_valid,\n  output reg [7:0] crc_out\n);\n  integer i;\n  wire [7:0] next_crc;\n  // Combinational CRC update\n  assign next_crc[0] = crc_out[7]^data_in[7]^crc_out[6]^data_in[6]; // simplified\n  // Full polynomial expansion in practice uses XOR gate array\n  always @(posedge clk) begin\n    if (rst) crc_out <= 8'hFF;\n    else if (data_valid) crc_out <= next_crc;\n  end\nendmodule"},

{"instruction": "Design a configurable multi-stage pipeline with bypass forwarding between any two stages.",
 "output": "module fwd_pipeline #(parameter STAGES=4, W=32) (\n  input clk, rst,\n  input [W-1:0] din, input valid_in,\n  output [W-1:0] dout, output valid_out\n);\n  reg [W-1:0] pipe_data [0:STAGES-1];\n  reg pipe_valid [0:STAGES-1];\n  integer i;\n  always @(posedge clk) begin\n    if (rst) for(i=0;i<STAGES;i=i+1) pipe_valid[i]<=0;\n    else begin\n      pipe_data[0]<=din; pipe_valid[0]<=valid_in;\n      for(i=1;i<STAGES;i=i+1) begin pipe_data[i]<=pipe_data[i-1]; pipe_valid[i]<=pipe_valid[i-1]; end\n    end\n  end\n  assign dout = pipe_data[STAGES-1];\n  assign valid_out = pipe_valid[STAGES-1];\nendmodule"},

{"instruction": "Design an AXI4-Stream demultiplexer that routes traffic to one of N outputs based on a TDEST field.",
 "output": "module axis_demux #(parameter N=4, W=32, DEST_W=2) (\n  input clk, rst,\n  input [W-1:0] s_tdata, input [DEST_W-1:0] s_tdest,\n  input s_tvalid, output reg s_tready,\n  output reg [W-1:0] m_tdata [0:N-1],\n  output reg [N-1:0] m_tvalid, input [N-1:0] m_tready\n);\n  always @(posedge clk) begin\n    if (rst) begin m_tvalid<=0; s_tready<=1; end\n    else begin\n      m_tvalid <= 0;\n      if (s_tvalid) begin\n        m_tdata[s_tdest]  <= s_tdata;\n        m_tvalid[s_tdest] <= 1;\n        s_tready <= m_tready[s_tdest];\n      end\n    end\n  end\nendmodule"},

{"instruction": "Design a performance counter block with 4 event inputs and read-clear register interface.",
 "output": "module perf_counters #(parameter W=32) (\n  input clk, rst_n,\n  input [3:0] events,\n  input rd_en, input [1:0] rd_sel,\n  output reg [W-1:0] rd_data\n);\n  reg [W-1:0] cnt [0:3];\n  integer i;\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) for(i=0;i<4;i=i+1) cnt[i]<=0;\n    else begin\n      for(i=0;i<4;i=i+1) if(events[i]) cnt[i]<=cnt[i]+1;\n      if (rd_en) begin rd_data<=cnt[rd_sel]; cnt[rd_sel]<=0; end // read-clear\n    end\n  end\nendmodule"},

{"instruction": "Design a hardware priority encoder with valid output for up to 8 requests.",
 "output": "module pri_enc8 (\n  input [7:0] req,\n  output reg [2:0] idx, output reg valid\n);\n  always @(*) begin\n    valid = |req;\n    casez(req)\n      8'b???????1: idx = 0;\n      8'b??????10: idx = 1;\n      8'b?????100: idx = 2;\n      8'b????1000: idx = 3;\n      8'b???10000: idx = 4;\n      8'b??100000: idx = 5;\n      8'b?1000000: idx = 6;\n      8'b10000000: idx = 7;\n      default:     idx = 0;\n    endcase\n  end\nendmodule"},

{"instruction": "Design a configurable interrupt controller with enable, mask, status, and software-clear registers.",
 "output": "module irq_ctrl #(parameter N=8) (\n  input clk, rst_n,\n  input [N-1:0] irq_in,\n  input wr_en, input [1:0] wr_reg, input [N-1:0] wr_data,\n  output reg irq_out\n);\n  reg [N-1:0] enable_r, mask_r, status_r;\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin enable_r<=0; mask_r<=0; status_r<=0; irq_out<=0; end\n    else begin\n      status_r <= status_r | irq_in; // latch events\n      if (wr_en) case(wr_reg)\n        2'b00: enable_r <= wr_data;\n        2'b01: mask_r   <= wr_data;\n        2'b10: status_r <= status_r & ~wr_data; // W1C\n      endcase\n      irq_out <= |(status_r & enable_r & ~mask_r);\n    end\n  end\nendmodule"},

{"instruction": "Design a scoreboard-based flow controller that tracks issued vs completed transactions with a configurable window.",
 "output": "module flow_ctrl #(parameter WINDOW=8) (\n  input clk, rst_n, input issue, complete,\n  output reg can_issue\n);\n  reg [$clog2(WINDOW+1):0] inflight;\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) inflight <= 0;\n    else case({issue, complete})\n      2'b10: if (inflight < WINDOW) inflight <= inflight+1;\n      2'b01: if (inflight > 0)      inflight <= inflight-1;\n      2'b11: inflight <= inflight; // net zero\n      default: inflight <= inflight;\n    endcase\n    can_issue <= (inflight < WINDOW);\n  end\nendmodule"},

{"instruction": "Design a hardware Hamming encoder for 8-bit data producing a 13-bit SECDED codeword.",
 "output": "module hamming_enc (\n  input [7:0] data,\n  output [12:0] codeword\n);\n  // Place data bits at non-power-of-2 positions: 3,5,6,7,9,10,11,12\n  wire [12:0] c;\n  assign c[0]  = 0; // unused\n  assign c[2]  = data[0]; assign c[4]=data[1]; assign c[6]=data[2];\n  assign c[8]  = data[3]; assign c[10]=data[4]; assign c[11]=data[5];\n  assign c[12] = data[6];\n  // Parity bits at positions 1,2,4,8\n  assign c[1]  = c[3]^c[5]^c[7]^c[9]^c[11];\n  assign c[3]  = data[0]; // re-mapped for clarity\n  // Full encoding uses XOR of covered bit positions per parity bit\n  assign codeword = c;\nendmodule"},

{"instruction": "Design a dual-issue dispatch unit that issues two independent instructions per cycle to two execution units.",
 "output": "module dual_dispatch (\n  input clk, rst,\n  input [31:0] instr0, instr1, input valid0, valid1,\n  output reg [31:0] eu0_instr, eu1_instr,\n  output reg eu0_valid, eu1_valid,\n  input eu0_ready, eu1_ready\n);\n  // Structural hazard check: different destination registers\n  wire no_conflict = (instr0[11:7] != instr1[11:7]);\n  always @(posedge clk) begin\n    if (rst) begin eu0_valid<=0; eu1_valid<=0; end\n    else begin\n      if (valid0 && eu0_ready) begin eu0_instr<=instr0; eu0_valid<=1; end else eu0_valid<=0;\n      if (valid1 && eu1_ready && no_conflict) begin eu1_instr<=instr1; eu1_valid<=1; end else eu1_valid<=0;\n    end\n  end\nendmodule"},

{"instruction": "Design a memory-mapped GPIO controller with configurable direction, output, and input registers.",
 "output": "module gpio_ctrl #(parameter W=8) (\n  input clk, rst_n,\n  input wr_en, input [1:0] wr_reg, input [W-1:0] wr_data,\n  input rd_en, input [1:0] rd_reg, output reg [W-1:0] rd_data,\n  inout [W-1:0] gpio_pad\n);\n  reg [W-1:0] dir_r, out_r;\n  wire [W-1:0] in_r;\n  genvar i;\n  generate for(i=0;i<W;i=i+1) begin\n    assign gpio_pad[i] = dir_r[i] ? out_r[i] : 1'bz;\n    assign in_r[i]     = dir_r[i] ? out_r[i] : gpio_pad[i];\n  end endgenerate\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin dir_r<=0; out_r<=0; end\n    else begin\n      if (wr_en) case(wr_reg)\n        2'b00: dir_r <= wr_data;\n        2'b01: out_r <= wr_data;\n      endcase\n      if (rd_en) case(rd_reg)\n        2'b00: rd_data <= dir_r;\n        2'b01: rd_data <= out_r;\n        2'b10: rd_data <= in_r;\n        default: rd_data <= 0;\n      endcase\n    end\n  end\nendmodule"},

{"instruction": "Design a configurable SPI slave that receives bytes and asserts a data_ready pulse.",
 "output": "module spi_slave (\n  input clk, rst, input sclk, mosi, cs_n,\n  output reg [7:0] rx_data, output reg data_ready\n);\n  reg [2:0] bit_cnt;\n  reg [7:0] shift_r;\n  reg sclk_r, sclk_re;\n  always @(posedge clk) begin sclk_r <= sclk; sclk_re <= sclk & ~sclk_r; end\n  always @(posedge clk) begin\n    data_ready <= 0;\n    if (rst || cs_n) bit_cnt <= 7;\n    else if (sclk_re) begin\n      shift_r <= {shift_r[6:0], mosi};\n      if (bit_cnt == 0) begin rx_data <= {shift_r[6:0], mosi}; data_ready <= 1; bit_cnt <= 7; end\n      else bit_cnt <= bit_cnt - 1;\n    end\n  end\nendmodule"},

{"instruction": "Design a hardware semaphore with atomic test-and-set and clear operations.",
 "output": "module hw_semaphore (\n  input clk, rst_n, input test_set, clear,\n  output reg locked, output reg acquired\n);\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin locked<=0; acquired<=0; end\n    else begin\n      acquired <= 0;\n      if (test_set && !locked) begin locked<=1; acquired<=1; end\n      else if (clear && locked) locked<=0;\n    end\n  end\nendmodule"},

{"instruction": "Design a configurable AXI4-Stream FIFO with programmable full and empty thresholds.",
 "output": "module axis_fifo_thresh #(parameter W=32, DEPTH=16) (\n  input clk, rst_n,\n  input [$clog2(DEPTH):0] prog_full_thresh, prog_empty_thresh,\n  input [W-1:0] s_tdata, input s_tvalid, output s_tready,\n  output reg [W-1:0] m_tdata, output reg m_tvalid, input m_tready,\n  output prog_full, prog_empty\n);\n  reg [W-1:0] mem [0:DEPTH-1];\n  reg [$clog2(DEPTH):0] wr_ptr, rd_ptr, occupancy;\n  assign s_tready = (occupancy < DEPTH);\n  assign prog_full  = (occupancy >= prog_full_thresh);\n  assign prog_empty = (occupancy <= prog_empty_thresh);\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin wr_ptr<=0; rd_ptr<=0; occupancy<=0; m_tvalid<=0; end\n    else begin\n      if (s_tvalid && s_tready) begin mem[wr_ptr[$clog2(DEPTH)-1:0]]<=s_tdata; wr_ptr<=wr_ptr+1; end\n      if (m_tvalid && m_tready) begin rd_ptr<=rd_ptr+1; m_tvalid<=0; end\n      if (!m_tvalid && occupancy>0) begin m_tdata<=mem[rd_ptr[$clog2(DEPTH)-1:0]]; m_tvalid<=1; end\n      occupancy <= wr_ptr - rd_ptr;\n    end\n  end\nendmodule"},

{"instruction": "Design a hardware packet CRC checker that asserts crc_ok at end of packet.",
 "output": "module pkt_crc_check #(parameter W=8, POLY=8'h07) (\n  input clk, rst, input [W-1:0] din, input data_valid, eop,\n  output reg crc_ok\n);\n  reg [W-1:0] crc_r;\n  wire [W-1:0] crc_next;\n  // Compute CRC iteratively\n  assign crc_next = data_valid ? (crc_r ^ din) : crc_r; // simplified; real impl uses XOR feedback\n  always @(posedge clk) begin\n    if (rst) begin crc_r<=8'hFF; crc_ok<=0; end\n    else begin\n      if (eop) begin crc_ok <= (crc_r == 8'h00); crc_r<=8'hFF; end\n      else if (data_valid) crc_r <= crc_next;\n    end\n  end\nendmodule"},

{"instruction": "Design a multi-cycle multiply-accumulate unit with valid-in/valid-out handshake and configurable accumulate depth.",
 "output": "module mac_accum #(parameter N=4, W=16) (\n  input clk, rst, input valid_in,\n  input [W-1:0] a, b, output reg valid_out,\n  output reg [2*W+$clog2(N)-1:0] accum\n);\n  reg [2*W-1:0] product;\n  reg [$clog2(N):0] cnt;\n  always @(posedge clk) begin\n    if (rst) begin accum<=0; cnt<=0; valid_out<=0; end\n    else begin\n      if (valid_in) begin\n        product <= a * b;\n        accum   <= (cnt==0) ? a*b : accum + a*b;\n        cnt <= (cnt==N-1) ? 0 : cnt+1;\n        valid_out <= (cnt==N-1);\n      end else valid_out<=0;\n    end\n  end\nendmodule"},

# Add these to your raw_data — targeted fixes for your failures

{"instruction": "Write a correct Gray code encoder for an N-bit binary input.",
 "output": "module gray_enc #(parameter N=4) (input [N-1:0] bin, output [N-1:0] gray);\n  assign gray = bin ^ (bin >> 1);\nendmodule\n// MSB is unchanged: gray[N-1] = bin[N-1]\n// Each lower bit: gray[i] = bin[i+1] ^ bin[i]"},

{"instruction": "The following FIFO full condition is wrong. Fix it:\nassign full = (wr_ptr[$clog2(DEPTH)] == rd_ptr[$clog2(DEPTH)]) && (wr_ptr[$clog2(DEPTH)-1:0] != rd_ptr[$clog2(DEPTH)-1:0]);",
 "output": "The MSB comparison is inverted. Full means the write pointer has lapped the read pointer: MSBs DIFFER (wrap happened) and lower bits are EQUAL (same slot).\nassign full  = (wr_ptr[$clog2(DEPTH)] != rd_ptr[$clog2(DEPTH)]) && (wr_ptr[$clog2(DEPTH)-1:0] == rd_ptr[$clog2(DEPTH)-1:0]);\nassign empty = (wr_ptr == rd_ptr);"},

{"instruction": "Design a 4-bit up/down counter with synchronous load and asynchronous reset.",
 "output": "module updown_cnt (\n  input clk, aresetn, load, up_down,\n  input [3:0] data_in,\n  output reg [3:0] q\n);\n  always @(posedge clk or negedge aresetn) begin\n    if (!aresetn)     q <= 4'h0;\n    else if (load)    q <= data_in;\n    else if (up_down) q <= q + 1;\n    else              q <= q - 1;\n  end\nendmodule"},

{"instruction": "The AXI4-Lite slave below stores wdata to rdata instead of actual registers. Fix it:\ncase(awaddr[3:2])\n  2'b00: rdata <= 32'h0;\n  2'b01: rdata <= 32'h0;\nendcase",
 "output": "The case statement must write wdata into register storage, not rdata:\nreg [31:0] reg0, reg1, reg2, reg3;\ncase(awaddr[3:2])\n  2'b00: reg0 <= wdata;\n  2'b01: reg1 <= wdata;\n  2'b10: reg2 <= wdata;\n  2'b11: reg3 <= wdata;\nendcase\nFor reads, assign rdata combinationally based on araddr."},
]

extra_data = [

# ============================================================
# WEAK AREA 1: GRAY CODE / CDC (60 examples)
# ============================================================

# --- Gray Code Core (15 examples) ---
{"instruction": "Write a parameterizable Gray code encoder using the XOR formula.",
 "output": "module gray_enc #(parameter N=8) (input [N-1:0] bin, output [N-1:0] gray);\n  assign gray = bin ^ (bin >> 1);\n  // Equivalent explicit form:\n  // gray[N-1] = bin[N-1]          (MSB unchanged)\n  // gray[i]   = bin[i+1] ^ bin[i] (each lower bit)\nendmodule"},

{"instruction": "Write a parameterizable Gray code decoder (Gray to binary).",
 "output": "module gray_dec #(parameter N=8) (input [N-1:0] gray, output reg [N-1:0] bin);\n  integer i;\n  always @(*) begin\n    bin[N-1] = gray[N-1];\n    for (i=N-2; i>=0; i=i-1)\n      bin[i] = bin[i+1] ^ gray[i];\n  end\nendmodule"},

{"instruction": "Show the Gray code encoding for 4-bit binary values 0 through 7.",
 "output": "// Binary → Gray (using gray = bin ^ (bin>>1))\n// 4'b0000 → 4'b0000\n// 4'b0001 → 4'b0001\n// 4'b0010 → 4'b0011\n// 4'b0011 → 4'b0010\n// 4'b0100 → 4'b0110\n// 4'b0101 → 4'b0111\n// 4'b0110 → 4'b0101\n// 4'b0111 → 4'b0100\n// Key property: only ONE bit changes between consecutive values."},

{"instruction": "Why does the Gray encoder use gray = bin ^ (bin >> 1)?",
 "output": "Because XORing a binary number with its own right-shifted version produces a code where each output bit encodes the transition between adjacent input bits. bit[i] = 1 when bin[i+1] != bin[i], meaning the output changes exactly one bit per increment. The MSB is unchanged because there is no bin[N] to XOR against."},

{"instruction": "Write a testbench to verify the Gray encoder only changes one bit per increment.",
 "output": "module tb_gray;\n  reg [7:0] bin;\n  wire [7:0] gray;\n  gray_enc #(8) dut(.bin(bin),.gray(gray));\n  reg [7:0] prev_gray;\n  integer i;\n  initial begin\n    prev_gray = 0;\n    for (i=0;i<256;i=i+1) begin\n      bin = i; #1;\n      if (i>0) begin\n        assert ($countones(gray ^ prev_gray) == 1)\n          else $error(\"Multi-bit transition at bin=%0d\",i);\n      end\n      prev_gray = gray;\n    end\n    $display(\"Gray encoder verified\"); $finish;\n  end\nendmodule"},

{"instruction": "Write a binary-to-Gray and Gray-to-binary roundtrip module and verify it.",
 "output": "module gray_roundtrip #(parameter N=8) (input [N-1:0] bin_in, output [N-1:0] bin_out);\n  wire [N-1:0] gray;\n  assign gray = bin_in ^ (bin_in >> 1); // encode\n  // decode\n  genvar i;\n  assign bin_out[N-1] = gray[N-1];\n  generate\n    for (i=N-2;i>=0;i=i-1)\n      assign bin_out[i] = bin_out[i+1] ^ gray[i];\n  endgenerate\n  // bin_out should always equal bin_in\nendmodule"},

{"instruction": "Write a 4-bit Gray counter (counter that increments in Gray code order).",
 "output": "module gray_counter #(parameter N=4) (input clk, rst, output reg [N-1:0] gray_cnt);\n  reg [N-1:0] bin_cnt;\n  always @(posedge clk) begin\n    if (rst) begin bin_cnt<=0; gray_cnt<=0; end\n    else begin\n      bin_cnt  <= bin_cnt + 1;\n      gray_cnt <= bin_cnt ^ (bin_cnt >> 1); // convert next binary to Gray\n    end\n  end\nendmodule"},

{"instruction": "Explain the difference between a binary counter and a Gray counter for CDC use.",
 "output": "A binary counter can change multiple bits simultaneously on wrap-around (e.g., 0111→1000 flips 4 bits). If this crosses a clock domain, each bit resolves metastability independently producing a corrupt intermediate value. A Gray counter changes exactly one bit per increment, so a two-flop synchronizer can safely capture it — at worst it captures the old or new value, never a corrupt intermediate."},

{"instruction": "Write the FIFO write pointer in Gray code for safe CDC crossing.",
 "output": "module wr_ptr_gray #(parameter AW=4) (\n  input wclk, rst_n, input wr_en, input full,\n  output reg [AW:0] wptr_gray, output [AW-1:0] waddr\n);\n  reg [AW:0] wptr_bin;\n  assign waddr = wptr_bin[AW-1:0];\n  always @(posedge wclk or negedge rst_n) begin\n    if (!rst_n) begin wptr_bin<=0; wptr_gray<=0; end\n    else if (wr_en && !full) begin\n      wptr_bin  <= wptr_bin + 1;\n      wptr_gray <= (wptr_bin+1) ^ ((wptr_bin+1)>>1);\n    end\n  end\nendmodule"},

{"instruction": "Write the FIFO read pointer in Gray code for safe CDC crossing.",
 "output": "module rd_ptr_gray #(parameter AW=4) (\n  input rclk, rst_n, input rd_en, input empty,\n  output reg [AW:0] rptr_gray, output [AW-1:0] raddr\n);\n  reg [AW:0] rptr_bin;\n  assign raddr = rptr_bin[AW-1:0];\n  always @(posedge rclk or negedge rst_n) begin\n    if (!rst_n) begin rptr_bin<=0; rptr_gray<=0; end\n    else if (rd_en && !empty) begin\n      rptr_bin  <= rptr_bin + 1;\n      rptr_gray <= (rptr_bin+1) ^ ((rptr_bin+1)>>1);\n    end\n  end\nendmodule"},

{"instruction": "Write a two-flop synchronizer for a Gray-coded pointer.",
 "output": "module ptr_sync #(parameter AW=4) (\n  input dst_clk, rst_n,\n  input [AW:0] ptr_gray_in,\n  output reg [AW:0] ptr_gray_sync\n);\n  reg [AW:0] meta;\n  always @(posedge dst_clk or negedge rst_n) begin\n    if (!rst_n) begin meta<=0; ptr_gray_sync<=0; end\n    else begin\n      meta          <= ptr_gray_in;   // stage 1: capture (may be metastable)\n      ptr_gray_sync <= meta;          // stage 2: resolve metastability\n    end\n  end\nendmodule"},

{"instruction": "Write the full/empty comparison logic for an async FIFO using synchronized Gray pointers.",
 "output": "module fifo_status #(parameter AW=4) (\n  input [AW:0] wptr_gray, rptr_gray_sync, // in write domain\n  input [AW:0] rptr_gray, wptr_gray_sync, // in read domain\n  output full, empty\n);\n  // Full: MSB and next-MSB differ, remaining bits match\n  assign full  = (wptr_gray[AW]   != rptr_gray_sync[AW]) &&\n                 (wptr_gray[AW-1] != rptr_gray_sync[AW-1]) &&\n                 (wptr_gray[AW-2:0] == rptr_gray_sync[AW-2:0]);\n  // Empty: all bits match\n  assign empty = (rptr_gray == wptr_gray_sync);\nendmodule"},

{"instruction": "Write a complete parameterizable asynchronous FIFO using Gray code pointers.",
 "output": "module async_fifo #(parameter DW=8, AW=4) (\n  input wclk, rclk, rst_n,\n  input wr_en, input [DW-1:0] wdata,\n  input rd_en, output reg [DW-1:0] rdata,\n  output full, empty\n);\n  localparam DEPTH = 1<<AW;\n  reg [DW-1:0] mem [0:DEPTH-1];\n  // Write domain\n  reg [AW:0] wbin, wgray;\n  // Read domain\n  reg [AW:0] rbin, rgray;\n  // Synchronizers\n  reg [AW:0] wgray_sync1, wgray_sync2;\n  reg [AW:0] rgray_sync1, rgray_sync2;\n\n  // Write pointer\n  always @(posedge wclk or negedge rst_n) begin\n    if (!rst_n) begin wbin<=0; wgray<=0; end\n    else if (wr_en && !full) begin\n      wbin  <= wbin+1;\n      wgray <= (wbin+1)^((wbin+1)>>1);\n      mem[wbin[AW-1:0]] <= wdata;\n    end\n  end\n\n  // Read pointer\n  always @(posedge rclk or negedge rst_n) begin\n    if (!rst_n) begin rbin<=0; rgray<=0; end\n    else if (rd_en && !empty) begin\n      rbin  <= rbin+1;\n      rgray <= (rbin+1)^((rbin+1)>>1);\n      rdata <= mem[rbin[AW-1:0]];\n    end\n  end\n\n  // Sync wgray into rclk domain\n  always @(posedge rclk or negedge rst_n) begin\n    if (!rst_n) begin wgray_sync1<=0; wgray_sync2<=0; end\n    else begin wgray_sync1<=wgray; wgray_sync2<=wgray_sync1; end\n  end\n\n  // Sync rgray into wclk domain\n  always @(posedge wclk or negedge rst_n) begin\n    if (!rst_n) begin rgray_sync1<=0; rgray_sync2<=0; end\n    else begin rgray_sync1<=rgray; rgray_sync2<=rgray_sync1; end\n  end\n\n  assign full  = (wgray[AW] != rgray_sync2[AW]) &&\n                 (wgray[AW-1] != rgray_sync2[AW-1]) &&\n                 (wgray[AW-2:0] == rgray_sync2[AW-2:0]);\n  assign empty = (rgray == wgray_sync2);\nendmodule"},

{"instruction": "What is wrong with using a binary counter directly as an async FIFO pointer?",
 "output": "Binary counters can change multiple bits simultaneously. For example, incrementing from 3 (0011) to 4 (0100) flips 3 bits at once. When this multi-bit value crosses a clock domain through a two-flop synchronizer, each bit independently resolves metastability at different times, potentially producing an intermediate corrupted code like 0110 or 0111. Gray code avoids this because exactly one bit changes per increment, so the synchronizer captures either the old value or the new value — never a corrupt intermediate."},

{"instruction": "Debug: This async FIFO uses binary pointers synchronized with two flip-flops. Fix it.\nreg [AW:0] wptr_sync1, wptr_sync2;\nalways @(posedge rclk) begin\n  wptr_sync1 <= wptr_bin;\n  wptr_sync2 <= wptr_sync1;\nend",
 "output": "Binary pointers must not cross clock domains directly. Convert to Gray code before synchronizing:\nwire [AW:0] wptr_gray = wptr_bin ^ (wptr_bin >> 1);\nreg [AW:0] wgray_sync1, wgray_sync2;\nalways @(posedge rclk) begin\n  wgray_sync1 <= wptr_gray;\n  wgray_sync2 <= wgray_sync1;\nend\nThen compare wgray_sync2 against the read Gray pointer for empty detection."},

# --- CDC Patterns (25 examples) ---
{"instruction": "Write a standard two-flop synchronizer for a single-bit control signal.",
 "output": "module sync2ff (input dst_clk, rst_n, input d, output reg q);\n  reg meta;\n  always @(posedge dst_clk or negedge rst_n) begin\n    if (!rst_n) begin meta<=0; q<=0; end\n    else begin meta<=d; q<=meta; end\n  end\nendmodule"},

{"instruction": "Write a three-flop synchronizer for high-reliability CDC applications.",
 "output": "module sync3ff (input dst_clk, rst_n, input d, output reg q);\n  reg [1:0] meta;\n  always @(posedge dst_clk or negedge rst_n) begin\n    if (!rst_n) begin meta<=0; q<=0; end\n    else begin meta<={meta[0],d}; q<=meta[1]; end\n  end\nendmodule"},

{"instruction": "Write a CDC pulse synchronizer using a toggle-and-detect approach.",
 "output": "module cdc_pulse (input sclk, dclk, rst_n, input src_pulse, output reg dst_pulse);\n  reg src_toggle;\n  reg [1:0] dst_sync;\n  // Toggle in source domain on each pulse\n  always @(posedge sclk or negedge rst_n)\n    if (!rst_n) src_toggle <= 0;\n    else if (src_pulse) src_toggle <= ~src_toggle;\n  // Synchronize toggle into destination domain\n  always @(posedge dclk or negedge rst_n)\n    if (!rst_n) dst_sync <= 0;\n    else dst_sync <= {dst_sync[0], src_toggle};\n  // Detect edge in destination domain\n  always @(posedge dclk)\n    dst_pulse <= dst_sync[1] ^ dst_sync[0];\nendmodule"},

{"instruction": "Explain why a simple two-flop synchronizer works for single bits but not multi-bit buses.",
 "output": "A two-flop synchronizer works for single bits because even if the first flop enters metastability, the second flop waits one full destination clock cycle for it to resolve. For a multi-bit bus, each bit goes through its own synchronizer independently. If the bus is changing (e.g., a binary counter incrementing), different bits resolve at different times, producing a corrupted intermediate value that was never a valid code. Single bits can only be 0 or 1 — there is no invalid intermediate state."},

{"instruction": "Write a CDC handshake for transferring a multi-bit data word safely.",
 "output": "module cdc_handshake #(parameter W=32) (\n  input sclk, dclk, rst_n,\n  input s_valid, output reg s_ready,\n  input [W-1:0] s_data,\n  output reg d_valid, input d_ready,\n  output reg [W-1:0] d_data\n);\n  reg s_req, d_ack_sync1, d_ack_sync2;\n  reg d_req_sync1, d_req_sync2;\n  reg d_ack;\n\n  // Source: assert req, wait for ack\n  always @(posedge sclk or negedge rst_n) begin\n    if (!rst_n) begin s_req<=0; s_ready<=1; end\n    else begin\n      if (s_valid && s_ready) begin s_req<=1; s_ready<=0; end\n      if (s_req && d_ack_sync2) begin s_req<=0; s_ready<=1; end\n    end\n  end\n\n  // Sync req to dest\n  always @(posedge dclk or negedge rst_n)\n    if (!rst_n) begin d_req_sync1<=0; d_req_sync2<=0; end\n    else begin d_req_sync1<=s_req; d_req_sync2<=d_req_sync1; end\n\n  // Dest: capture data when req arrives\n  always @(posedge dclk or negedge rst_n) begin\n    if (!rst_n) begin d_ack<=0; d_valid<=0; end\n    else begin\n      if (d_req_sync2 && !d_ack) begin d_data<=s_data; d_valid<=1; d_ack<=1; end\n      if (d_valid && d_ready) d_valid<=0;\n      if (!d_req_sync2) d_ack<=0;\n    end\n  end\n\n  // Sync ack back to source\n  always @(posedge sclk or negedge rst_n)\n    if (!rst_n) begin d_ack_sync1<=0; d_ack_sync2<=0; end\n    else begin d_ack_sync1<=d_ack; d_ack_sync2<=d_ack_sync1; end\nendmodule"},

{"instruction": "Debug: This CDC synchronizer only has one flip-flop. Fix the metastability risk.\nreg sync;\nalways @(posedge dst_clk) sync <= src_sig;\nassign dst_sig = sync;",
 "output": "One flip-flop is insufficient — metastability can propagate directly to dst_sig. Add a second stage:\nreg sync1, sync2;\nalways @(posedge dst_clk) begin\n  sync1 <= src_sig;\n  sync2 <= sync1;\nend\nassign dst_sig = sync2;"},

{"instruction": "Write a reset synchronizer that is safe for CDC.",
 "output": "module rst_sync (input clk, async_rst_n, output reg sync_rst_n);\n  reg pre_rst_n;\n  // Assert reset asynchronously (immediately)\n  // Deassert synchronously (through two flops)\n  always @(posedge clk or negedge async_rst_n) begin\n    if (!async_rst_n) begin pre_rst_n<=0; sync_rst_n<=0; end\n    else begin pre_rst_n<=1; sync_rst_n<=pre_rst_n; end\n  end\nendmodule"},

{"instruction": "Explain the difference between asynchronous assert / synchronous deassert reset.",
 "output": "Asynchronous assert means reset takes effect immediately when asserted, regardless of clock — this ensures the system enters a known state instantly even if the clock is not running. Synchronous deassert means reset is released only on a clock edge after it deasserts, preventing flip-flops from coming out of reset at slightly different times due to clock skew, which could cause setup violations on the first active cycle."},

{"instruction": "Write an open-loop CDC for a slow-changing configuration register.",
 "output": "// For registers that change very infrequently and are stable long\n// before the destination domain reads them, simple two-flop sync is enough\nmodule cfg_cdc #(parameter W=32) (\n  input dst_clk, rst_n,\n  input [W-1:0] cfg_in,  // written in source domain, stable before capture\n  output reg [W-1:0] cfg_out\n);\n  reg [W-1:0] stage1;\n  // Two-cycle latency but safe if cfg_in is guaranteed stable\n  always @(posedge dst_clk or negedge rst_n) begin\n    if (!rst_n) begin stage1<='0; cfg_out<='0; end\n    else begin stage1<=cfg_in; cfg_out<=stage1; end\n  end\nendmodule"},

{"instruction": "What is MTBF in the context of synchronizer design and how do more flops improve it?",
 "output": "Mean Time Between Failures (MTBF) measures how often a synchronizer produces a metastability-induced error. MTBF = exp(Tr/tau) / (fa * fc) where Tr is the resolution time available (one clock period for 2FF, two for 3FF), tau is the flip-flop metastability time constant, fa is the data toggle rate, and fc is the clock frequency. Adding a third flop doubles Tr, exponentially increasing MTBF — typically from millions of years to billions of years. Each additional flop multiplies MTBF exponentially."},

{"instruction": "Write a CDC FIFO occupancy estimator that works across clock domains.",
 "output": "// Conservative occupancy: use synchronized pointers which may be stale\n// Always over-estimates fullness (safe for producer) and over-estimates empty (safe for consumer)\nmodule fifo_occ #(parameter AW=4) (\n  input wclk, input [AW:0] wptr_bin, input [AW:0] rptr_gray_sync\n);\n  reg [AW:0] rptr_bin_approx;\n  integer i;\n  // Convert synchronized Gray pointer back to binary\n  always @(*) begin\n    rptr_bin_approx[AW] = rptr_gray_sync[AW];\n    for (i=AW-1;i>=0;i=i-1)\n      rptr_bin_approx[i] = rptr_bin_approx[i+1] ^ rptr_gray_sync[i];\n  end\n  wire [AW:0] occupancy = wptr_bin - rptr_bin_approx;\nendmodule"},

{"instruction": "Debug: This design crosses a multi-bit status register directly between domains.\nalways @(posedge dst_clk) dst_status <= src_status; // src_status is 8 bits",
 "output": "This is unsafe — src_status can change multiple bits simultaneously. Options in order of complexity:\n1. Use a CDC FIFO if status updates are frequent\n2. Use a handshake synchronizer if updates are occasional\n3. Use a quasi-static approach: ensure src_status is stable for 2+ dst_clk cycles before toggling a sync enable\n4. If status is a Gray-coded value (e.g., a state machine), two-flop sync is safe\nFor a general 8-bit register, implement a handshake or use the async FIFO approach."},

{"instruction": "Write a level synchronizer for an enable signal crossing from slow to fast clock.",
 "output": "module level_sync (input fast_clk, rst_n, input slow_en, output reg fast_en);\n  reg meta;\n  always @(posedge fast_clk or negedge rst_n) begin\n    if (!rst_n) begin meta<=0; fast_en<=0; end\n    else begin meta<=slow_en; fast_en<=meta; end\n  end\n  // Level signals are safe to synchronize with 2FF since they are\n  // held stable for many cycles relative to the fast clock\nendmodule"},

{"instruction": "Write an assertion to flag any multi-bit CDC crossing without a synchronizer.",
 "output": "// Structural SVA — flag if any bit of a multi-bit bus changes in the same\n// cycle as it is sampled in the destination domain without going through sync\n// This is typically caught by formal CDC tools, but at simulation level:\nassert property (@(posedge dst_clk)\n  !($changed(raw_src_bus) && $changed(dst_registered_bus)))\n  else $warning(\"Possible CDC violation on multi-bit bus\");"},

{"instruction": "Explain why CDC violations are not caught by standard timing analysis.",
 "output": "Standard STA (Static Timing Analysis) requires a defined timing relationship (setup/hold) between launch and capture clocks. For asynchronous CDC crossings, no such relationship exists — the tools either report infinite slack (unconstrained path) or are told to ignore the path via false-path constraints. The actual metastability risk is a probabilistic analog phenomenon that STA has no model for. CDC must be verified with dedicated structural CDC tools like Synopsys SpyGlass CDC or Cadence JasperGold CDC, which identify unprotected crossings by tracing signal fanout across clock domains."},

{"instruction": "Write a clock domain crossing indicator module that flags when data is actively transitioning.",
 "output": "module cdc_monitor (input src_clk, dst_clk, rst_n, input src_data, output reg cdc_active);\n  reg src_r, dst_r1, dst_r2;\n  always @(posedge src_clk) src_r <= src_data;\n  always @(posedge dst_clk) begin dst_r1<=src_data; dst_r2<=dst_r1; end\n  // Active if source changed recently or destination hasn't settled\n  always @(posedge dst_clk)\n    cdc_active <= (src_r ^ src_data) | (dst_r1 ^ dst_r2);\nendmodule"},

{"instruction": "Write a simulation-only CDC checker that warns on metastability windows.",
 "output": "// Simulation-only — not for synthesis\nmodule cdc_checker (input src_clk, dst_clk, input src_sig);\n  real last_src_change;\n  real setup_time = 0.5; // ns\n  real hold_time  = 0.2; // ns\n  always @(src_sig) last_src_change = $realtime;\n  always @(posedge dst_clk) begin\n    if (($realtime - last_src_change) < setup_time)\n      $warning(\"CDC setup violation at %0t\", $realtime);\n  end\n  always @(negedge dst_clk) begin // check hold\n    if (($realtime - last_src_change) < hold_time)\n      $warning(\"CDC hold violation at %0t\", $realtime);\n  end\nendmodule"},

{"instruction": "Write a CDC-safe configuration update mechanism with acknowledgment.",
 "output": "module cdc_cfg_update #(parameter W=32) (\n  input src_clk, dst_clk, rst_n,\n  input [W-1:0] new_cfg, input cfg_wr,\n  output reg cfg_wr_done,\n  output reg [W-1:0] cfg_dst\n);\n  reg req, ack_s1, ack_s2;\n  reg req_d1, req_d2;\n  reg ack;\n  reg [W-1:0] cfg_hold;\n\n  always @(posedge src_clk or negedge rst_n) begin\n    if (!rst_n) begin req<=0; cfg_wr_done<=0; cfg_hold<=0; end\n    else begin\n      cfg_wr_done <= 0;\n      if (cfg_wr && !req) begin cfg_hold<=new_cfg; req<=1; end\n      if (req && ack_s2) begin req<=0; cfg_wr_done<=1; end\n    end\n  end\n\n  always @(posedge src_clk) begin ack_s1<=ack; ack_s2<=ack_s1; end\n  always @(posedge dst_clk) begin req_d1<=req; req_d2<=req_d1; end\n\n  always @(posedge dst_clk or negedge rst_n) begin\n    if (!rst_n) begin ack<=0; cfg_dst<=0; end\n    else begin\n      if (req_d2 && !ack) begin cfg_dst<=cfg_hold; ack<=1; end\n      if (!req_d2) ack<=0;\n    end\n  end\nendmodule"},

{"instruction": "Write an async FIFO almost-full flag with a programmable threshold.",
 "output": "module async_fifo_af #(parameter AW=4) (\n  input wclk, rst_n,\n  input [AW:0] wptr_bin, rptr_gray_sync,\n  input [AW:0] af_thresh,\n  output reg almost_full\n);\n  reg [AW:0] rptr_bin;\n  integer i;\n  always @(*) begin\n    rptr_bin[AW] = rptr_gray_sync[AW];\n    for (i=AW-1;i>=0;i=i-1)\n      rptr_bin[i] = rptr_bin[i+1] ^ rptr_gray_sync[i];\n  end\n  always @(posedge wclk or negedge rst_n) begin\n    if (!rst_n) almost_full<=0;\n    else almost_full <= ((wptr_bin - rptr_bin) >= af_thresh);\n  end\nendmodule"},

{"instruction": "Explain why reset must be synchronized independently for each clock domain in a CDC design.",
 "output": "Each clock domain's flip-flops must come out of reset synchronously with respect to their own clock to avoid setup/hold violations on the first active cycle. A single global async reset released at an arbitrary time may violate hold time on some domain's flops due to clock skew. Each domain needs its own reset synchronizer that takes the shared async assert but releases (deasserts) synchronously to the domain's local clock."},

{"instruction": "Write a dual-domain reset controller that safely resets both clock domains.",
 "output": "module dual_domain_rst (\n  input clk_a, clk_b, por_n,\n  output rst_a_n, rst_b_n\n);\n  // Independent sync chains for each domain\n  reg [1:0] chain_a, chain_b;\n  always @(posedge clk_a or negedge por_n) begin\n    if (!por_n) chain_a <= 2'b00;\n    else chain_a <= {chain_a[0], 1'b1};\n  end\n  always @(posedge clk_b or negedge por_n) begin\n    if (!por_n) chain_b <= 2'b00;\n    else chain_b <= {chain_b[0], 1'b1};\n  end\n  assign rst_a_n = chain_a[1];\n  assign rst_b_n = chain_b[1];\nendmodule"},

{"instruction": "Write an SVA assertion to verify no glitch appears on a synchronized CDC signal.",
 "output": "// After synchronization, the output should be stable for at least\n// one full destination clock cycle before changing again\nassert property (@(posedge dst_clk) disable iff(!rst_n)\n  $changed(sync_out) |-> ##1 $stable(sync_out));"},

{"instruction": "Debug: This CDC FIFO full detection compares binary pointers across domains.\nassign full = (wptr == rptr_sync);",
 "output": "rptr_sync is a binary pointer synchronized from the read domain — this is wrong because binary pointers can change multiple bits simultaneously. Fix:\n1. Convert rptr to Gray before synchronizing: rptr_gray = rptr ^ (rptr>>1)\n2. Sync rptr_gray with two flops into the write domain: rptr_gray_sync\n3. Check full using the Gray comparison:\nassign full = (wptr_gray[AW] != rptr_gray_sync[AW]) &&\n             (wptr_gray[AW-1] != rptr_gray_sync[AW-1]) &&\n             (wptr_gray[AW-2:0] == rptr_gray_sync[AW-2:0]);"},

{"instruction": "Write a CDC FIFO that guarantees no data is lost at the boundary.",
 "output": "// Key principle: writes are blocked (full asserted conservatively) before\n// the FIFO is actually full. This prevents overflow at the cost of\n// slightly reduced usable depth.\n// The synchronized read pointer lags the actual read pointer by 1-2 cycles,\n// so the write domain sees the FIFO as fuller than it is.\n// This is safe — it wastes at most 2 slots but never loses data.\n// The async_fifo module from the previous example implements this correctly\n// by using Gray-coded pointer synchronization."},

{"instruction": "Write a FIFO fill level calculator that works in the write clock domain.",
 "output": "module fifo_fill #(parameter AW=4) (\n  input wclk, rst_n,\n  input [AW:0] wptr_bin,\n  input [AW:0] rptr_gray_sync, // synchronized from read domain\n  output reg [AW:0] fill_level\n);\n  reg [AW:0] rptr_bin;\n  integer i;\n  // Convert synchronized Gray read pointer to binary\n  always @(*) begin\n    rptr_bin[AW] = rptr_gray_sync[AW];\n    for (i=AW-1; i>=0; i=i-1)\n      rptr_bin[i] = rptr_bin[i+1] ^ rptr_gray_sync[i];\n  end\n  always @(posedge wclk or negedge rst_n) begin\n    if (!rst_n) fill_level <= 0;\n    else fill_level <= wptr_bin - rptr_bin;\n  end\nendmodule"},

# ============================================================
# WEAK AREA 2: ASSERTIONS / SVA (80 examples)
# ============================================================

# --- One-hot / Mutual Exclusion ---
{"instruction": "Write a correct one-hot assertion for a 4-bit grant signal.",
 "output": "// $onehot checks exactly one bit is set\nassert property (@(posedge clk) disable iff(!rst_n)\n  |grant |-> $onehot(grant))\n  else $error(\"Grant is not one-hot: %b\", grant);"},

{"instruction": "Write an assertion to verify grant is zero or one-hot (allows no-grant cycles).",
 "output": "assert property (@(posedge clk) disable iff(!rst_n)\n  $onehot0(grant))\n  else $error(\"Multiple grants asserted simultaneously: %b\", grant);"},

{"instruction": "Write an assertion ensuring no two FSM states are active simultaneously.",
 "output": "// For one-hot encoded FSM\nassert property (@(posedge clk) disable iff(!rst_n)\n  $onehot(state))\n  else $error(\"FSM in invalid state: %b\", state);\n// Also check no zero state (stuck)\nassert property (@(posedge clk) disable iff(rst_n)\n  |state)\n  else $error(\"FSM has no active state\");"},

{"instruction": "Write an assertion to verify a thermometer-coded signal only increments.",
 "output": "// Thermometer code: once a bit is set, it stays set\nassert property (@(posedge clk) disable iff(!rst_n)\n  (therm & $past(therm)) == $past(therm))\n  else $error(\"Thermometer code cleared a bit: was %b, now %b\", $past(therm), therm);"},

{"instruction": "Write an assertion to verify a priority encoder output is valid when input is non-zero.",
 "output": "assert property (@(posedge clk) disable iff(!rst_n)\n  (|req_in) |-> valid_out)\n  else $error(\"Priority encoder failed to assert valid with req=%b\", req_in);\nassert property (@(posedge clk) disable iff(!rst_n)\n  (!|req_in) |-> !valid_out)\n  else $error(\"Priority encoder asserted valid with no requests\");"},

# --- AXI Protocol Assertions ---
{"instruction": "Write the complete set of AXI4-Lite write channel assertions.",
 "output": "// AWVALID stability\nassert property (@(posedge clk) disable iff(!rst_n)\n  (awvalid && !awready) |-> ##1 awvalid);\n// WVALID stability\nassert property (@(posedge clk) disable iff(!rst_n)\n  (wvalid && !wready) |-> ##1 wvalid);\n// BVALID must come after both AW and W handshakes\nassert property (@(posedge clk) disable iff(!rst_n)\n  (awvalid && awready && wvalid && wready) |-> ##[1:8] bvalid);\n// BVALID stability until BREADY\nassert property (@(posedge clk) disable iff(!rst_n)\n  (bvalid && !bready) |-> ##1 bvalid);"},

{"instruction": "Write AXI4-Lite read channel assertions.",
 "output": "// ARVALID stability\nassert property (@(posedge clk) disable iff(!rst_n)\n  (arvalid && !arready) |-> ##1 arvalid);\n// RVALID must come after AR handshake\nassert property (@(posedge clk) disable iff(!rst_n)\n  (arvalid && arready) |-> ##[1:16] rvalid);\n// RVALID stability until RREADY\nassert property (@(posedge clk) disable iff(!rst_n)\n  (rvalid && !rready) |-> ##1 rvalid);\n// RDATA stable while RVALID and not RREADY\nassert property (@(posedge clk) disable iff(!rst_n)\n  (rvalid && !rready) |-> ##1 $stable(rdata));"},

{"instruction": "Write an assertion to verify AXI4 burst length does not exceed 256 beats.",
 "output": "assert property (@(posedge clk) disable iff(!rst_n)\n  (arvalid && arready) |-> (arlen <= 8'd255))\n  else $error(\"AXI burst length %0d exceeds maximum\", arlen);"},

{"instruction": "Write an assertion to check AXI4 burst does not cross a 4KB boundary.",
 "output": "assert property (@(posedge clk) disable iff(!rst_n)\n  (arvalid && arready) |-> \n  ((araddr[11:0] + ((arlen + 1) << arsize)) <= 12'hFFF))\n  else $error(\"AXI burst crosses 4KB boundary: addr=%h len=%0d\", araddr, arlen);"},

{"instruction": "Write an assertion that AWADDR is aligned to the transfer size.",
 "output": "assert property (@(posedge clk) disable iff(!rst_n)\n  (awvalid && awready) |-> \n  (awaddr[2:0] & ((1<<awsize)-1)) == 0)\n  else $error(\"Unaligned AXI write: addr=%h size=%0d\", awaddr, awsize);"},

{"instruction": "Write AXI-Stream assertions for the full TVALID/TREADY handshake.",
 "output": "// TVALID must not deassert before TREADY\nassert property (@(posedge clk) disable iff(!rst_n)\n  (s_tvalid && !s_tready) |-> ##1 s_tvalid);\n// TDATA must be stable while TVALID and not TREADY  \nassert property (@(posedge clk) disable iff(!rst_n)\n  (s_tvalid && !s_tready) |-> ##1 $stable(s_tdata));\n// TLAST should eventually assert (no infinite streams without TLAST)\ncover property (@(posedge clk) s_tvalid && s_tready && s_tlast);"},

{"instruction": "Write an assertion verifying TLAST correctly marks the last beat of a packet.",
 "output": "// After TLAST, the next valid beat starts a new packet\nassert property (@(posedge clk) disable iff(!rst_n)\n  (s_tvalid && s_tready && s_tlast) |=> \n  (!s_tvalid || (s_tvalid && !s_tlast) || (s_tvalid && s_tlast)));\n// Cover: TLAST seen\ncover property (@(posedge clk) s_tvalid && s_tready && s_tlast);"},

{"instruction": "Write an assertion to verify no X values appear on a valid AXI data bus.",
 "output": "assert property (@(posedge clk) disable iff(!rst_n)\n  rvalid |-> !$isunknown(rdata))\n  else $error(\"X/Z on RDATA while RVALID high at time %0t\", $time);\nassert property (@(posedge clk) disable iff(!rst_n)\n  (wvalid && wready) |-> !$isunknown(wdata));"},

{"instruction": "Write a set of assertions for the FIFO — overflow, underflow, and occupancy range.",
 "output": "// No write when full\nassert property (@(posedge clk) disable iff(!rst_n)\n  !(wr_en && full))\n  else $error(\"FIFO overflow at time %0t\", $time);\n// No read when empty\nassert property (@(posedge clk) disable iff(!rst_n)\n  !(rd_en && empty))\n  else $error(\"FIFO underflow at time %0t\", $time);\n// Occupancy never exceeds depth\nassert property (@(posedge clk) disable iff(!rst_n)\n  occupancy <= DEPTH)\n  else $error(\"FIFO occupancy %0d exceeds depth %0d\", occupancy, DEPTH);\n// Full and empty never simultaneously true\nassert property (@(posedge clk) disable iff(!rst_n)\n  !(full && empty))\n  else $error(\"FIFO full and empty simultaneously\");"},

{"instruction": "Write an assertion to verify occupancy increments correctly on write.",
 "output": "assert property (@(posedge clk) disable iff(!rst_n)\n  (wr_en && !full && !rd_en) |=>\n  (occupancy == $past(occupancy) + 1))\n  else $error(\"Occupancy did not increment on write\");"},

{"instruction": "Write an assertion to verify occupancy decrements correctly on read.",
 "output": "assert property (@(posedge clk) disable iff(!rst_n)\n  (rd_en && !empty && !wr_en) |=>\n  (occupancy == $past(occupancy) - 1))\n  else $error(\"Occupancy did not decrement on read\");"},

{"instruction": "Write an assertion that simultaneous read and write keeps occupancy stable.",
 "output": "assert property (@(posedge clk) disable iff(!rst_n)\n  (wr_en && !full && rd_en && !empty) |=>\n  (occupancy == $past(occupancy)))\n  else $error(\"Simultaneous R/W changed occupancy incorrectly\");"},

{"instruction": "Write assertions for an arbiter: grants are one-hot, every request is eventually granted.",
 "output": "// One-hot grant\nassert property (@(posedge clk) disable iff(rst)\n  $onehot0(grant));\n// Every request granted within N cycles (liveness)\ngenvar i;\ngenerate for (i=0;i<4;i++) begin : liveness\n  assert property (@(posedge clk) disable iff(rst)\n    req[i] |-> ##[1:16] grant[i])\n    else $error(\"Client %0d starved\", i);\nend endgenerate"},

{"instruction": "Write an assertion that the round-robin pointer always advances after a grant.",
 "output": "assert property (@(posedge clk) disable iff(rst)\n  (|grant) |=> (ptr != $past(ptr)))\n  else $error(\"RR pointer did not advance after grant\");"},

{"instruction": "Write coverage for all four arbiter clients being granted.",
 "output": "covergroup arb_cov @(posedge clk);\n  cp_grant: coverpoint grant {\n    bins c0 = {4'b0001};\n    bins c1 = {4'b0010};\n    bins c2 = {4'b0100};\n    bins c3 = {4'b1000};\n    bins none = {4'b0000};\n  }\n  // Cross: all clients request simultaneously\n  cp_req: coverpoint req {\n    bins all_req = {4'b1111};\n  }\nendgroup"},

{"instruction": "Write an SVA sequence for a three-phase handshake: req, then gnt, then ack.",
 "output": "sequence three_phase;\n  req ##[1:4] gnt ##[1:4] ack;\nendsequence\nassert property (@(posedge clk) disable iff(rst)\n  req |-> three_phase)\n  else $error(\"Three-phase handshake did not complete\");"},

{"instruction": "Write an assertion ensuring an interrupt is acknowledged within a timeout.",
 "output": "assert property (@(posedge clk) disable iff(!rst_n)\n  $rose(irq) |-> ##[1:IRQ_TIMEOUT] irq_ack)\n  else $error(\"IRQ not acknowledged within %0d cycles\", IRQ_TIMEOUT);"},

{"instruction": "Write an assertion that write-enables are mutually exclusive to two different memory banks.",
 "output": "assert property (@(posedge clk) disable iff(!rst_n)\n  !(wr_en_bank0 && wr_en_bank1))\n  else $error(\"Simultaneous write to both memory banks\");"},

{"instruction": "Write a cover property to verify back-to-back AXI transactions occur.",
 "output": "cover property (@(posedge clk) disable iff(!rst_n)\n  (arvalid && arready) ##1 (arvalid && arready));"},

{"instruction": "Write an assertion that RRESP is always OKAY for a simple slave.",
 "output": "assert property (@(posedge clk) disable iff(!rst_n)\n  (rvalid && rready) |-> (rresp == 2'b00))\n  else $error(\"Non-OKAY read response: %b\", rresp);"},

{"instruction": "Write a complete SVA checker module for an AXI4-Lite slave.",
 "output": "module axi_lite_sva (\n  input clk, rst_n,\n  input awvalid, awready, wvalid, wready, bvalid, bready,\n  input arvalid, arready, rvalid, rready,\n  input [1:0] bresp, rresp\n);\n  // Write channel stability\n  ap_awvalid: assert property (@(posedge clk) disable iff(!rst_n)\n    (awvalid && !awready) |-> ##1 awvalid);\n  ap_wvalid:  assert property (@(posedge clk) disable iff(!rst_n)\n    (wvalid && !wready) |-> ##1 wvalid);\n  ap_bvalid:  assert property (@(posedge clk) disable iff(!rst_n)\n    (bvalid && !bready) |-> ##1 bvalid);\n  // Read channel stability\n  ap_arvalid: assert property (@(posedge clk) disable iff(!rst_n)\n    (arvalid && !arready) |-> ##1 arvalid);\n  ap_rvalid:  assert property (@(posedge clk) disable iff(!rst_n)\n    (rvalid && !rready) |-> ##1 rvalid);\n  // Response correctness\n  ap_bresp:   assert property (@(posedge clk) disable iff(!rst_n)\n    (bvalid && bready) |-> (bresp == 2'b00));\n  ap_rresp:   assert property (@(posedge clk) disable iff(!rst_n)\n    (rvalid && rready) |-> (rresp == 2'b00));\nendmodule\nbind my_axi_slave axi_lite_sva u_sva(.*);"},

{"instruction": "Write an assertion to detect combinational loops between valid and ready.",
 "output": "// A combinational dependency from tvalid → tready creates a loop\n// Detect by checking tready changed in same delta as tvalid\n// In simulation:\nalways @(tvalid) begin\n  #0; // let combinational settle\n  if (tready !== $past(tready, 0))\n    $warning(\"tready has combinational dependency on tvalid — potential deadlock\");\nend"},

{"instruction": "Write functional coverage for AXI burst types FIXED, INCR, and WRAP.",
 "output": "covergroup axi_burst_type @(posedge clk);\n  cp_burst: coverpoint arburst {\n    bins fixed = {2'b00};\n    bins incr  = {2'b01};\n    bins wrap  = {2'b10};\n  }\n  cp_len: coverpoint arlen {\n    bins len1   = {0};\n    bins len4   = {3};\n    bins len8   = {7};\n    bins len16  = {15};\n    bins len256 = {255};\n  }\nendgroup"},

{"instruction": "Write an assertion to check that a FIFO never has both full and empty asserted.",
 "output": "assert property (@(posedge clk) disable iff(!rst_n)\n  !(full && empty))\n  else $fatal(\"Impossible FIFO state: full and empty both asserted\");"},

{"instruction": "Write a liveness assertion that every FIFO write eventually produces a readable output.",
 "output": "assert property (@(posedge clk) disable iff(!rst_n)\n  (wr_en && !full) |-> ##[1:DEPTH+2] (!empty))\n  else $error(\"Data written to FIFO never became readable\");"},

{"instruction": "Write an SVA to verify a DMA counter counts exactly the programmed number of beats.",
 "output": "assert property (@(posedge clk) disable iff(!rst_n)\n  $rose(dma_start) |-> ##[1:$] (dma_done && (beat_count == programmed_beats)))\n  else $error(\"DMA beat count mismatch: expected %0d got %0d\",\n    programmed_beats, beat_count);"},

{"instruction": "Write an assertion that a synchronous reset clears all outputs within one cycle.",
 "output": "assert property (@(posedge clk)\n  $rose(rst) |=> (data_out == '0 && valid_out == 0 && ready_out == 0))\n  else $error(\"Reset did not clear outputs in one cycle\");"},

{"instruction": "Write coverage for FIFO boundary conditions: empty, one entry, full-minus-one, full.",
 "output": "covergroup fifo_boundary @(posedge clk);\n  cp_occ: coverpoint occupancy {\n    bins empty      = {0};\n    bins one        = {1};\n    bins almost_full = {DEPTH-1};\n    bins full       = {DEPTH};\n  }\nendgroup"},

{"instruction": "Write an assertion ensuring a master never issues a new AXI transaction before receiving the response to the previous one (for a non-pipelined master).",
 "output": "reg outstanding;\nalways @(posedge clk or negedge rst_n) begin\n  if (!rst_n) outstanding <= 0;\n  else if (awvalid && awready) outstanding <= 1;\n  else if (bvalid && bready)   outstanding <= 0;\nend\nassert property (@(posedge clk) disable iff(!rst_n)\n  (awvalid && awready) |-> !outstanding)\n  else $error(\"New AXI write issued while previous response outstanding\");"},

{"instruction": "Write an assertion to verify write data matches what was written to a register.",
 "output": "assert property (@(posedge clk) disable iff(!rst_n)\n  (wr_en && wr_addr == TARGET_ADDR) |=>\n  (reg_out == $past(wr_data)))\n  else $error(\"Register write mismatch: wrote %h read %h\", $past(wr_data), reg_out);"},

{"instruction": "Write a complete SVA bind file for a round-robin arbiter.",
 "output": "module rr_arb_sva #(parameter N=4) (\n  input clk, rst,\n  input [N-1:0] req, grant\n);\n  // One-hot or zero grants\n  ap_onehot: assert property (@(posedge clk) disable iff(rst)\n    $onehot0(grant));\n  // No spurious grants\n  ap_no_spurious: assert property (@(posedge clk) disable iff(rst)\n    (grant != 0) |-> (grant & req) != 0);\n  // Liveness: each requester eventually granted\n  genvar i;\n  generate for (i=0;i<N;i++) begin\n    ap_live: assert property (@(posedge clk) disable iff(rst)\n      req[i] |-> ##[1:N*4] grant[i]);\n  end endgenerate\nendmodule\nbind rr_arb rr_arb_sva #(.N(4)) u_sva(.clk,.rst,.req,.grant);"},

{"instruction": "Write an assertion that a pipeline does not produce output before input is valid.",
 "output": "assert property (@(posedge clk) disable iff(!rst_n)\n  !valid_in throughout ##[1:PIPE_DEPTH] 1'b1 |-> !valid_out)\n  else $error(\"Pipeline produced output without valid input\");"},

{"instruction": "Write an SVA to verify a watchdog fires within the timeout window.",
 "output": "assert property (@(posedge clk) disable iff(!rst_n)\n  $rose(wdt_enable) |-> \n  (kick throughout ##[1:WDT_TIMEOUT] 1'b1) or\n  ##[WDT_TIMEOUT:WDT_TIMEOUT] $rose(wdt_rst))\n  else $error(\"Watchdog did not fire at timeout\");"},

{"instruction": "Write a protocol checker for a simple req/ack handshake.",
 "output": "module req_ack_checker (input clk, rst, input req, ack);\n  // Req must be held until ack\n  assert property (@(posedge clk) disable iff(rst)\n    (req && !ack) |-> ##1 req)\n    else $error(\"Req deasserted before ack\");\n  // Ack only when req is high\n  assert property (@(posedge clk) disable iff(rst)\n    ack |-> req)\n    else $error(\"Ack without req\");\n  // Ack is a single cycle pulse\n  assert property (@(posedge clk) disable iff(rst)\n    ack |=> !ack)\n    else $error(\"Ack held high for more than one cycle\");\nendmodule"},

{"instruction": "Write coverage to verify all state machine transitions are exercised.",
 "output": "covergroup fsm_transitions @(posedge clk);\n  cp_state: coverpoint state {\n    bins IDLE  = {IDLE};\n    bins BUSY  = {BUSY};\n    bins DONE  = {DONE};\n    bins ERROR = {ERROR};\n  }\n  cp_trans: coverpoint state {\n    bins idle_to_busy  = (IDLE  => BUSY);\n    bins busy_to_done  = (BUSY  => DONE);\n    bins busy_to_error = (BUSY  => ERROR);\n    bins done_to_idle  = (DONE  => IDLE);\n    bins error_to_idle = (ERROR => IDLE);\n  }\nendgroup"},

{"instruction": "Write an assertion that checks FIFO read data is stable until acknowledged.",
 "output": "assert property (@(posedge clk) disable iff(!rst_n)\n  (!empty && !rd_en) |=> $stable(data_out))\n  else $error(\"FIFO read data changed without rd_en\");"},

{"instruction": "Write an assertion to flag back-pressure violations in a pipeline.",
 "output": "assert property (@(posedge clk) disable iff(!rst_n)\n  (m_valid && !m_ready) |-> ##1 (m_valid && $stable(m_data)))\n  else $error(\"Pipeline dropped data or changed data under backpressure\");"},

# ============================================================
# WEAK AREA 3: AXI PROTOCOL DEEP (80 examples)
# ============================================================

# --- AXI4-Lite Correct Implementations ---
{"instruction": "Write a correct AXI4-Lite slave with proper write channel decoupling.",
 "output": "module axi_lite_slave (\n  input clk, rst_n,\n  input [31:0] awaddr, wdata,\n  input awvalid, wvalid, bready,\n  output reg awready, wready, bvalid,\n  output reg [1:0] bresp\n);\n  reg [31:0] reg0, reg1, reg2, reg3;\n  reg aw_done, w_done;\n  reg [31:0] aw_addr_lat;\n\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin\n      awready<=0; wready<=0; bvalid<=0;\n      aw_done<=0; w_done<=0;\n    end else begin\n      // Accept AW independently\n      if (awvalid && !awready && !aw_done) begin\n        awready    <= 1;\n        aw_addr_lat<= awaddr;\n        aw_done    <= 1;\n      end else awready <= 0;\n\n      // Accept W independently\n      if (wvalid && !wready && !w_done) begin\n        wready <= 1;\n        w_done <= 1;\n      end else wready <= 0;\n\n      // Write when both captured\n      if (aw_done && w_done && !bvalid) begin\n        case (aw_addr_lat[3:2])\n          2'b00: reg0 <= wdata;\n          2'b01: reg1 <= wdata;\n          2'b10: reg2 <= wdata;\n          2'b11: reg3 <= wdata;\n        endcase\n        bvalid <= 1; bresp <= 2'b00;\n        aw_done <= 0; w_done <= 0;\n      end\n      if (bvalid && bready) bvalid <= 0;\n    end\n  end\nendmodule"},

{"instruction": "Explain why AXI4-Lite write address and write data channels must be accepted independently.",
 "output": "The AXI specification allows a master to issue AW and W in any order — the data may arrive before the address or after it. A slave that only accepts W when AW is already present, or vice versa, can deadlock with a master that sends them in the opposite order. Each channel must have its own acceptance register (aw_done, w_done flags) and be accepted independently. The write to the register bank and the write response (BVALID) should only be issued after both have been captured."},

{"instruction": "Debug: This AXI4-Lite slave only accepts W when AW has arrived. Fix the deadlock.\nif (awvalid && wvalid) begin awready<=1; wready<=1; end",
 "output": "This creates a deadlock if the master sends AW and W independently. Fix by accepting each channel independently:\nreg aw_pend, w_pend;\nreg [31:0] aw_addr_hold, w_data_hold;\n// Accept AW channel\nif (awvalid && !aw_pend) begin awready<=1; aw_addr_hold<=awaddr; aw_pend<=1; end\nelse awready<=0;\n// Accept W channel independently\nif (wvalid && !w_pend) begin wready<=1; w_data_hold<=wdata; w_pend<=1; end\nelse wready<=0;\n// Write and respond when both received\nif (aw_pend && w_pend && !bvalid) begin\n  /* write to register */ bvalid<=1; bresp<=0; aw_pend<=0; w_pend<=0;\nend"},

{"instruction": "Write a correct AXI4-Lite read channel with proper ARREADY/RVALID sequencing.",
 "output": "module axi_lite_rd (\n  input clk, rst_n,\n  input [31:0] araddr, input arvalid, input rready,\n  output reg arready, output reg [31:0] rdata, output reg rvalid, output reg [1:0] rresp\n);\n  reg [31:0] reg0=32'hDEAD, reg1=32'hBEEF, reg2=32'hCAFE, reg3=32'hF00D;\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin arready<=0; rvalid<=0; end\n    else begin\n      arready <= 0;\n      if (arvalid && !arready && !rvalid) begin\n        arready <= 1;\n        case (araddr[3:2])\n          2'b00: rdata <= reg0;\n          2'b01: rdata <= reg1;\n          2'b10: rdata <= reg2;\n          2'b11: rdata <= reg3;\n        endcase\n        rvalid <= 1; rresp <= 2'b00;\n      end\n      if (rvalid && rready) rvalid <= 0;\n    end\n  end\nendmodule"},

{"instruction": "Write a full AXI4-Lite slave combining read and write channels with 4 registers.",
 "output": "module axil_slave_full (\n  input clk, rst_n,\n  // Write\n  input [3:0] awaddr, input awvalid, output reg awready,\n  input [31:0] wdata, input wvalid, output reg wready,\n  output reg bvalid, input bready, output reg [1:0] bresp,\n  // Read\n  input [3:0] araddr, input arvalid, output reg arready,\n  output reg [31:0] rdata, output reg rvalid, input rready, output reg [1:0] rresp\n);\n  reg [31:0] regs [0:3];\n  reg aw_pend; reg [3:0] aw_addr_h;\n  reg w_pend;  reg [31:0] w_data_h;\n  integer i;\n\n  initial for(i=0;i<4;i=i+1) regs[i]=0;\n\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin\n      awready<=0; wready<=0; bvalid<=0;\n      arready<=0; rvalid<=0;\n      aw_pend<=0; w_pend<=0;\n    end else begin\n      // Write address\n      awready<=0;\n      if (awvalid && !aw_pend) begin awready<=1; aw_addr_h<=awaddr; aw_pend<=1; end\n      // Write data\n      wready<=0;\n      if (wvalid && !w_pend) begin wready<=1; w_data_h<=wdata; w_pend<=1; end\n      // Write commit\n      if (aw_pend && w_pend && !bvalid) begin\n        regs[aw_addr_h[3:2]] <= w_data_h;\n        bvalid<=1; bresp<=0; aw_pend<=0; w_pend<=0;\n      end\n      if (bvalid && bready) bvalid<=0;\n      // Read\n      arready<=0;\n      if (arvalid && !rvalid) begin\n        arready<=1;\n        rdata <= regs[araddr[3:2]];\n        rvalid<=1; rresp<=0;\n      end\n      if (rvalid && rready) rvalid<=0;\n    end\n  end\nendmodule"},

{"instruction": "Write a correct AXI-Stream skid buffer that never drops data under backpressure.",
 "output": "module axis_skid #(parameter W=32) (\n  input clk, rst_n,\n  input [W-1:0] s_tdata, input s_tvalid, output reg s_tready,\n  output reg [W-1:0] m_tdata, output reg m_tvalid, input m_tready\n);\n  reg [W-1:0] skid_data;\n  reg skid_valid;\n\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin\n      m_tvalid<=0; skid_valid<=0; s_tready<=1;\n    end else begin\n      // Consume skid buffer first\n      if (skid_valid && (!m_tvalid || m_tready)) begin\n        m_tdata  <= skid_data;\n        m_tvalid <= 1;\n        skid_valid <= 0;\n        s_tready <= 1;\n      end\n      // Normal passthrough\n      else if (s_tvalid && s_tready) begin\n        if (!m_tvalid || m_tready) begin\n          m_tdata  <= s_tdata;\n          m_tvalid <= 1;\n        end else begin\n          // Downstream not ready — store in skid buffer\n          skid_data  <= s_tdata;\n          skid_valid <= 1;\n          s_tready   <= 0; // stop accepting input\n        end\n      end\n      // Clear m_tvalid when consumed\n      else if (m_tvalid && m_tready && !skid_valid) begin\n        m_tvalid <= 0;\n      end\n    end\n  end\nendmodule"},

{"instruction": "Explain the difference between a register slice and a skid buffer in AXI-Stream.",
 "output": "A register slice adds exactly one pipeline register stage — it cuts combinational paths but adds one cycle of latency. It breaks ready/valid timing paths but cannot handle back-to-back transfers when downstream deasserts ready, because it has no overflow storage.\n\nA skid buffer adds an extra hold register alongside the pipeline register. When downstream deasserts ready while data is in-flight, the incoming beat is stored in the skid register instead of being dropped. This allows zero-bubble throughput under backpressure at the cost of slightly more area."},

{"instruction": "Debug: This AXI-Stream module asserts m_tready combinationally from s_tvalid.\nassign m_tready = s_tvalid && downstream_ready;",
 "output": "This creates a combinational loop if upstream computes s_tvalid from m_tready. Fix by registering m_tready:\nalways @(posedge clk)\n  m_tready <= downstream_ready && !fifo_full;\nNever make tready combinationally dependent on tvalid from the same interface direction."},

{"instruction": "Write an AXI-Stream register slice with one cycle latency and zero throughput loss.",
 "output": "module axis_reg_slice #(parameter W=32) (\n  input clk, rst_n,\n  input [W-1:0] s_tdata, input s_tvalid, output reg s_tready,\n  output reg [W-1:0] m_tdata, output reg m_tvalid, input m_tready\n);\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin m_tvalid<=0; s_tready<=1; end\n    else begin\n      if (s_tvalid && s_tready) begin\n        m_tdata  <= s_tdata;\n        m_tvalid <= 1;\n      end else if (m_tready) begin\n        m_tvalid <= 0;\n      end\n      s_tready <= !m_tvalid || m_tready;\n    end\n  end\nendmodule"},

{"instruction": "Write an AXI4-Lite master that performs a single write transaction.",
 "output": "module axil_master_wr (\n  input clk, rst_n,\n  input start, input [31:0] addr, data,\n  output reg done,\n  output reg [31:0] awaddr, wdata,\n  output reg awvalid, wvalid, bready,\n  input awready, wready, bvalid, input [1:0] bresp\n);\n  reg [2:0] state;\n  localparam IDLE=0, AW=1, W=2, AW_W=3, RESP=4;\n\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin\n      state<=IDLE; awvalid<=0; wvalid<=0; bready<=0; done<=0;\n    end else begin\n      done <= 0;\n      case (state)\n        IDLE: if (start) begin\n          awaddr<=addr; wdata<=data;\n          awvalid<=1; wvalid<=1; bready<=1;\n          state<=AW_W;\n        end\n        AW_W: begin\n          if (awready) awvalid<=0;\n          if (wready)  wvalid<=0;\n          if (!awvalid && !wvalid) state<=RESP;\n        end\n        RESP: if (bvalid) begin bready<=0; done<=1; state<=IDLE; end\n      endcase\n    end\n  end\nendmodule"},

{"instruction": "Write an AXI4-Lite master that performs a single read transaction.",
 "output": "module axil_master_rd (\n  input clk, rst_n,\n  input start, input [31:0] addr,\n  output reg done, output reg [31:0] rd_data,\n  output reg [31:0] araddr, output reg arvalid, rready,\n  input arready, input [31:0] rdata, input rvalid, input [1:0] rresp\n);\n  reg [1:0] state;\n  localparam IDLE=0, AR=1, RD=2;\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin state<=IDLE; arvalid<=0; rready<=0; done<=0; end\n    else begin\n      done<=0;\n      case(state)\n        IDLE: if(start) begin araddr<=addr; arvalid<=1; rready<=1; state<=AR; end\n        AR:   if(arready) begin arvalid<=0; state<=RD; end\n        RD:   if(rvalid) begin rd_data<=rdata; rready<=0; done<=1; state<=IDLE; end\n      endcase\n    end\n  end\nendmodule"},

{"instruction": "Write a write response buffer for an AXI4-Lite slave to handle out-of-order AW/W.",
 "output": "// Stores up to 2 pending write responses\nmodule bresp_buffer (\n  input clk, rst_n,\n  input wr_commit, // both AW and W received\n  output reg bvalid, input bready, output reg [1:0] bresp\n);\n  reg [1:0] resp_q [0:1];\n  reg [1:0] resp_valid;\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin resp_valid<=0; bvalid<=0; end\n    else begin\n      if (wr_commit && !resp_valid[0]) begin resp_q[0]<=2'b00; resp_valid[0]<=1; end\n      else if (wr_commit && !resp_valid[1]) begin resp_q[1]<=2'b00; resp_valid[1]<=1; end\n      if (!bvalid && resp_valid[0]) begin bvalid<=1; bresp<=resp_q[0]; end\n      if (bvalid && bready) begin\n        bvalid<=0;\n        resp_valid[0]<=resp_valid[1];\n        resp_q[0]<=resp_q[1];\n        resp_valid[1]<=0;\n      end\n    end\n  end\nendmodule"},

{"instruction": "Explain the AXI4 ordering rules for transactions with the same ID.",
 "output": "AXI4 requires that transactions with the same ID complete in order. The master must not issue a new transaction with ID=N until all previous transactions with ID=N have completed. This allows an interconnect to track in-flight transactions by ID and return responses in order. Transactions with different IDs can complete out of order — this is how AXI achieves high throughput with multiple outstanding transactions."},

{"instruction": "Write an AXI4-Lite decoder that maps addresses to one of three slaves.",
 "output": "module axil_decoder (\n  input [31:0] araddr, awaddr,\n  output reg [1:0] rd_sel, wr_sel\n);\n  // Slave 0: 0x0000_0000 - 0x0000_FFFF\n  // Slave 1: 0x0001_0000 - 0x0001_FFFF\n  // Slave 2: 0x4000_0000 - 0x4000_FFFF\n  always @(*) begin\n    case (araddr[31:16])\n      16'h0000: rd_sel = 0;\n      16'h0001: rd_sel = 1;\n      16'h4000: rd_sel = 2;\n      default:  rd_sel = 2'bxx; // error\n    endcase\n    case (awaddr[31:16])\n      16'h0000: wr_sel = 0;\n      16'h0001: wr_sel = 1;\n      16'h4000: wr_sel = 2;\n      default:  wr_sel = 2'bxx;\n    endcase\n  end\nendmodule"},

{"instruction": "Write an AXI-Stream FIFO that provides buffering between a fast producer and slow consumer.",
 "output": "module axis_fifo #(parameter W=32, DEPTH=16) (\n  input clk, rst_n,\n  input [W-1:0] s_tdata, input s_tvalid, output s_tready,\n  output reg [W-1:0] m_tdata, output reg m_tvalid, input m_tready\n);\n  reg [W-1:0] mem [0:DEPTH-1];\n  reg [$clog2(DEPTH):0] wr_ptr, rd_ptr;\n  wire full  = (wr_ptr[$clog2(DEPTH)] != rd_ptr[$clog2(DEPTH)]) &&\n               (wr_ptr[$clog2(DEPTH)-1:0] == rd_ptr[$clog2(DEPTH)-1:0]);\n  wire empty = (wr_ptr == rd_ptr);\n  assign s_tready = !full;\n\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin wr_ptr<=0; rd_ptr<=0; m_tvalid<=0; end\n    else begin\n      if (s_tvalid && !full) begin\n        mem[wr_ptr[$clog2(DEPTH)-1:0]] <= s_tdata;\n        wr_ptr <= wr_ptr+1;\n      end\n      if (!empty && (!m_tvalid || m_tready)) begin\n        m_tdata  <= mem[rd_ptr[$clog2(DEPTH)-1:0]];\n        m_tvalid <= 1;\n        rd_ptr   <= rd_ptr+1;\n      end else if (m_tvalid && m_tready && empty) begin\n        m_tvalid <= 0;\n      end\n    end\n  end\nendmodule"},

{"instruction": "Write an AXI-Stream width converter from 8-bit to 32-bit.",
 "output": "module axis_8to32 (\n  input clk, rst_n,\n  input [7:0] s_tdata, input s_tvalid, output reg s_tready,\n  output reg [31:0] m_tdata, output reg m_tvalid, input m_tready\n);\n  reg [7:0] buf [0:3];\n  reg [1:0] cnt;\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin cnt<=0; m_tvalid<=0; s_tready<=1; end\n    else begin\n      if (s_tvalid && s_tready) begin\n        buf[cnt] <= s_tdata;\n        cnt <= cnt+1;\n        if (cnt==3) begin\n          m_tdata  <= {s_tdata, buf[2], buf[1], buf[0]};\n          m_tvalid <= 1;\n          s_tready <= 0;\n          cnt <= 0;\n        end\n      end\n      if (m_tvalid && m_tready) begin m_tvalid<=0; s_tready<=1; end\n    end\n  end\nendmodule"},

{"instruction": "Write an AXI-Stream packet length counter.",
 "output": "module axis_pkt_len (\n  input clk, rst_n,\n  input s_tvalid, s_tready, s_tlast,\n  output reg [15:0] pkt_len, output reg pkt_done\n);\n  reg [15:0] cnt;\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin cnt<=0; pkt_done<=0; pkt_len<=0; end\n    else begin\n      pkt_done <= 0;\n      if (s_tvalid && s_tready) begin\n        cnt <= cnt+1;\n        if (s_tlast) begin pkt_len<=cnt+1; pkt_done<=1; cnt<=0; end\n      end\n    end\n  end\nendmodule"},

{"instruction": "Debug: This AXI4-Lite slave asserts AWREADY for multiple cycles. Fix it.\nalways @(posedge clk)\n  if (awvalid) awready <= 1;\n  else awready <= 0;",
 "output": "AWREADY being held high while AWVALID is high causes the address to be captured multiple times. AWREADY should pulse for exactly one cycle:\nalways @(posedge clk or negedge rst_n) begin\n  if (!rst_n) awready <= 0;\n  else if (awvalid && !awready && !aw_pend) awready <= 1; // pulse one cycle\n  else awready <= 0; // deassert next cycle\nend"},

{"instruction": "Write an AXI-Stream header parser that extracts the first 4 bytes as a header.",
 "output": "module axis_hdr_parse #(parameter W=8) (\n  input clk, rst_n,\n  input [W-1:0] s_tdata, input s_tvalid, s_tlast, output reg s_tready,\n  output reg [31:0] header, output reg hdr_valid\n);\n  reg [1:0] byte_cnt;\n  reg in_hdr;\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin byte_cnt<=0; in_hdr<=1; hdr_valid<=0; s_tready<=1; end\n    else begin\n      hdr_valid<=0;\n      if (s_tvalid && s_tready) begin\n        if (in_hdr) begin\n          header[byte_cnt*8+:8] <= s_tdata;\n          if (byte_cnt==3) begin in_hdr<=0; hdr_valid<=1; end\n          byte_cnt <= byte_cnt+1;\n        end\n        if (s_tlast) begin in_hdr<=1; byte_cnt<=0; end\n      end\n    end\n  end\nendmodule"},

{"instruction": "Write an AXI4-Lite error slave that returns SLVERR for all transactions.",
 "output": "module axil_error_slave (\n  input clk, rst_n,\n  input awvalid, output reg awready,\n  input wvalid, output reg wready,\n  output reg bvalid, input bready, output reg [1:0] bresp,\n  input arvalid, output reg arready,\n  output reg rvalid, input rready, output reg [31:0] rdata, output reg [1:0] rresp\n);\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin awready<=0; wready<=0; bvalid<=0; arready<=0; rvalid<=0; end\n    else begin\n      // Accept and immediately respond with SLVERR\n      awready <= awvalid && !awready;\n      wready  <= wvalid  && !wready;\n      if ((awvalid&&awready)||(wvalid&&wready)) bvalid<=1;\n      if (bvalid&&bready) bvalid<=0;\n      bresp <= 2'b10; // SLVERR\n      arready <= arvalid && !arready;\n      if (arvalid&&arready) begin rvalid<=1; rdata<=32'hDEAD_DEAD; rresp<=2'b10; end\n      if (rvalid&&rready) rvalid<=0;\n    end\n  end\nendmodule"},

{"instruction": "Write an AXI4-Lite interconnect that arbitrates between two masters.",
 "output": "module axil_arb2 (\n  input clk, rst_n,\n  // Master 0\n  input m0_awvalid, input [31:0] m0_awaddr, output m0_awready,\n  input m0_arvalid, input [31:0] m0_araddr, output m0_arready,\n  // Master 1\n  input m1_awvalid, input [31:0] m1_awaddr, output m1_awready,\n  input m1_arvalid, input [31:0] m1_araddr, output m1_arready,\n  // Slave\n  output reg s_awvalid, output reg [31:0] s_awaddr, input s_awready,\n  output reg s_arvalid, output reg [31:0] s_araddr, input s_arready\n);\n  reg wr_grant, rd_grant; // 0=m0, 1=m1\n  // Simple fixed priority: m0 > m1\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin wr_grant<=0; rd_grant<=0; s_awvalid<=0; s_arvalid<=0; end\n    else begin\n      // Write arbitration\n      if (!s_awvalid) begin\n        if (m0_awvalid) begin s_awvalid<=1; s_awaddr<=m0_awaddr; wr_grant<=0; end\n        else if (m1_awvalid) begin s_awvalid<=1; s_awaddr<=m1_awaddr; wr_grant<=1; end\n      end else if (s_awready) s_awvalid<=0;\n      // Read arbitration\n      if (!s_arvalid) begin\n        if (m0_arvalid) begin s_arvalid<=1; s_araddr<=m0_araddr; rd_grant<=0; end\n        else if (m1_arvalid) begin s_arvalid<=1; s_araddr<=m1_araddr; rd_grant<=1; end\n      end else if (s_arready) s_arvalid<=0;\n    end\n  end\n  assign m0_awready = s_awready && (wr_grant==0);\n  assign m1_awready = s_awready && (wr_grant==1);\n  assign m0_arready = s_arready && (rd_grant==0);\n  assign m1_arready = s_arready && (rd_grant==1);\nendmodule"},

# ============================================================
# BONUS AREA 4: LATCH FIXES / COMMON DEBUG (40 examples)
# ============================================================

{"instruction": "Fix this latch-inferring always block by adding a default assignment.\nalways @(*) begin\n  case(cmd)\n    2'b00: out = a;\n    2'b01: out = b;\n  endcase\nend",
 "output": "Add a default value before the case statement so all branches are covered:\nalways @(*) begin\n  out = '0; // default prevents latch\n  case(cmd)\n    2'b00: out = a;\n    2'b01: out = b;\n    default: out = '0;\n  endcase\nend"},

{"instruction": "Fix this latch-inferring if-else chain:\nalways @(*) begin\n  if (sel == 2'b00) y = a;\n  else if (sel == 2'b01) y = b;\nend",
 "output": "Add a final else to cover all cases:\nalways @(*) begin\n  if      (sel == 2'b00) y = a;\n  else if (sel == 2'b01) y = b;\n  else                   y = '0; // prevents latch\nend"},

{"instruction": "Explain why adding a clock to fix a latch is wrong.\nalways @(*) begin\n  case(sel)\n    2'b00: y = a;\n    2'b01: y = b;\n  endcase\nend\n// Wrong fix: always @(posedge clk) ...",
 "output": "Adding a clock changes the design from combinational to sequential — it adds a register which changes the timing and behavior of the circuit entirely. The correct fix for a latch inference is to ensure all possible input combinations produce a defined output, either by adding a default assignment, completing all case branches, or adding a final else. The logic should remain combinational (always @(*)) with complete branch coverage."},

{"instruction": "Fix this incomplete case statement that infers latches for all outputs.\nalways @(*) begin\n  case(state)\n    IDLE: begin next=START; out=1; end\n    START: begin next=DONE; end\n    DONE: begin next=IDLE; out=0; end\n  endcase\nend",
 "output": "The START state doesn't assign 'out', inferring a latch. Add defaults before the case:\nalways @(*) begin\n  next = IDLE; // default\n  out  = 0;    // default\n  case(state)\n    IDLE:  begin next=START; out=1; end\n    START: begin next=DONE;  out=0; end // explicit assignment\n    DONE:  begin next=IDLE;  out=0; end\n    default: begin next=IDLE; out=0; end\n  endcase\nend"},

{"instruction": "Debug: This FSM transition uses blocking assignment causing race conditions.\nalways @(posedge clk) begin\n  state = next_state; // blocking!\nend",
 "output": "Sequential always blocks must use non-blocking assignments to model flip-flop behavior:\nalways @(posedge clk) begin\n  state <= next_state; // non-blocking\nend\nUsing blocking assignment in a clocked always block causes the assignment to take effect immediately within the same time step, potentially causing race conditions with other always blocks that read state in the same clock edge."},

{"instruction": "Debug: This register is assigned in two always blocks.\nalways @(posedge clk) data_r <= a;\nalways @(posedge clk) data_r <= b;",
 "output": "Driving the same signal from multiple always blocks is illegal in Verilog and causes undefined behavior (X in simulation, arbitrary result in synthesis). Fix by combining into one always block:\nalways @(posedge clk) begin\n  if (sel_a) data_r <= a;\n  else       data_r <= b;\nend"},

{"instruction": "Fix this module where output depends on itself combinationally (combinational loop).\nassign valid_out = valid_in && valid_out;",
 "output": "A signal that depends on itself creates a combinational loop with no stable state. Break the loop with a register:\nalways @(posedge clk or negedge rst_n) begin\n  if (!rst_n) valid_out <= 0;\n  else        valid_out <= valid_in && some_other_condition;\nend"},

{"instruction": "Debug: This synchronous reset uses the wrong polarity.\nalways @(posedge clk) begin\n  if (rst_n) data <= 0; // wrong! rst_n is active-low\n  else data <= data_in;\nend",
 "output": "Active-low reset (rst_n) resets when LOW (logic 0), not when high. Fix:\nalways @(posedge clk) begin\n  if (!rst_n) data <= 0;  // reset when rst_n is LOW\n  else        data <= data_in;\nend"},

{"instruction": "Debug: This counter never resets because the reset condition is unreachable.\nalways @(posedge clk) begin\n  if (cnt == 8'hFF && rst) cnt <= 0;\n  else cnt <= cnt + 1;\nend",
 "output": "Reset should not depend on the counter value — it must work at any count:\nalways @(posedge clk) begin\n  if (rst) cnt <= 0;         // reset at any time\n  else     cnt <= cnt + 1;\nend"},

{"instruction": "Debug: This FIFO pointer uses DEPTH instead of DEPTH-1 as the address mask.\nmem[wr_ptr % DEPTH] <= data_in;",
 "output": "Using modulo is synthesizable only when DEPTH is a power of two. For power-of-two depths, use bit masking instead:\nmem[wr_ptr[AW-1:0]] <= data_in; // AW = $clog2(DEPTH)\nThis is both correct and efficient in hardware. For non-power-of-two depths, use a comparison and wrap:\nif (wr_ptr_addr == DEPTH-1) wr_ptr_addr <= 0;\nelse wr_ptr_addr <= wr_ptr_addr + 1;"},

{"instruction": "Debug: This enable signal is generated combinationally from two registered signals causing a glitch.\nassign write_en = reg_a && reg_b;",
 "output": "If reg_a and reg_b are registered but update at different times relative to the consuming logic, the combinational AND can glitch. Register the enable:\nalways @(posedge clk)\n  write_en <= reg_a && reg_b;\nThis adds one cycle of latency but eliminates the glitch completely."},

{"instruction": "Explain the difference between blocking and non-blocking assignments with an example.",
 "output": "Blocking (=) executes sequentially within the same time step:\n  a = b; b = a; // a gets old b, then b gets new a (which is old b) — SWAP FAILS\n\nNon-blocking (<=) evaluates RHS before any assignments take effect:\n  a <= b; b <= a; // both read old values, then both update — SWAP WORKS\n\nRule: always use <= in clocked (sequential) always blocks. Use = in combinational (always @(*)) blocks and tasks/functions."},

{"instruction": "Fix this generate loop that accidentally drives the same port multiple times.\ngenerate\n  for(i=0;i<4;i++) begin\n    assign data_out = mem[i]; // drives data_out 4 times!\n  end\nendgenerate",
 "output": "Each iteration overwrites the previous assignment creating multiple drivers. Use the loop index to select:\nalways @(*) begin\n  data_out = '0;\n  for(int i=0;i<4;i++)\n    if (sel==i) data_out = mem[i];\nend\n// Or use a mux:\nassign data_out = mem[sel];"},

{"instruction": "Debug: This parameterized module uses a parameter before it's declared.\nassign out = in[WIDTH-1:0];\nparameter WIDTH = 8;",
 "output": "Parameters must be declared before they are used. Move the parameter declaration to the top of the module:\nmodule mymod #(parameter WIDTH=8) (input [WIDTH-1:0] in, output [WIDTH-1:0] out);\n  assign out = in;\nendmodule"},

{"instruction": "Fix the sensitivity list bug in this always block.\nalways @(a, b) begin\n  y = a & b & c; // c is missing from sensitivity list!\nend",
 "output": "Missing signals in the sensitivity list cause simulation mismatches with synthesis. Use wildcard:\nalways @(*) begin\n  y = a & b & c;\nend\nThis automatically includes all signals read in the block and eliminates sensitivity list errors."},

{"instruction": "Debug: This registered output has a one-cycle initialization glitch.\nreg [7:0] data_out;\nalways @(posedge clk) data_out <= processed_data;",
 "output": "Without a reset, data_out starts as X in simulation and may be unknown in FPGA on power-up. Add a reset:\nalways @(posedge clk or negedge rst_n) begin\n  if (!rst_n) data_out <= 8'h00;\n  else        data_out <= processed_data;\nend"},

{"instruction": "Explain why continuous assign cannot drive reg types and always cannot drive wire types.",
 "output": "In Verilog, 'wire' represents a physical connection — it gets its value from whatever drives it (assign or module output), not from procedural code. 'reg' represents a storage element that holds its value between updates and can only be assigned in always/initial blocks. Using assign on a reg or a non-blocking assignment to a wire produces a compile error because the driver model doesn't match the variable type. SystemVerilog's 'logic' type eliminates this distinction."},

{"instruction": "Fix this testbench that applies stimulus before the reset deasserts.\ninitial begin\n  stimulus = 8'hAA;\n  rst = 0; #20; rst = 1;\nend",
 "output": "Stimulus applied during reset may be ignored or cause X propagation. Apply stimulus only after reset deasserts:\ninitial begin\n  rst = 0;\n  stimulus = '0; // hold inputs known during reset\n  #20;\n  rst = 1;\n  #5; // small settle time after reset deasserts\n  stimulus = 8'hAA; // now safe to apply stimulus\nend"},

{"instruction": "Debug: This pipeline register uses blocking assignments causing incorrect forwarding.\nalways @(posedge clk) begin\n  stage1 = stage0_data; // blocking\n  stage2 = stage1;      // captures new stage1 not old!\nend",
 "output": "With blocking assignments, stage2 captures the newly assigned stage1 value, collapsing two pipeline stages into one. Fix with non-blocking:\nalways @(posedge clk) begin\n  stage1 <= stage0_data; // scheduled\n  stage2 <= stage1;      // captures OLD stage1\nend"},

{"instruction": "Fix this always block that uses an incomplete if-else causing a latch on 'count'.\nalways @(posedge clk) begin\n  if (en) count <= count + 1;\nend",
 "output": "This is actually correct — in a clocked always block, a register without else retains its value (holds), which is the intended flip-flop behavior. This does NOT infer a latch. Latches are only inferred in combinational (always @(*)) blocks. The correct understanding:\n// In always @(posedge clk): missing else = hold (flip-flop behavior) = CORRECT\n// In always @(*):           missing else = latch                      = WRONG\nYour original code is fine as written."},

# ============================================================
# BONUS AREA 5: COMPLEX GENERATION (44 examples)
# ============================================================

{"instruction": "Design a parameterizable priority encoder with valid output.",
 "output": "module pri_enc #(parameter N=8) (\n  input [N-1:0] req,\n  output reg [$clog2(N)-1:0] idx,\n  output valid\n);\n  assign valid = |req;\n  integer i;\n  always @(*) begin\n    idx = '0;\n    for (i=N-1; i>=0; i=i-1)\n      if (req[i]) idx = i[$clog2(N)-1:0];\n  end\nendmodule"},

{"instruction": "Design a parameterizable binary to one-hot decoder.",
 "output": "module bin2onehot #(parameter N=3) (\n  input [N-1:0] bin,\n  input en,\n  output reg [(1<<N)-1:0] onehot\n);\n  always @(*) begin\n    onehot = '0;\n    if (en) onehot[bin] = 1'b1;\n  end\nendmodule"},

{"instruction": "Design a saturating adder that clamps at max value.",
 "output": "module sat_add #(parameter W=8) (\n  input [W-1:0] a, b,\n  output reg [W-1:0] sum\n);\n  wire [W:0] full_sum = {1'b0,a} + {1'b0,b};\n  always @(*)\n    sum = full_sum[W] ? {W{1'b1}} : full_sum[W-1:0];\nendmodule"},

{"instruction": "Design a parameterizable leading zero counter.",
 "output": "module lzc #(parameter W=8) (\n  input [W-1:0] in,\n  output reg [$clog2(W):0] lz_count\n);\n  integer i;\n  always @(*) begin\n    lz_count = W;\n    for (i=W-1; i>=0; i=i-1)\n      if (in[i]) lz_count = W-1-i;\n  end\nendmodule"},

{"instruction": "Design a simple two-state Moore FSM for a door lock.",
 "output": "module door_lock (\n  input clk, rst_n, correct_code, timeout,\n  output reg unlocked\n);\n  reg state;\n  localparam LOCKED=0, UNLOCKED=1;\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin state<=LOCKED; unlocked<=0; end\n    else case(state)\n      LOCKED:   if (correct_code) begin state<=UNLOCKED; unlocked<=1; end\n      UNLOCKED: if (timeout)      begin state<=LOCKED;   unlocked<=0; end\n    endcase\n  end\nendmodule"},

{"instruction": "Design a traffic light controller FSM for a 4-way intersection.",
 "output": "module traffic_light (\n  input clk, rst_n,\n  output reg [1:0] ns_light, ew_light // 00=red,01=yellow,10=green\n);\n  reg [2:0] state;\n  reg [5:0] timer;\n  localparam NS_GREEN=0, NS_YELLOW=1, EW_GREEN=2, EW_YELLOW=3;\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin state<=NS_GREEN; timer<=60; ns_light<=2'b10; ew_light<=2'b00; end\n    else begin\n      if (timer>0) timer<=timer-1;\n      else case(state)\n        NS_GREEN:  begin state<=NS_YELLOW; timer<=5;  ns_light<=2'b01; end\n        NS_YELLOW: begin state<=EW_GREEN;  timer<=60; ew_light<=2'b10; ns_light<=2'b00; end\n        EW_GREEN:  begin state<=EW_YELLOW; timer<=5;  ew_light<=2'b01; end\n        EW_YELLOW: begin state<=NS_GREEN;  timer<=60; ns_light<=2'b10; ew_light<=2'b00; end\n      endcase\n    end\n  end\nendmodule"},

{"instruction": "Design a configurable PWM generator with duty cycle and period registers.",
 "output": "module pwm_gen (\n  input clk, rst_n,\n  input [15:0] period, duty,\n  output reg pwm_out\n);\n  reg [15:0] cnt;\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin cnt<=0; pwm_out<=0; end\n    else begin\n      cnt <= (cnt == period-1) ? 16'h0 : cnt+1;\n      pwm_out <= (duty==0) ? 0 : (duty>=period) ? 1 : (cnt < duty);\n    end\n  end\nendmodule"},

{"instruction": "Design a parameterizable shift register with serial in, parallel out.",
 "output": "module sipo #(parameter N=8) (\n  input clk, rst_n, si,\n  output reg [N-1:0] po\n);\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) po <= '0;\n    else po <= {po[N-2:0], si};\n  end\nendmodule"},

{"instruction": "Design a parameterizable shift register with parallel in, serial out.",
 "output": "module piso #(parameter N=8) (\n  input clk, rst_n, load, shift,\n  input [N-1:0] pi,\n  output reg so\n);\n  reg [N-1:0] sr;\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin sr<='0; so<=0; end\n    else if (load) begin sr<=pi; so<=pi[N-1]; end\n    else if (shift) begin so<=sr[N-1]; sr<={sr[N-2:0],1'b0}; end\n  end\nendmodule"},

{"instruction": "Design a binary to BCD converter for 8-bit input.",
 "output": "module bin2bcd_8 (\n  input [7:0] bin,\n  output reg [3:0] hundreds, tens, ones\n);\n  always @(*) begin\n    hundreds = bin / 100;\n    tens     = (bin % 100) / 10;\n    ones     = bin % 10;\n  end\nendmodule"},

{"instruction": "Design a clock crossing register with enable pulse.",
 "output": "module clk_cross_reg #(parameter W=8) (\n  input src_clk, dst_clk, rst_n,\n  input src_en, input [W-1:0] src_data,\n  output reg [W-1:0] dst_data, output reg dst_valid\n);\n  reg [W-1:0] hold;\n  reg toggle_src, [1:0] toggle_dst;\n  always @(posedge src_clk or negedge rst_n) begin\n    if (!rst_n) begin toggle_src<=0; hold<=0; end\n    else if (src_en) begin hold<=src_data; toggle_src<=~toggle_src; end\n  end\n  always @(posedge dst_clk or negedge rst_n) begin\n    if (!rst_n) begin toggle_dst<=0; dst_valid<=0; end\n    else begin\n      toggle_dst <= {toggle_dst[0], toggle_src};\n      dst_valid  <= toggle_dst[0] ^ toggle_dst[1];\n      if (toggle_dst[0] ^ toggle_dst[1]) dst_data <= hold;\n    end\n  end\nendmodule"},

{"instruction": "Design a simple memory controller with read and write state machine.",
 "output": "module mem_ctrl (\n  input clk, rst_n,\n  input req, wr, input [15:0] addr, wdata,\n  output reg ack, output reg [15:0] rdata\n);\n  reg [1:0] state;\n  localparam IDLE=0, RD=1, WR=2, DONE=3;\n  reg [15:0] mem [0:255];\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin state<=IDLE; ack<=0; end\n    else begin\n      ack<=0;\n      case(state)\n        IDLE: if(req) state <= wr ? WR : RD;\n        RD:   begin rdata<=mem[addr[7:0]]; state<=DONE; end\n        WR:   begin mem[addr[7:0]]<=wdata; state<=DONE; end\n        DONE: begin ack<=1; state<=IDLE; end\n      endcase\n    end\n  end\nendmodule"},

{"instruction": "Design a parameterizable up/down counter with overflow and underflow flags.",
 "output": "module updn_cnt_flags #(parameter W=8) (\n  input clk, rst_n, en, up,\n  output reg [W-1:0] cnt,\n  output ovf, udf\n);\n  assign ovf = en && up   && (cnt == {W{1'b1}});\n  assign udf = en && !up  && (cnt == '0);\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n)  cnt <= '0;\n    else if (en) cnt <= up ? cnt+1 : cnt-1;\n  end\nendmodule"},

{"instruction": "Design a simple LFSR-based pseudorandom number generator.",
 "output": "module prng #(parameter W=16, POLY=16'hD008) (\n  input clk, rst_n, en,\n  input [W-1:0] seed, input load,\n  output reg [W-1:0] rng\n);\n  wire feedback = ^(rng & POLY);\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n)   rng <= 16'hACE1;\n    else if (load) rng <= seed;\n    else if (en)   rng <= {rng[W-2:0], feedback};\n  end\nendmodule"},

{"instruction": "Design a hardware mutex with multiple clients.",
 "output": "module hw_mutex #(parameter N=4) (\n  input clk, rst_n,\n  input [N-1:0] lock_req, unlock_req,\n  output reg [N-1:0] lock_grant,\n  output reg locked\n);\n  reg [$clog2(N)-1:0] owner;\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin locked<=0; lock_grant<=0; end\n    else begin\n      lock_grant <= '0;\n      if (!locked) begin\n        // Grant to first requester\n        integer i;\n        for(i=0;i<N;i=i+1)\n          if(lock_req[i] && !locked) begin\n            locked<=1; owner<=i[$clog2(N)-1:0]; lock_grant[i]<=1;\n          end\n      end else if (unlock_req[owner]) begin\n        locked<=0;\n      end\n    end\n  end\nendmodule"},

{"instruction": "Design a data integrity checker using XOR-based checksum.",
 "output": "module xor_checksum #(parameter W=32, N=8) (\n  input clk, rst_n,\n  input [W-1:0] data_in, input valid, last,\n  output reg [W-1:0] checksum, output reg checksum_valid\n);\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin checksum<=0; checksum_valid<=0; end\n    else begin\n      checksum_valid<=0;\n      if (valid && !last) checksum <= checksum ^ data_in;\n      else if (valid && last) begin\n        checksum <= checksum ^ data_in;\n        checksum_valid <= 1;\n      end\n    end\n  end\nendmodule"},

{"instruction": "Design a configurable input debouncer for mechanical switches.",
 "output": "module debounce #(parameter STABLE_COUNT=1000) (\n  input clk, rst_n, sw_in,\n  output reg sw_out\n);\n  reg [$clog2(STABLE_COUNT+1)-1:0] cnt;\n  reg sw_sync1, sw_sync2;\n  // Two-stage synchronizer first\n  always @(posedge clk) begin sw_sync1<=sw_in; sw_sync2<=sw_sync1; end\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin cnt<=0; sw_out<=0; end\n    else begin\n      if (sw_sync2 == sw_out) cnt<=0;\n      else begin\n        cnt<=cnt+1;\n        if (cnt==STABLE_COUNT-1) begin sw_out<=sw_sync2; cnt<=0; end\n      end\n    end\n  end\nendmodule"},

]

extra_data += [

# ============================================================
# WEAK AREA 3: AXI PROTOCOL DEEP
# ============================================================

{"instruction": "Write a correct AXI4-Lite slave with proper write channel decoupling.",
 "output": "module axi_lite_slave (\n  input clk, rst_n,\n  input [31:0] awaddr, wdata,\n  input awvalid, wvalid, bready,\n  output reg awready, wready, bvalid,\n  output reg [1:0] bresp\n);\n  reg [31:0] reg0, reg1, reg2, reg3;\n  reg aw_done, w_done;\n  reg [31:0] aw_addr_lat;\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin awready<=0; wready<=0; bvalid<=0; aw_done<=0; w_done<=0; end\n    else begin\n      awready<=0;\n      if (awvalid && !awready && !aw_done) begin awready<=1; aw_addr_lat<=awaddr; aw_done<=1; end\n      wready<=0;\n      if (wvalid && !wready && !w_done) begin wready<=1; w_done<=1; end\n      if (aw_done && w_done && !bvalid) begin\n        case(aw_addr_lat[3:2])\n          2'b00: reg0<=wdata; 2'b01: reg1<=wdata;\n          2'b10: reg2<=wdata; 2'b11: reg3<=wdata;\n        endcase\n        bvalid<=1; bresp<=0; aw_done<=0; w_done<=0;\n      end\n      if (bvalid && bready) bvalid<=0;\n    end\n  end\nendmodule"},

{"instruction": "Explain why AXI4-Lite write address and write data channels must be accepted independently.",
 "output": "The AXI specification allows a master to issue AW and W in any order. A slave that only accepts W when AW has already arrived, or vice versa, can deadlock with a master that sends them in the opposite order. Each channel must have its own acceptance flag (aw_done, w_done) and be accepted independently. The register write and BVALID response should only be issued after both channels have been captured."},

{"instruction": "Debug: This AXI4-Lite slave only accepts W when AW has arrived, causing deadlock.\nif (awvalid && wvalid) begin awready<=1; wready<=1; end",
 "output": "Fix by accepting each channel independently:\nreg aw_pend, w_pend; reg [31:0] aw_addr_h, w_data_h;\nif (awvalid && !aw_pend) begin awready<=1; aw_addr_h<=awaddr; aw_pend<=1; end else awready<=0;\nif (wvalid  && !w_pend)  begin wready<=1;  w_data_h<=wdata;   w_pend<=1;  end else wready<=0;\nif (aw_pend && w_pend && !bvalid) begin\n  /* write register */ bvalid<=1; bresp<=0; aw_pend<=0; w_pend<=0;\nend"},

{"instruction": "Write a correct AXI-Stream skid buffer that never drops data under backpressure.",
 "output": "module axis_skid #(parameter W=32) (\n  input clk, rst_n,\n  input [W-1:0] s_tdata, input s_tvalid, output reg s_tready,\n  output reg [W-1:0] m_tdata, output reg m_tvalid, input m_tready\n);\n  reg [W-1:0] skid_data;\n  reg skid_valid;\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin m_tvalid<=0; skid_valid<=0; s_tready<=1; end\n    else begin\n      if (skid_valid && (!m_tvalid || m_tready)) begin\n        m_tdata<=skid_data; m_tvalid<=1; skid_valid<=0; s_tready<=1;\n      end else if (s_tvalid && s_tready) begin\n        if (!m_tvalid || m_tready) begin m_tdata<=s_tdata; m_tvalid<=1; end\n        else begin skid_data<=s_tdata; skid_valid<=1; s_tready<=0; end\n      end else if (m_tvalid && m_tready && !skid_valid) m_tvalid<=0;\n    end\n  end\nendmodule"},

{"instruction": "Debug: This AXI-Stream module asserts m_tready combinationally from s_tvalid causing a loop.\nassign m_tready = s_tvalid && downstream_ready;",
 "output": "This creates a combinational loop. Fix by registering tready:\nalways @(posedge clk)\n  m_tready <= downstream_ready && !fifo_full;\nNever make tready combinationally dependent on tvalid from the same interface direction."},

{"instruction": "Write an AXI-Stream register slice with zero throughput loss.",
 "output": "module axis_reg_slice #(parameter W=32) (\n  input clk, rst_n,\n  input [W-1:0] s_tdata, input s_tvalid, output reg s_tready,\n  output reg [W-1:0] m_tdata, output reg m_tvalid, input m_tready\n);\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin m_tvalid<=0; s_tready<=1; end\n    else begin\n      if (s_tvalid && s_tready) begin m_tdata<=s_tdata; m_tvalid<=1; end\n      else if (m_tready) m_tvalid<=0;\n      s_tready <= !m_tvalid || m_tready;\n    end\n  end\nendmodule"},

{"instruction": "Write an AXI4-Lite master that performs a single write transaction.",
 "output": "module axil_master_wr (\n  input clk, rst_n, start, input [31:0] addr, data, output reg done,\n  output reg [31:0] awaddr, wdata, output reg awvalid, wvalid, bready,\n  input awready, wready, bvalid\n);\n  reg [2:0] state;\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin state<=0; awvalid<=0; wvalid<=0; bready<=0; done<=0; end\n    else begin\n      done<=0;\n      case(state)\n        0: if(start) begin awaddr<=addr; wdata<=data; awvalid<=1; wvalid<=1; bready<=1; state<=1; end\n        1: begin\n          if(awready) awvalid<=0;\n          if(wready)  wvalid<=0;\n          if(!awvalid && !wvalid) state<=2;\n        end\n        2: if(bvalid) begin bready<=0; done<=1; state<=0; end\n      endcase\n    end\n  end\nendmodule"},

{"instruction": "Write an AXI4-Lite master that performs a single read transaction.",
 "output": "module axil_master_rd (\n  input clk, rst_n, start, input [31:0] addr,\n  output reg done, output reg [31:0] rd_data,\n  output reg [31:0] araddr, output reg arvalid, rready,\n  input arready, input [31:0] rdata, input rvalid\n);\n  reg [1:0] state;\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin state<=0; arvalid<=0; rready<=0; done<=0; end\n    else begin\n      done<=0;\n      case(state)\n        0: if(start) begin araddr<=addr; arvalid<=1; rready<=1; state<=1; end\n        1: if(arready) begin arvalid<=0; state<=2; end\n        2: if(rvalid) begin rd_data<=rdata; rready<=0; done<=1; state<=0; end\n      endcase\n    end\n  end\nendmodule"},

{"instruction": "Explain the AXI4 VALID/READY handshake rule that prevents deadlock.",
 "output": "AXI4 rule: a source (master for request channels, slave for response channels) must NOT wait for READY before asserting VALID. If a master waits to see READY before driving VALID, and the slave waits to see VALID before driving READY, neither side ever starts — classic deadlock. The source must assert VALID independently as soon as it has data or a request. READY can be combinationally or registerly dependent on VALID, but VALID must never depend on READY."},

{"instruction": "Write an AXI-Stream FIFO that provides buffering between producer and consumer.",
 "output": "module axis_fifo #(parameter W=32, DEPTH=16) (\n  input clk, rst_n,\n  input [W-1:0] s_tdata, input s_tvalid, output s_tready,\n  output reg [W-1:0] m_tdata, output reg m_tvalid, input m_tready\n);\n  reg [W-1:0] mem [0:DEPTH-1];\n  reg [$clog2(DEPTH):0] wr_ptr, rd_ptr;\n  wire full  = (wr_ptr[$clog2(DEPTH)] != rd_ptr[$clog2(DEPTH)]) && (wr_ptr[$clog2(DEPTH)-1:0] == rd_ptr[$clog2(DEPTH)-1:0]);\n  wire empty = (wr_ptr == rd_ptr);\n  assign s_tready = !full;\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin wr_ptr<=0; rd_ptr<=0; m_tvalid<=0; end\n    else begin\n      if (s_tvalid && !full) begin mem[wr_ptr[$clog2(DEPTH)-1:0]]<=s_tdata; wr_ptr<=wr_ptr+1; end\n      if (!empty && (!m_tvalid || m_tready)) begin m_tdata<=mem[rd_ptr[$clog2(DEPTH)-1:0]]; m_tvalid<=1; rd_ptr<=rd_ptr+1; end\n      else if (m_tvalid && m_tready && empty) m_tvalid<=0;\n    end\n  end\nendmodule"},

{"instruction": "Debug: This AXI4-Lite slave asserts AWREADY for multiple cycles causing duplicate writes.\nalways @(posedge clk) if (awvalid) awready <= 1; else awready <= 0;",
 "output": "AWREADY must pulse for exactly one cycle per handshake. Add a pending flag:\nreg aw_pend;\nalways @(posedge clk or negedge rst_n) begin\n  if (!rst_n) begin awready<=0; aw_pend<=0; end\n  else begin\n    awready<=0;\n    if (awvalid && !aw_pend) begin awready<=1; aw_pend<=1; end\n    if (bvalid && bready) aw_pend<=0; // clear after response\n  end\nend"},

{"instruction": "Write an AXI-Stream width converter from 8-bit to 32-bit.",
 "output": "module axis_8to32 (\n  input clk, rst_n,\n  input [7:0] s_tdata, input s_tvalid, output reg s_tready,\n  output reg [31:0] m_tdata, output reg m_tvalid, input m_tready\n);\n  reg [7:0] buf_r [0:2];\n  reg [1:0] cnt;\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin cnt<=0; m_tvalid<=0; s_tready<=1; end\n    else begin\n      if (s_tvalid && s_tready) begin\n        buf_r[cnt[1:0]] <= s_tdata;\n        if (cnt==3) begin m_tdata<={s_tdata,buf_r[2],buf_r[1],buf_r[0]}; m_tvalid<=1; s_tready<=0; cnt<=0; end\n        else cnt<=cnt+1;\n      end\n      if (m_tvalid && m_tready) begin m_tvalid<=0; s_tready<=1; end\n    end\n  end\nendmodule"},

{"instruction": "Write an AXI4-Lite address decoder for 3 slaves.",
 "output": "module axil_decoder (\n  input [31:0] addr,\n  output reg [1:0] sel\n);\n  always @(*)\n    casez (addr)\n      32'h0000_????: sel = 0; // Slave 0: 0x0000_0000-0x0000_FFFF\n      32'h0001_????: sel = 1; // Slave 1: 0x0001_0000-0x0001_FFFF\n      32'h4000_????: sel = 2; // Slave 2: 0x4000_0000-0x4000_FFFF\n      default: sel = 2'bxx;\n    endcase\nendmodule"},

{"instruction": "Write an AXI-Stream packet length counter.",
 "output": "module axis_pkt_len (\n  input clk, rst_n,\n  input s_tvalid, s_tready, s_tlast,\n  output reg [15:0] pkt_len, output reg pkt_done\n);\n  reg [15:0] cnt;\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin cnt<=0; pkt_done<=0; pkt_len<=0; end\n    else begin\n      pkt_done<=0;\n      if (s_tvalid && s_tready) begin\n        cnt<=cnt+1;\n        if (s_tlast) begin pkt_len<=cnt+1; pkt_done<=1; cnt<=0; end\n      end\n    end\n  end\nendmodule"},

{"instruction": "Write an AXI4-Lite error slave that returns SLVERR for all transactions.",
 "output": "module axil_err_slave (\n  input clk, rst_n,\n  input awvalid, output reg awready,\n  input wvalid,  output reg wready,\n  output reg bvalid, input bready, output reg [1:0] bresp,\n  input arvalid, output reg arready,\n  output reg rvalid, input rready,\n  output reg [31:0] rdata, output reg [1:0] rresp\n);\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin awready<=0; wready<=0; bvalid<=0; arready<=0; rvalid<=0; end\n    else begin\n      awready <= awvalid && !awready;\n      wready  <= wvalid  && !wready;\n      if ((awvalid&&awready)||(wvalid&&wready)) bvalid<=1;\n      if (bvalid&&bready) bvalid<=0;\n      bresp<=2'b10; // SLVERR\n      arready<=arvalid&&!arready;\n      if(arvalid&&arready) begin rvalid<=1; rdata<=32'hDEAD_DEAD; rresp<=2'b10; end\n      if(rvalid&&rready) rvalid<=0;\n    end\n  end\nendmodule"},

# ============================================================
# LATCH / COMMON DEBUG
# ============================================================

{"instruction": "Fix this latch-inferring always block by adding a default.\nalways @(*) begin\n  case(cmd)\n    2'b00: out = a;\n    2'b01: out = b;\n  endcase\nend",
 "output": "Add default before case:\nalways @(*) begin\n  out = '0;\n  case(cmd)\n    2'b00: out = a;\n    2'b01: out = b;\n    default: out = '0;\n  endcase\nend"},

{"instruction": "Fix this latch-inferring if-else.\nalways @(*) begin\n  if (sel==2'b00) y=a;\n  else if (sel==2'b01) y=b;\nend",
 "output": "always @(*) begin\n  if      (sel==2'b00) y=a;\n  else if (sel==2'b01) y=b;\n  else                 y='0; // prevents latch\nend"},

{"instruction": "Explain why adding a clock to fix a latch is wrong.",
 "output": "Adding a clock changes combinational logic to sequential — this adds a register, changes timing, and alters behavior. The correct fix is to ensure all input combinations produce a defined output using a default assignment or complete case/else branches. The always block should stay @(*) with complete branch coverage."},

{"instruction": "Fix this FSM where a missing output assignment in one state infers a latch.\nalways @(*) begin\n  case(state)\n    IDLE:  begin next=START; out=1; end\n    START: begin next=DONE; end // out not assigned!\n    DONE:  begin next=IDLE; out=0; end\n  endcase\nend",
 "output": "Add defaults before the case:\nalways @(*) begin\n  next='0; out=0;\n  case(state)\n    IDLE:  begin next=START; out=1; end\n    START: begin next=DONE;  out=0; end\n    DONE:  begin next=IDLE;  out=0; end\n    default: begin next=IDLE; out=0; end\n  endcase\nend"},

{"instruction": "Debug: FSM uses blocking assignment in clocked block.\nalways @(posedge clk) begin\n  state = next_state;\nend",
 "output": "Use non-blocking in sequential blocks:\nalways @(posedge clk) begin\n  state <= next_state;\nend\nBlocking in clocked blocks causes same-timestep updates that can cause race conditions with other always blocks reading state in the same edge."},

{"instruction": "Debug: Same register driven from two always blocks.\nalways @(posedge clk) data_r <= a;\nalways @(posedge clk) data_r <= b;",
 "output": "Multiple drivers on the same register is illegal. Combine into one always block:\nalways @(posedge clk) begin\n  if (sel_a) data_r <= a;\n  else       data_r <= b;\nend"},

{"instruction": "Fix this combinational loop.\nassign valid_out = valid_in && valid_out;",
 "output": "Break the loop with a register:\nalways @(posedge clk or negedge rst_n) begin\n  if (!rst_n) valid_out <= 0;\n  else valid_out <= valid_in && enable_condition;\nend"},

{"instruction": "Debug: Active-low reset used with wrong polarity.\nalways @(posedge clk) begin\n  if (rst_n) data <= 0;\n  else data <= data_in;\nend",
 "output": "Active-low rst_n resets when LOW:\nalways @(posedge clk) begin\n  if (!rst_n) data <= 0;\n  else        data <= data_in;\nend"},

{"instruction": "Explain the difference between blocking and non-blocking assignments.",
 "output": "Blocking (=) executes sequentially:\n  a = b; b = a; // swap fails — a gets b, then b gets new a (old b)\nNon-blocking (<=) evaluates all RHS before any updates:\n  a <= b; b <= a; // swap works — both read old values simultaneously\nRule: always use <= in clocked always blocks. Use = in combinational always @(*) blocks."},

{"instruction": "Debug: Pipeline collapses because blocking assignments used.\nalways @(posedge clk) begin\n  stage1 = stage0;\n  stage2 = stage1; // captures NEW stage1!\nend",
 "output": "With blocking, stage2 sees the just-written stage1, collapsing two stages into one. Fix:\nalways @(posedge clk) begin\n  stage1 <= stage0;\n  stage2 <= stage1; // captures OLD stage1\nend"},

{"instruction": "Fix this always block with incomplete sensitivity list.\nalways @(a, b) begin\n  y = a & b & c; // c missing!\nend",
 "output": "Use wildcard to include all signals automatically:\nalways @(*) begin\n  y = a & b & c;\nend"},

{"instruction": "Fix this testbench that applies stimulus before reset deasserts.\ninitial begin stimulus = 8'hAA; rst=0; #20; rst=1; end",
 "output": "initial begin\n  rst=0; stimulus='0; // hold known during reset\n  #20; rst=1;\n  #5; // settle after reset\n  stimulus=8'hAA; // safe to apply now\nend"},

# ============================================================
# COMPLEX GENERATION
# ============================================================

{"instruction": "Design a parameterizable priority encoder with valid output.",
 "output": "module pri_enc #(parameter N=8) (\n  input [N-1:0] req,\n  output reg [$clog2(N)-1:0] idx,\n  output valid\n);\n  assign valid = |req;\n  integer i;\n  always @(*) begin\n    idx='0;\n    for(i=N-1;i>=0;i=i-1) if(req[i]) idx=i[$clog2(N)-1:0];\n  end\nendmodule"},

{"instruction": "Design a saturating adder that clamps at maximum value.",
 "output": "module sat_add #(parameter W=8) (\n  input [W-1:0] a, b, output reg [W-1:0] sum\n);\n  wire [W:0] full = {1'b0,a}+{1'b0,b};\n  always @(*) sum = full[W] ? {W{1'b1}} : full[W-1:0];\nendmodule"},

{"instruction": "Design a configurable PWM generator with duty and period registers.",
 "output": "module pwm #(parameter W=16) (\n  input clk, rst_n, input [W-1:0] period, duty,\n  output reg pwm_out\n);\n  reg [W-1:0] cnt;\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin cnt<=0; pwm_out<=0; end\n    else begin\n      cnt <= (cnt==period-1) ? '0 : cnt+1;\n      pwm_out <= (duty==0)?0 : (duty>=period)?1 : (cnt<duty);\n    end\n  end\nendmodule"},

{"instruction": "Design a traffic light FSM with configurable timing.",
 "output": "module traffic_light (\n  input clk, rst_n,\n  output reg [1:0] ns_light, ew_light\n);\n  reg [5:0] timer; reg [1:0] state;\n  localparam NSG=0, NSY=1, EWG=2, EWY=3;\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin state<=NSG; timer<=60; ns_light<=2'b10; ew_light<=2'b00; end\n    else begin\n      if(timer>0) timer<=timer-1;\n      else case(state)\n        NSG: begin state<=NSY; timer<=5;  ns_light<=2'b01; end\n        NSY: begin state<=EWG; timer<=60; ew_light<=2'b10; ns_light<=2'b00; end\n        EWG: begin state<=EWY; timer<=5;  ew_light<=2'b01; end\n        EWY: begin state<=NSG; timer<=60; ns_light<=2'b10; ew_light<=2'b00; end\n      endcase\n    end\n  end\nendmodule"},

{"instruction": "Design a parameterizable leading zero counter.",
 "output": "module lzc #(parameter W=8) (\n  input [W-1:0] in,\n  output reg [$clog2(W):0] lz\n);\n  integer i;\n  always @(*) begin\n    lz=W;\n    for(i=W-1;i>=0;i=i-1) if(in[i]) lz=W-1-i;\n  end\nendmodule"},

{"instruction": "Design a hardware mutex with N clients and rotating priority.",
 "output": "module hw_mutex #(parameter N=4) (\n  input clk, rst_n,\n  input [N-1:0] lock_req, unlock_req,\n  output reg [N-1:0] lock_grant, output reg locked\n);\n  reg [$clog2(N)-1:0] owner;\n  integer i;\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin locked<=0; lock_grant<=0; end\n    else begin\n      lock_grant<='0;\n      if (!locked) for(i=0;i<N;i=i+1)\n        if(lock_req[i]&&!locked) begin locked<=1; owner<=i[$clog2(N)-1:0]; lock_grant[i]<=1; end\n      else if(unlock_req[owner]) locked<=0;\n    end\n  end\nendmodule"},

{"instruction": "Design a simple memory controller FSM with read and write states.",
 "output": "module mem_ctrl (\n  input clk, rst_n, req, wr,\n  input [15:0] addr, wdata,\n  output reg ack, output reg [15:0] rdata\n);\n  reg [1:0] state;\n  reg [15:0] mem [0:255];\n  localparam IDLE=0,RD=1,WR=2,DONE=3;\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin state<=IDLE; ack<=0; end\n    else begin\n      ack<=0;\n      case(state)\n        IDLE: if(req) state<=wr?WR:RD;\n        RD:   begin rdata<=mem[addr[7:0]]; state<=DONE; end\n        WR:   begin mem[addr[7:0]]<=wdata; state<=DONE; end\n        DONE: begin ack<=1; state<=IDLE; end\n      endcase\n    end\n  end\nendmodule"},

{"instruction": "Design a configurable input debouncer for mechanical switches.",
 "output": "module debounce #(parameter STABLE=1000) (\n  input clk, rst_n, sw_in, output reg sw_out\n);\n  reg [$clog2(STABLE+1)-1:0] cnt;\n  reg s1, s2;\n  always @(posedge clk) begin s1<=sw_in; s2<=s1; end\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin cnt<=0; sw_out<=0; end\n    else begin\n      if (s2==sw_out) cnt<=0;\n      else begin cnt<=cnt+1; if(cnt==STABLE-1) begin sw_out<=s2; cnt<=0; end end\n    end\n  end\nendmodule"},

{"instruction": "Design a XOR-based data integrity checksum module.",
 "output": "module xor_cksum #(parameter W=32) (\n  input clk, rst_n,\n  input [W-1:0] data_in, input valid, last,\n  output reg [W-1:0] checksum, output reg cksum_valid\n);\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin checksum<=0; cksum_valid<=0; end\n    else begin\n      cksum_valid<=0;\n      if(valid) begin\n        checksum<=checksum^data_in;\n        if(last) cksum_valid<=1;\n      end\n    end\n  end\nendmodule"},

{"instruction": "Design a binary to one-hot decoder with enable.",
 "output": "module bin2oh #(parameter N=3) (\n  input [N-1:0] bin, input en,\n  output reg [(1<<N)-1:0] oh\n);\n  always @(*) begin oh='0; if(en) oh[bin]=1'b1; end\nendmodule"},

]

extra_data += [

# ============================================================
# MORE GRAY / CDC TARGETED (fill to 300+)
# ============================================================

{"instruction": "Write Gray encoder and decoder and show they are inverses for N=4.",
 "output": "// Encoder: gray = bin ^ (bin>>1)\n// Decoder: bin[N-1]=gray[N-1]; bin[i]=bin[i+1]^gray[i]\n// Proof for N=4, bin=4'b1011 (11):\n//   gray = 1011 ^ 0101 = 1110\n// Decode gray=1110:\n//   bin[3]=1, bin[2]=1^1=0... wait: bin[2]=bin[3]^gray[2]=1^1=0? No:\n//   bin[3]=gray[3]=1\n//   bin[2]=bin[3]^gray[2]=1^1=0 -- ERROR in example, correct:\n//   1110 gray -> bin[3]=1, bin[2]=1^1=0, WRONG. Let me redo:\n// bin=1011, gray=1011^0101=1110\n// decode 1110: bin[3]=1, bin[2]=1^1=0... Hmm that gives 1001 not 1011\n// Actually: for i from MSB-1 down: bin[i]=bin[i+1]^gray[i]\n// bin[3]=1, bin[2]=1^1=0... This seems wrong. Let me recheck the formula.\n// gray = {bin[3], bin[3]^bin[2], bin[2]^bin[1], bin[1]^bin[0]}\n// bin=1011: gray={1, 1^0, 0^1, 1^1}={1,1,1,0}=4'b1110 ✓\n// decode 1110: bin[3]=gray[3]=1, bin[2]=1^gray[2]=1^1=0... still wrong\n// Correct decode: bin[i]=XOR(gray[N-1:i])\n// bin[3]=1, bin[2]=1^1=0, bin[1]=1^1^1=1, bin[0]=1^1^1^0=1 → 1011 ✓\nmodule gray_roundtrip #(N=4)(input [N-1:0] bin_in, output [N-1:0] bin_out);\n  wire [N-1:0] g = bin_in^(bin_in>>1);\n  genvar i; assign bin_out[N-1]=g[N-1];\n  generate for(i=N-2;i>=0;i=i-1) assign bin_out[i]=bin_out[i+1]^g[i]; endgenerate\nendmodule"},

{"instruction": "Write a simulation testbench for the async FIFO verifying no data loss under random stimulus.",
 "output": "module async_fifo_tb;\n  reg wclk=0, rclk=0, rst_n=0;\n  always #5  wclk=~wclk; // 100MHz write\n  always #7  rclk=~rclk; // ~71MHz read\n  reg wr_en, rd_en; reg [7:0] wdata; wire [7:0] rdata; wire full, empty;\n  integer sent=0, received=0;\n  reg [7:0] ref_q[$];\n  async_fifo #(.DW(8),.AW(4)) dut(.wclk,.rclk,.rst_n,.wr_en,.wdata,.rd_en,.rdata,.full,.empty);\n  initial begin\n    #20 rst_n=1;\n    repeat(200) begin\n      @(posedge wclk);\n      wr_en=$urandom_range(0,1);\n      wdata=$urandom;\n      if(wr_en&&!full) begin ref_q.push_back(wdata); sent++; end\n    end\n  end\n  always @(posedge rclk) begin\n    rd_en<=$urandom_range(0,1);\n    if(rd_en&&!empty) begin\n      assert(rdata==ref_q.pop_front())\n        else $error(\"Data mismatch\");\n      received++;\n    end\n  end\n  initial #3000 begin\n    $display(\"Sent=%0d Recv=%0d\",sent,received);\n    $finish;\n  end\nendmodule"},

{"instruction": "Explain why empty detection uses all bits matching but full uses inverted MSBs.",
 "output": "Both use the extra MSB (pointer width = address width + 1) to track wrap-arounds. Empty: all N+1 bits of both pointers match, meaning read has consumed everything written — no wrapping involved. Full: the lower N bits match (same slot), but the MSBs differ, meaning the write pointer has wrapped around exactly once more than the read pointer — the buffer is exactly full. If MSBs differed by 2, that would represent 2 full buffers worth of data which is impossible, so only a 1-bit difference in MSB is needed for full detection."},

{"instruction": "Write an SVA to verify the Gray pointer only changes by one bit per cycle.",
 "output": "assert property (@(posedge wclk) disable iff(!rst_n)\n  $countones(wptr_gray ^ $past(wptr_gray)) <= 1)\n  else $error(\"Gray pointer changed more than 1 bit: %b -> %b\",\n    $past(wptr_gray), wptr_gray);"},

{"instruction": "Write a CDC signal monitor that counts metastability windows in simulation.",
 "output": "// Simulation-only monitor\nmodule cdc_mon (input src_clk, dst_clk, input sig);\n  real t_change; integer window_cnt=0;\n  always @(sig) t_change=$realtime;\n  always @(posedge dst_clk) begin\n    if(($realtime-t_change)<0.5) begin // 500ps setup window\n      window_cnt++;\n      $warning(\"Metastability window #%0d at %0t\",window_cnt,$realtime);\n    end\n  end\n  final $display(\"Total metastability windows: %0d\",window_cnt);\nendmodule"},

# More AXI targeted
{"instruction": "Write the complete BVALID/BREADY handshake logic for an AXI4-Lite slave.",
 "output": "// BVALID is asserted after both AW and W have been accepted\n// BVALID must be held until BREADY is high\nalways @(posedge clk or negedge rst_n) begin\n  if (!rst_n) bvalid<=0;\n  else begin\n    // Assert after write commit\n    if (aw_done && w_done && !bvalid) begin\n      bvalid<=1; bresp<=2'b00; // OKAY\n    end\n    // Deassert after master accepts response\n    if (bvalid && bready) bvalid<=0;\n  end\nend\n// BVALID must not deassert until BREADY — the always block above guarantees this\n// because we only clear bvalid when bready is already high"},

{"instruction": "Debug: BVALID is asserted before the write is committed to storage. Fix it.\nif(awvalid) bvalid<=1;",
 "output": "BVALID must only be asserted after both AW and W have been received and the write has completed:\nreg aw_done, w_done;\n// Accept channels\nif(awvalid&&!aw_done) begin awready<=1; aw_done<=1; end else awready<=0;\nif(wvalid&&!w_done)   begin wready<=1;  w_done<=1;  end else wready<=0;\n// Only assert BVALID after both done and after writing register\nif(aw_done&&w_done&&!bvalid) begin\n  regs[aw_addr[3:2]]<=wdata; // commit write\n  bvalid<=1; bresp<=2'b00;\n  aw_done<=0; w_done<=0;\nend\nif(bvalid&&bready) bvalid<=0;"},

{"instruction": "Explain what happens when an AXI master deasserts AWVALID before AWREADY is seen.",
 "output": "This violates the AXI protocol. Once a master asserts AWVALID, it must hold it high until the slave asserts AWREADY and the handshake completes (both AWVALID and AWREADY high in the same cycle). Prematurely deasserting AWVALID causes the slave to never see a complete handshake, leaving the transaction incomplete. The slave may be waiting for AWVALID to complete address capture while the master has moved on, causing a protocol deadlock or lost transaction."},

{"instruction": "Write an AXI4-Lite transaction monitor that logs all write transactions.",
 "output": "module axil_monitor (\n  input clk, rst_n,\n  input awvalid, awready, input [31:0] awaddr,\n  input wvalid, wready, input [31:0] wdata,\n  input bvalid, bready, input [1:0] bresp\n);\n  always @(posedge clk) begin\n    if (awvalid && awready)\n      $display(\"[%0t] AXI WR ADDR: 0x%08h\", $time, awaddr);\n    if (wvalid && wready)\n      $display(\"[%0t] AXI WR DATA: 0x%08h\", $time, wdata);\n    if (bvalid && bready)\n      $display(\"[%0t] AXI WR RESP: %b\", $time, bresp);\n  end\nendmodule"},

{"instruction": "Write an AXI-Stream throughput monitor measuring beats per cycle.",
 "output": "module axis_tput_mon (\n  input clk, rst_n,\n  input tvalid, tready,\n  output reg [31:0] beat_count, cycle_count\n);\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin beat_count<=0; cycle_count<=0; end\n    else begin\n      cycle_count<=cycle_count+1;\n      if(tvalid&&tready) beat_count<=beat_count+1;\n    end\n  end\n  // Throughput = beat_count/cycle_count (compute externally)\nendmodule"},

# More assertion examples
{"instruction": "Write a complete protocol checker module for AXI-Stream.",
 "output": "module axis_checker #(parameter W=32) (\n  input clk, rst_n,\n  input [W-1:0] tdata, input tvalid, tready, tlast\n);\n  // TVALID must not deassert before TREADY\n  ap_valid_stable: assert property (@(posedge clk) disable iff(!rst_n)\n    (tvalid&&!tready) |-> ##1 tvalid);\n  // TDATA stable while valid not accepted\n  ap_data_stable: assert property (@(posedge clk) disable iff(!rst_n)\n    (tvalid&&!tready) |-> ##1 $stable(tdata));\n  // TLAST stable while valid not accepted\n  ap_last_stable: assert property (@(posedge clk) disable iff(!rst_n)\n    (tvalid&&!tready) |-> ##1 $stable(tlast));\n  // No X on data when valid\n  ap_no_x: assert property (@(posedge clk) disable iff(!rst_n)\n    tvalid |-> !$isunknown(tdata));\nendmodule"},

{"instruction": "Write assertions to verify a 2-deep pipeline has correct latency.",
 "output": "// Input applied at cycle N must appear at output at cycle N+2\nassert property (@(posedge clk) disable iff(!rst_n)\n  valid_in |-> ##2 valid_out);\n// Data value preserved through pipeline\nassert property (@(posedge clk) disable iff(!rst_n)\n  valid_in |-> ##2 (data_out == $past(data_in, 2)));"},

{"instruction": "Write an assertion to verify the reset state is reached within 3 cycles.",
 "output": "assert property (@(posedge clk)\n  $fell(rst_n) |-> ##[1:3] (state==IDLE && count=='0 && valid_out==0))\n  else $error(\"Reset state not reached within 3 cycles\");"},

{"instruction": "Write a cover property ensuring all 4 FIFO depth thresholds are exercised.",
 "output": "cover property (@(posedge clk) occupancy==0);        // empty\ncover property (@(posedge clk) occupancy==1);        // one entry\ncover property (@(posedge clk) occupancy==DEPTH/2);  // half full\ncover property (@(posedge clk) occupancy==DEPTH-1);  // almost full\ncover property (@(posedge clk) occupancy==DEPTH);    // full"},

{"instruction": "Write an assertion that no two pipeline stages hold data simultaneously for a simple 1-element pipe.",
 "output": "// For a single-entry pipeline: only one stage can hold valid data\nassert property (@(posedge clk) disable iff(!rst_n)\n  !(stage1_valid && stage2_valid))\n  else $error(\"Both stages valid simultaneously in single-entry pipe\");"},

# More generation examples
{"instruction": "Design a parameterizable round-robin arbiter with 4 clients.",
 "output": "module rr_arb4 (\n  input clk, rst, input [3:0] req, output reg [3:0] grant\n);\n  reg [1:0] ptr;\n  integer i;\n  always @(posedge clk) begin\n    if (rst) begin ptr<=0; grant<=0; end\n    else begin\n      grant<='0;\n      for(i=0;i<4;i=i+1)\n        if(req[(ptr+i)%4]&&grant==0) begin\n          grant[(ptr+i)%4]<=1;\n          ptr<=(ptr+i+1)%4;\n        end\n    end\n  end\nendmodule"},

{"instruction": "Design a parameterizable synchronous FIFO with correct MSB-based full/empty detection.",
 "output": "module sync_fifo #(parameter DW=8, AW=4) (\n  input clk, rst_n,\n  input wr_en, input [DW-1:0] wdata,\n  input rd_en, output reg [DW-1:0] rdata,\n  output full, empty,\n  output reg [AW:0] occupancy\n);\n  localparam DEPTH=1<<AW;\n  reg [DW-1:0] mem [0:DEPTH-1];\n  reg [AW:0] wr_ptr, rd_ptr;\n  // MSB-based correct full/empty\n  assign empty = (wr_ptr==rd_ptr);\n  assign full  = (wr_ptr[AW]!=rd_ptr[AW]) && (wr_ptr[AW-1:0]==rd_ptr[AW-1:0]);\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin wr_ptr<=0; rd_ptr<=0; end\n    else begin\n      if(wr_en&&!full) begin mem[wr_ptr[AW-1:0]]<=wdata; wr_ptr<=wr_ptr+1; end\n      if(rd_en&&!empty) begin rdata<=mem[rd_ptr[AW-1:0]]; rd_ptr<=rd_ptr+1; end\n      occupancy<=wr_ptr-rd_ptr;\n    end\n  end\nendmodule"},

{"instruction": "Design a Gray code encoder and verify it with a testbench.",
 "output": "module gray_enc #(parameter N=8) (input [N-1:0] bin, output [N-1:0] gray);\n  assign gray = bin ^ (bin >> 1);\nendmodule\n\nmodule tb_gray_enc;\n  reg [7:0] bin; wire [7:0] gray;\n  gray_enc #(8) dut(.bin,.gray);\n  reg [7:0] prev_gray;\n  integer i;\n  initial begin\n    prev_gray=0;\n    for(i=0;i<256;i=i+1) begin\n      bin=i; #1;\n      if(i>0) assert($countones(gray^prev_gray)==1)\n        else $error(\"Multi-bit change at %0d\",i);\n      prev_gray=gray;\n    end\n    $display(\"All 256 transitions verified - only 1 bit changes each step\");\n    $finish;\n  end\nendmodule"},

{"instruction": "Design a two-stage pipelined multiplier with valid/ready handshake.",
 "output": "module pipe_mult #(parameter W=16) (\n  input clk, rst_n,\n  input valid_in, input [W-1:0] a, b,\n  output reg valid_out, output reg [2*W-1:0] product\n);\n  reg [W-1:0] a_r, b_r; reg v1;\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin v1<=0; valid_out<=0; end\n    else begin\n      a_r<=a; b_r<=b; v1<=valid_in;\n      product<=a_r*b_r; valid_out<=v1;\n    end\n  end\nendmodule"},

{"instruction": "Design a UART transmitter for 8N1 format.",
 "output": "module uart_tx #(parameter CLKS_PER_BIT=87) (\n  input clk, rst_n,\n  input [7:0] tx_data, input tx_start,\n  output reg txd, tx_busy\n);\n  reg [6:0] clk_cnt; reg [2:0] bit_idx; reg [3:0] state;\n  reg [7:0] sr;\n  localparam IDLE=0,START=1,DATA=2,STOP=3;\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin txd<=1; tx_busy<=0; state<=IDLE; end\n    else case(state)\n      IDLE:  if(tx_start) begin sr<=tx_data; txd<=0; clk_cnt<=0; bit_idx<=0; tx_busy<=1; state<=START; end\n      START: if(clk_cnt==CLKS_PER_BIT-1) begin clk_cnt<=0; state<=DATA; end else clk_cnt<=clk_cnt+1;\n      DATA:  if(clk_cnt==CLKS_PER_BIT-1) begin\n               clk_cnt<=0; txd<=sr[0]; sr<={1'b0,sr[7:1]};\n               if(bit_idx==7) state<=STOP; else bit_idx<=bit_idx+1;\n             end else clk_cnt<=clk_cnt+1;\n      STOP:  if(clk_cnt==CLKS_PER_BIT-1) begin txd<=1; tx_busy<=0; state<=IDLE; end else clk_cnt<=clk_cnt+1;\n    endcase\n  end\nendmodule"},

{"instruction": "Design a credit-based flow controller.",
 "output": "module credit_fc #(parameter MAX=8) (\n  input clk, rst_n, input send, credit_return,\n  output reg can_send\n);\n  reg [$clog2(MAX+1):0] credits;\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) credits<=MAX;\n    else case({send&&can_send, credit_return})\n      2'b10: credits<=credits-1;\n      2'b01: if(credits<MAX) credits<=credits+1;\n      2'b11: credits<=credits; // net zero\n      default: credits<=credits;\n    endcase\n    can_send<=(credits>0);\n  end\nendmodule"},

{"instruction": "Design a parameterizable synchronous counter with load, up/down, and enable.",
 "output": "module flex_cnt #(parameter W=8) (\n  input clk, rst_n, en, load, up,\n  input [W-1:0] load_val,\n  output reg [W-1:0] cnt\n);\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n)    cnt<='0;\n    else if (load) cnt<=load_val;\n    else if (en)   cnt<=up ? cnt+1 : cnt-1;\n  end\nendmodule"},

{"instruction": "Design a hardware timer with compare match and auto-reload.",
 "output": "module timer #(parameter W=32) (\n  input clk, rst_n, en,\n  input [W-1:0] cmp, input auto_reload,\n  output reg irq, output reg [W-1:0] cnt\n);\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin cnt<=0; irq<=0; end\n    else begin\n      irq<=0;\n      if (en) begin\n        if (cnt==cmp) begin irq<=1; cnt<=auto_reload?'0:cnt; end\n        else cnt<=cnt+1;\n      end\n    end\n  end\nendmodule"},

{"instruction": "Design a shift-and-add multiplier for 8-bit operands.",
 "output": "module shift_mult #(parameter W=8) (\n  input clk, rst_n, start,\n  input [W-1:0] a, b,\n  output reg [2*W-1:0] product,\n  output reg done\n);\n  reg [W-1:0] mcand; reg [W-1:0] mplier; reg [3:0] cnt;\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin product<=0; done<=0; cnt<=0; end\n    else if (start) begin product<=0; mcand<=a; mplier<=b; cnt<=W; done<=0; end\n    else if (cnt>0) begin\n      if (mplier[0]) product<=product+(mcand<<(W-cnt));\n      mplier<=mplier>>1; cnt<=cnt-1;\n      if(cnt==1) done<=1;\n    end\n  end\nendmodule"},

{"instruction": "Design a parameterizable one-hot to binary encoder.",
 "output": "module oh2bin #(parameter N=4) (\n  input [(1<<N)-1:0] onehot,\n  output reg [N-1:0] bin,\n  output valid\n);\n  assign valid=$onehot(onehot);\n  integer i;\n  always @(*) begin\n    bin='0;\n    for(i=0;i<(1<<N);i=i+1)\n      if(onehot[i]) bin=i[N-1:0];\n  end\nendmodule"},

]


correction_data = [

# ============================================================
# BLOCK 1: GRAY CODE - Clean, no narrative confusion
# ============================================================

{"instruction": "Write the correct Gray code encoder formula for N bits.",
 "output": "module gray_enc #(parameter N=8) (input [N-1:0] bin, output [N-1:0] gray);\n  assign gray = bin ^ (bin >> 1);\n  // Explicit per-bit form:\n  // gray[N-1] = bin[N-1]          (MSB is unchanged)\n  // gray[i]   = bin[i+1] ^ bin[i] (each lower bit)\nendmodule"},

{"instruction": "Write the correct Gray code decoder formula for N bits.",
 "output": "module gray_dec #(parameter N=8) (input [N-1:0] gray, output reg [N-1:0] bin);\n  integer i;\n  always @(*) begin\n    bin[N-1] = gray[N-1];              // MSB is unchanged\n    for (i = N-2; i >= 0; i = i-1)\n      bin[i] = bin[i+1] ^ gray[i];    // each lower bit XORs with decoded bit above\n  end\nendmodule\n// Rule: bin[i] = XOR of all gray bits from MSB down to position i"},

{"instruction": "Show the Gray encode and decode roundtrip for 4-bit value 11 (1011).",
 "output": "// Encode: gray = bin ^ (bin >> 1)\n// bin  = 4'b1011 (11)\n// bin>>1 = 4'b0101\n// gray = 1011 ^ 0101 = 4'b1110\n\n// Decode: bin[i] = bin[i+1] ^ gray[i], start from MSB\n// gray = 4'b1110\n// bin[3] = gray[3]            = 1\n// bin[2] = bin[3] ^ gray[2]   = 1 ^ 1 = 0\n// bin[1] = bin[2] ^ gray[1]   = 0 ^ 1 = 1\n// bin[0] = bin[1] ^ gray[0]   = 1 ^ 0 = 1\n// bin = 4'b1011 = 11 ✓  (matches original)"},

{"instruction": "Prove that Gray code changes exactly one bit between consecutive values for N=3.",
 "output": "// gray = bin ^ (bin>>1)\n// bin 000 -> gray 000\n// bin 001 -> gray 001  (bit 0 changed)\n// bin 010 -> gray 011  (bit 0 changed)\n// bin 011 -> gray 010  (bit 0 changed)\n// bin 100 -> gray 110  (bit 1 changed)\n// bin 101 -> gray 111  (bit 0 changed)\n// bin 110 -> gray 101  (bit 0 changed)\n// bin 111 -> gray 100  (bit 0 changed)\n// Each transition: $countones(gray[n] ^ gray[n-1]) == 1 for all n\n// This is the key property that makes Gray code safe for CDC pointer synchronization"},

{"instruction": "Write a testbench that proves Gray encoder changes only one bit per increment.",
 "output": "module tb_gray;\n  reg [7:0] bin;\n  wire [7:0] gray;\n  gray_enc #(8) dut(.bin(bin), .gray(gray));\n  reg [7:0] prev_gray;\n  integer i, errors;\n  initial begin\n    errors = 0;\n    prev_gray = 8'b0;\n    for (i = 0; i < 256; i = i + 1) begin\n      bin = i[7:0]; #1;\n      if (i > 0) begin\n        if ($countones(gray ^ prev_gray) !== 1) begin\n          $error(\"FAIL at bin=%0d: gray=%b prev=%b transitions=%0d\",\n            i, gray, prev_gray, $countones(gray ^ prev_gray));\n          errors = errors + 1;\n        end\n      end\n      prev_gray = gray;\n    end\n    if (errors == 0) $display(\"PASS: All 255 transitions change exactly 1 bit\");\n    $finish;\n  end\nendmodule"},

{"instruction": "What is the correct Gray code for binary values 0 through 15?",
 "output": "// Binary -> Gray (formula: gray = bin ^ (bin>>1))\n// 0000 -> 0000\n// 0001 -> 0001\n// 0010 -> 0011\n// 0011 -> 0010\n// 0100 -> 0110\n// 0101 -> 0111\n// 0110 -> 0101\n// 0111 -> 0100\n// 1000 -> 1100\n// 1001 -> 1101\n// 1010 -> 1111\n// 1011 -> 1110\n// 1100 -> 1010\n// 1101 -> 1011\n// 1110 -> 1001\n// 1111 -> 1000\n// Property verified: each row differs from the next by exactly 1 bit"},

{"instruction": "Write a complete async FIFO using correct Gray code pointers and two-flop synchronizers.",
 "output": "module async_fifo #(parameter DW=8, AW=4) (\n  input wclk, rclk, rst_n,\n  input wr_en, input [DW-1:0] wdata,\n  input rd_en, output reg [DW-1:0] rdata,\n  output full, empty\n);\n  localparam DEPTH = 1 << AW;\n  reg [DW-1:0] mem [0:DEPTH-1];\n\n  // Write domain pointers\n  reg [AW:0] wbin, wgray;\n  // Read domain pointers\n  reg [AW:0] rbin, rgray;\n  // Two-flop synchronizers\n  reg [AW:0] wgray_s1, wgray_s2; // wgray synced into rclk domain\n  reg [AW:0] rgray_s1, rgray_s2; // rgray synced into wclk domain\n\n  // Write pointer (in wclk domain)\n  always @(posedge wclk or negedge rst_n) begin\n    if (!rst_n) begin wbin <= 0; wgray <= 0; end\n    else if (wr_en && !full) begin\n      mem[wbin[AW-1:0]] <= wdata;\n      wbin  <= wbin + 1;\n      wgray <= (wbin + 1) ^ ((wbin + 1) >> 1); // Gray encode next value\n    end\n  end\n\n  // Read pointer (in rclk domain)\n  always @(posedge rclk or negedge rst_n) begin\n    if (!rst_n) begin rbin <= 0; rgray <= 0; end\n    else if (rd_en && !empty) begin\n      rdata <= mem[rbin[AW-1:0]];\n      rbin  <= rbin + 1;\n      rgray <= (rbin + 1) ^ ((rbin + 1) >> 1);\n    end\n  end\n\n  // Synchronize wgray into rclk domain\n  always @(posedge rclk or negedge rst_n) begin\n    if (!rst_n) begin wgray_s1 <= 0; wgray_s2 <= 0; end\n    else begin wgray_s1 <= wgray; wgray_s2 <= wgray_s1; end\n  end\n\n  // Synchronize rgray into wclk domain\n  always @(posedge wclk or negedge rst_n) begin\n    if (!rst_n) begin rgray_s1 <= 0; rgray_s2 <= 0; end\n    else begin rgray_s1 <= rgray; rgray_s2 <= rgray_s1; end\n  end\n\n  // Full: write pointer lapped read pointer\n  // MSBs differ (one more wrap) AND lower bits match (same slot)\n  assign full  = (wgray[AW]   != rgray_s2[AW]) &&\n                 (wgray[AW-1] != rgray_s2[AW-1]) &&\n                 (wgray[AW-2:0] == rgray_s2[AW-2:0]);\n\n  // Empty: all bits match (read has caught up to write)\n  assign empty = (rgray == wgray_s2);\nendmodule"},

# // ============================================================
# // BLOCK 2: FIFO FULL/EMPTY - Correct logic, no confusion
# // ============================================================

{"instruction": "What is the correct FIFO full and empty detection using extra MSB pointers?",
 "output": "// Pointers are AW+1 bits wide (AW = log2(DEPTH))\n// The extra MSB tracks how many times the pointer has wrapped\n\n// EMPTY: write pointer equals read pointer (all bits match)\nassign empty = (wr_ptr == rd_ptr);\n\n// FULL: write pointer has wrapped exactly once more than read pointer\n// Lower AW bits match (same slot) AND MSBs differ (one extra wrap)\nassign full = (wr_ptr[AW-1:0] == rd_ptr[AW-1:0]) &&\n             (wr_ptr[AW]     != rd_ptr[AW]);\n\n// Memory:\n// if (wr_en && !full)  mem[wr_ptr[AW-1:0]] <= data_in;\n// if (rd_en && !empty) data_out <= mem[rd_ptr[AW-1:0]];"},

{"instruction": "Write a synchronous FIFO with correct MSB-based full and empty detection.",
 "output": "module sync_fifo #(parameter DW=8, AW=4) (\n  input clk, rst_n,\n  input wr_en, input [DW-1:0] wdata,\n  input rd_en, output reg [DW-1:0] rdata,\n  output full, empty\n);\n  localparam DEPTH = 1 << AW;\n  reg [DW-1:0] mem [0:DEPTH-1];\n  reg [AW:0] wr_ptr, rd_ptr;\n\n  // CORRECT full/empty using extra MSB\n  assign empty = (wr_ptr == rd_ptr);\n  assign full  = (wr_ptr[AW-1:0] == rd_ptr[AW-1:0]) && (wr_ptr[AW] != rd_ptr[AW]);\n\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin wr_ptr <= 0; rd_ptr <= 0; end\n    else begin\n      if (wr_en && !full)  begin mem[wr_ptr[AW-1:0]] <= wdata; wr_ptr <= wr_ptr + 1; end\n      if (rd_en && !empty) begin rdata <= mem[rd_ptr[AW-1:0]]; rd_ptr <= rd_ptr + 1; end\n    end\n  end\nendmodule"},

{"instruction": "Fix this FIFO where full and empty are swapped.\nassign full  = (wr_ptr == rd_ptr);\nassign empty = (wr_ptr[AW] != rd_ptr[AW]) && (wr_ptr[AW-1:0] == rd_ptr[AW-1:0]);",
 "output": "The conditions are swapped. Correct assignment:\nassign empty = (wr_ptr == rd_ptr);  // all bits equal = nothing written yet unread\nassign full  = (wr_ptr[AW-1:0] == rd_ptr[AW-1:0]) && (wr_ptr[AW] != rd_ptr[AW]);\n// full = same slot, one extra wrap = buffer completely filled"},

{"instruction": "Explain step by step why FIFO full uses MSBs differ and empty uses all bits equal.",
 "output": "// Start: wr_ptr=0000_0, rd_ptr=0000_0 (5 bits for depth=16)\n// Empty: wr_ptr==rd_ptr? YES -> empty=1\n\n// Write 16 times: wr_ptr=1_0000, rd_ptr=0_0000\n// Full check: lower 4 bits: 0000==0000 YES, MSB: 1!=0 YES -> full=1\n\n// Read 1: rd_ptr=0_0001\n// Full check: lower bits 0000!=0001 NO -> full=0 (one slot freed)\n\n// Read 16 times: rd_ptr=1_0000\n// Empty check: wr_ptr==rd_ptr? 1_0000==1_0000 YES -> empty=1\n\n// The MSB acts as a wrap-around flag. Differing MSBs means\n// wr_ptr has wrapped once more than rd_ptr, i.e., the buffer is full."},

# // ============================================================
# // BLOCK 3: SVA ASSERTIONS - Correct, no logical errors
# // ============================================================

{"instruction": "Write the correct SVA assertions to prevent FIFO overflow and underflow.",
 "output": "// CORRECT form: assert that the ILLEGAL condition never occurs\n\n// No write when full (overflow protection)\nassert property (@(posedge clk) disable iff(!rst_n)\n  !(wr_en && full))\n  else $error(\"FIFO overflow: write attempted when full\");\n\n// No read when empty (underflow protection)\nassert property (@(posedge clk) disable iff(!rst_n)\n  !(rd_en && empty))\n  else $error(\"FIFO underflow: read attempted when empty\");"},

{"instruction": "What is wrong with this FIFO assertion and how do you fix it?\nassert property (@(posedge clk) disable iff(!rst_n) (wr_en && !full) | (rd_en && !empty));",
 "output": "This assertion is logically wrong. It passes whenever any write OR read happens to a non-full/non-empty FIFO - it does NOT prevent overflow or underflow.\n\nCorrect assertions check that the ILLEGAL state never occurs:\n\n// Assertion 1: never write when full\nassert property (@(posedge clk) disable iff(!rst_n)\n  !(wr_en && full))\n  else $error(\"FIFO overflow\");\n\n// Assertion 2: never read when empty\nassert property (@(posedge clk) disable iff(!rst_n)\n  !(rd_en && empty))\n  else $error(\"FIFO underflow\");"},

{"instruction": "Write a correct one-hot assertion for arbiter grant.",
 "output": "// $onehot  = exactly one bit set (use when grant must always be asserted)\n// $onehot0 = zero or one bit set (use when no-grant cycles are valid)\n\n// If arbiter always grants when there are requests:\nassert property (@(posedge clk) disable iff(rst)\n  |grant |-> $onehot(grant))\n  else $error(\"Grant is not one-hot: %b\", grant);\n\n// If arbiter can have idle cycles (no grant):\nassert property (@(posedge clk) disable iff(rst)\n  $onehot0(grant))\n  else $error(\"Multiple grants asserted: %b\", grant);"},

{"instruction": "What is wrong with this one-hot assertion and fix it?\nassert property (@(posedge clk) (grant[0] | grant[1]) implies (!grant[0] & !grant[1]));",
 "output": "This says 'if either grant[0] or grant[1] is set, then neither is set' - which can never be true. It's self-contradictory and will always fail when any grant is asserted.\n\nCorrect form using $onehot0:\nassert property (@(posedge clk) disable iff(rst)\n  $onehot0(grant))\n  else $error(\"Multiple grants: %b\", grant);\n\n// $onehot0 is the built-in SystemVerilog function that checks\n// at most one bit is set - this is exactly what mutual exclusion requires."},

{"instruction": "Write correct AXI VALID/READY stability assertions.",
 "output": "// AXI rule: once VALID is asserted, it must be held until READY is seen\n// This applies to: AWVALID, WVALID, ARVALID (master-driven)\n// And to: BVALID, RVALID (slave-driven)\n\n// AWVALID stability\nassert property (@(posedge clk) disable iff(!rst_n)\n  (awvalid && !awready) |-> ##1 awvalid)\n  else $error(\"AWVALID deasserted before AWREADY\");\n\n// WVALID stability\nassert property (@(posedge clk) disable iff(!rst_n)\n  (wvalid && !wready) |-> ##1 wvalid)\n  else $error(\"WVALID deasserted before WREADY\");\n\n// BVALID stability (slave must hold until master accepts)\nassert property (@(posedge clk) disable iff(!rst_n)\n  (bvalid && !bready) |-> ##1 bvalid)\n  else $error(\"BVALID deasserted before BREADY\");"},

{"instruction": "Write a correct SVA to check no simultaneous full and empty in a FIFO.",
 "output": "// full and empty are mutually exclusive - both true at once is impossible\nassert property (@(posedge clk) disable iff(!rst_n)\n  !(full && empty))\n  else $fatal(\"Impossible FIFO state: full AND empty simultaneously\");"},

{"instruction": "Write correct assertions for FIFO occupancy bounds.",
 "output": "// Occupancy must always be between 0 and DEPTH inclusive\nassert property (@(posedge clk) disable iff(!rst_n)\n  (occupancy >= 0) && (occupancy <= DEPTH))\n  else $error(\"FIFO occupancy %0d out of range [0,%0d]\", occupancy, DEPTH);\n\n// Occupancy increments on write only\nassert property (@(posedge clk) disable iff(!rst_n)\n  (wr_en && !full && !rd_en) |=> (occupancy == $past(occupancy) + 1))\n  else $error(\"Occupancy did not increment on write\");\n\n// Occupancy decrements on read only\nassert property (@(posedge clk) disable iff(!rst_n)\n  (rd_en && !empty && !wr_en) |=> (occupancy == $past(occupancy) - 1))\n  else $error(\"Occupancy did not decrement on read\");"},

# // ============================================================
# // BLOCK 4: AXI - Correct channel decoupling
# // ============================================================

{"instruction": "Write a correct AXI4-Lite slave that accepts AW and W channels independently.",
 "output": "module axil_slave (\n  input clk, rst_n,\n  // Write address channel\n  input [31:0] awaddr, input awvalid, output reg awready,\n  // Write data channel\n  input [31:0] wdata,  input wvalid,  output reg wready,\n  // Write response channel\n  output reg bvalid, input bready, output reg [1:0] bresp\n);\n  reg [31:0] reg0, reg1, reg2, reg3;\n  reg        aw_pend;  // AW channel captured\n  reg        w_pend;   // W channel captured\n  reg [31:0] aw_addr;  // latched address\n  reg [31:0] w_data;   // latched data\n\n  always @(posedge clk or negedge rst_n) begin\n    if (!rst_n) begin\n      awready<=0; wready<=0; bvalid<=0;\n      aw_pend<=0; w_pend<=0;\n    end else begin\n      // Accept AW independently - one cycle pulse\n      awready <= 0;\n      if (awvalid && !aw_pend) begin\n        awready  <= 1;\n        aw_addr  <= awaddr;\n        aw_pend  <= 1;\n      end\n\n      // Accept W independently - one cycle pulse\n      wready <= 0;\n      if (wvalid && !w_pend) begin\n        wready  <= 1;\n        w_data  <= wdata;\n        w_pend  <= 1;\n      end\n\n      // Commit write only when BOTH channels captured\n      if (aw_pend && w_pend && !bvalid) begin\n        case (aw_addr[3:2])\n          2'b00: reg0 <= w_data;\n          2'b01: reg1 <= w_data;\n          2'b10: reg2 <= w_data;\n          2'b11: reg3 <= w_data;\n        endcase\n        bvalid  <= 1;\n        bresp   <= 2'b00; // OKAY\n        aw_pend <= 0;\n        w_pend  <= 0;\n      end\n\n      // Clear response after master accepts\n      if (bvalid && bready) bvalid <= 0;\n    end\n  end\nendmodule"},

{"instruction": "Explain the AXI4-Lite channel independence rule and why violating it causes deadlock.",
 "output": "The AXI4 spec allows a master to send AW and W in ANY order or even simultaneously. A slave that only accepts W after AW is seen will deadlock with a master that sends W before AW. A slave that gates W acceptance on AW will also block if the master sends them simultaneously but the slave only watches for a specific ordering.\n\nCorrect slave behavior:\n1. Accept AWVALID when it arrives - store the address, pulse AWREADY\n2. Accept WVALID when it arrives - store the data, pulse WREADY\n3. Perform the register write only after BOTH have been captured\n4. Then assert BVALID\n\nUse separate aw_pend and w_pend flags. Never write `if (awvalid && wvalid)` as the acceptance condition."},

{"instruction": "Debug: This AXI slave requires AW and W in same cycle - fix the deadlock.\nif (awvalid && wvalid && !aw_done) begin awready<=1; wready<=1; end",
 "output": "This deadlocks with any master that sends AW and W in separate cycles. Fix:\nreg aw_pend, w_pend;\nreg [31:0] aw_addr_h, w_data_h;\n\n// Accept AW whenever it arrives\nawready<=0;\nif (awvalid && !aw_pend) begin awready<=1; aw_addr_h<=awaddr; aw_pend<=1; end\n\n// Accept W whenever it arrives (independent of AW)\nwready<=0;\nif (wvalid && !w_pend) begin wready<=1; w_data_h<=wdata; w_pend<=1; end\n\n// Write register only after both captured\nif (aw_pend && w_pend && !bvalid) begin\n  regs[aw_addr_h[3:2]] <= w_data_h;\n  bvalid<=1; bresp<=2'b00;\n  aw_pend<=0; w_pend<=0;\nend\nif (bvalid && bready) bvalid<=0;"},

# // ============================================================
# // BLOCK 5: CDC - Correct multi-bit rules
# // ============================================================

{"instruction": "Explain why passing a multi-bit binary bus through two flip-flops is wrong.",
 "output": "A two-flop synchronizer works for single bits only. For a multi-bit binary bus:\n\n1. Each bit goes through its own flip-flop independently\n2. If the bus is changing (e.g., a counter incrementing from 0111 to 1000), multiple bits change simultaneously\n3. Each bit's flip-flop resolves metastability independently and at a slightly different time\n4. The destination domain reads a combination of old and new bit values - an invalid intermediate code like 1111 or 0000 that was never a valid count\n\nCorrect approaches for multi-bit CDC:\n- Use an async FIFO with Gray-coded pointers (for streaming data)\n- Use a handshake synchronizer with req/ack (for occasional data transfers)\n- Use Gray code if the value is a counter (only 1 bit changes per increment)\n\nNever do: `always @(posedge dst_clk) dst_bus <= src_bus;` for a multi-bit changing signal"},

{"instruction": "Write the correct CDC approach for a multi-bit data word.",
 "output": "// For occasional multi-bit transfers: use handshake synchronizer\nmodule cdc_data #(parameter W=32) (\n  input src_clk, dst_clk, rst_n,\n  input [W-1:0] src_data, input src_valid, output reg src_ready,\n  output reg [W-1:0] dst_data, output reg dst_valid, input dst_ready\n);\n  reg src_req, dst_ack;\n  reg [1:0] req_sync, ack_sync;\n  reg [W-1:0] data_hold;\n\n  // Source: assert req when valid, wait for ack\n  always @(posedge src_clk or negedge rst_n) begin\n    if (!rst_n) begin src_req<=0; src_ready<=1; data_hold<='0; end\n    else begin\n      if (src_valid && src_ready) begin data_hold<=src_data; src_req<=1; src_ready<=0; end\n      if (src_req && ack_sync[1]) begin src_req<=0; src_ready<=1; end\n    end\n  end\n\n  // Sync req to dst domain (single bit - safe)\n  always @(posedge dst_clk or negedge rst_n)\n    if (!rst_n) req_sync<=0;\n    else req_sync<={req_sync[0], src_req};\n\n  // Destination: capture data when req arrives\n  always @(posedge dst_clk or negedge rst_n) begin\n    if (!rst_n) begin dst_ack<=0; dst_valid<=0; end\n    else begin\n      if (req_sync[1] && !dst_ack) begin dst_data<=data_hold; dst_valid<=1; dst_ack<=1; end\n      if (dst_valid && dst_ready) dst_valid<=0;\n      if (!req_sync[1]) dst_ack<=0;\n    end\n  end\n\n  // Sync ack back to src domain\n  always @(posedge src_clk or negedge rst_n)\n    if (!rst_n) ack_sync<=0;\n    else ack_sync<={ack_sync[0], dst_ack};\nendmodule"},

{"instruction": "Debug: This CDC synchronizer passes an 8-bit counter directly through two flip-flops.\nreg [7:0] sync1, sync2;\nalways @(posedge dst_clk) begin sync1<=src_cnt; sync2<=sync1; end",
 "output": "This is wrong. A binary counter changes multiple bits simultaneously on many increments (e.g., 0111->1000 flips 4 bits). Each bit resolves metastability independently, producing corrupted intermediate codes.\n\nFix options:\n\n1. If the counter must cross domains, convert to Gray code first:\nwire [7:0] src_gray = src_cnt ^ (src_cnt >> 1); // Gray encode\nreg [7:0] gray_s1, gray_s2;\nalways @(posedge dst_clk) begin gray_s1<=src_gray; gray_s2<=gray_s1; end\n// Then decode gray_s2 back to binary in the destination domain\n\n2. If only occasional snapshots are needed, use a handshake synchronizer\n\n3. If streaming data, use an async FIFO"},

# // ============================================================
# // BLOCK 6: Repeat correct FIFO logic many times (reinforcement)
# // ============================================================

{"instruction": "What is the correct full condition for a FIFO with extra-MSB pointers?",
 "output": "assign full = (wr_ptr[AW-1:0] == rd_ptr[AW-1:0]) && (wr_ptr[AW] != rd_ptr[AW]);\n// Lower address bits match: write pointer is at the same slot as read pointer\n// MSBs differ: write pointer has wrapped once more than read pointer\n// Together: the buffer is completely full - every slot has unread data"},

{"instruction": "What is the correct empty condition for a FIFO with extra-MSB pointers?",
 "output": "assign empty = (wr_ptr == rd_ptr);\n// All bits equal: write and read pointers are at the same position\n// No wrapping difference: nothing has been written that hasn't been read"},

{"instruction": "Show the complete pointer comparison table for a 4-entry FIFO.",
 "output": "// AW=2 (4-entry FIFO), pointers are 3 bits (AW+1)\n// wr_ptr[2] = wrap bit, wr_ptr[1:0] = address\n\n// Initial state:\n// wr=000, rd=000 -> wr==rd -> EMPTY\n\n// After writing 1:\n// wr=001, rd=000 -> not equal, MSBs same -> NORMAL (1 entry)\n\n// After writing 4 (full):\n// wr=100, rd=000 -> lower bits 00==00, MSBs 1!=0 -> FULL\n\n// After reading 1:\n// wr=100, rd=001 -> lower bits 00!=01 -> NORMAL (3 entries)\n\n// After reading 4:\n// wr=100, rd=100 -> wr==rd -> EMPTY\n\n// Correct code:\n// assign empty = (wr_ptr == rd_ptr);\n// assign full  = (wr_ptr[AW-1:0]==rd_ptr[AW-1:0]) && (wr_ptr[AW]!=rd_ptr[AW]);"},

]

extra_data += raw_data + (correction_data * 3)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import os, gc, torch
os.environ["USE_TF"] = "0"
os.environ["USE_TORCH"] = "1"

!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps xformers trl peft accelerate bitsandbytes datasets

from unsloth import FastVisionModel, is_bfloat16_supported
# from unsloth import UnslothVisionDataCollator
# from transformers import TrainingArguments
from trl import SFTTrainer, SFTConfig
from datasets import Dataset

torch.cuda.empty_cache()
gc.collect()

# --- 1. LOAD MODEL ---
# --- 1. LOAD STAGE3 DIRECTLY — no separate adapter load ---
model, tokenizer = FastVisionModel.from_pretrained(
    model_name = "/content/drive/MyDrive/qwen_verilog_stage4_final",
    load_in_4bit = True,
    max_seq_length = 2048,
)

trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
total     = sum(p.numel() for p in model.parameters())
print(f"Trainable: {trainable:,} / {total:,} ({100*trainable/total:.2f}%)")

dataset = Dataset.from_list(extra_data)

# --- 3. FORMATTING FUNCTION ---
# ✅ Must return {"messages": ...} with the structure Unsloth Vision expects
def format_data(examples):
    texts = []
    for inst, out in zip(examples["instruction"], examples["output"]):
        messages = [
            {"role": "system", "content": "You are an expert Verilog and RTL design engineer."},
            {"role": "user",   "content": inst},
            {"role": "assistant", "content": out},
        ]
        # Apply the chat template to get a single string
        text = tokenizer.apply_chat_template(
            messages,
            tokenize = False,
            add_generation_prompt = False,
        )
        texts.append(text)
    return {"text": texts}

qwen_train_data = dataset.map(format_data, batched=True)

# --- 4. TRAINER ---
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    data_collator = None,
    train_dataset = qwen_train_data,
    args = SFTConfig(
        num_train_epochs = 8,
        learning_rate = 1e-5,
        lr_scheduler_type = "cosine",  # ✅ key addition
        warmup_steps = 40,
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 10,
        optim = "paged_adamw_8bit",
        output_dir = "/content/drive/MyDrive/qwen_verilog_stage6",
        save_steps = 50,
        save_total_limit = 2,
        remove_unused_columns = False,
        dataset_text_field = "text",
        max_seq_length = 2048,
    ),
)

trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
total     = sum(p.numel() for p in model.parameters())
print(f"Trainable: {trainable:,} / {total:,} ({100*trainable/total:.2f}%)")

# --- 5. TRAIN ---
trainer.train()

# --- 6. SAVE TO DRIVE (do this immediately!) ---
model.save_pretrained("/content/drive/MyDrive/qwen_verilog_stage6_final")
tokenizer.save_pretrained("/content/drive/MyDrive/qwen_verilog_stage6_final")
print("✅ Stage6 saved")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Collecting unsloth@ git+https://github.com/unslothai/unsloth.git (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-install-0waokenc/unsloth_4b622226f1194d6aa9ab22d5d2bde660
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-install-0waokenc/unsloth_4b622226f1194d6aa9ab22d5d2bde660
  Resolved https://github.com/unslothai/unsloth.git to commit 56c8f9662b1bc1fb50bcbe6bcc45ddffb0cdeb60
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2026.2.1: Fast Q

Map:   0%|          | 0/487 [00:00<?, ? examples/s]

Unsloth: Tokenizing ["text"] (num_proc=6):   0%|          | 0/487 [00:00<?, ? examples/s]

Trainable: 47,589,376 / 5,077,111,808 (0.94%)


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 487 | Num Epochs = 8 | Total steps = 488
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 47,589,376 of 8,339,756,032 (0.57% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
10,0.8892
20,0.9533
30,1.0179
40,0.9231
50,1.0016
60,0.9691
70,1.0324
80,0.8728
90,0.925
100,0.8198


✅ Stage6 saved


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
from google.colab import drive
drive.mount('/content/drive')

import os, gc, torch
os.environ["USE_TF"] = "0"
os.environ["USE_TORCH"] = "1"

!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps xformers trl peft accelerate bitsandbytes datasets

import torch
from unsloth import FastVisionModel
from transformers import TextStreamer

torch.cuda.empty_cache()

model, tokenizer = FastVisionModel.from_pretrained(
    model_name = "/content/drive/MyDrive/qwen_verilog_stage6_final",
    load_in_4bit = True,
    max_seq_length = 2048,
)
FastVisionModel.for_inference(model)

def generate_verilog(instruction, max_tokens=1024):
    messages = [
        {"role": "system", "content": "You are an expert Verilog and RTL design engineer."},
        {"role": "user",   "content": instruction}
    ]
    text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

    inputs = tokenizer(
        text = [text],
        return_tensors = "pt",
    ).to("cuda")

    streamer = TextStreamer(tokenizer, skip_prompt=True)
    print(f"\n{'='*50}\nPROMPT: {instruction}\n{'='*50}")
    _ = model.generate(**inputs, streamer=streamer, max_new_tokens=max_tokens,
                       temperature=0.1, do_sample=True, use_cache=True)

# ============================================================
# STAGE4 FINAL EVALUATION — Easy → Medium → Hard
# ============================================================

# --- CATEGORY 1: EASY — Direct Generation ---
generate_verilog("Design a simple 4-bit synchronous counter with active-low reset.")
generate_verilog("Design a D flip-flop with synchronous reset and enable.")
generate_verilog("Design a 2-to-1 multiplexer with parameterizable width.")
generate_verilog("Design a shift register with parameterizable depth and width.")

# --- CATEGORY 2: EASY-MEDIUM — Explain Why ---
generate_verilog("Explain why non-blocking assignments must be used in sequential always blocks.")
generate_verilog("Explain why two flip-flops are needed when crossing clock domains.")
generate_verilog("Explain why FIFO depth must be a power of two for pointer-based full/empty detection.")

# --- CATEGORY 3: MEDIUM — Debug RTL ---
generate_verilog("The following counter overflows silently. Add saturation logic:\nif (en) cnt <= cnt + 1;")
generate_verilog("The following always block infers a latch. Fix it:\nalways @(*) begin\n  case(sel)\n    2'b00: y = a;\n    2'b01: y = b;\n  endcase\nend")
generate_verilog("The following FIFO full logic is wrong. Fix it:\nassign full = (wr_ptr == rd_ptr);\nassign empty = (wr_ptr[$clog2(DEPTH)] != rd_ptr[$clog2(DEPTH)]) && (wr_ptr[$clog2(DEPTH)-1:0] == rd_ptr[$clog2(DEPTH)-1:0]);")

# --- CATEGORY 4: MEDIUM — Generation ---
generate_verilog("Design a parameterizable synchronous FIFO with depth and width parameters.")
generate_verilog("Design a 4-bit up/down counter with synchronous load and asynchronous reset.")
generate_verilog("Design a Gray code encoder for an N-bit binary input.")
generate_verilog("Design a CDC-safe pulse synchronizer for transferring a pulse between two clock domains.")

# --- CATEGORY 5: MEDIUM-HARD — Assertions ---
generate_verilog("Write SystemVerilog assertions to ensure no FIFO write occurs when full and no read when empty.")
generate_verilog("Write a SystemVerilog assertion to verify AXI valid never deasserts before ready.")
generate_verilog("Write an assertion to ensure arbiter grants are always one-hot.")

# --- CATEGORY 6: HARD — Complex Generation ---
generate_verilog("Design a parameterizable asynchronous FIFO with Gray-coded pointers and two-flop synchronizers.")
generate_verilog("Design an AXI4-Lite slave register block with 4 read/write registers at addresses 0x00, 0x04, 0x08, 0x0C.")
generate_verilog("Design a 4-client round-robin arbiter with rotating priority pointer.")
generate_verilog("Design an AXI-Stream skid buffer that handles backpressure without dropping data.")

# --- CATEGORY 7: HARD — Complex Debug ---
generate_verilog("The following AXI-Stream sink deasserts ready combinationally based on valid, causing a deadlock. Explain the bug and fix it.")
generate_verilog("The following CDC design passes an 8-bit data bus directly through two flip-flops. Explain why this is wrong and provide the correct approach.")
generate_verilog("The following AXI4-Lite slave drops write responses when AW and W channels arrive in different cycles. Fix the protocol handling.")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Collecting unsloth@ git+https://github.com/unslothai/unsloth.git (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-install-ox_mypne/unsloth_bf8ef0c92dc940ffa33d143f881f2851
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-install-ox_mypne/unsloth_bf8ef0c92dc940ffa33d143f881f2851
  Resolved https://github.com/unslothai/unsloth.git to commit 56c8f9662b1bc1fb50bcbe6bcc45ddffb0cdeb60
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2026.2.1: Fast Q