In [13]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, Subset

import pennylane as qml
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tqdm.notebook import tqdm  # Use notebook-friendly tqdm
import matplotlib.pyplot as plt
import time, os, random
import pandas as pd

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Running on {device}")

def set_all_seeds(seed: int = 42) -> None:
    """Seed every RNG we rely on (Python, NumPy, Torch, PennyLane, CUDNN)."""
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
    os.environ["PL_GLOBAL_SEED"] = str(seed)
    # qml.numpy has been deprecated, direct use of np is preferred
    # qml.numpy.random.seed(seed)

Running on cuda


In [14]:
def generate_narma_data(n_samples, order, seed=None):
    """Generates NARMA time-series data."""
    if seed is not None:
        np.random.seed(seed)
    u = np.random.uniform(0, 0.5, n_samples)
    y = np.zeros(n_samples)

    for t in range(order, n_samples):
        term1 = 0.3 * y[t-1]
        term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))
        term3 = 1.5 * u[t-order] * u[t-1]
        term4 = 0.1
        y[t] = term1 + term2 + term3 + term4
        
    return y.reshape(-1, 1)

def transform_narma_data(data, seq_len):
    """Transforms NARMA data into input-output pairs."""
    x, y = [], []
    for i in range(len(data) - seq_len):
        _x = data[i:(i + seq_len)]
        _y = data[i + seq_len]
        x.append(_x)
        y.append(_y)

    x = np.array(x).transpose(0, 2, 1) # Reshape for Conv1d: (batch, channels, seq_len)
    x = torch.from_numpy(x).float()
    y = torch.from_numpy(np.array(y)).float()
    
    return x, y

def get_narma_dataloaders(n_samples=2000, order=10, seq_len=20, batch_size=32, train_p=0.7, val_p=0.15, seed=None):
    """Creates DataLoader objects for the NARMA dataset."""
    narma_series = generate_narma_data(n_samples, order, seed=seed)

    scaler = MinMaxScaler(feature_range=(-1, 1))
    dataset_scaled = scaler.fit_transform(narma_series)

    x, y = transform_narma_data(dataset_scaled, seq_len)
    full_dataset = TensorDataset(x, y)

    # Sequential split for time-series data
    train_end_idx = int(train_p * len(full_dataset))
    val_end_idx = train_end_idx + int(val_p * len(full_dataset))
    
    train_indices = list(range(train_end_idx))
    val_indices = list(range(train_end_idx, val_end_idx))
    test_indices = list(range(val_end_idx, len(full_dataset)))

    train_dataset = Subset(full_dataset, train_indices)
    val_dataset = Subset(full_dataset, val_indices)
    test_dataset = Subset(full_dataset, test_indices)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, drop_last=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, drop_last=True)
    
    print(f"Data loaded. Train: {len(train_dataset)}, Val: {len(val_dataset)}, Test: {len(test_dataset)}")
    
    return train_loader, val_loader, test_loader

In [15]:
class QTCN(nn.Module):
    def __init__(self, n_qubits, circuit_depth, input_dim, kernel_size, dilation=1):
        super(QTCN, self).__init__()
        self.n_qubits = n_qubits
        self.circuit_depth = circuit_depth
        
        self.input_channels = input_dim[1]
        self.time_steps = input_dim[2]
        self.kernel_size = kernel_size
        self.dilation = dilation
        
        # Classical linear layer for dimension reduction
        self.fc = nn.Linear(self.input_channels * self.kernel_size, n_qubits)
        
        # Quantum parameters
        self.conv_params = nn.Parameter(torch.randn(circuit_depth, n_qubits, 15))
        self.pool_params = nn.Parameter(torch.randn(circuit_depth, n_qubits // 2, 3))
        
        # Quantum device and QNode
        self.dev = qml.device("default.qubit", wires=n_qubits)
        self.qc = qml.QNode(self.circuit, self.dev, interface='torch', diff_method='backprop')

    def circuit(self, features):
        wires = list(range(self.n_qubits))    
        qml.AngleEmbedding(features, wires=wires, rotation='Y')
        for layer in range(self.circuit_depth):
            self._apply_convolution(self.conv_params[layer], wires)
            self._apply_pooling(self.pool_params[layer], wires)
            wires = wires[::2]
        return qml.expval(qml.PauliZ(wires[0])) # Measure the first qubit

    def _apply_convolution(self, weights, wires):
        n_wires = len(wires)
        for p in [0, 1]:
            for indx, w in enumerate(wires):
                if indx % 2 == p and indx < n_wires - 1:
                    qml.U3(*weights[indx, :3], wires=w)
                    qml.U3(*weights[indx + 1, 3:6], wires=wires[indx + 1])
                    qml.IsingZZ(weights[indx, 6], wires=[w, wires[indx + 1]])
                    qml.IsingYY(weights[indx, 7], wires=[w, wires[indx + 1]])
                    qml.IsingXX(weights[indx, 8], wires=[w, wires[indx + 1]])
                    qml.U3(*weights[indx, 9:12], wires=w)
                    qml.U3(*weights[indx + 1, 12:], wires=wires[indx + 1])

    def _apply_pooling(self, pool_weights, wires):
        n_wires = len(wires)
        assert n_wires >= 2, "Need at least two wires for pooling."
        for indx, w in enumerate(wires):
            if indx % 2 == 1:
                measurement = qml.measure(w)
                qml.cond(measurement, qml.U3)(*pool_weights[indx // 2], wires=wires[indx - 1])
                
    def forward(self, x):
        batch_size, _, time_steps = x.size()
        
        # Slide a dilated window across the time steps
        output_q_nodes = []
        num_windows = time_steps - (self.kernel_size - 1) * self.dilation
        for i in range(num_windows):
            start = i
            indices = [start + d * self.dilation for d in range(self.kernel_size)]
            
            window = x[:, :, indices].reshape(batch_size, -1)
            reduced_window = self.fc(window)
            output_q_nodes.append(self.qc(reduced_window))
            
        # Aggregate the outputs from each window
        # Taking the mean is a simple and effective aggregation strategy
        output = torch.stack(output_q_nodes, dim=1).float()
        return torch.mean(output, dim=1)

In [16]:
def train_epoch(model, dataloader, optimizer, criterion):
    model.train()
    total_loss = 0.0
    for inputs, labels in dataloader:
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels.squeeze())
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        
    return total_loss / len(dataloader)

def evaluate(model, dataloader, criterion):
    model.eval()
    total_loss = 0.0
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels.squeeze())
            total_loss += loss.item()
            
    return total_loss / len(dataloader)

# Tune kernel size and dilation

In [17]:
def train_and_evaluate(kernel_size, dilation, seed, hparams):
    """Trains the QTCN model for a given seed and returns the best validation loss."""
    print(f"--- Seed: {seed}, Testing: kernel_size={kernel_size}, dilation={dilation} ---")
    
    # --- Set seed for this specific run ---
    set_all_seeds(seed)

    # --- Constraint Check ---
    # CORRECTED: Use SEQ_LEN for the constraint check, not N_SAMPLES
    if (kernel_size - 1) * dilation >= hparams['SEQ_LEN']:
        print(f"Invalid combination: (kernel_size-1)*dilation ({ (kernel_size - 1) * dilation}) must be less than SEQ_LEN ({hparams['SEQ_LEN']}). Skipping.")
        return float('inf')

    # --- Data Loading ---
    train_loader, val_loader, _ = get_narma_dataloaders(
        n_samples=hparams['N_SAMPLES'], 
        order=hparams['ORDER'], 
        seq_len=hparams['SEQ_LEN'], 
        batch_size=hparams['BATCH_SIZE'], 
        seed=seed
    )

    # --- Model Initialization ---
    input_channels = 1 # NARMA is univariate
    input_dim = (hparams['BATCH_SIZE'], input_channels, hparams['SEQ_LEN']) 

    model = QTCN(
        n_qubits=hparams['N_QUBITS'], 
        circuit_depth=hparams['CIRCUIT_DEPTH'], 
        input_dim=input_dim, 
        kernel_size=kernel_size, 
        dilation=dilation
    ).to(device)
    
    # CORRECTED: Added eps=1e-8 for consistency with the main script
    optimizer = optim.Adam(model.parameters(), lr=hparams['LEARNING_RATE'], weight_decay=1e-4, eps=1e-8)
    criterion = nn.MSELoss()

    best_val_loss = float('inf')
    pbar = tqdm(range(hparams['EPOCHS']), desc=f'k={kernel_size}, d={dilation}, s={seed}')
    for epoch in pbar:
        train_loss = train_epoch(model, train_loader, optimizer, criterion)
        val_loss = evaluate(model, val_loader, criterion)
        if val_loss < best_val_loss:
            best_val_loss = val_loss
        pbar.set_postfix({"train_loss": f"{train_loss:.4f}", "val_loss": f"{val_loss:.4f}", "best_val": f"{best_val_loss:.4f}"})
    
    print(f"Finished seed {seed}. Best validation loss: {best_val_loss:.4f}")
    return best_val_loss

In [12]:
if __name__ == '__main__':
    # --- Define Hyperparameters and Search Grid ---
    HPARAMS = {
        'N_QUBITS': 8,
        'CIRCUIT_DEPTH': 2,
        'N_SAMPLES': 240,
        'ORDER': 10,
        'SEQ_LEN': 10,
        'BATCH_SIZE': 32,
        'EPOCHS': 50,
        'LEARNING_RATE': 0.005,
    }
    
    kernel_sizes_to_test = [3,4,5,6,7,8,9]
    dilations_to_test = [1, 2, 3] # Added dilation=1 to the test
    seeds_to_test = [2025, 2026, 2027]
    
    results = {}
    best_hyperparams = {'kernel_size': None, 'dilation': None}
    best_avg_loss = float('inf')

    print("=== Starting Grid Search for HQTCN Hyperparameters (avg over seeds) ===")

    for k in kernel_sizes_to_test:
        for d in dilations_to_test:
            losses_for_seeds = []
            print("-" * 60)
            print(f"Evaluating pair: kernel_size={k}, dilation={d}")

            for seed in seeds_to_test:
                val_loss = train_and_evaluate(kernel_size=k, dilation=d, seed=seed, hparams=HPARAMS)
                if val_loss != float('inf'):
                    losses_for_seeds.append(val_loss)
            
            if not losses_for_seeds: continue # Skip if all runs for this combo were invalid

            mean_val_loss = np.mean(losses_for_seeds)
            results[(k, d)] = mean_val_loss
            print(f"\n=> Average validation loss for (k={k}, d={d}): {mean_val_loss:.4f}\n")

            if mean_val_loss < best_avg_loss:
                best_avg_loss = mean_val_loss
                best_hyperparams['kernel_size'] = k
                best_hyperparams['dilation'] = d
    
    print("\n" + "="*60)
    print("      === Grid Search Complete ===      ")
    print("="*60)
    print(f"\nBest AVERAGE Validation Loss: {best_avg_loss:.4f}")
    print(f"Optimal Hyperparameters: kernel_size = {best_hyperparams['kernel_size']}, dilation = {best_hyperparams['dilation']}")
    
    print("\n--- Full Results (sorted by average loss) ---")
    sorted_results = sorted(results.items(), key=lambda item: item[1])
    for (k, d), loss in sorted_results:
        print(f"Kernel Size: {k}, Dilation: {d} -> Avg Val Loss: {loss:.4f}")

=== Starting Grid Search for HQTCN Hyperparameters (avg over seeds) ===
------------------------------------------------------------
Evaluating pair: kernel_size=3, dilation=1
--- Seed: 2025, Testing: kernel_size=3, dilation=1 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


k=3, d=1, s=2025:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2025. Best validation loss: 0.0844
--- Seed: 2026, Testing: kernel_size=3, dilation=1 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


k=3, d=1, s=2026:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2026. Best validation loss: 0.0942
--- Seed: 2027, Testing: kernel_size=3, dilation=1 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


k=3, d=1, s=2027:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2027. Best validation loss: 0.0881

=> Average validation loss for (k=3, d=1): 0.0889

------------------------------------------------------------
Evaluating pair: kernel_size=3, dilation=2
--- Seed: 2025, Testing: kernel_size=3, dilation=2 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


k=3, d=2, s=2025:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2025. Best validation loss: 0.0767
--- Seed: 2026, Testing: kernel_size=3, dilation=2 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


k=3, d=2, s=2026:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2026. Best validation loss: 0.0972
--- Seed: 2027, Testing: kernel_size=3, dilation=2 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


k=3, d=2, s=2027:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2027. Best validation loss: 0.0908

=> Average validation loss for (k=3, d=2): 0.0883

------------------------------------------------------------
Evaluating pair: kernel_size=3, dilation=3
--- Seed: 2025, Testing: kernel_size=3, dilation=3 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


k=3, d=3, s=2025:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2025. Best validation loss: 0.0669
--- Seed: 2026, Testing: kernel_size=3, dilation=3 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


k=3, d=3, s=2026:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2026. Best validation loss: 0.0982
--- Seed: 2027, Testing: kernel_size=3, dilation=3 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


k=3, d=3, s=2027:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2027. Best validation loss: 0.0875

=> Average validation loss for (k=3, d=3): 0.0842

------------------------------------------------------------
Evaluating pair: kernel_size=4, dilation=1
--- Seed: 2025, Testing: kernel_size=4, dilation=1 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


k=4, d=1, s=2025:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2025. Best validation loss: 0.0881
--- Seed: 2026, Testing: kernel_size=4, dilation=1 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


k=4, d=1, s=2026:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2026. Best validation loss: 0.0914
--- Seed: 2027, Testing: kernel_size=4, dilation=1 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


k=4, d=1, s=2027:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2027. Best validation loss: 0.0877

=> Average validation loss for (k=4, d=1): 0.0891

------------------------------------------------------------
Evaluating pair: kernel_size=4, dilation=2
--- Seed: 2025, Testing: kernel_size=4, dilation=2 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


k=4, d=2, s=2025:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2025. Best validation loss: 0.0676
--- Seed: 2026, Testing: kernel_size=4, dilation=2 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


k=4, d=2, s=2026:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2026. Best validation loss: 0.0934
--- Seed: 2027, Testing: kernel_size=4, dilation=2 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


k=4, d=2, s=2027:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2027. Best validation loss: 0.0869

=> Average validation loss for (k=4, d=2): 0.0826

------------------------------------------------------------
Evaluating pair: kernel_size=4, dilation=3
--- Seed: 2025, Testing: kernel_size=4, dilation=3 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


k=4, d=3, s=2025:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2025. Best validation loss: 0.0683
--- Seed: 2026, Testing: kernel_size=4, dilation=3 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


k=4, d=3, s=2026:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2026. Best validation loss: 0.0773
--- Seed: 2027, Testing: kernel_size=4, dilation=3 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


k=4, d=3, s=2027:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2027. Best validation loss: 0.0770

=> Average validation loss for (k=4, d=3): 0.0742

------------------------------------------------------------
Evaluating pair: kernel_size=5, dilation=1
--- Seed: 2025, Testing: kernel_size=5, dilation=1 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


k=5, d=1, s=2025:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2025. Best validation loss: 0.0861
--- Seed: 2026, Testing: kernel_size=5, dilation=1 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


k=5, d=1, s=2026:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2026. Best validation loss: 0.0952
--- Seed: 2027, Testing: kernel_size=5, dilation=1 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


k=5, d=1, s=2027:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2027. Best validation loss: 0.0836

=> Average validation loss for (k=5, d=1): 0.0883

------------------------------------------------------------
Evaluating pair: kernel_size=5, dilation=2
--- Seed: 2025, Testing: kernel_size=5, dilation=2 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


k=5, d=2, s=2025:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2025. Best validation loss: 0.0651
--- Seed: 2026, Testing: kernel_size=5, dilation=2 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


k=5, d=2, s=2026:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2026. Best validation loss: 0.0858
--- Seed: 2027, Testing: kernel_size=5, dilation=2 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


k=5, d=2, s=2027:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2027. Best validation loss: 0.0789

=> Average validation loss for (k=5, d=2): 0.0766

------------------------------------------------------------
Evaluating pair: kernel_size=5, dilation=3
--- Seed: 2025, Testing: kernel_size=5, dilation=3 ---
Invalid combination: (kernel_size-1)*dilation (12) must be less than SEQ_LEN (10). Skipping.
--- Seed: 2026, Testing: kernel_size=5, dilation=3 ---
Invalid combination: (kernel_size-1)*dilation (12) must be less than SEQ_LEN (10). Skipping.
--- Seed: 2027, Testing: kernel_size=5, dilation=3 ---
Invalid combination: (kernel_size-1)*dilation (12) must be less than SEQ_LEN (10). Skipping.
------------------------------------------------------------
Evaluating pair: kernel_size=6, dilation=1
--- Seed: 2025, Testing: kernel_size=6, dilation=1 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


k=6, d=1, s=2025:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2025. Best validation loss: 0.0697
--- Seed: 2026, Testing: kernel_size=6, dilation=1 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


k=6, d=1, s=2026:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2026. Best validation loss: 0.0940
--- Seed: 2027, Testing: kernel_size=6, dilation=1 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


k=6, d=1, s=2027:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2027. Best validation loss: 0.0901

=> Average validation loss for (k=6, d=1): 0.0846

------------------------------------------------------------
Evaluating pair: kernel_size=6, dilation=2
--- Seed: 2025, Testing: kernel_size=6, dilation=2 ---
Invalid combination: (kernel_size-1)*dilation (10) must be less than SEQ_LEN (10). Skipping.
--- Seed: 2026, Testing: kernel_size=6, dilation=2 ---
Invalid combination: (kernel_size-1)*dilation (10) must be less than SEQ_LEN (10). Skipping.
--- Seed: 2027, Testing: kernel_size=6, dilation=2 ---
Invalid combination: (kernel_size-1)*dilation (10) must be less than SEQ_LEN (10). Skipping.
------------------------------------------------------------
Evaluating pair: kernel_size=6, dilation=3
--- Seed: 2025, Testing: kernel_size=6, dilation=3 ---
Invalid combination: (kernel_size-1)*dilation (15) must be less than SEQ_LEN (10). Skipping.
--- Seed: 2026, Testing: kernel_size=6, dilation=3 ---
Invalid combination: (kernel_size-1)*dilatio

  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


k=7, d=1, s=2025:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2025. Best validation loss: 0.0695
--- Seed: 2026, Testing: kernel_size=7, dilation=1 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


k=7, d=1, s=2026:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2026. Best validation loss: 0.1002
--- Seed: 2027, Testing: kernel_size=7, dilation=1 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


k=7, d=1, s=2027:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2027. Best validation loss: 0.0975

=> Average validation loss for (k=7, d=1): 0.0891

------------------------------------------------------------
Evaluating pair: kernel_size=7, dilation=2
--- Seed: 2025, Testing: kernel_size=7, dilation=2 ---
Invalid combination: (kernel_size-1)*dilation (12) must be less than SEQ_LEN (10). Skipping.
--- Seed: 2026, Testing: kernel_size=7, dilation=2 ---
Invalid combination: (kernel_size-1)*dilation (12) must be less than SEQ_LEN (10). Skipping.
--- Seed: 2027, Testing: kernel_size=7, dilation=2 ---
Invalid combination: (kernel_size-1)*dilation (12) must be less than SEQ_LEN (10). Skipping.
------------------------------------------------------------
Evaluating pair: kernel_size=7, dilation=3
--- Seed: 2025, Testing: kernel_size=7, dilation=3 ---
Invalid combination: (kernel_size-1)*dilation (18) must be less than SEQ_LEN (10). Skipping.
--- Seed: 2026, Testing: kernel_size=7, dilation=3 ---
Invalid combination: (kernel_size-1)*dilatio

  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


k=8, d=1, s=2025:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2025. Best validation loss: 0.0631
--- Seed: 2026, Testing: kernel_size=8, dilation=1 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


k=8, d=1, s=2026:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2026. Best validation loss: 0.0899
--- Seed: 2027, Testing: kernel_size=8, dilation=1 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


k=8, d=1, s=2027:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2027. Best validation loss: 0.0965

=> Average validation loss for (k=8, d=1): 0.0831

------------------------------------------------------------
Evaluating pair: kernel_size=8, dilation=2
--- Seed: 2025, Testing: kernel_size=8, dilation=2 ---
Invalid combination: (kernel_size-1)*dilation (14) must be less than SEQ_LEN (10). Skipping.
--- Seed: 2026, Testing: kernel_size=8, dilation=2 ---
Invalid combination: (kernel_size-1)*dilation (14) must be less than SEQ_LEN (10). Skipping.
--- Seed: 2027, Testing: kernel_size=8, dilation=2 ---
Invalid combination: (kernel_size-1)*dilation (14) must be less than SEQ_LEN (10). Skipping.
------------------------------------------------------------
Evaluating pair: kernel_size=8, dilation=3
--- Seed: 2025, Testing: kernel_size=8, dilation=3 ---
Invalid combination: (kernel_size-1)*dilation (21) must be less than SEQ_LEN (10). Skipping.
--- Seed: 2026, Testing: kernel_size=8, dilation=3 ---
Invalid combination: (kernel_size-1)*dilatio

  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


k=9, d=1, s=2025:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2025. Best validation loss: 0.0681
--- Seed: 2026, Testing: kernel_size=9, dilation=1 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


k=9, d=1, s=2026:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2026. Best validation loss: 0.0882
--- Seed: 2027, Testing: kernel_size=9, dilation=1 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


k=9, d=1, s=2027:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2027. Best validation loss: 0.0942

=> Average validation loss for (k=9, d=1): 0.0835

------------------------------------------------------------
Evaluating pair: kernel_size=9, dilation=2
--- Seed: 2025, Testing: kernel_size=9, dilation=2 ---
Invalid combination: (kernel_size-1)*dilation (16) must be less than SEQ_LEN (10). Skipping.
--- Seed: 2026, Testing: kernel_size=9, dilation=2 ---
Invalid combination: (kernel_size-1)*dilation (16) must be less than SEQ_LEN (10). Skipping.
--- Seed: 2027, Testing: kernel_size=9, dilation=2 ---
Invalid combination: (kernel_size-1)*dilation (16) must be less than SEQ_LEN (10). Skipping.
------------------------------------------------------------
Evaluating pair: kernel_size=9, dilation=3
--- Seed: 2025, Testing: kernel_size=9, dilation=3 ---
Invalid combination: (kernel_size-1)*dilation (24) must be less than SEQ_LEN (10). Skipping.
--- Seed: 2026, Testing: kernel_size=9, dilation=3 ---
Invalid combination: (kernel_size-1)*dilatio

# Tune batch size and learning rate

In [20]:
def train_and_evaluate(learning_rate, batch_size, seed, hparams):
    """Trains the QTCN model for a given seed and returns the best validation loss."""
    print(f"--- Seed: {seed}, Testing: learning_rate={learning_rate}, batch_size={batch_size} ---")
    
    # --- Set seed for this specific run ---
    set_all_seeds(seed)

    # # --- Constraint Check ---
    # # CORRECTED: Use SEQ_LEN for the constraint check, not N_SAMPLES
    # if (kernel_size - 1) * dilation >= hparams['SEQ_LEN']:
    #     print(f"Invalid combination: (kernel_size-1)*dilation ({ (kernel_size - 1) * dilation}) must be less than SEQ_LEN ({hparams['SEQ_LEN']}). Skipping.")
    #     return float('inf')

    # --- Data Loading ---
    train_loader, val_loader, _ = get_narma_dataloaders(
        n_samples=hparams['N_SAMPLES'], 
        order=hparams['ORDER'], 
        seq_len=hparams['SEQ_LEN'], 
        batch_size=batch_size, 
        seed=seed
    )

    # --- Model Initialization ---
    input_channels = 1 # NARMA is univariate
    input_dim = (batch_size, input_channels, hparams['SEQ_LEN']) 

    model = QTCN(
        n_qubits=hparams['N_QUBITS'], 
        circuit_depth=hparams['CIRCUIT_DEPTH'], 
        input_dim=input_dim, 
        kernel_size=hparams['KERNEL_SIZE'], 
        dilation=hparams['DILATION']
    ).to(device)
    
    # CORRECTED: Added eps=1e-8 for consistency with the main script
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-4, eps=1e-8)
    criterion = nn.MSELoss()

    best_val_loss = float('inf')
    pbar = tqdm(range(hparams['EPOCHS']), desc=f'l={learning_rate}, b={batch_size}, s={seed}')
    for epoch in pbar:
        train_loss = train_epoch(model, train_loader, optimizer, criterion)
        val_loss = evaluate(model, val_loader, criterion)
        if val_loss < best_val_loss:
            best_val_loss = val_loss
        pbar.set_postfix({"train_loss": f"{train_loss:.4f}", "val_loss": f"{val_loss:.4f}", "best_val": f"{best_val_loss:.4f}"})
    
    print(f"Finished seed {seed}. Best validation loss: {best_val_loss:.4f}")
    return best_val_loss

In [21]:
if __name__ == '__main__':
    # --- Define Hyperparameters and Search Grid ---
    HPARAMS = {
        'N_QUBITS': 8,
        'CIRCUIT_DEPTH': 2,
        'N_SAMPLES': 240,
        'ORDER': 10,
        'SEQ_LEN': 10,
        'EPOCHS': 50,
        'KERNEL_SIZE': 4,
        'DILATION': 3,
    }
    
    learning_rates_to_test = [0.001,0.005,0.01,0.05]
    batch_sizes_to_test = [8, 16, 32]
    seeds_to_test = [2025, 2026, 2027]
    
    results = {}
    best_hyperparams = {'learning_rate': None, 'batch_size': None}
    best_avg_loss = float('inf')

    print("=== Starting Grid Search for HQTCN Hyperparameters (avg over seeds) ===")

    for l in learning_rates_to_test:
        for b in batch_sizes_to_test:
            losses_for_seeds = []
            print("-" * 60)
            print(f"Evaluating pair: learning_rate={l}, batch_size={b}")

            for seed in seeds_to_test:
                val_loss = train_and_evaluate(learning_rate=l, batch_size=b, seed=seed, hparams=HPARAMS)
                if val_loss != float('inf'):
                    losses_for_seeds.append(val_loss)
            
            if not losses_for_seeds: continue # Skip if all runs for this combo were invalid

            mean_val_loss = np.mean(losses_for_seeds)
            results[(l, b)] = mean_val_loss
            print(f"\n=> Average validation loss for (l={l}, b={b}): {mean_val_loss:.4f}\n")

            if mean_val_loss < best_avg_loss:
                best_avg_loss = mean_val_loss
                best_hyperparams['learning_rate'] = l
                best_hyperparams['batch_size'] = b
    
    print("\n" + "="*60)
    print("      === Grid Search Complete ===      ")
    print("="*60)
    print(f"\nBest AVERAGE Validation Loss: {best_avg_loss:.4f}")
    print(f"Optimal Hyperparameters: learning_rate = {best_hyperparams['learning_rate']}, batch_size = {best_hyperparams['batch_size']}")
    
    print("\n--- Full Results (sorted by average loss) ---")
    sorted_results = sorted(results.items(), key=lambda item: item[1])
    for (l, r), loss in sorted_results:
        print(f"Learning Rate: {l}, Batch Size: {r} -> Avg Val Loss: {loss:.4f}")

=== Starting Grid Search for HQTCN Hyperparameters (avg over seeds) ===
------------------------------------------------------------
Evaluating pair: learning_rate=0.001, batch_size=8
--- Seed: 2025, Testing: learning_rate=0.001, batch_size=8 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


l=0.001, b=8, s=2025:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2025. Best validation loss: 0.0712
--- Seed: 2026, Testing: learning_rate=0.001, batch_size=8 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


l=0.001, b=8, s=2026:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2026. Best validation loss: 0.0809
--- Seed: 2027, Testing: learning_rate=0.001, batch_size=8 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


l=0.001, b=8, s=2027:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2027. Best validation loss: 0.0771

=> Average validation loss for (l=0.001, b=8): 0.0764

------------------------------------------------------------
Evaluating pair: learning_rate=0.001, batch_size=16
--- Seed: 2025, Testing: learning_rate=0.001, batch_size=16 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


l=0.001, b=16, s=2025:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2025. Best validation loss: 0.0745
--- Seed: 2026, Testing: learning_rate=0.001, batch_size=16 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


l=0.001, b=16, s=2026:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2026. Best validation loss: 0.0867
--- Seed: 2027, Testing: learning_rate=0.001, batch_size=16 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


l=0.001, b=16, s=2027:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2027. Best validation loss: 0.0770

=> Average validation loss for (l=0.001, b=16): 0.0794

------------------------------------------------------------
Evaluating pair: learning_rate=0.001, batch_size=32
--- Seed: 2025, Testing: learning_rate=0.001, batch_size=32 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


l=0.001, b=32, s=2025:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2025. Best validation loss: 0.0875
--- Seed: 2026, Testing: learning_rate=0.001, batch_size=32 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


l=0.001, b=32, s=2026:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2026. Best validation loss: 0.0959
--- Seed: 2027, Testing: learning_rate=0.001, batch_size=32 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


l=0.001, b=32, s=2027:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2027. Best validation loss: 0.0771

=> Average validation loss for (l=0.001, b=32): 0.0868

------------------------------------------------------------
Evaluating pair: learning_rate=0.005, batch_size=8
--- Seed: 2025, Testing: learning_rate=0.005, batch_size=8 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


l=0.005, b=8, s=2025:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2025. Best validation loss: 0.0642
--- Seed: 2026, Testing: learning_rate=0.005, batch_size=8 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


l=0.005, b=8, s=2026:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2026. Best validation loss: 0.0772
--- Seed: 2027, Testing: learning_rate=0.005, batch_size=8 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


l=0.005, b=8, s=2027:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2027. Best validation loss: 0.0761

=> Average validation loss for (l=0.005, b=8): 0.0725

------------------------------------------------------------
Evaluating pair: learning_rate=0.005, batch_size=16
--- Seed: 2025, Testing: learning_rate=0.005, batch_size=16 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


l=0.005, b=16, s=2025:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2025. Best validation loss: 0.0674
--- Seed: 2026, Testing: learning_rate=0.005, batch_size=16 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


l=0.005, b=16, s=2026:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2026. Best validation loss: 0.0769
--- Seed: 2027, Testing: learning_rate=0.005, batch_size=16 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


l=0.005, b=16, s=2027:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2027. Best validation loss: 0.0769

=> Average validation loss for (l=0.005, b=16): 0.0738

------------------------------------------------------------
Evaluating pair: learning_rate=0.005, batch_size=32
--- Seed: 2025, Testing: learning_rate=0.005, batch_size=32 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


l=0.005, b=32, s=2025:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2025. Best validation loss: 0.0683
--- Seed: 2026, Testing: learning_rate=0.005, batch_size=32 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


l=0.005, b=32, s=2026:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2026. Best validation loss: 0.0773
--- Seed: 2027, Testing: learning_rate=0.005, batch_size=32 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


l=0.005, b=32, s=2027:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2027. Best validation loss: 0.0770

=> Average validation loss for (l=0.005, b=32): 0.0742

------------------------------------------------------------
Evaluating pair: learning_rate=0.01, batch_size=8
--- Seed: 2025, Testing: learning_rate=0.01, batch_size=8 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


l=0.01, b=8, s=2025:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2025. Best validation loss: 0.0607
--- Seed: 2026, Testing: learning_rate=0.01, batch_size=8 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


l=0.01, b=8, s=2026:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2026. Best validation loss: 0.0770
--- Seed: 2027, Testing: learning_rate=0.01, batch_size=8 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


l=0.01, b=8, s=2027:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2027. Best validation loss: 0.0758

=> Average validation loss for (l=0.01, b=8): 0.0712

------------------------------------------------------------
Evaluating pair: learning_rate=0.01, batch_size=16
--- Seed: 2025, Testing: learning_rate=0.01, batch_size=16 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


l=0.01, b=16, s=2025:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2025. Best validation loss: 0.0641
--- Seed: 2026, Testing: learning_rate=0.01, batch_size=16 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


l=0.01, b=16, s=2026:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2026. Best validation loss: 0.0786
--- Seed: 2027, Testing: learning_rate=0.01, batch_size=16 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


l=0.01, b=16, s=2027:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2027. Best validation loss: 0.0761

=> Average validation loss for (l=0.01, b=16): 0.0729

------------------------------------------------------------
Evaluating pair: learning_rate=0.01, batch_size=32
--- Seed: 2025, Testing: learning_rate=0.01, batch_size=32 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


l=0.01, b=32, s=2025:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2025. Best validation loss: 0.0643
--- Seed: 2026, Testing: learning_rate=0.01, batch_size=32 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


l=0.01, b=32, s=2026:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2026. Best validation loss: 0.0767
--- Seed: 2027, Testing: learning_rate=0.01, batch_size=32 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


l=0.01, b=32, s=2027:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2027. Best validation loss: 0.0768

=> Average validation loss for (l=0.01, b=32): 0.0726

------------------------------------------------------------
Evaluating pair: learning_rate=0.05, batch_size=8
--- Seed: 2025, Testing: learning_rate=0.05, batch_size=8 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


l=0.05, b=8, s=2025:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2025. Best validation loss: 0.0613
--- Seed: 2026, Testing: learning_rate=0.05, batch_size=8 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


l=0.05, b=8, s=2026:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2026. Best validation loss: 0.0765
--- Seed: 2027, Testing: learning_rate=0.05, batch_size=8 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


l=0.05, b=8, s=2027:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2027. Best validation loss: 0.0754

=> Average validation loss for (l=0.05, b=8): 0.0710

------------------------------------------------------------
Evaluating pair: learning_rate=0.05, batch_size=16
--- Seed: 2025, Testing: learning_rate=0.05, batch_size=16 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


l=0.05, b=16, s=2025:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2025. Best validation loss: 0.0604
--- Seed: 2026, Testing: learning_rate=0.05, batch_size=16 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


l=0.05, b=16, s=2026:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2026. Best validation loss: 0.0752
--- Seed: 2027, Testing: learning_rate=0.05, batch_size=16 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


l=0.05, b=16, s=2027:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2027. Best validation loss: 0.0747

=> Average validation loss for (l=0.05, b=16): 0.0701

------------------------------------------------------------
Evaluating pair: learning_rate=0.05, batch_size=32
--- Seed: 2025, Testing: learning_rate=0.05, batch_size=32 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


l=0.05, b=32, s=2025:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2025. Best validation loss: 0.0633
--- Seed: 2026, Testing: learning_rate=0.05, batch_size=32 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


l=0.05, b=32, s=2026:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2026. Best validation loss: 0.0761
--- Seed: 2027, Testing: learning_rate=0.05, batch_size=32 ---
Data loaded. Train: 161, Val: 34, Test: 35


  term2 = 0.05 * y[t-1] * np.sum(y[t-i-1] for i in range(order))


l=0.05, b=32, s=2027:   0%|          | 0/50 [00:00<?, ?it/s]

Finished seed 2027. Best validation loss: 0.0749

=> Average validation loss for (l=0.05, b=32): 0.0715


      === Grid Search Complete ===      

Best AVERAGE Validation Loss: 0.0701
Optimal Hyperparameters: learning_rate = 0.05, batch_size = 16

--- Full Results (sorted by average loss) ---
Learning Rate: 0.05, Batch Size: 32 -> Avg Val Loss: 0.0701
Learning Rate: 0.05, Batch Size: 32 -> Avg Val Loss: 0.0710
Learning Rate: 0.01, Batch Size: 32 -> Avg Val Loss: 0.0712
Learning Rate: 0.05, Batch Size: 32 -> Avg Val Loss: 0.0715
Learning Rate: 0.005, Batch Size: 32 -> Avg Val Loss: 0.0725
Learning Rate: 0.01, Batch Size: 32 -> Avg Val Loss: 0.0726
Learning Rate: 0.01, Batch Size: 32 -> Avg Val Loss: 0.0729
Learning Rate: 0.005, Batch Size: 32 -> Avg Val Loss: 0.0738
Learning Rate: 0.005, Batch Size: 32 -> Avg Val Loss: 0.0742
Learning Rate: 0.001, Batch Size: 32 -> Avg Val Loss: 0.0764
Learning Rate: 0.001, Batch Size: 32 -> Avg Val Loss: 0.0794
Learning Rate: 0.001, Batch Size: 32 -> 