# Module 6: AI for QEM (Sequence-Based LSTM)

Standard ZNE assumes noise is a simple scaling function. Real noise depends on the *sequence* of gates (e.g., crosstalk, heating).
We upgrade from simple SVR to a **Sequence-to-Scalar LSTM** that reads the actual circuit instructions.

In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.svm import SVR
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from qiskit_aer import AerSimulator
from qiskit import transpile
import utils 

# --- 1. Generate Dataset (Sequences + Features) ---
DATASET_SIZE = 2000
tokenizer = utils.CircuitTokenizer(max_length=60)

X_sequences = []
X_features = []
y_errors = []

print(f"Generating {DATASET_SIZE} samples with real gate sequences...")
sim_ideal = AerSimulator(method='stabilizer')
sim_noisy = AerSimulator(noise_model=utils.build_noise_model(scale=1.0))

for i in range(DATASET_SIZE):
    if i % 500 == 0: print(f"  ... {i}/{DATASET_SIZE}")
    depth = np.random.randint(5, 50)
    # Get Circuit AND Instructions from UTILS
    qc, instructions = utils.create_random_clifford_circuit(2, depth)
    
    # Tokenize (The Real AI Part)
    seq = tokenizer.tokenize(instructions)
    X_sequences.append(seq)
    
    # Features for SVR baseline
    ops = qc.count_ops()
    X_features.append([depth, ops.get('cx', 0)])
    
    # Values
    qc_sim = qc.copy()
    qc_sim.measure_all()
    # Ideal
    res_ideal = sim_ideal.run(qc_sim, shots=1000).result()
    counts = res_ideal.get_counts()
    shots = sum(counts.values())
    ideal_val = (counts.get('00',0)+counts.get('11',0) - counts.get('01',0)-counts.get('10',0))/shots
    
    # Noisy
    qc_noisy = transpile(qc_sim, sim_noisy)
    res_noisy = sim_noisy.run(qc_noisy, shots=1000).result()
    counts_n = res_noisy.get_counts()
    shots_n = sum(counts_n.values())
    noisy_val = (counts_n.get('00',0)+counts_n.get('11',0) - counts_n.get('01',0)-counts_n.get('10',0))/shots_n
    
    y_errors.append(ideal_val - noisy_val)

print("Dataset Generated.")

# Split
X_seq_train, X_seq_test, X_feat_train, X_feat_test, y_train, y_test = train_test_split(
    X_sequences, X_features, y_errors, test_size=0.2
)

# Convert to Tensors
X_seq_train_t = torch.tensor(X_seq_train, dtype=torch.long) # Long for Embedding
X_seq_test_t = torch.tensor(X_seq_test, dtype=torch.long)
y_train_t = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
y_test_t = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)

Generating 2000 samples with real gate sequences...
  ... 0/2000


  import pkg_resources


  ... 500/2000
  ... 1000/2000
  ... 1500/2000
Dataset Generated.


In [2]:
# --- 2. Train Baseline SVR ---
print("Training SVR Baseline...")
svr = make_pipeline(StandardScaler(), SVR(C=1.0, epsilon=0.01))
svr.fit(X_feat_train, y_train)
svr_preds = svr.predict(X_feat_test)
svr_mse = np.mean((svr_preds - y_test)**2)
print(f"SVR MSE: {svr_mse:.5f}")

Training SVR Baseline...
SVR MSE: 0.03010


In [3]:
# --- 3. Train Sequence LSTM ---
class QEM_LSTM(nn.Module):
    def __init__(self, vocab_size, embedding_dim=16, hidden_size=32):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        # batch_first=True -> (Batch, Seq, Feature)
        self.lstm = nn.LSTM(embedding_dim, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)
        
    def forward(self, x):
        embeds = self.embedding(x)
        lstm_out, _ = self.lstm(embeds)
        # Take last time step
        last_out = lstm_out[:, -1, :] 
        return self.fc(last_out)

vocab_size = len(tokenizer.vocab) + 1 # +1 for safety
model = QEM_LSTM(vocab_size)
optimizer = optim.Adam(model.parameters(), lr=0.005)
criterion = nn.MSELoss()

print(f"Training LSTM on {len(X_seq_train_t)} sequences...")
print(f"Input Shape: {X_seq_train_t.shape} (Batch, Seq_Len)")

loss_history = []
BATCH_SIZE = 32

for epoch in range(30): # 30 epochs enough for demo
    model.train()
    # Mini-batching
    permutation = torch.randperm(X_seq_train_t.size()[0])
    epoch_loss = 0
    
    for i in range(0, X_seq_train_t.size()[0], BATCH_SIZE):
        indices = permutation[i:i+BATCH_SIZE]
        batch_x, batch_y = X_seq_train_t[indices], y_train_t[indices]
        
        optimizer.zero_grad()
        outputs = model(batch_x)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
        
    avg_loss = epoch_loss / (X_seq_train_t.size()[0] / BATCH_SIZE)
    loss_history.append(avg_loss)
    if epoch % 5 == 0:
        print(f"Epoch {epoch}: Loss {avg_loss:.5f}")

# Eval
model.eval()
with torch.no_grad():
    lstm_preds = model(X_seq_test_t).squeeze().numpy()
    
lstm_mse = np.mean((lstm_preds - y_test)**2)
print(f"\nLSTM MSE: {lstm_mse:.5f}")

if lstm_mse < svr_mse:
    print("✅ LSTM Wins! (Sequence learning beats feature engineering)")
else:
    print("⚠️ SVR Wins (Typical for simple noise models or small data)")
    
# Save Model
torch.save(model.state_dict(), "qem_lstm.pth")
print("Model saved to qem_lstm.pth")

Training LSTM on 1600 sequences...
Input Shape: torch.Size([1600, 60]) (Batch, Seq_Len)
Epoch 0: Loss 0.02631
Epoch 5: Loss 0.02565
Epoch 10: Loss 0.02558
Epoch 15: Loss 0.02564
Epoch 20: Loss 0.02565
Epoch 25: Loss 0.02559

LSTM MSE: 0.03012
⚠️ SVR Wins (Typical for simple noise models or small data)
Model saved to qem_lstm.pth
