In [8]:
import numpy as np
import pandas as pd
from sklearn.model_selection import GroupShuffleSplit
import numpy as np
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt

**Split**

In [9]:
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedShuffleSplit

# read dataset
df = pd.read_csv("training_data_75.csv")

# Target for classification is the threshold value
y = df["min_threshold"].astype(int).values

# Group (so all rows of same circuit stay together)
groups = df["file"].astype(str).values

# Drop columns we dont want as features
drop_cols = [
    "min_threshold",   # target
    "file",
    "family",
    "forward_runtime", # not for classification, only regression
    "max_fidelity_achieved",
    "forward_shots",
    "forward_peak_rss_mb",
    "n_thresholds_tested",
]
drop_cols = [c for c in drop_cols if c in df.columns]

# X is equal to the whole dataset - dropped columns
X = df.drop(columns=drop_cols).copy()

# encode categorical columns (backend/precision/etc.)
X = pd.get_dummies(X, columns=[c for c in X.columns if X[c].dtype == "object" or X[c].dtype == "str"])

# ---------------------------
# Stratified split BY FILE, stratified by n_qubits bucket
# ---------------------------

# 1) Build a file-level table for stratification
file_info = df.groupby("file", as_index=False).agg(
    n_qubits=("n_qubits", "first")
)

# Bucketize n_qubits so stratification is stable (avoids classes with only 1 file)
file_info["qubit_bucket"] = pd.cut(
    file_info["n_qubits"],
    bins=[-1, 20, 60, 10**9],
    labels=["small", "medium", "large"]
)

# 2) Optional: force rare-threshold files into TRAIN (helps avoid "unseen class 256")
forced_train_files = set(df.loc[df["min_threshold"] == 256, "file"].unique())

# Split only on remaining files (pool)
pool = file_info[~file_info["file"].isin(forced_train_files)].reset_index(drop=True)

sss = StratifiedShuffleSplit(n_splits=1, test_size=0.25, random_state=42)
train_f_idx, test_f_idx = next(sss.split(pool["file"], pool["qubit_bucket"]))

train_files = set(pool.loc[train_f_idx, "file"])
test_files  = set(pool.loc[test_f_idx, "file"])

# Add forced files to train
train_files |= forced_train_files

# Convert file sets -> row indices
train_idx = df.index[df["file"].isin(train_files)].to_numpy()
test_idx  = df.index[df["file"].isin(test_files)].to_numpy()

# Final arrays
x_train = X.iloc[train_idx].values.astype(np.float32)
x_test  = X.iloc[test_idx].values.astype(np.float32)
y_train = y[train_idx]
y_test  = y[test_idx]

# sanity checks
print("Shapes:", x_train.shape, x_test.shape)
print("Train classes:", sorted(np.unique(y_train)))
print("Test classes:", sorted(np.unique(y_test)))

overlap = train_files.intersection(test_files)
print("Unique files train:", len(train_files), "test:", len(test_files), "overlap:", len(overlap))


Shapes: (102, 66) (35, 66)
Train classes: [np.int64(1), np.int64(2), np.int64(4), np.int64(8), np.int64(16), np.int64(64)]
Test classes: [np.int64(1), np.int64(2), np.int64(4)]
Unique files train: 27 test: 9 overlap: 0


**Metricas**

In [10]:
import numpy as np
from sklearn.metrics import accuracy_score

def cls_metrics(y_true, y_pred, name="model"):
    y_true = np.asarray(y_true).astype(int)
    y_pred = np.asarray(y_pred).astype(int)

    acc = accuracy_score(y_true, y_pred)
    under = np.mean(y_pred < y_true)   # super importante en tu reto
    over  = np.mean(y_pred > y_true)

    #print(f"{name}")
    #print("  Accuracy:", round(acc, 4))
    #print("  Under-rate (pred < true):", round(float(under), 4))
    #print("  Over-rate  (pred > true):", round(float(over), 4))
    
    return acc


In [11]:
# =============================================================================
# HOLDOUT TEST: Conservative 60% Strategy (RandomForest only)
# =============================================================================
# Train on all data EXCEPT two holdout files, test on those files
# Using conservative_60: bump prediction up when confidence < 60%
# =============================================================================

import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
import warnings
warnings.filterwarnings("ignore")

# -----------------------------------------------------------------------------
# 1. LOAD DATA AND FEATURE ENGINEERING
# -----------------------------------------------------------------------------
df = pd.read_csv("training_data_75.csv")

def engineer_features(df):
    """Create domain-specific features for quantum circuit threshold prediction."""
    X = df.copy()
    
    # Interaction features
    X['degree_x_qubits'] = X['avg_qubit_degree'] * X['n_qubits']
    X['degree_x_depth'] = X['avg_qubit_degree'] * X['crude_depth']
    X['degree_x_2q'] = X['avg_qubit_degree'] * X['n_2q_gates']
    X['entanglement_complexity'] = X['n_unique_edges'] * X['avg_qubit_degree']
    X['entanglement_per_qubit'] = X['n_unique_edges'] / (X['n_qubits'] + 1)
    
    # Ratio features
    X['cx_ratio'] = X['n_cx'] / (X['n_total_gates'] + 1)
    X['rotation_ratio'] = X['n_rotation_gates'] / (X['n_total_gates'] + 1)
    X['multi_qubit_ratio'] = (X['n_2q_gates'] + X['n_3q_gates']) / (X['n_total_gates'] + 1)
    X['gates_per_depth'] = X['n_total_gates'] / (X['crude_depth'] + 1)
    X['depth_per_qubit'] = X['crude_depth'] / (X['n_qubits'] + 1)
    X['edge_density'] = X['n_unique_edges'] / (X['n_qubits'] * (X['n_qubits'] - 1) / 2 + 1)
    X['edge_repetition_ratio'] = X['n_edge_repetitions'] / (X['n_unique_edges'] + 1)
    
    # Polynomial features
    X['degree_squared'] = X['avg_qubit_degree'] ** 2
    X['qubits_squared'] = X['n_qubits'] ** 2
    X['depth_squared'] = X['crude_depth'] ** 2
    X['log_qubits'] = np.log1p(X['n_qubits'])
    X['log_depth'] = np.log1p(X['crude_depth'])
    X['log_gates'] = np.log1p(X['n_total_gates'])
    
    # Complexity scores
    X['complexity_score'] = X['n_qubits'] * X['crude_depth'] * X['avg_qubit_degree'] / 1000
    X['entanglement_burden'] = X['n_2q_gates'] * X['avg_qubit_degree'] / (X['n_qubits'] + 1)
    X['sim_difficulty'] = X['n_qubits'] ** 1.5 * X['entanglement_pressure']
    
    # Pattern features
    X['n_patterns'] = (X['has_qft_pattern'] + X['has_iqft_pattern'] + 
                       X['has_grover_pattern'] + X['has_variational_pattern'] + X['has_ghz_pattern'])
    X['variational_complexity'] = X['has_variational_pattern'] * X['n_rotation_gates']
    
    return X

# -----------------------------------------------------------------------------
# 2. DEFINE HOLDOUT FILES
# -----------------------------------------------------------------------------
holdout_files = [
    "wstate_indep_qiskit_30.qasm",
    "shor_15_4_indep_qiskit_18.qasm"
]

print("=" * 70)
print("HOLDOUT TEST: Conservative Strategy (RandomForest Only)")
print("=" * 70)
print()
print(f"Holdout files (test set):")
for f in holdout_files:
    n_rows = df[df['file'] == f].shape[0]
    print(f"  - {f} ({n_rows} samples)")
print()

# Split into train and test
train_mask = ~df['file'].isin(holdout_files)
test_mask = df['file'].isin(holdout_files)

df_train = df[train_mask].copy()
df_test = df[test_mask].copy()

print(f"Training samples: {len(df_train)}")
print(f"Test samples: {len(df_test)}")
print()

# -----------------------------------------------------------------------------
# 3. PREPARE FEATURES
# -----------------------------------------------------------------------------
X_train_eng = engineer_features(df_train)
X_test_eng = engineer_features(df_test)

# Drop non-feature columns
drop_cols = ["min_threshold", "file", "family", "forward_runtime", 
             "max_fidelity_achieved", "forward_shots", "forward_peak_rss_mb", "n_thresholds_tested"]
drop_cols = [c for c in drop_cols if c in X_train_eng.columns]

X_train_eng = X_train_eng.drop(columns=drop_cols)
X_test_eng = X_test_eng.drop(columns=drop_cols)

# One-hot encode categoricals
cat_cols = X_train_eng.select_dtypes(exclude=[np.number]).columns.tolist()
X_train_eng = pd.get_dummies(X_train_eng, columns=cat_cols)
X_test_eng = pd.get_dummies(X_test_eng, columns=cat_cols)

# Align columns (test may have different dummies)
X_test_eng = X_test_eng.reindex(columns=X_train_eng.columns, fill_value=0)

# Get labels
y_train_raw = df_train["min_threshold"].astype(int).values
y_test_raw = df_test["min_threshold"].astype(int).values

# Encode labels
le = LabelEncoder()
le.fit(df["min_threshold"].astype(int).values)  # Fit on ALL data to know all classes
y_train = le.transform(y_train_raw)
y_test = le.transform(y_test_raw)

threshold_classes = le.classes_
print(f"Threshold classes: {list(threshold_classes)}")
print()

# -----------------------------------------------------------------------------
# 4. FEATURE SELECTION (Top 10 by RandomForest importance)
# -----------------------------------------------------------------------------
X_train_arr = X_train_eng.values.astype(np.float32)
X_train_arr = np.nan_to_num(X_train_arr, nan=0.0, posinf=0.0, neginf=0.0)

# Use RandomForest for feature importance
clf_importance = RandomForestClassifier(
    n_estimators=500, max_depth=10, min_samples_leaf=2,
    class_weight='balanced', random_state=42, n_jobs=-1
)
clf_importance.fit(X_train_arr, y_train)

feature_importance = clf_importance.feature_importances_
importance_df = pd.DataFrame({
    'feature': X_train_eng.columns.tolist(),
    'importance': feature_importance
}).sort_values('importance', ascending=False)

top_k = 10
top_features = importance_df.head(top_k)['feature'].tolist()

print(f"Top {top_k} features (by RandomForest importance):")
for i, feat in enumerate(top_features, 1):
    imp = importance_df[importance_df['feature'] == feat]['importance'].values[0]
    print(f"  {i:2d}. {feat:<30} ({imp:.4f})")
print()

X_train_top = X_train_eng[top_features].values.astype(np.float32)
X_test_top = X_test_eng[top_features].values.astype(np.float32)
X_train_top = np.nan_to_num(X_train_top, nan=0.0, posinf=0.0, neginf=0.0)
X_test_top = np.nan_to_num(X_test_top, nan=0.0, posinf=0.0, neginf=0.0)

# -----------------------------------------------------------------------------
# 5. TRAIN MODEL AND CONSERVATIVE PREDICT
# -----------------------------------------------------------------------------
clf = RandomForestClassifier(
    n_estimators=500, max_depth=10, min_samples_leaf=2,
    class_weight='balanced', random_state=42, n_jobs=-1
)
clf.fit(X_train_top, y_train)

def conservative_predict(clf, X, confidence_threshold=0.6, bump_steps=1):
    """Bump up prediction when confidence < threshold"""
    proba = clf.predict_proba(X)
    classes = clf.classes_
    
    predictions = []
    confidences = []
    bumped = []
    
    for p in proba:
        max_prob = p.max()
        max_class_idx = p.argmax()
        
        if max_prob < confidence_threshold:
            new_idx = min(max_class_idx + bump_steps, len(classes) - 1)
            predictions.append(classes[new_idx])
            bumped.append(True)
        else:
            predictions.append(classes[max_class_idx])
            bumped.append(False)
        confidences.append(max_prob)
    
    return np.array(predictions), np.array(confidences), np.array(bumped)

# Get predictions with different strategies
y_pred_raw = clf.predict(X_test_top)

# Test multiple confidence thresholds
strategies = {
    'raw': (1.0, 0),           # No bumping
    'cons_80': (0.80, 1),      # Bump if conf < 80%
    'cons_70': (0.70, 1),      # Bump if conf < 70%
    'cons_60': (0.60, 1),      # Bump if conf < 60%
    'always_+1': (1.01, 1),    # Always bump by 1
    'always_+2': (1.01, 2),    # Always bump by 2
}

# Convert test labels
y_test_orig = le.inverse_transform(y_test)

# -----------------------------------------------------------------------------
# 6. COMPETITION SCORING
# -----------------------------------------------------------------------------
def competition_score(y_true, y_pred):
    """Calculate competition score"""
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    scores = np.zeros(len(y_true))
    scores[y_pred == y_true] = 1.0
    over = y_pred > y_true
    scores[over] = y_true[over] / y_pred[over]
    return scores

def get_outcome(true, pred):
    if pred == true:
        return "EXACT"
    elif pred < true:
        return "UNDER"
    else:
        return "OVER"

# -----------------------------------------------------------------------------
# 7. DISPLAY RESULTS FOR ALL STRATEGIES
# -----------------------------------------------------------------------------
print("=" * 70)
print("DETAILED RESULTS FOR HOLDOUT TEST SET")
print("=" * 70)
print()

# Get raw predictions with probabilities for display
proba = clf.predict_proba(X_test_top)
raw_confidences = [p.max() for p in proba]

print(f"{'File':<35} {'Backend':<5} {'Prec':<7} {'True':>5} {'Conf':>6}")
print("-" * 65)
for i, (_, row) in enumerate(df_test.iterrows()):
    print(f"{row['file']:<35} {row['backend']:<5} {row['precision']:<7} "
          f"{y_test_orig[i]:>5} {raw_confidences[i]:>6.2f}")
print()

# Evaluate each strategy
print("=" * 70)
print("STRATEGY COMPARISON")
print("=" * 70)
print()

results_summary = []

for strat_name, (thresh, bump) in strategies.items():
    if thresh > 1.0:  # Always bump
        y_pred = np.minimum(y_pred_raw + bump, len(le.classes_) - 1)
        y_pred_orig = le.inverse_transform(y_pred)
        bumped = [True] * len(y_pred)
    else:
        y_pred, _, bumped = conservative_predict(clf, X_test_top, 
                                                   confidence_threshold=thresh, 
                                                   bump_steps=bump)
        y_pred_orig = le.inverse_transform(y_pred)
    
    scores = competition_score(y_test_orig, y_pred_orig)
    n = len(y_test_orig)
    exact = np.sum(y_pred_orig == y_test_orig)
    under = np.sum(y_pred_orig < y_test_orig)
    over = np.sum(y_pred_orig > y_test_orig)
    n_bumped = sum(bumped)
    
    results_summary.append({
        'strategy': strat_name,
        'score': scores.sum(),
        'exact': exact,
        'under': under,
        'over': over,
        'bumped': n_bumped
    })
    
    print(f"{strat_name}:")
    print(f"  Score: {scores.sum():.2f}/{n} = {scores.mean():.4f}")
    print(f"  Exact={exact}, Under={under} [RISK!], Over={over}, Bumped={n_bumped}")
    
    # Show predictions
    preds_str = ', '.join([f"{p}" for p in y_pred_orig])
    print(f"  Predictions: [{preds_str}]")
    print()

# Summary table sorted by underprediction count
print("=" * 70)
print("SUMMARY - Sorted by Underprediction Risk")
print("=" * 70)
print(f"{'Strategy':<15} {'Score':>8} {'Exact':>6} {'Under':>6} {'Over':>6} {'Risk':<10}")
print("-" * 55)
for r in sorted(results_summary, key=lambda x: (x['under'], -x['score'])):
    risk = "LOW" if r['under'] == 0 else ("MEDIUM" if r['under'] <= 1 else "HIGH")
    print(f"{r['strategy']:<15} {r['score']:>8.2f} {r['exact']:>6} {r['under']:>6} {r['over']:>6} {risk:<10}")

print()
print("True values:", list(y_test_orig))


HOLDOUT TEST: Conservative Strategy (RandomForest Only)

Holdout files (test set):
  - wstate_indep_qiskit_30.qasm (4 samples)
  - shor_15_4_indep_qiskit_18.qasm (3 samples)

Training samples: 130
Test samples: 7

Threshold classes: [np.int64(1), np.int64(2), np.int64(4), np.int64(8), np.int64(16), np.int64(64)]

Top 10 features (by RandomForest importance):
   1. log_depth                      (0.0374)
   2. depth_per_qubit                (0.0365)
   3. entanglement_per_qubit         (0.0336)
   4. n_h                            (0.0332)
   5. degree_squared                 (0.0317)
   6. crude_depth                    (0.0300)
   7. avg_qubit_degree               (0.0299)
   8. n_nonempty_lines               (0.0279)
   9. depth_squared                  (0.0275)
  10. n_lines                        (0.0273)

DETAILED RESULTS FOR HOLDOUT TEST SET

File                                Backend Prec     True   Conf
-----------------------------------------------------------------
shor_15_

In [12]:
# =============================================================================
# PRODUCTION DEMO: Threshold Classifier
# =============================================================================
# Usage: predict_threshold(qasm_path, backend, precision)
# =============================================================================

import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from pathlib import Path
import warnings
warnings.filterwarnings("ignore")

# Import the feature extractor
from comprehensive_features import QASMFeatureExtractor

# -----------------------------------------------------------------------------
# 1. BEST HYPERPARAMETERS (from Optuna tuning - paste yours here!)
# -----------------------------------------------------------------------------
# Replace with your best params from the Optuna run
best_rf_params = {
    'n_estimators': 368,
    'max_depth': 5,
    'min_samples_split': 4,
    'min_samples_leaf': 1,
    'max_features': 'log2',
    'criterion': 'entropy',
    'class_weight': 'balanced_subsample',
    'bootstrap': False,
    'random_state': 42,
    'n_jobs': -1
}
best_rf_params['random_state'] = 42
best_rf_params['n_jobs'] = -1

print("=" * 70)
print("PRODUCTION THRESHOLD CLASSIFIER")
print("=" * 70)
print()
print("Best Hyperparameters:")
for k, v in best_rf_params.items():
    print(f"  {k}: {v}")
print()

# -----------------------------------------------------------------------------
# 2. LOAD TRAINING DATA
# -----------------------------------------------------------------------------
df = pd.read_csv("training_data_75.csv")

# Define holdout files for testing
holdout_files = [
    "wstate_indep_qiskit_30.qasm",
    "shor_15_4_indep_qiskit_18.qasm",
    "pricingcall_indep_qiskit_17.qasm",
    "portfoliovqe_indep_qiskit_18.qasm",
    "dj_indep_qiskit_30.qasm"
]














train_mask = ~df['file'].isin(holdout_files)
test_mask = df['file'].isin(holdout_files)

df_train = df[train_mask].copy()
df_test = df[test_mask].copy()

# -----------------------------------------------------------------------------
# 3. PRINT TRAINING AND TEST FILES
# -----------------------------------------------------------------------------
print("=" * 70)
print("TRAINING FILES")
print("=" * 70)
train_files = sorted(df_train['file'].unique())
print(f"Total: {len(train_files)} unique QASM files\n")
for i, f in enumerate(train_files, 1):
    n_samples = len(df_train[df_train['file'] == f])
    thresh = df_train[df_train['file'] == f]['min_threshold'].iloc[0]
    print(f"  {i:2d}. {f:<45} (threshold={thresh:>2}, samples={n_samples})")

print()
print("=" * 70)
print("TEST/HOLDOUT FILES")
print("=" * 70)
test_files = sorted(df_test['file'].unique())
print(f"Total: {len(test_files)} unique QASM files\n")
for i, f in enumerate(test_files, 1):
    n_samples = len(df_test[df_test['file'] == f])
    thresh = df_test[df_test['file'] == f]['min_threshold'].iloc[0]
    print(f"  {i:2d}. {f:<45} (threshold={thresh:>2}, samples={n_samples})")

print()

# -----------------------------------------------------------------------------
# 4. FEATURE ENGINEERING FUNCTION
# -----------------------------------------------------------------------------
def engineer_features(df):
    """Create domain-specific features for quantum circuit threshold prediction."""
    X = df.copy()
    X['degree_x_qubits'] = X['avg_qubit_degree'] * X['n_qubits']
    X['degree_x_depth'] = X['avg_qubit_degree'] * X['crude_depth']
    X['degree_x_2q'] = X['avg_qubit_degree'] * X['n_2q_gates']
    X['entanglement_complexity'] = X['n_unique_edges'] * X['avg_qubit_degree']
    X['entanglement_per_qubit'] = X['n_unique_edges'] / (X['n_qubits'] + 1)
    X['cx_ratio'] = X['n_cx'] / (X['n_total_gates'] + 1)
    X['rotation_ratio'] = X['n_rotation_gates'] / (X['n_total_gates'] + 1)
    X['multi_qubit_ratio'] = (X['n_2q_gates'] + X['n_3q_gates']) / (X['n_total_gates'] + 1)
    X['gates_per_depth'] = X['n_total_gates'] / (X['crude_depth'] + 1)
    X['depth_per_qubit'] = X['crude_depth'] / (X['n_qubits'] + 1)
    X['edge_density'] = X['n_unique_edges'] / (X['n_qubits'] * (X['n_qubits'] - 1) / 2 + 1)
    X['edge_repetition_ratio'] = X['n_edge_repetitions'] / (X['n_unique_edges'] + 1)
    X['degree_squared'] = X['avg_qubit_degree'] ** 2
    X['qubits_squared'] = X['n_qubits'] ** 2
    X['depth_squared'] = X['crude_depth'] ** 2
    X['log_qubits'] = np.log1p(X['n_qubits'])
    X['log_depth'] = np.log1p(X['crude_depth'])
    X['log_gates'] = np.log1p(X['n_total_gates'])
    X['complexity_score'] = X['n_qubits'] * X['crude_depth'] * X['avg_qubit_degree'] / 1000
    X['entanglement_burden'] = X['n_2q_gates'] * X['avg_qubit_degree'] / (X['n_qubits'] + 1)
    X['sim_difficulty'] = X['n_qubits'] ** 1.5 * X['entanglement_pressure']
    X['n_patterns'] = (X['has_qft_pattern'] + X['has_iqft_pattern'] + 
                       X['has_grover_pattern'] + X['has_variational_pattern'] + X['has_ghz_pattern'])
    X['variational_complexity'] = X['has_variational_pattern'] * X['n_rotation_gates']
    return X

# -----------------------------------------------------------------------------
# 5. PREPARE TRAINING DATA
# -----------------------------------------------------------------------------
print("=" * 70)
print("TRAINING MODEL")
print("=" * 70)
print()

# Engineer features for training data
X_train_eng = engineer_features(df_train)

# Columns to drop (not features)
drop_cols = ["min_threshold", "file", "family", "forward_runtime", 
             "max_fidelity_achieved", "forward_shots", "forward_peak_rss_mb", "n_thresholds_tested"]
drop_cols = [c for c in drop_cols if c in X_train_eng.columns]
X_train_eng = X_train_eng.drop(columns=drop_cols)

# One-hot encode categorical columns
cat_cols = X_train_eng.select_dtypes(exclude=[np.number]).columns.tolist()
X_train_eng = pd.get_dummies(X_train_eng, columns=cat_cols)

# Store column order for prediction
FEATURE_COLUMNS = X_train_eng.columns.tolist()

# Prepare arrays
X_train = X_train_eng.values.astype(np.float32)
X_train = np.nan_to_num(X_train, nan=0.0, posinf=0.0, neginf=0.0)

y_train_raw = df_train["min_threshold"].astype(int).values

# Label encoder
le = LabelEncoder()
le.fit(df["min_threshold"].astype(int).values)  # Fit on ALL thresholds
y_train = le.transform(y_train_raw)

THRESHOLD_CLASSES = le.classes_
print(f"Training samples: {len(X_train)}")
print(f"Feature dimensions: {X_train.shape[1]}")
print(f"Threshold classes: {list(THRESHOLD_CLASSES)}")
print()

# -----------------------------------------------------------------------------
# 6. TRAIN THE MODEL
# -----------------------------------------------------------------------------
print("Training RandomForest with tuned hyperparameters...")
clf = RandomForestClassifier(**best_rf_params)
clf.fit(X_train, y_train)
print("Model trained successfully!")
print()

# -----------------------------------------------------------------------------
# 7. PREDICTION FUNCTION
# -----------------------------------------------------------------------------
def predict_threshold(qasm_path, backend, precision, conservative=False, confidence_threshold=0.6):
    """
    Predict the optimal threshold for a QASM circuit.
    
    Args:
        qasm_path: Path to the QASM file
        backend: 'CPU' or 'GPU'
        precision: 'single' or 'double'
        conservative: If True, bump up prediction when confidence is low
        confidence_threshold: Threshold for conservative prediction (default 0.6)
    
    Returns:
        dict with prediction, confidence, and probabilities
    """
    # Extract features from QASM file
    extractor = QASMFeatureExtractor(qasm_path)
    circuit_features = extractor.extract_all()
    
    # Create a DataFrame row with circuit features + config
    row = circuit_features.copy()
    row['backend'] = backend
    row['precision'] = precision
    
    # Convert to DataFrame
    input_df = pd.DataFrame([row])
    
    # Engineer additional features
    input_eng = engineer_features(input_df)
    
    # Drop non-feature columns
    for col in drop_cols:
        if col in input_eng.columns:
            input_eng = input_eng.drop(columns=[col])
    
    # One-hot encode
    cat_cols_input = input_eng.select_dtypes(exclude=[np.number]).columns.tolist()
    input_eng = pd.get_dummies(input_eng, columns=cat_cols_input)
    
    # Align with training columns
    input_eng = input_eng.reindex(columns=FEATURE_COLUMNS, fill_value=0)
    
    # Prepare array
    X_input = input_eng.values.astype(np.float32)
    X_input = np.nan_to_num(X_input, nan=0.0, posinf=0.0, neginf=0.0)
    
    # Get prediction and probabilities
    proba = clf.predict_proba(X_input)[0]
    pred_encoded = clf.predict(X_input)[0]
    confidence = proba.max()
    
    # Conservative prediction: bump up if not confident
    if conservative and confidence < confidence_threshold:
        # Bump up by 1 class
        new_idx = min(pred_encoded + 1, len(THRESHOLD_CLASSES) - 1)
        pred_encoded = new_idx
    
    pred_threshold = le.inverse_transform([pred_encoded])[0]
    
    # Build probability dict
    prob_dict = {int(THRESHOLD_CLASSES[i]): float(proba[i]) for i in range(len(THRESHOLD_CLASSES))}
    
    return {
        'predicted_threshold': int(pred_threshold),
        'confidence': float(confidence),
        'probabilities': prob_dict,
        'conservative_mode': conservative
    }

# -----------------------------------------------------------------------------
# 8. TEST ON HOLDOUT FILES
# -----------------------------------------------------------------------------
print("=" * 70)
print("TESTING ON HOLDOUT FILES")
print("=" * 70)
print()

def competition_score(y_true, y_pred):
    if y_pred == y_true:
        return 1.0
    elif y_pred > y_true:
        return y_true / y_pred
    else:
        return 0.0  # Underprediction

circuits_dir = Path("circuits")

print(f"{'File':<45} {'Backend':<5} {'Prec':<7} {'True':>5} {'Pred':>5} {'Conf':>6} {'Score':>6}")
print("-" * 90)

total_score = 0
total_samples = 0

for _, row in df_test.iterrows():
    qasm_path = circuits_dir / row['file']
    
    result = predict_threshold(
        qasm_path=qasm_path,
        backend=row['backend'],
        precision=row['precision'],
        conservative=False
    )
    
    true_thresh = int(row['min_threshold'])
    pred_thresh = result['predicted_threshold']
    conf = result['confidence']
    score = competition_score(true_thresh, pred_thresh)
    
    total_score += score
    total_samples += 1
    
    status = "✓" if pred_thresh == true_thresh else ("↓" if pred_thresh < true_thresh else "↑")
    print(f"{row['file']:<45} {row['backend']:<5} {row['precision']:<7} "
          f"{true_thresh:>5} {pred_thresh:>5} {conf:>6.2f} {status}{score:>5.2f}")

print()
print(f"Total Score: {total_score:.2f} / {total_samples} = {total_score/total_samples:.4f}")
print()

# -----------------------------------------------------------------------------
# 9. USAGE EXAMPLE
# -----------------------------------------------------------------------------
print("=" * 70)
print("USAGE EXAMPLE")
print("=" * 70)
print()
print('# Basic prediction')
print('result = predict_threshold(')
print('    qasm_path="circuits/your_circuit.qasm",')
print('    backend="GPU",')
print('    precision="double"')
print(')')
print('print(f"Predicted threshold: {result[\'predicted_threshold\']}")')
print('print(f"Confidence: {result[\'confidence\']:.2f}")')
print()
print('# Conservative prediction (bumps up when uncertain)')
print('result = predict_threshold(')
print('    qasm_path="circuits/your_circuit.qasm",')
print('    backend="GPU",')
print('    precision="double",')
print('    conservative=True,')
print('    confidence_threshold=0.6')
print(')')


PRODUCTION THRESHOLD CLASSIFIER

Best Hyperparameters:
  n_estimators: 368
  max_depth: 5
  min_samples_split: 4
  min_samples_leaf: 1
  max_features: log2
  criterion: entropy
  class_weight: balanced_subsample
  bootstrap: False
  random_state: 42
  n_jobs: -1

TRAINING FILES
Total: 31 unique QASM files

   1. ae_indep_qiskit_130.qasm                      (threshold= 8, samples=1)
   2. ae_indep_qiskit_20.qasm                       (threshold= 1, samples=4)
   3. cutbell_n30_k6.qasm                           (threshold= 2, samples=4)
   4. dj_indep_qiskit_130.qasm                      (threshold= 1, samples=4)
   5. dj_indep_qiskit_15.qasm                       (threshold= 1, samples=4)
   6. ghz_indep_qiskit_100.qasm                     (threshold= 2, samples=4)
   7. ghz_indep_qiskit_130.qasm                     (threshold= 2, samples=4)
   8. ghz_indep_qiskit_15.qasm                      (threshold= 2, samples=4)
   9. ghz_indep_qiskit_30.qasm                      (threshold= 2, s

In [None]:
# Conservative prediction (bumps up when uncertain)
result1 = predict_threshold(
    qasm_path="test_circuits/graphstate_indep_qiskit_30.qasm",
    backend="CPU",
    precision="double",
    conservative=True,
    confidence_threshold=0.6
)

result2 = predict_threshold(
    qasm_path="test_circuits/pricingcall_indep_qiskit_25.qasm",
    backend="CPU",
    precision="double",
    conservative=True,
    confidence_threshold=0.6
)

result3 = predict_threshold(
    qasm_path="test_circuits/qftentangled_indep_qiskit_130.qasm",
    backend="CPU",
    precision="double",
    conservative=True,
    confidence_threshold=0.6
)

result4 = predict_threshold(
    qasm_path="test_circuits/qnn_indep_qiskit_30.qasm",
    backend="CPU",
    precision="double",
    conservative=True,
    confidence_threshold=0.6
)

result5 = predict_threshold(
    qasm_path="test_circuits/shor_9_4_indep_qiskit_18.qasm",
    backend="CPU",
    precision="double",
    conservative=True,
    confidence_threshold=0.6
)

print(f"Predicted threshold: {result['predicted_threshold']}")
print(f"Confidence: {result['confidence']:.2f}")
print()