In [2]:
# =============================================================================
# THRESHOLD CLASSIFIER - Model Comparison
# =============================================================================
# Goal: Predict min_threshold class given circuit features
# Dataset: training_data.csv (0.99 fidelity threshold, CPU-only)
# Scoring: competition score (exact=1, over=true/pred, under=0)
# =============================================================================

import numpy as np
import pandas as pd
from sklearn.ensemble import (
    RandomForestClassifier, GradientBoostingClassifier, ExtraTreesClassifier,
    AdaBoostClassifier, BaggingClassifier
)
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import StratifiedGroupKFold
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
import warnings
warnings.filterwarnings("ignore")

# -----------------------------------------------------------------------------
# 1. LOAD DATA
# -----------------------------------------------------------------------------
df = pd.read_csv("training_data.csv")

print("=" * 70)
print("THRESHOLD CLASSIFIER")
print("=" * 70)
print()
print(f"Dataset: training_data.csv")
print(f"Total samples: {len(df)}")
print(f"Unique files: {df['file'].nunique()}")
print()

print("Target: min_threshold")
threshold_dist = df['min_threshold'].value_counts().sort_index()
for threshold, count in threshold_dist.items():
    pct = 100 * count / len(df)
    print(f"  {threshold:>3d}: {count:3d} samples ({pct:5.1f}%)")
print()

# -----------------------------------------------------------------------------
# 2. FEATURE ENGINEERING
# -----------------------------------------------------------------------------
def engineer_features(df):
    """Create domain-specific features for threshold prediction."""
    X = df.copy()

    # Interaction features
    X['degree_x_qubits'] = X['avg_qubit_degree'] * X['n_qubits']
    X['degree_x_depth'] = X['avg_qubit_degree'] * X['crude_depth']
    X['degree_x_2q'] = X['avg_qubit_degree'] * X['n_2q_gates']
    X['entanglement_complexity'] = X['n_unique_edges'] * X['avg_qubit_degree']
    X['entanglement_per_qubit'] = X['n_unique_edges'] / (X['n_qubits'] + 1)

    # Ratio features
    X['cx_ratio'] = X['n_cx'] / (X['n_total_gates'] + 1)
    X['rotation_ratio'] = X['n_rotation_gates'] / (X['n_total_gates'] + 1)
    X['multi_qubit_ratio'] = (X['n_2q_gates'] + X['n_3q_gates']) / (X['n_total_gates'] + 1)
    X['gates_per_depth'] = X['n_total_gates'] / (X['crude_depth'] + 1)
    X['depth_per_qubit'] = X['crude_depth'] / (X['n_qubits'] + 1)
    X['edge_density'] = X['n_unique_edges'] / (X['n_qubits'] * (X['n_qubits'] - 1) / 2 + 1)
    X['edge_repetition_ratio'] = X['n_edge_repetitions'] / (X['n_unique_edges'] + 1)

    # Polynomial / log features
    X['degree_squared'] = X['avg_qubit_degree'] ** 2
    X['qubits_squared'] = X['n_qubits'] ** 2
    X['depth_squared'] = X['crude_depth'] ** 2
    X['log_qubits'] = np.log1p(X['n_qubits'])
    X['log_depth'] = np.log1p(X['crude_depth'])
    X['log_gates'] = np.log1p(X['n_total_gates'])

    # Complexity scores
    X['complexity_score'] = X['n_qubits'] * X['crude_depth'] * X['avg_qubit_degree'] / 1000
    X['entanglement_burden'] = X['n_2q_gates'] * X['avg_qubit_degree'] / (X['n_qubits'] + 1)
    X['sim_difficulty'] = X['n_qubits'] ** 1.5 * X['entanglement_pressure']

    # Pattern features
    X['n_patterns'] = (X['has_qft_pattern'] + X['has_iqft_pattern'] +
                       X['has_grover_pattern'] + X['has_variational_pattern'] + X['has_ghz_pattern'])
    X['variational_complexity'] = X['has_variational_pattern'] * X['n_rotation_gates']

    return X

X_eng = engineer_features(df)

# Target and groups
y_raw = df['min_threshold'].astype(int).values
groups = df['file'].astype(str).values

le = LabelEncoder()
y = le.fit_transform(y_raw)
THRESHOLD_CLASSES = le.classes_

# Drop non-feature columns
drop_cols = ["min_threshold", "file", "forward_runtime",
             "max_fidelity_achieved", "n_thresholds_tested", "threshold_runtime"]
drop_cols = [c for c in drop_cols if c in X_eng.columns]
X_eng = X_eng.drop(columns=drop_cols)

# One-hot encode categoricals
cat_cols = X_eng.select_dtypes(exclude=[np.number]).columns.tolist()
print(f"Categorical columns: {cat_cols}")
X_eng = pd.get_dummies(X_eng, columns=cat_cols)

X = X_eng.values.astype(np.float32)
X = np.nan_to_num(X, nan=0.0, posinf=0.0, neginf=0.0)

print(f"Feature matrix shape: {X.shape}")
print(f"Threshold classes: {list(THRESHOLD_CLASSES)}")
print()

# -----------------------------------------------------------------------------
# 3. SCORING FUNCTIONS
# -----------------------------------------------------------------------------
def competition_score(y_true, y_pred):
    """Competition score: exact=1, over=true/pred, under=0."""
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    scores = np.zeros(len(y_true))
    scores[y_pred == y_true] = 1.0
    over = y_pred > y_true
    scores[over] = y_true[over] / y_pred[over]
    return scores

# -----------------------------------------------------------------------------
# 4. MODEL COMPARISON
# -----------------------------------------------------------------------------
min_class_count = min(np.bincount(y)[np.bincount(y) > 0])
n_splits = min(5, min_class_count)
sgkf = StratifiedGroupKFold(n_splits=n_splits, shuffle=True, random_state=42)

print(f"Using {n_splits}-fold StratifiedGroupKFold (grouped by file)")
print()

models = {
    'XGBoost': XGBClassifier(
        n_estimators=500, max_depth=6, learning_rate=0.05,
        random_state=42, verbosity=0, use_label_encoder=False, eval_metric='mlogloss'
    ),
    'LightGBM': LGBMClassifier(
        n_estimators=500, max_depth=6, learning_rate=0.05,
        random_state=42, verbose=-1, class_weight='balanced'
    ),
    'RandomForest': RandomForestClassifier(
        n_estimators=500, max_depth=15, min_samples_leaf=2,
        class_weight='balanced', random_state=42, n_jobs=-1
    ),
    'ExtraTrees': ExtraTreesClassifier(
        n_estimators=500, max_depth=15, min_samples_leaf=2,
        class_weight='balanced', random_state=42, n_jobs=-1
    ),
    'GradientBoosting': GradientBoostingClassifier(
        n_estimators=500, max_depth=5, learning_rate=0.05,
        random_state=42
    ),
    'AdaBoost': AdaBoostClassifier(
        n_estimators=200, learning_rate=0.05, random_state=42
    ),
    'LogisticReg': LogisticRegression(
        max_iter=1000, class_weight='balanced', random_state=42, n_jobs=-1
    ),
}

print("=" * 70)
print("MODEL EVALUATION")
print("=" * 70)
print()

results = []

for name, model in models.items():
    print(f"Evaluating {name}...")

    y_pred_all = np.full(len(y), -1)

    for fold_idx, (train_idx, test_idx) in enumerate(sgkf.split(X, y, groups)):
        scaler = StandardScaler()
        X_train = scaler.fit_transform(X[train_idx])
        X_test = scaler.transform(X[test_idx])
        X_train = np.clip(X_train, -10, 10)
        X_test = np.clip(X_test, -10, 10)

        model_fold = model.__class__(**model.get_params())
        model_fold.fit(X_train, y[train_idx])
        y_pred_all[test_idx] = model_fold.predict(X_test)

    # Convert back to original threshold values
    y_true_orig = le.inverse_transform(y)
    y_pred_orig = le.inverse_transform(y_pred_all)

    scores = competition_score(y_true_orig, y_pred_orig)
    acc = accuracy_score(y, y_pred_all)
    under = np.mean(y_pred_orig < y_true_orig)
    over = np.mean(y_pred_orig > y_true_orig)

    results.append({
        'model': name,
        'comp_score': scores.mean(),
        'accuracy': acc,
        'under_rate': under,
        'over_rate': over,
    })

print()
print("=" * 70)
print("RESULTS - Sorted by Competition Score (higher is better)")
print("=" * 70)
print()
print(f"{'Model':<20} {'CompScore':>10} {'Accuracy':>10} {'Under%':>8} {'Over%':>8}")
print("-" * 60)

for r in sorted(results, key=lambda x: -x['comp_score']):
    print(f"{r['model']:<20} {r['comp_score']:>10.4f} {r['accuracy']:>10.4f} "
          f"{r['under_rate']*100:>7.1f} {r['over_rate']*100:>7.1f}")

print()
best = max(results, key=lambda x: x['comp_score'])
print(f"Best Model: {best['model']}")
print(f"  Competition Score: {best['comp_score']:.4f}")
print(f"  Accuracy: {best['accuracy']:.4f}")
print(f"  Under-rate: {best['under_rate']*100:.1f}%")
print(f"  Over-rate: {best['over_rate']*100:.1f}%")

THRESHOLD CLASSIFIER

Dataset: training_data.csv
Total samples: 1107
Unique files: 576

Target: min_threshold
    1: 434 samples ( 39.2%)
    2: 336 samples ( 30.4%)
    4: 114 samples ( 10.3%)
    8:  80 samples (  7.2%)
   16:  56 samples (  5.1%)
   32:  58 samples (  5.2%)
   64:  29 samples (  2.6%)

Categorical columns: ['backend', 'precision']
Feature matrix shape: (1107, 88)
Threshold classes: [np.int64(1), np.int64(2), np.int64(4), np.int64(8), np.int64(16), np.int64(32), np.int64(64)]

Using 5-fold StratifiedGroupKFold (grouped by file)

MODEL EVALUATION

Evaluating XGBoost...
Evaluating LightGBM...
Evaluating RandomForest...
Evaluating ExtraTrees...
Evaluating GradientBoosting...
Evaluating AdaBoost...
Evaluating LogisticReg...

RESULTS - Sorted by Competition Score (higher is better)

Model                 CompScore   Accuracy   Under%    Over%
------------------------------------------------------------
GradientBoosting         0.9219     0.9033     5.8     3.9
XGBoost    

In [7]:
# =============================================================================
# FEATURE SELECTION - Find optimal feature count
# =============================================================================

print("=" * 70)
print("FEATURE SELECTION")
print("=" * 70)
print()

# Get feature importance using RandomForest
print("Training RandomForest to get feature importances...")
scaler_init = StandardScaler()
X_scaled_init = scaler_init.fit_transform(X)

rf_imp = RandomForestClassifier(
    n_estimators=500, max_depth=15, min_samples_leaf=2,
    class_weight='balanced', random_state=42, n_jobs=-1
)
rf_imp.fit(X_scaled_init, y)

importance_df = pd.DataFrame({
    'feature': X_eng.columns.tolist(),
    'importance': rf_imp.feature_importances_
}).sort_values('importance', ascending=False)

print()
print("Top 20 most important features:")
print("-" * 50)
for _, row in importance_df.head(20).iterrows():
    print(f"  {row['feature']:<35} {row['importance']:.4f}")
print()

# -----------------------------------------------------------------------------
# Sweep feature counts with the best model from cell 1
# -----------------------------------------------------------------------------
print("Optimal number of features (using best model from above):")
print("-" * 50)

best_model_name = best['model']
best_model_template = models[best_model_name]

feature_sweep_results = []

for top_k in list(range(10, len(importance_df), 10)) + [len(importance_df)]:
    if top_k > len(importance_df):
        continue

    top_features = importance_df.head(top_k)['feature'].tolist()
    X_top = X_eng[top_features].values.astype(np.float32)
    X_top = np.nan_to_num(X_top, nan=0.0, posinf=0.0, neginf=0.0)

    y_pred_all = np.full(len(y), -1)

    for train_idx, test_idx in sgkf.split(X_top, y, groups):
        scaler = StandardScaler()
        X_train = scaler.fit_transform(X_top[train_idx])
        X_test = scaler.transform(X_top[test_idx])
        X_train = np.clip(X_train, -10, 10)
        X_test = np.clip(X_test, -10, 10)

        model_fold = best_model_template.__class__(**best_model_template.get_params())
        model_fold.fit(X_train, y[train_idx])
        y_pred_all[test_idx] = model_fold.predict(X_test)

    y_true_orig = le.inverse_transform(y)
    y_pred_orig = le.inverse_transform(y_pred_all)
    cs = competition_score(y_true_orig, y_pred_orig).mean()
    acc = accuracy_score(y, y_pred_all)
    feature_sweep_results.append({'k': top_k, 'comp_score': cs, 'accuracy': acc})
    print(f"  Top {top_k:2d} features: CompScore = {cs:.4f}, Accuracy = {acc:.4f}")

# Pick best feature count
best_k_result = max(feature_sweep_results, key=lambda x: x['comp_score'])
BEST_K = best_k_result['k']
TOP_FEATURES = importance_df.head(BEST_K)['feature'].tolist()

print()
print(f"Best feature count: {BEST_K} (CompScore = {best_k_result['comp_score']:.4f})")
print()
print(f"Selected Top {BEST_K} Features:")
for i, feat in enumerate(TOP_FEATURES, 1):
    imp = importance_df[importance_df['feature'] == feat]['importance'].values[0]
    print(f"  {i:2d}. {feat:<35} ({imp:.4f})")
print()

# Re-compare all models with the selected features
print("=" * 70)
print(f"MODEL RE-COMPARISON WITH TOP {BEST_K} FEATURES")
print("=" * 70)
print()

X_top = X_eng[TOP_FEATURES].values.astype(np.float32)
X_top = np.nan_to_num(X_top, nan=0.0, posinf=0.0, neginf=0.0)

results_top = []

for name, model in models.items():
    print(f"Evaluating {name}...")
    y_pred_all = np.full(len(y), -1)

    for train_idx, test_idx in sgkf.split(X_top, y, groups):
        scaler = StandardScaler()
        X_train = scaler.fit_transform(X_top[train_idx])
        X_test = scaler.transform(X_top[test_idx])
        X_train = np.clip(X_train, -10, 10)
        X_test = np.clip(X_test, -10, 10)

        model_fold = model.__class__(**model.get_params())
        model_fold.fit(X_train, y[train_idx])
        y_pred_all[test_idx] = model_fold.predict(X_test)

    y_true_orig = le.inverse_transform(y)
    y_pred_orig = le.inverse_transform(y_pred_all)
    scores = competition_score(y_true_orig, y_pred_orig)
    acc = accuracy_score(y, y_pred_all)
    under = np.mean(y_pred_orig < y_true_orig)
    over = np.mean(y_pred_orig > y_true_orig)

    results_top.append({
        'model': name,
        'comp_score': scores.mean(),
        'accuracy': acc,
        'under_rate': under,
        'over_rate': over,
    })

print()
print(f"{'Model':<20} {'CompScore':>10} {'Accuracy':>10} {'Under%':>8} {'Over%':>8}")
print("-" * 60)

for r in sorted(results_top, key=lambda x: -x['comp_score']):
    print(f"{r['model']:<20} {r['comp_score']:>10.4f} {r['accuracy']:>10.4f} "
          f"{r['under_rate']*100:>7.1f} {r['over_rate']*100:>7.1f}")

best_top = max(results_top, key=lambda x: x['comp_score'])
BEST_MODEL_NAME = best_top['model']
print()
print(f"Best model with top {BEST_K} features: {BEST_MODEL_NAME} (CompScore = {best_top['comp_score']:.4f})")

FEATURE SELECTION

Training RandomForest to get feature importances...

Top 20 most important features:
--------------------------------------------------
  entanglement_per_qubit              0.0428
  avg_gate_span                       0.0384
  degree_squared                      0.0372
  avg_qubit_degree                    0.0360
  degree_x_qubits                     0.0317
  entanglement_complexity             0.0312
  std_gate_span                       0.0286
  n_unique_edges                      0.0275
  gates_per_layer_estimate            0.0257
  max_gate_span                       0.0244
  midpoint_cut_crossings              0.0237
  max_qubit_degree                    0.0223
  n_connected_components              0.0217
  multi_qubit_ratio                   0.0182
  degree_x_2q                         0.0181
  depth_squared                       0.0179
  degree_x_depth                      0.0177
  depth_per_qubit                     0.0176
  edge_density                     

In [9]:
# =============================================================================
# HYPERPARAMETER TUNING - Optuna on best model
# =============================================================================

import optuna
from optuna.samplers import TPESampler
optuna.logging.set_verbosity(optuna.logging.WARNING)

print("=" * 70)
print(f"HYPERPARAMETER TUNING: {BEST_MODEL_NAME} with Top {BEST_K} Features")
print("=" * 70)
print()

X_top = X_eng[TOP_FEATURES].values.astype(np.float32)
X_top = np.nan_to_num(X_top, nan=0.0, posinf=0.0, neginf=0.0)

def make_objective(model_name):
    def objective(trial):
        if model_name == 'XGBoost':
            params = {
                'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
                'max_depth': trial.suggest_int('max_depth', 3, 20),
                'learning_rate': trial.suggest_float('learning_rate', 0.005, 0.5, log=True),
                'min_child_weight': trial.suggest_int('min_child_weight', 1, 15),
                'subsample': trial.suggest_float('subsample', 0.5, 1.0),
                'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
                'reg_alpha': trial.suggest_float('reg_alpha', 1e-8, 10.0, log=True),
                'reg_lambda': trial.suggest_float('reg_lambda', 1e-8, 10.0, log=True),
                'gamma': trial.suggest_float('gamma', 1e-8, 5.0, log=True),
                'random_state': 42,
                'verbosity': 0,
                'use_label_encoder': False,
                'eval_metric': 'mlogloss'
            }
            model_class = XGBClassifier
        elif model_name == 'LightGBM':
            params = {
                'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
                'max_depth': trial.suggest_int('max_depth', 3, 20),
                'learning_rate': trial.suggest_float('learning_rate', 0.005, 0.5, log=True),
                'num_leaves': trial.suggest_int('num_leaves', 10, 200),
                'min_child_samples': trial.suggest_int('min_child_samples', 5, 50),
                'subsample': trial.suggest_float('subsample', 0.5, 1.0),
                'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
                'reg_alpha': trial.suggest_float('reg_alpha', 1e-8, 10.0, log=True),
                'reg_lambda': trial.suggest_float('reg_lambda', 1e-8, 10.0, log=True),
                'class_weight': 'balanced',
                'random_state': 42,
                'verbose': -1
            }
            model_class = LGBMClassifier
        elif model_name == 'GradientBoosting':
            params = {
                'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
                'max_depth': trial.suggest_int('max_depth', 3, 15),
                'learning_rate': trial.suggest_float('learning_rate', 0.005, 0.5, log=True),
                'min_samples_split': trial.suggest_int('min_samples_split', 2, 20),
                'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 15),
                'subsample': trial.suggest_float('subsample', 0.5, 1.0),
                'random_state': 42
            }
            model_class = GradientBoostingClassifier
        elif model_name == 'RandomForest':
            params = {
                'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
                'max_depth': trial.suggest_int('max_depth', 3, 30),
                'min_samples_split': trial.suggest_int('min_samples_split', 2, 20),
                'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 10),
                'max_features': trial.suggest_categorical('max_features', ['sqrt', 'log2', None]),
                'criterion': trial.suggest_categorical('criterion', ['gini', 'entropy']),
                'class_weight': trial.suggest_categorical('class_weight', ['balanced', 'balanced_subsample']),
                'bootstrap': trial.suggest_categorical('bootstrap', [True, False]),
                'random_state': 42,
                'n_jobs': -1
            }
            model_class = RandomForestClassifier
        elif model_name == 'ExtraTrees':
            params = {
                'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
                'max_depth': trial.suggest_int('max_depth', 3, 30),
                'min_samples_split': trial.suggest_int('min_samples_split', 2, 20),
                'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 10),
                'max_features': trial.suggest_categorical('max_features', ['sqrt', 'log2', None]),
                'criterion': trial.suggest_categorical('criterion', ['gini', 'entropy']),
                'class_weight': trial.suggest_categorical('class_weight', ['balanced', 'balanced_subsample']),
                'bootstrap': trial.suggest_categorical('bootstrap', [True, False]),
                'random_state': 42,
                'n_jobs': -1
            }
            model_class = ExtraTreesClassifier
        else:
            raise ValueError(f"No tuning defined for {model_name}")

        y_pred_all = np.full(len(y), -1)

        for train_idx, test_idx in sgkf.split(X_top, y, groups):
            scaler = StandardScaler()
            X_train = scaler.fit_transform(X_top[train_idx])
            X_test = scaler.transform(X_top[test_idx])
            X_train = np.clip(X_train, -10, 10)
            X_test = np.clip(X_test, -10, 10)

            model = model_class(**params)
            model.fit(X_train, y[train_idx])
            y_pred_all[test_idx] = model.predict(X_test)

        y_true_orig = le.inverse_transform(y)
        y_pred_orig = le.inverse_transform(y_pred_all)
        cs = competition_score(y_true_orig, y_pred_orig).mean()
        acc = accuracy_score(y, y_pred_all)
        under = np.mean(y_pred_orig < y_true_orig)

        trial.set_user_attr('accuracy', acc)
        trial.set_user_attr('under_rate', under)
        return cs

    return objective

# Run Optuna
N_TRIALS = 40
print(f"Running {N_TRIALS} Optuna trials for {BEST_MODEL_NAME}...")
print()

sampler = TPESampler(seed=42)
study = optuna.create_study(direction='maximize', sampler=sampler)
study.optimize(make_objective(BEST_MODEL_NAME), n_trials=N_TRIALS, show_progress_bar=True)

print()
print("=" * 70)
print("TUNING RESULTS")
print("=" * 70)
print()
print(f"Best Competition Score: {study.best_value:.4f}")
print(f"Best Accuracy: {study.best_trial.user_attrs['accuracy']:.4f}")
print(f"Best Under-rate: {study.best_trial.user_attrs['under_rate']*100:.1f}%")
print()
print("Best Hyperparameters:")
BEST_PARAMS = study.best_params.copy()
for k, v in BEST_PARAMS.items():
    if isinstance(v, float):
        print(f"  {k}: {v:.6f}")
    else:
        print(f"  {k}: {v}")
print()

# Final evaluation with best params
print("=" * 70)
print("FINAL CROSS-VALIDATED METRICS")
print("=" * 70)
print()

# Add fixed params back
if BEST_MODEL_NAME == 'XGBoost':
    BEST_PARAMS['random_state'] = 42
    BEST_PARAMS['verbosity'] = 0
    BEST_PARAMS['use_label_encoder'] = False
    BEST_PARAMS['eval_metric'] = 'mlogloss'
    best_model_class = XGBClassifier
elif BEST_MODEL_NAME == 'LightGBM':
    BEST_PARAMS['random_state'] = 42
    BEST_PARAMS['verbose'] = -1
    BEST_PARAMS['class_weight'] = 'balanced'
    best_model_class = LGBMClassifier
elif BEST_MODEL_NAME == 'GradientBoosting':
    BEST_PARAMS['random_state'] = 42
    best_model_class = GradientBoostingClassifier
elif BEST_MODEL_NAME == 'RandomForest':
    BEST_PARAMS['random_state'] = 42
    BEST_PARAMS['n_jobs'] = -1
    best_model_class = RandomForestClassifier
elif BEST_MODEL_NAME == 'ExtraTrees':
    BEST_PARAMS['random_state'] = 42
    BEST_PARAMS['n_jobs'] = -1
    best_model_class = ExtraTreesClassifier

y_pred_final = np.full(len(y), -1)

for train_idx, test_idx in sgkf.split(X_top, y, groups):
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_top[train_idx])
    X_test = scaler.transform(X_top[test_idx])
    X_train = np.clip(X_train, -10, 10)
    X_test = np.clip(X_test, -10, 10)

    model = best_model_class(**BEST_PARAMS)
    model.fit(X_train, y[train_idx])
    y_pred_final[test_idx] = model.predict(X_test)

y_true_orig = le.inverse_transform(y)
y_pred_orig = le.inverse_transform(y_pred_final)
final_cs = competition_score(y_true_orig, y_pred_orig).mean()
final_acc = accuracy_score(y, y_pred_final)
final_under = np.mean(y_pred_orig < y_true_orig)
final_over = np.mean(y_pred_orig > y_true_orig)

print(f"  Competition Score: {final_cs:.4f}")
print(f"  Accuracy:          {final_acc:.4f}")
print(f"  Under-rate:        {final_under*100:.1f}%")
print(f"  Over-rate:         {final_over*100:.1f}%")
print()
print(f"Baseline (all features, default params): CompScore = {best['comp_score']:.4f}")
print(f"After feature selection + tuning:         CompScore = {final_cs:.4f}")

HYPERPARAMETER TUNING: GradientBoosting with Top 88 Features

Running 40 Optuna trials for GradientBoosting...



Best trial: 32. Best value: 0.925023: 100%|██████████| 40/40 [2:23:22<00:00, 215.07s/it]  



TUNING RESULTS

Best Competition Score: 0.9250
Best Accuracy: 0.9097
Best Under-rate: 5.9%

Best Hyperparameters:
  n_estimators: 254
  max_depth: 8
  learning_rate: 0.020167
  min_samples_split: 8
  min_samples_leaf: 6
  subsample: 0.947884

FINAL CROSS-VALIDATED METRICS

  Competition Score: 0.9250
  Accuracy:          0.9097
  Under-rate:        5.9%
  Over-rate:         3.2%

Baseline (all features, default params): CompScore = 0.9219
After feature selection + tuning:         CompScore = 0.9250


In [12]:
# =============================================================================
# PRODUCTION THRESHOLD CLASSIFIER
# =============================================================================
# Trains on ALL data with tuned hyperparameters, exposes predict_threshold()
# =============================================================================

import joblib
from comprehensive_features import QASMFeatureExtractor
from pathlib import Path

print("=" * 70)
print("PRODUCTION THRESHOLD CLASSIFIER")
print("=" * 70)
print()
print(f"Model: {BEST_MODEL_NAME}")
print(f"Features: Top {BEST_K}")
print()
print("Hyperparameters:")
for k, v in BEST_PARAMS.items():
    if isinstance(v, float):
        print(f"  {k}: {v:.6f}")
    else:
        print(f"  {k}: {v}")
print()

# Train on ALL data
prod_scaler = StandardScaler()
X_prod = prod_scaler.fit_transform(X_top)
X_prod = np.clip(X_prod, -10, 10)

prod_model = best_model_class(**BEST_PARAMS)
prod_model.fit(X_prod, y)
print(f"Model trained on full dataset ({len(y)} samples).")
print(f"Classes: {list(THRESHOLD_CLASSES)}")
print()

# Save references for prediction
PRODUCTION_FEATURES = TOP_FEATURES
PRODUCTION_DROP_COLS = drop_cols

# -------------------------------------------------------------------------
# SAVE MODEL TO DISK
# -------------------------------------------------------------------------
model_artifact = {
    'model': prod_model,
    'scaler': prod_scaler,
    'label_encoder': le,
    'threshold_classes': THRESHOLD_CLASSES,
    'features': PRODUCTION_FEATURES,
    'drop_cols': PRODUCTION_DROP_COLS,
    'model_name': BEST_MODEL_NAME,
    'best_params': BEST_PARAMS,
    'best_k': BEST_K,
}

save_path = Path("models/threshold_classifier.pkl")
joblib.dump(model_artifact, save_path)
print(f"Model saved to {save_path}")
print(f"  File size: {save_path.stat().st_size / 1024:.1f} KB")
print()


def predict_threshold(file_path, precision, conservative=False, confidence_threshold=0.6):
    """
    Predict the optimal threshold for a QASM circuit.

    Args:
        file_path: Path to the QASM file
        precision: 'single' or 'double'
        conservative: If True, bump up prediction when confidence is low
        confidence_threshold: Confidence threshold for bumping (default 0.6)

    Returns:
        dict with prediction, confidence, and probabilities
    """
    features = QASMFeatureExtractor(file_path).extract_all()
    features['backend'] = 'CPU'
    features['precision'] = precision

    input_df = pd.DataFrame([features])
    input_eng = engineer_features(input_df)

    # Drop non-feature cols
    for col in PRODUCTION_DROP_COLS:
        if col in input_eng.columns:
            input_eng = input_eng.drop(columns=[col])

    # One-hot encode
    cat = input_eng.select_dtypes(exclude=[np.number]).columns.tolist()
    input_eng = pd.get_dummies(input_eng, columns=cat)

    # Align with training features
    for col in PRODUCTION_FEATURES:
        if col not in input_eng.columns:
            input_eng[col] = 0

    X_input = input_eng[PRODUCTION_FEATURES].values.astype(np.float32)
    X_input = np.nan_to_num(X_input, nan=0.0, posinf=0.0, neginf=0.0)
    X_input = np.clip(prod_scaler.transform(X_input), -10, 10)

    # Get prediction and probabilities
    pred_encoded = prod_model.predict(X_input)[0]
    confidence = 0.0
    prob_dict = {}

    if hasattr(prod_model, 'predict_proba'):
        proba = prod_model.predict_proba(X_input)[0]
        confidence = float(proba.max())
        for i, cls in enumerate(prod_model.classes_):
            prob_dict[int(THRESHOLD_CLASSES[cls])] = float(proba[i])

        # Conservative: bump up if not confident
        if conservative and confidence < confidence_threshold:
            new_idx = min(pred_encoded + 1, len(THRESHOLD_CLASSES) - 1)
            pred_encoded = new_idx

    pred_threshold = int(le.inverse_transform([pred_encoded])[0])

    return {
        'predicted_threshold': pred_threshold,
        'confidence': confidence,
        'probabilities': prob_dict,
        'conservative_mode': conservative
    }

# Sanity check on training files
print("=" * 70)
print("SANITY CHECK ON TRAINING FILES")
print("=" * 70)
print()

circuits_dir = Path("circuits_new")
test_rows = df.sample(n=min(15, len(df)), random_state=42)

print(f"{'File':<45} {'Prec':<7} {'True':>5} {'Pred':>5} {'Conf':>6} {'Result':<8}")
print("-" * 80)

total_score = 0
for _, row in test_rows.iterrows():
    qasm_path = circuits_dir / row['file']
    result = predict_threshold(qasm_path, row['precision'])
    true_t = int(row['min_threshold'])
    pred_t = result['predicted_threshold']
    conf = result['confidence']

    if pred_t == true_t:
        status = "EXACT"
        score = 1.0
    elif pred_t > true_t:
        status = "OVER"
        score = true_t / pred_t
    else:
        status = "UNDER"
        score = 0.0
    total_score += score

    print(f"{row['file']:<45} {row['precision']:<7} {true_t:>5} {pred_t:>5} {conf:>6.2f} {status:<8}")

print()
print(f"Score: {total_score:.1f}/{len(test_rows)} = {total_score/len(test_rows):.4f}")
print()
print("Usage:")
print('  result = predict_threshold("circuits_new/your_circuit.qasm", "single")')
print('  result = predict_threshold("circuits_new/your_circuit.qasm", "double", conservative=True)')

PRODUCTION THRESHOLD CLASSIFIER

Model: GradientBoosting
Features: Top 88

Hyperparameters:
  n_estimators: 254
  max_depth: 8
  learning_rate: 0.020167
  min_samples_split: 8
  min_samples_leaf: 6
  subsample: 0.947884
  random_state: 42

Model trained on full dataset (1107 samples).
Classes: [np.int64(1), np.int64(2), np.int64(4), np.int64(8), np.int64(16), np.int64(32), np.int64(64)]

Model saved to models\threshold_classifier.pkl
  File size: 9731.8 KB

SANITY CHECK ON TRAINING FILES

File                                          Prec     True  Pred   Conf Result  
--------------------------------------------------------------------------------
qftentangled_indep_qiskit_17.qasm             single      4     4   0.93 EXACT   
ae_indep_qiskit_36.qasm                       double      4     4   1.00 EXACT   
grover-noancilla_indep_qiskit_3.qasm          single      2     2   1.00 EXACT   
wstate_indep_qiskit_43.qasm                   double      2     2   1.00 EXACT   
qnn_indep_qiski