<a href="https://colab.research.google.com/github/RH00000/UH_RTS_Research_ML/blob/main/abcd_cascade_simulation_gridsearch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Upload the profiling NPZ file from your local machine (Colab)
from google.colab import files
uploaded = files.upload()

Saving abcd_profiling_validation_stats.npz to abcd_profiling_validation_stats.npz


In [3]:
# 1: Load profiling data
import numpy as np
import pandas as pd

data = np.load('abcd_profiling_validation_stats.npz')  # uses the uploaded file
confs = data['confs']    # shape (N,4)
times = data['times']    # shape (N,4)
oks   = data['oks']      # shape (N,4), bool
N, M = confs.shape       # N samples, M=4 models (A,B,C,D)

In [4]:
# 2: Simulation function
def simulate_cascade(confs, times, oks,
                     thr_a, thr_b, thr_c,
                     skip_b_thresh, skip_c_thresh):
    total_time = 0.0
    correct = 0
    skip_c_count = 0
    skip_bc_count = 0

    for j in range(N):
        # Model A
        conf_a = confs[j,0]; time_a = times[j,0]; ok_a = oks[j,0]
        if conf_a >= thr_a:
            total_time += time_a
            correct += ok_a
            continue

        # Decide skip B
        skip_b = (conf_a < skip_b_thresh)
        if not skip_b:
            total_time += times[j,1]              # run B
            if confs[j,1] >= thr_b:
                correct += oks[j,1]
                continue

        # Decide skip C
        conf_b = confs[j,1] if not skip_b else 0.0
        skip_c = skip_b or (conf_b < skip_c_thresh)
        if skip_c and not skip_b:
            skip_c_count += 1
        if skip_b and skip_c:
            skip_bc_count += 1

        if not skip_c:
            total_time += times[j,2]              # run C
            if confs[j,2] >= thr_c:
                correct += oks[j,2]
                continue

        # Fallback D
        total_time += times[j,3]
        correct += oks[j,3]

    avg_time = total_time / N
    accuracy = correct / N
    skip_c_rate = skip_c_count / N
    skip_bc_rate = skip_bc_count / N
    return accuracy, avg_time, skip_c_rate, skip_bc_rate

In [13]:
# 3: Define grid of thresholds to search
# np.linspace(start, end, num) generates 'num' evenly spaced values from start to end (inclusive).
# For example, np.linspace(0.5, 0.9, 5) produces [0.5, 0.6, 0.7, 0.8, 0.9].
# Step 3: Define grid of thresholds to search
# Use np.arange(start, stop, step) to generate values with a specific increment.
thr_as  = np.arange(0.50, 0.951, 0.05)    # A's IDK thresholds: [0.50,0.55,...,0.95]
thr_bs  = np.arange(0.50, 0.951, 0.05)    # B's IDK thresholds
thr_cs  = np.arange(0.50, 0.951, 0.05)    # C's IDK thresholds
# Skip thresholds
skip_bs = np.linspace(0.1, 0.5, 5)       # skip B when A is low: [0.1,0.2,0.3,0.4,0.5]
skip_cs = np.linspace(0.1, 0.5, 5)       # skip C when B is low

In [14]:
# 4: Grid search over all combinations
records = []
total_iters = len(thr_as)*len(thr_bs)*len(thr_cs)*len(skip_bs)*len(skip_cs)
iter_count = 0
for thr_a in thr_as:
    for thr_b in thr_bs:
        for thr_c in thr_cs:
            for skip_b in skip_bs:
                for skip_c in skip_cs:
                    acc, t, r_c, r_bc = simulate_cascade(
                        confs, times, oks,
                        thr_a, thr_b, thr_c,
                        skip_b, skip_c)
                    records.append({
                        'thr_a': thr_a, 'thr_b': thr_b, 'thr_c': thr_c,
                        'skip_b': skip_b, 'skip_c': skip_c,
                        'accuracy': acc, 'avg_time': t,
                        'skip_C→D_rate': r_c, 'skip_B,C→D_rate': r_bc
                    })
                    # Light logging every 500 simulations
                    iter_count += 1
                    if iter_count % 500 == 0:
                        print(f"Completed {iter_count}/{total_iters} grid points")

Completed 500/25000 grid points
Completed 1000/25000 grid points
Completed 1500/25000 grid points
Completed 2000/25000 grid points
Completed 2500/25000 grid points
Completed 3000/25000 grid points
Completed 3500/25000 grid points
Completed 4000/25000 grid points
Completed 4500/25000 grid points
Completed 5000/25000 grid points
Completed 5500/25000 grid points
Completed 6000/25000 grid points
Completed 6500/25000 grid points
Completed 7000/25000 grid points
Completed 7500/25000 grid points
Completed 8000/25000 grid points
Completed 8500/25000 grid points
Completed 9000/25000 grid points
Completed 9500/25000 grid points
Completed 10000/25000 grid points
Completed 10500/25000 grid points
Completed 11000/25000 grid points
Completed 11500/25000 grid points
Completed 12000/25000 grid points
Completed 12500/25000 grid points
Completed 13000/25000 grid points
Completed 13500/25000 grid points
Completed 14000/25000 grid points
Completed 14500/25000 grid points
Completed 15000/25000 grid points


In [16]:
# 5: Analyze results
# Convert to DataFrame for easy sorting/filtering
df = pd.DataFrame(records)
# For example: show top 10 fastest under a target_accuracy% accuracy constraint
target_accuracy = 0.79
best = df[df['accuracy'] >= target_accuracy].sort_values('avg_time').head(10)
print(best)

       thr_a  thr_b  thr_c  skip_b  skip_c  accuracy  avg_time  skip_C→D_rate  \
10043    0.7    0.5   0.55     0.4     0.4    0.7900  0.009059         0.0300   
10040    0.7    0.5   0.55     0.4     0.1    0.7900  0.009080         0.0000   
10041    0.7    0.5   0.55     0.4     0.2    0.7900  0.009080         0.0036   
10042    0.7    0.5   0.55     0.4     0.3    0.7900  0.009082         0.0132   
10068    0.7    0.5   0.60     0.4     0.4    0.7904  0.009083         0.0300   
10093    0.7    0.5   0.65     0.4     0.4    0.7904  0.009100         0.0300   
10119    0.7    0.5   0.70     0.4     0.5    0.7916  0.009115         0.0544   
10019    0.7    0.5   0.50     0.4     0.5    0.7916  0.009115         0.0544   
10069    0.7    0.5   0.60     0.4     0.5    0.7916  0.009115         0.0544   
10144    0.7    0.5   0.75     0.4     0.5    0.7916  0.009115         0.0544   

       skip_B,C→D_rate  
10043           0.1732  
10040           0.1732  
10041           0.1732  
10042   

In [17]:
# Save full results to CSV
df.to_csv('abcd_cascade_grid_search.csv', index=False)
print("Saved full grid results to 'cascade_grid_search.csv'")

Saved full grid results to 'cascade_grid_search.csv'


In [19]:
# # Download CSV to local machine (in Colab)
# from google.colab import files
# files.download('abcd_cascade_grid_search.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>