In [1]:
import os
import numpy as np
import ast
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.ticker as ticker
from collections import Counter
from tqdm import tqdm
sns.set_theme(style="white")
colors = ['#66c2a5', '#fc8d62', '#8da0cb', '#e78ac3', '#a6d854', 'gray']

In [2]:
task = "aime24"
model = "qwen3-8b"
root_dir = "/home/rsadhukh/ranajoy/sparse_scaling/Kinetics/cost_model"

In [20]:
dense_df = pd.read_csv(f"{root_dir}/{task}/dense/{task}_{model}_dense_ntrial_tradeoff.csv")

max_cost = dense_df["total_cost"].max()
min_cost = dense_df["total_cost"].min() 
cost_points = np.logspace(np.log10(min_cost), np.log10(max_cost), 100)

delta = 0.001
covs = []
covs = []
for i, cost in enumerate(cost_points):
    best_covs = []
    for query_id in dense_df["query_id"].unique():
        query_df = dense_df[dense_df["query_id"] == query_id]
        best_coverage = query_df[query_df["total_cost"] <= cost]["coverage"]
        if len(best_coverage) > 0:
            best_cov = best_coverage.max()
            best_covs.append(best_cov)
        else:
            best_covs.append(0)
    covs.append(np.mean(best_covs))

In [None]:
all_sparse_covs = {}
all_sparse_dfs = {}
for sparsity in ["topk", "blocktopk"]:
    
    sparse_df = pd.concat([
        pd.read_csv(f"{root_dir}/{task}/{sparsity}/{fname}")
        for fname in os.listdir(f"{root_dir}/{task}/{sparsity}")
        if fname.endswith("_ntrial_tradeoff.csv") and model in fname
    ])
    print(len(sparse_df))
    
    all_sparse_dfs[sparsity] = sparse_df
    sparse_covs = []
    
    for i, cost in enumerate(cost_points):
        best_covs = []
        best_genlen = []
        best_budgets = []
        median_generation_lengths = []
        for query_id in sparse_df["query_id"].unique():
            query_df = sparse_df[sparse_df["query_id"] == query_id]
            best_coverage = query_df[query_df["total_cost"] <= cost]["coverage"]
            if len(best_coverage) > 0:
                cur_best_cov = best_coverage.max()
                best_covs.append(cur_best_cov)
                cur_best_df = query_df[query_df["coverage"] >= cur_best_cov * (1 - delta)]
                min_cost_df = cur_best_df[cur_best_df["total_cost"] == cur_best_df["total_cost"].min()]
                best_genlen.append(min_cost_df["generation_length"].values[0])
                best_budgets.append(min_cost_df["budget"].values[0])
            else:
                best_covs.append(0)
                best_genlen.append(0)
                best_budgets.append(0)
                median_generation_lengths.append(0)
        sparse_covs.append(np.mean(best_covs))

    all_sparse_covs[sparsity] = sparse_covs

1080
> [0;32m/tmp/ipykernel_2071088/805976609.py[0m(24)[0;36m<module>[0;34m()[0m
[0;32m     22 [0;31m            [0mbest_cov_df[0m [0;34m=[0m [0mquery_df[0m[0;34m[[0m[0mquery_df[0m[0;34m[[0m[0;34m"total_cost"[0m[0;34m][0m [0;34m<=[0m [0mcost[0m[0;34m][0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     23 [0;31m            [0;32mimport[0m [0mpdb[0m[0;34m;[0m [0mpdb[0m[0;34m.[0m[0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m---> 24 [0;31m            [0;32mif[0m [0mlen[0m[0;34m([0m[0mbest_coverage[0m[0;34m)[0m [0;34m>[0m [0;36m0[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     25 [0;31m                [0mcur_best_cov[0m [0;34m=[0m [0mbest_coverage[0m[0;34m.[0m[0mmax[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     26 [0;31m                [0mbest_covs[0m[0;34m.[0m[0mappend[0m[0;34m([0m[0mcur_best_cov[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m


   query_id                                  generation_length  trial  \
0         0  [2937, 10271, 9667, 8809, 16987, 12844, 11248,...      1   
1         0  [2937, 10271, 9667, 8809, 16987, 12844, 11248,...      2   
0         0  [4445, 5967, 4219, 3950, 7575, 7250, 4413, 459...      1   
1         0  [4445, 5967, 4219, 3950, 7575, 7250, 4413, 459...      2   
0         0  [4192, 3812, 4745, 4387, 3660, 4188, 4315, 438...      1   
0         0  [9330, 8707, 10825, 9632, 11609, 13185, 11500,...      1   
1         0  [9330, 8707, 10825, 9632, 11609, 13185, 11500,...      2   
2         0  [9330, 8707, 10825, 9632, 11609, 13185, 11500,...      4   
0         0  [7939, 30767, 30767, 14028, 13576, 20018, 1476...      1   
0         0  [5467, 6488, 6067, 6854, 5143, 5534, 5501, 493...      1   
1         0  [5467, 6488, 6067, 6854, 5143, 5534, 5501, 493...      2   
2         0  [5467, 6488, 6067, 6854, 5143, 5534, 5501, 493...      4   

   coverage  compute_cost  memory_cost  total_cost

In [15]:
df = all_sparse_dfs["topk"]
max_df = df.groupby("query_id").agg({"coverage": list, "trial": list}).reset_index()

In [17]:
df

Unnamed: 0,query_id,generation_length,trial,coverage,compute_cost,memory_cost,total_cost,budget
0,0,"[2937, 10271, 9667, 8809, 16987, 12844, 11248,...",1,0.937500,177.779911,228.413344,406.193255,256
1,0,"[2937, 10271, 9667, 8809, 16987, 12844, 11248,...",2,0.997984,355.559823,456.826687,812.386510,256
2,0,"[2937, 10271, 9667, 8809, 16987, 12844, 11248,...",4,1.000000,711.119646,913.653375,1624.773021,256
3,0,"[2937, 10271, 9667, 8809, 16987, 12844, 11248,...",8,1.000000,1422.239291,1827.306750,3249.546041,256
4,0,"[2937, 10271, 9667, 8809, 16987, 12844, 11248,...",16,1.000000,2844.478583,3654.613500,6499.092083,256
...,...,...,...,...,...,...,...,...
175,29,"[30767, 30767, 30767, 30767, 30338, 26918, 300...",2,0.000000,973.563796,628.128276,1601.692073,128
176,29,"[30767, 30767, 30767, 30767, 30338, 26918, 300...",4,0.000000,1947.127592,1256.256553,3203.384145,128
177,29,"[30767, 30767, 30767, 30767, 30338, 26918, 300...",8,0.000000,3894.255185,2512.513106,6406.768291,128
178,29,"[30767, 30767, 30767, 30767, 30338, 26918, 300...",16,0.000000,7788.510370,5025.026212,12813.536582,128
