In [2]:
import os
import numpy as np
import ast
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.ticker as ticker
from collections import Counter
from tqdm import tqdm
sns.set_theme(style="white")
colors = ['#66c2a5', '#fc8d62', '#8da0cb', '#e78ac3', '#a6d854', 'gray']


In [17]:
task = "aime24"
model = "qwen3-8b"

model_sizes = {
    "qwen3-32b": 32,
    "qwen3-8b": 8,
    "qwen3-4b": 4,
    "qwen3-14b": 14,
    "qwen3-1-7b": 1.7,
    "qwen3-0-6b": 0.6
}
gen_len_budgets = [
    2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384,
    18432, 20480, 22528, 24576, 26624, 28672, 30720, 32768
]
root_dir = "."

In [11]:
dense_df = pd.read_csv(f"{root_dir}/{task}/dense/{task}_{model}_dense_genlen_tradeoff.csv")

max_cost = dense_df["total_cost"].max()
min_cost = dense_df["total_cost"].min() / 10
cost_points = np.logspace(np.log10(min_cost), np.log10(max_cost), 100)

delta = 0.001
covs = []
covs = []
for i, cost in enumerate(cost_points):
    best_covs = []
    for query_id in dense_df["query_id"].unique():
        query_df = dense_df[dense_df["query_id"] == query_id]
        best_coverage = query_df[query_df["total_cost"] <= cost]["coverage"]
        if len(best_coverage) > 0:
            best_cov = best_coverage.max()
            best_covs.append(best_cov)
        else:
            best_covs.append(0)
    covs.append(np.mean(best_covs))

In [None]:
all_sparse_covs = {}
    
for sparsity in ["topk", "blocktopk"]:
    
    sparse_df = pd.concat([
        pd.read_csv(f"{root_dir}/{task}/{sparsity}/{fname}")
        for fname in os.listdir(f"{root_dir}/{task}/{sparsity}")
        if fname.endswith("_genlen_tradeoff.csv") and model in fname
    ])
    print(len(sparse_df))
    
    sparse_covs = []
    
    for i, cost in enumerate(cost_points):
        best_covs = []
        best_genlen = []
        best_budgets = []
        median_generation_lengths = []
        for query_id in sparse_df["query_id"].unique():
            query_df = sparse_df[sparse_df["query_id"] == query_id]
            best_coverage = query_df[query_df["total_cost"] <= cost]["coverage"]
            if len(best_coverage) > 0:
                cur_best_cov = best_coverage.max()
                best_covs.append(cur_best_cov)
                cur_best_df = query_df[query_df["coverage"] >= cur_best_cov * (1 - delta)]
                min_cost_df = cur_best_df[cur_best_df["total_cost"] == cur_best_df["total_cost"].min()]
                best_genlen.append(min_cost_df["generation_length"].values[0])
                best_budgets.append(min_cost_df["budget"].values[0])
            else:
                best_covs.append(0)
                best_genlen.append(0)
                best_budgets.append(0)
                median_generation_lengths.append(0)
        sparse_covs.append(np.mean(best_covs))

    all_sparse_covs[sparsity] = sparse_covs


In [None]:
plt.figure(figsize=(12, 8))
sns.lineplot(x=cost_points, y=covs, label="dense", linewidth=4, color=colors[0])
for j, sparsity in enumerate(["topk", "blocktopk"]):
    sns.lineplot(x=cost_points, y=all_sparse_covs[sparsity], label=sparsity, linewidth=4, color=colors[j+1])
plt.xscale("log")
plt.xlabel("Tera-eflops", fontsize=28)
plt.ylabel("Solving Rate", fontsize=28)
plt.xticks(fontsize=28)
plt.yticks(fontsize=28)
plt.legend(fontsize=23)
plt.grid(True, which="both", ls="-", alpha=0.5)
ax = plt.gca()

for spine in ax.spines.values():
    spine.set_visible(False)
plt.tight_layout()
plt.savefig(f"{root_dir}/{task}/{task}_{model}_sparse_method_comparison_genlen.pdf")