# 

In [None]:
import pandas as pd
import glob
import json
import matplotlib.pyplot as plt

In [None]:
def load_json(fname):
    try:
        with open(fname, 'r') as f:
            data = json.load(f)
        return data
    except:
        return []

In [None]:
import numpy as np

def plot_with_err(x, y_mean, y_std, linestyle=None, label=None, color='blue', marker='.',ax=None):
    sort_idx = np.argsort(x)
    x_sorted = np.array(x)[sort_idx]
    y_mean_sorted = np.array(y_mean)[sort_idx]
    y_std_sorted = np.array(y_std)[sort_idx]

    y_upper = y_mean_sorted + y_std_sorted
    y_lower = y_mean_sorted - y_std_sorted

    ax.plot(x_sorted, y_mean_sorted, label=label, linestyle=linestyle,color=color,marker=marker)

    ax.fill_between(
        x_sorted,
        y_lower,
        y_upper,
        alpha=0.2,
        color=color
    )
    return ax

In [None]:
int('seed0'[-1])

In [None]:
exp_keys = ['model', 'quant', 'wrapper', 'rank', 'prompt_type', 'dataset']

root = '/workspace1/csamplawski/src/BayesAdapt/logs/'
json_fnames = glob.glob(f'{root}/**/metrics.json', recursive=True)

expdirs = []
for fname in json_fnames:
    tokens = fname.split('/')
    edir = '/'.join(tokens[0:-1])
    expdirs.append(edir)
expdirs = list(set(expdirs))

df = []
for edir in expdirs:
    tokens = edir.replace(root, '').split('/')
    keys = ['model', 'quant', 'wrapper', 'rank', 'prompt_type', 'seed', 'dataset']
    row = dict(zip(keys, tokens[1:]))
    row['rank'] = int(tokens[4].replace('rank', ''))
    row['seed'] = int(tokens[6][-1])
    data = load_json(f'{edir}/metrics.json')
    row['results'] = data
    df.append(row)
df = pd.DataFrame(df)

In [None]:
df_exploded = df.explode('results').reset_index(drop=True)
metrics_df = pd.json_normalize(df_exploded['results']).drop(columns=['seed'])
df = pd.concat([df_exploded.drop(columns=['results']), metrics_df], axis=1)

In [None]:
df

In [None]:
dataset = 'winogrande_l'
prompt_type = 'instruct'
wrapper = 'mle'
model = 'Qwen3-8B'

q = df.query(f"model == '{model}' and dataset == '{dataset}' and prompt_type == '{prompt_type}' and wrapper == '{wrapper}'")#.reset_index()
q.groupby(exp_keys)['Brier'].agg(['mean', 'std'])


In [None]:
#ax = plt.gca()
fig, axes = plt.subplots(1, 3, figsize=(18, 5), sharey=False)
plt.rcParams.update({'font.size': 12})
metrics = ['ACC', 'ECE', 'NLL']

dataset = 'winogrande_s'
prompt_type = 'instruct'
metric = 'ECE'

for ax, metric in zip(axes, metrics):
    for wrapper in ['mle', 'laplace', 'scalabl', 'tempscale', 'blob','tfb']:
        metric_df = df.groupby(exp_keys)[metric].agg(['mean', 'std'])
        q = metric_df.query(f"dataset == '{dataset}' and prompt_type == '{prompt_type}' and wrapper == '{wrapper}'").reset_index()
        ax = plot_with_err(q['model'], q['mean'], q['std'], **style_dict[wrapper], label=wrapper, ax=ax)

    ax.set_xlabel('# Parameters (Base + LoRA)')
    ax.set_ylabel(metric)
    ax.legend(
        loc='upper center',          # Anchor point on the legend box itself
        bbox_to_anchor=(0.5, -0.15), # (x, y) coordinates relative to the plot axes
        ncols=2,       # Forces all items into a single row
        frameon=True                # Optional: removes the box border for a cleaner look
    )
    #ax.set_title(f'Qwen3 Family | Instruct | rank = 8 | {dataset}')
    ax.grid()

In [None]:
style_dict = {
    'laplace': {'color': 'black', 'linestyle': '--', 'marker': '.'},
    'mle': {'color': 'red', 'linestyle': ':', 'marker': 'v'},
    'tempscale': {'color': 'blue', 'linestyle': 'dashdot', 'marker': 'o'},
    'blob': {'color': 'purple', 'linestyle': '--', 'marker': 's'},
    'scalabl': {'color': 'green', 'linestyle': 'solid', 'marker': '^'},
    'tfb': {'color': 'orange', 'linestyle': 'dashdot', 'marker': '^'},
    #deepens
    #mcdroput
    #sgld?
    #map
    #zeroshot?
}

In [None]:
q.reset_index()['model']

In [None]:
list(q['model'])

In [None]:
root = '/workspace1/csamplawski/src/BayesAdapt/logs/'
json_fnames = glob.glob(f'{root}/**/metrics.json', recursive=True)

expdirs = []
for fname in json_fnames:
    tokens = fname.split('/')
    edir = '/'.join(tokens[0:-1])
    expdirs.append(edir)
expdirs = list(set(expdirs))

df = []
for edir in expdirs:
    if 'scalabl' not in edir:
        continue
    tokens = edir.replace(root, '').split('/')
    print(tokens)
    keys = ['model', 'quant', 'wrapper', 'rank', 'prompt_type', 'seed', 'dataset']
    row = dict(zip(keys, tokens[1:]))
    data = load_json(f'{edir}/metrics.json')
    print(row)
    print(data)
    
    data = []
    for seed_dir in glob.glob(f'{edir}/*'):
        data += load_json(f'{seed_dir}/results.json')

        try:
            params_info = load_json(f'{seed_dir}/num_params.json')
            row['trainable_params'] = params_info['trainable']
            row['total_params'] = params_info['total']
        except:
            pass
            
        try:
            row['trainable_params'] = data[-1]['num_trainable_params']
            row['total_params'] = data[-1]['num_total_params']
        except:
            pass 
            
    data = pd.DataFrame(data)
    row['latency'] = data['latency'].median()
    row['peak_memory'] = data['peak_memory'].median()
    for metric in ['ACC', 'ECE', 'NLL']:
        row[f'{metric}_mean'] = data[metric].mean()
        row[f'{metric}_std']  = data[metric].std()
    df.append(row)
df = pd.DataFrame(df)


In [None]:
cond = df['model'].str.contains('Qwen2.5')
df = df[cond]

In [None]:
#supported values are '-', '--', '-.', ':', 'None', ' ', '', 'solid', 'dashed', 'dashdot', 'dotted'
ax = plt.gca()
plt.rcParams.update({'font.size': 12})

metric = 'ACC'
dataset = 'winogrande_s'
prompt_type = 'instruct'
x_key = 'total_params'

cond = df['model'].str.contains('Qwen2.5')
df[cond]


for wrapper in style_dict.keys():
    q = df.query(f"dataset == '{dataset}' and prompt_type == '{prompt_type}' and wrapper == '{wrapper}'")
    ax = plot_with_err(q[x_key], q[f'{metric}_mean'], q[f'{metric}_std'], **style_dict[wrapper], label=wrapper, ax=ax)
    
ax.grid()
ax.set_xlabel('# Parameters (Base + LoRA)')
ax.set_ylabel(metric)
ax.legend(
    loc='upper center',          # Anchor point on the legend box itself
    bbox_to_anchor=(0.5, -0.15), # (x, y) coordinates relative to the plot axes
    ncols=len(style_dict),       # Forces all items into a single row
    frameon=True                # Optional: removes the box border for a cleaner look
)
ax.set_title(f'Qwen3 Family | Instruct | rank = 8 | {dataset}')
#ax.set_xticks(x)
#ax.set_xlim(min(x), max(x))
#ax.set_xticklabels(q['model'])
#ax.set_ylim(0.5,1.0)
#q = df.query("dataset == 'winogrande_s' and prompt_type == 'base'")
#plt.scatter(q['total_params'], q['ACC_mean'], label='base')

#plt.grid()
#plt.legend()

In [None]:
import matplotlib.pyplot as plt

# 1. Setup your configuration
metrics = ['ACC', 'ECE', 'NLL'] 
dataset = 'ARC-Easy'
prompt_type = 'base'
x_key = 'total_params'
for dataset in ['winogrande_s', 'winogrande_m', 'ARC-Easy', 'ARC-Challenge', 'obqa']:
    plt.rcParams.update({'font.size': 12})
    
    # 2. Create a figure with 1 row and 3 columns
    # figsize is (width, height). Increase width to accommodate 3 plots.
    fig, axes = plt.subplots(1, 3, figsize=(18, 5), sharey=False)
    
    for ax, metric in zip(axes, metrics):
        for wrapper in style_dict.keys():
            #if wrapper == 'zeroshot':
            #    continue
            q = df.query(f"dataset == '{dataset}' and prompt_type == '{prompt_type}' and wrapper == '{wrapper}'")
            
            plot_with_err(
                q[x_key], 
                q[f'{metric}_mean'], 
                q[f'{metric}_std'], 
                **style_dict[wrapper], 
                label=wrapper, 
                ax=ax
            )
    
        ax.grid(True)
        ax.set_xlabel('# Parameters (Base + LoRA)')
        #ax.set_ylabel(metric)
        ax.set_title(metric) # Or more specific title if needed
    
    # 4. Create the Global Legend
    # We grab handles and labels from the first axis (axes[0]) since they are identical across plots
    handles, labels = axes[0].get_legend_handles_labels()
    
    fig.legend(
        handles, 
        labels,
        loc='lower center',           # Anchor point of the legend
        bbox_to_anchor=(0.5, -0.1),   # (x, y) coordinates relative to the WHOLE FIGURE (0,0 is bottom-left)
        ncols=len(style_dict),        # Single row
        frameon=True
    )
    
    # 5. formatting
    fig.suptitle(f'Qwen3 Family | Instruct | rank = 8 | {dataset}', fontsize=16)
    plt.tight_layout() 

# Adjust layout to make room for the legend at the bottom
# (tight_layout calculates spacing, then we shrink the bottom margin slightly)
#plt.subplots_adjust(bottom=0.2) 
