In [11]:
import os
import pandas as pd
import numpy as np
from scipy import stats

method = 'R2CCP'
models = ['4omini', 'dsr1', 'qwen']
datasets = ['Summeval', 'Dialsumm']
dimensions = ['consistency', 'coherence', 'fluency', 'relevance']
calsizes = ['0.25', '0.5', '0.75', '1']

def coverage_and_width(low, up, y_test):
    width = up - low
    coverage = np.mean((low <= y_test) & (y_test <= up))
    return width.mean(), coverage.mean()

def process_directory_cal(directory):
    results = []
    for model in models:
        for dataset in datasets:
            for dimension in dimensions:
                for seed in range(1, 31):
                    for calsize in calsizes:
                        if calsize == '1':
                            file_name = f"R2CCP_summeval_{dimension}_{seed}.csv"
                            file_path = os.path.join(directory, 'interval_results', model, method, file_name)
                        else:
                            file_name = f"R2CCP_{dataset}_{dimension}_{seed}_{calsize}.csv"
                            file_path = os.path.join(directory, 'calsize', model, file_name)
                        
                        if os.path.exists(file_path):
                            data = pd.read_csv(file_path)
                            data = data.round(2)
                            width, coverage = coverage_and_width(data['low'], data['up'], data['y_test'])
                            results.append((model, dataset, dimension, calsize, coverage))
    
    return pd.DataFrame(results, columns=['Model', 'Dataset', 'Dimension', 'Calsize', 'Coverage'])

def aggregate_results(df):
    aggregated = df.groupby(
        ['Model', 'Dataset', 'Dimension', 'Calsize']
    ).agg({
        'Coverage': ['mean', 'std']
    }).reset_index()

    aggregated.columns = [
        'Model', 'Dataset', 'Dimension', 'Calsize',
        'Coverage_mean', 'Coverage_std'
    ]

    return aggregated[
        ['Model', 'Dataset', 'Dimension', 'Calsize',
         'Coverage_mean', 'Coverage_std']
    ]

In [12]:
base_dir = os.getcwd() 
df_all = process_directory_cal(base_dir)
df_aggregated = aggregate_results(df_all)
df_aggregated.to_csv('Summary_calsize.csv', index=False)

In [13]:
print(df_aggregated)

     Model   Dataset    Dimension Calsize  Coverage_mean  Coverage_std
0   4omini  Dialsumm    coherence    0.25       0.887238      0.052089
1   4omini  Dialsumm    coherence     0.5       0.890714      0.042955
2   4omini  Dialsumm    coherence    0.75       0.902810      0.024175
3   4omini  Dialsumm    coherence       1       0.896339      0.031191
4   4omini  Dialsumm  consistency    0.25       0.876772      0.048817
..     ...       ...          ...     ...            ...           ...
91    qwen  Summeval      fluency       1       0.901750      0.019207
92    qwen  Summeval    relevance    0.25       0.876595      0.064757
93    qwen  Summeval    relevance     0.5       0.883125      0.023655
94    qwen  Summeval    relevance    0.75       0.895750      0.025719
95    qwen  Summeval    relevance       1       0.904542      0.024919

[96 rows x 6 columns]
