In [24]:
import pandas as pd
import glob

In [25]:
import os
import re
from pathlib import Path

# Find all AUC files for loops 8-11
base_path = "/Users/rezadoobary/Downloads/bayes_results_merged"
all_auc_files = []

for loop_num in [8, 9, 10, 11]:
    pattern = f"**/*_{loop_num}_auc.txt"
    files = list(Path(base_path).glob(pattern))
    all_auc_files.extend(files)

print(f"Found {len(all_auc_files)} AUC files")
all_auc_files

Found 15 AUC files


[PosixPath('/Users/rezadoobary/Downloads/bayes_results_merged/20251206-185245_train_567_test8_vstep1/merged_looporder_20251206-185246/test_loop_8_auc.txt'),
 PosixPath('/Users/rezadoobary/Downloads/bayes_results_merged/merged_looporder_20251206-182725/test_loop_8_auc.txt'),
 PosixPath('/Users/rezadoobary/Downloads/bayes_results_merged/merged_looporder_20251206-183808/test_loop_9_auc.txt'),
 PosixPath('/Users/rezadoobary/Downloads/bayes_results_merged/merged_looporder_20251206-183244/test_loop_9_auc.txt'),
 PosixPath('/Users/rezadoobary/Downloads/bayes_results_merged/20251206-185712_train_5678_test9_vstep12/merged_looporder_20251206-185712/test_loop_9_auc.txt'),
 PosixPath('/Users/rezadoobary/Downloads/bayes_results_merged/20251206-185321_train_5678_test9_vstep1/merged_looporder_20251206-185321/test_loop_9_auc.txt'),
 PosixPath('/Users/rezadoobary/Downloads/bayes_results_merged/20251206-185301_train_567_test9_vstep1/merged_looporder_20251206-185302/test_loop_9_auc.txt'),
 PosixPath('/Us

In [26]:
# Extract data from all files
results = []

for file_path in all_auc_files:
    # Extract vstep from path (look for vstep1 or vstep12)
    path_str = str(file_path)
    vstep_match = re.search(r'vstep(\d+)', path_str)
    vstep = vstep_match.group(1) if vstep_match else None
    
    # Extract training loops from path (look for train_567, train_5678, etc.)
    train_match = re.search(r'train_(\d+)', path_str)
    train_loops = train_match.group(1) if train_match else None
    
    # Read the file
    with open(file_path, 'r') as f:
        content = f.read()
    
    # Extract test loop number
    loop_match = re.search(r'Test Loop:\s*(\d+)', content)
    test_loop = int(loop_match.group(1)) if loop_match else None
    
    # Extract AUC score
    auc_match = re.search(r'Test AUC:\s*([\d.]+)', content)
    auc = float(auc_match.group(1)) if auc_match else None
    
    # Extract number of test samples
    samples_match = re.search(r'Number of test samples:\s*([\d,]+)', content)
    num_samples = samples_match.group(1).replace(',', '') if samples_match else None
    num_samples = int(num_samples) if num_samples else None
    
    results.append({
        'train_loops': train_loops,
        'vstep': vstep,
        'test_loop': test_loop,
        'auc': auc,
        'num_samples': num_samples,
        'file_path': path_str
    })

# Create DataFrame
df_results = pd.DataFrame(results)
df_results = df_results.sort_values(['test_loop', 'train_loops', 'vstep'])

print(f"Collected {len(df_results)} results")
df_results


Collected 15 results


Unnamed: 0,train_loops,vstep,test_loop,auc,num_samples,file_path
0,567.0,1.0,8,0.718816,1432,/Users/rezadoobary/Downloads/bayes_results_mer...
1,,,8,0.718816,1432,/Users/rezadoobary/Downloads/bayes_results_mer...
6,567.0,1.0,9,0.706831,13972,/Users/rezadoobary/Downloads/bayes_results_mer...
5,5678.0,1.0,9,0.834584,13972,/Users/rezadoobary/Downloads/bayes_results_mer...
4,5678.0,12.0,9,0.828857,13972,/Users/rezadoobary/Downloads/bayes_results_mer...
2,,,9,0.821526,13972,/Users/rezadoobary/Downloads/bayes_results_mer...
3,,,9,0.834584,13972,/Users/rezadoobary/Downloads/bayes_results_mer...
8,5678.0,1.0,10,0.835772,153252,/Users/rezadoobary/Downloads/bayes_results_mer...
7,5678.0,12.0,10,0.812156,153252,/Users/rezadoobary/Downloads/bayes_results_mer...
10,56789.0,1.0,10,0.870177,153252,/Users/rezadoobary/Downloads/bayes_results_mer...


In [27]:
df_results = df_results[df_results['train_loops'].notnull()]

In [28]:
# Create a pivot table with test_loop and train_loops as rows and vstep as columns
pivot_table = df_results.pivot_table(
    index=['test_loop', 'train_loops'],
    columns='vstep',
    values='auc',
    aggfunc='first'
)

# Sort columns numerically (vstep1, vstep12, etc.)
if not pivot_table.empty:
    # Sort columns by converting to int
    sorted_cols = sorted(pivot_table.columns, key=lambda x: int(x) if x and x.isdigit() else 0)
    pivot_table = pivot_table[sorted_cols]

print("AUC Scores Table (Test Loop x Train Loops vs Vstep):")
pivot_table


AUC Scores Table (Test Loop x Train Loops vs Vstep):


Unnamed: 0_level_0,vstep,1,12
test_loop,train_loops,Unnamed: 2_level_1,Unnamed: 3_level_1
8,567,0.718816,
9,567,0.706831,
9,5678,0.834584,0.828857
10,5678,0.835772,0.812156
10,56789,0.870177,0.858537
11,56789,0.866293,0.843185
11,5678910,0.881619,0.881134


In [29]:
# Also show the full detailed table
print("Detailed Results Table:")
df_results[['train_loops', 'vstep', 'test_loop', 'auc', 'num_samples']]


Detailed Results Table:


Unnamed: 0,train_loops,vstep,test_loop,auc,num_samples
0,567,1,8,0.718816,1432
6,567,1,9,0.706831,13972
5,5678,1,9,0.834584,13972
4,5678,12,9,0.828857,13972
8,5678,1,10,0.835772,153252
7,5678,12,10,0.812156,153252
10,56789,1,10,0.870177,153252
9,56789,12,10,0.858537,153252
12,56789,1,11,0.866293,1697302
14,56789,12,11,0.843185,1697302


In [31]:
loop_mapper = {'567':[5,6,7],'5678':[5,6,7,8],'56789':[5,6,7,8,9],
'5678910':[5,6,7,8,9,10],'567891011':[5,6,7,8,9,10,11]}

In [32]:
pivot_table['train_loops'] = pivot_table['train_loops'].map(loop_mapper)

In [34]:
pivot_table

vstep,test_loop,train_loops,1,12
0,8,"[5, 6, 7]",0.718816,
1,9,"[5, 6, 7]",0.706831,
2,9,"[5, 6, 7, 8]",0.834584,0.828857
3,10,"[5, 6, 7, 8]",0.835772,0.812156
4,10,"[5, 6, 7, 8, 9]",0.870177,0.858537
5,11,"[5, 6, 7, 8, 9]",0.866293,0.843185
6,11,"[5, 6, 7, 8, 9, 10]",0.881619,0.881134


In [41]:
pivot_table['l_diff']= pivot_table['test_loop'] - pivot_table['train_loops'].map(lambda x: x[-1])

In [48]:
pivot_table[pivot_table['l_diff'] == 1][['train_loops','test_loop','1']].rename(columns={'1':'auc'})

vstep,train_loops,test_loop,auc
0,"[5, 6, 7]",8,0.718816
2,"[5, 6, 7, 8]",9,0.834584
4,"[5, 6, 7, 8, 9]",10,0.870177
6,"[5, 6, 7, 8, 9, 10]",11,0.881619


In [55]:
pivot_table[pivot_table['l_diff'] == 2][['train_loops','test_loop','1']].rename(columns={'1':'auc'})

vstep,train_loops,test_loop,auc
1,"[5, 6, 7]",9,0.706831
3,"[5, 6, 7, 8]",10,0.835772
5,"[5, 6, 7, 8, 9]",11,0.866293
