In [93]:
import os
import pandas as pd
from pathlib import Path

# --- Parameters You Need to Set ---

# 1. The main folder containing all your experiment subdirectories
root_directory = '/home/eric/Documents/cervicalResearchIIP/result_test/MCUEffectivenessReal'

# 2. The exact name of the csv file you want to read from each directory
target_csv_name = 'Dice.csv'

# 3. The name of the column that holds the performance metric
performance_column_name = 'Dice_score'


# This list will store our results before creating the final DataFrame
results = []

results = []

print(f"Starting traversal of '{root_directory}'...")

# os.walk is the perfect tool for this. It goes into every subdirectory.
for dirpath, dirnames, filenames in os.walk(root_directory):
    # Check if our target CSV is in the list of files for the current directory
    if target_csv_name in filenames:
        experiment_name = os.path.basename(dirpath)
        file_path = os.path.join(dirpath, target_csv_name)
        
        try:
            # Read the CSV file, telling pandas there is NO header row
            temp_df = pd.read_csv(file_path, header=None)
            
            # Check if the DataFrame is not empty
            if not temp_df.empty:
                # Get the last value from the first row.
                # .iloc[0, -1] gets the item from the first row (index 0)
                # and the last column (index -1).
                last_performance_value = temp_df.iloc[0, -1]
                
                # Append the result as a dictionary to our list
                results.append({
                    'experiment': experiment_name,
                    'dice_score': last_performance_value
                })
                print(f"  [SUCCESS] Found and read data for '{experiment_name}'")
            else:
                 print(f"  [SKIPPED] File found for '{experiment_name}', but it is empty.")

        except Exception as e:
            # Catch potential errors like malformed files
            print(f"  [ERROR] Could not process file for '{experiment_name}'. Reason: {e}")

# Convert the list of dictionaries into a final, clean pandas DataFrame
mcu_df = pd.DataFrame(results)

print("\n--- Traversal Complete ---")
print("Final aggregated performance data:")
print(mcu_df)
 

# 1. The main folder containing all your experiment subdirectories
root_directory2 = '/home/eric/Documents/cervicalResearchIIP/result_test/UNetEffectivenessReal'

# 2. The exact name of the csv file you want to read from each directory
target_csv_name = 'Dice.csv'

# 3. The name of the column that holds the performance metric
performance_column_name = 'Dice_score'


# This list will store our results before creating the final DataFrame

results = []

print(f"Starting traversal of '{root_directory2}'...")

# os.walk is the perfect tool for this. It goes into every subdirectory.
for dirpath, dirnames, filenames in os.walk(root_directory2):
    # Check if our target CSV is in the list of files for the current directory
    if target_csv_name in filenames:
        experiment_name = os.path.basename(dirpath)
        file_path = os.path.join(dirpath, target_csv_name)
        
        try:
            # Read the CSV file, telling pandas there is NO header row
            temp_df = pd.read_csv(file_path, header=None)
            
            # Check if the DataFrame is not empty
            if not temp_df.empty:
                # Get the last value from the first row.
                # .iloc[0, -1] gets the item from the first row (index 0)
                # and the last column (index -1).
                last_performance_value = temp_df.iloc[0, -1]
                
                # Append the result as a dictionary to our list
                results.append({
                    'experiment': experiment_name,
                    'dice_score': last_performance_value
                })
                print(f"  [SUCCESS] Found and read data for '{experiment_name}'")
            else:
                 print(f"  [SKIPPED] File found for '{experiment_name}', but it is empty.")

        except Exception as e:
            # Catch potential errors like malformed files
            print(f"  [ERROR] Could not process file for '{experiment_name}'. Reason: {e}")

# Convert the list of dictionaries into a final, clean pandas DataFrame
u_df = pd.DataFrame(results)

print("\n--- Traversal Complete ---")
print("Final aggregated performance data:")
print(u_df)
mcu_df['dice_score'] = mcu_df['dice_score'] * 100
#mcu_df = mcu_df.drop(16)
print(f"Variance: {mcu_df['dice_score'].var()}")
print(f"Mean: {mcu_df['dice_score'].mean()}")
mcu_df
u_df['dice_score'] = u_df['dice_score'] * 100
print(f"Variance: {u_df['dice_score'].var()}")
print(f"Mean: {u_df['dice_score'].mean()}")
#u_df = u_df.drop(35)
u_df

Starting traversal of '/home/eric/Documents/cervicalResearchIIP/result_test/MCUEffectivenessReal'...
  [SUCCESS] Found and read data for '20250701-Study-MCUNet-9662'
  [SUCCESS] Found and read data for '20250701-Study-MCUNet-34518'
  [SUCCESS] Found and read data for '20250701-Study-MCUNet-68617'
  [SUCCESS] Found and read data for '20250701-Study-MCUNet-21392'
  [SUCCESS] Found and read data for '20250701-Study-MCUNet-2252'
  [SUCCESS] Found and read data for '20250701-Study-MCUNet-77996'
  [SUCCESS] Found and read data for '20250701-Study-MCUNet-81452'
  [SUCCESS] Found and read data for '20250701-Study-MCUNet-18497'
  [SUCCESS] Found and read data for '20250701-Study-MCUNet-73667'
  [SUCCESS] Found and read data for '20250701-Study-MCUNet-81108'
  [SUCCESS] Found and read data for '20250701-Study-MCUNet-81020'
  [SUCCESS] Found and read data for '20250701-Study-MCUNet-19041'
  [SUCCESS] Found and read data for '20250701-Study-MCUNet-86325'
  [SUCCESS] Found and read data for '202507

Unnamed: 0,experiment,dice_score
0,20250701-Study-UNet-51302,64.971677
1,20250701-Study-UNet-10400,80.229947
2,20250701-Study-UNet-7980,77.219381
3,20250701-Study-UNet-81108,83.698176
4,20250701-Study-UNet-54731,69.938701
...,...,...
259,20250701-Study-UNet-74556,69.273447
260,20250701-Study-UNet-44585,71.722932
261,20250701-Study-UNet-22763,76.516994
262,20250701-Study-UNet-53703,84.671587


In [94]:
unet_scores = u_df['dice_score'].values
mcunet_scores = mcu_df['dice_score'].values

In [95]:
print(unet_scores)

[64.97167704 80.2299469  77.21938119 83.69817602 69.93870126 79.16619963
 78.91703887  6.69842697 72.0712439  77.19536379 74.35075414 83.17849008
 67.80603905 82.89793824 76.93058082 68.79951457 80.57214286 85.44398298
 78.61723451 77.9248633  77.80869141 67.02789918 43.45407613 49.68830584
 80.41513843 43.32332412 49.65281739 69.38888743 57.66549994 67.31087318
 67.43160398 78.99216379 83.02723632  7.72073816 75.8627705  68.79496115
 52.43297812 82.60773391 72.48558773 22.69729701 78.24712644 74.45078769
 77.28496796 68.92482574 65.34417024 71.45262027 77.77547714 80.52638005
 76.27459805 81.85161135 85.73140288 72.53399876 80.90887504 80.7733307
 77.12528037 54.44271773 84.50554674 79.0218314  73.64988376 75.27495501
 78.73128445 75.86355142 70.10229166 71.76023254 42.27524937 79.52410767
 78.85542875 71.1691417  81.26262795 77.57304344 78.39322515 78.62369279
 75.14809677 75.76393242 68.71962011 68.26243285 80.93522971 85.96299391
 70.4628871  80.70122497 85.85420268 73.16313033 83.

In [96]:
print(mcunet_scores)

[76.18888104 89.56122275 76.89192215 82.67941125 85.05733561 77.67769056
 82.76010086 78.47809659 78.42597364 80.86638407 79.06322217 79.77383816
 38.62125507 68.94838838 72.38449277 82.19570092 79.8217418  67.3946199
 80.18136885 48.39333531 67.32178047 71.35415117 80.26290888 76.64914688
 75.20121596 78.59685956 73.57560674 79.13076112  1.09488381 84.76005886
 74.27897227 70.88777715 79.02109891 78.03292482 80.4349603  75.23960679
 15.02964533 81.83407966 56.51577917 83.47394798 79.00878772 75.35355486
 77.64388243 18.07425381 81.97130345 83.86381581 77.66586959 76.45278758
 62.11789747 74.80587839 69.24279729 69.3808873  75.9739063  77.44515899
 82.38716391 74.21046136 78.75940362 80.16318243 78.90476315 81.21001887
 75.39100065 84.02957833 80.83234068 69.45418054 82.46070603 77.30678381
 85.1717582  17.88176369 79.29376001 78.24781584 78.78215368 84.72761312
 83.18618691 78.63940957 61.61440413 84.30547832 78.28283782 13.7484786
 65.83279228 79.42774371 86.11883522 82.89137348 78.2

In [97]:
# diffs = mcunet_scores - unet_scores
# r = abs(np.median(diffs)) / np.std(diffs, ddof=1)  # r effect size approximation
# r

In [98]:
from statsmodels.stats.power import TTestPower


In [100]:
import numpy as np
from statsmodels.stats.power import TTestPower

def wilcoxon_sample_size_approximation(A, B, alpha=0.05, power=0.8, safety_factor=1.15, desired_diff = 4):
    """
    Estimate Wilcoxon sample size using t-test calculation + safety factor
    """
    print("Pilot data summary:")
    print(f"Algorithm A mean: {np.mean(A):.4f}")
    print(f"Algorithm B mean: {np.mean(B):.4f}")
    print(f"Difference in means: {abs(np.mean(A) - np.mean(B)):.4f}")
    print(f"Standard deviation of differences: {np.std(A-B, ddof=1):.4f}")

    # Step 1: Calculate Cohen's d as if it were a t-test
    diffs = A - B
    std_diffs = np.std(diffs, ddof=1)
    cohens_d = desired_diff / std_diffs
    #cohens_d = abs(np.mean(diffs) / np.std(diffs, ddof=1))
    
    # Step 2: CREATE INSTANCE FIRST, then call solve_power
    power_analysis = TTestPower()  # Create the instance
    n_ttest = power_analysis.solve_power(effect_size=cohens_d, 
                                        alpha=alpha, 
                                        power=power, 
                                        nobs=None,
                                        alternative='two-sided')
    
    # Step 3: Apply safety factor for non-parametric test
    n_wilcoxon = int(np.ceil(n_ttest * safety_factor))
    
    return n_wilcoxon, n_ttest, cohens_d

# Usage
n_wilcoxon, n_ttest, d = wilcoxon_sample_size_approximation(unet_scores, mcunet_scores)
print(f"T-test sample size: {n_ttest:.0f}")
print(f"Wilcoxon sample size (15% increase): {n_wilcoxon}")
print(f"Effect size (Cohen's d): {d:.3f}")

Pilot data summary:
Algorithm A mean: 73.1827
Algorithm B mean: 74.6067
Difference in means: 1.4241
Standard deviation of differences: 20.5557
T-test sample size: 209
Wilcoxon sample size (15% increase): 241
Effect size (Cohen's d): 0.195
