In [None]:
import os 
import pandas as pd
import re
import glob
from pathlib import Path

def parse_data(folder_path, dataset_name=None, strategy=None, augmentation=None):
    """
    Parse test data from a folder containing log_test_*.csv files
    """
    import os
    import re
    
    folder_path = Path(folder_path)
    
    if not folder_path.exists():
        print(f"❌ Folder does not exist: {folder_path}")
        return
    
    print(f"📂 Processing test folder: {folder_path}")
    
    prec1_data = []
    
    # Get all test files
    test_files = [f for f in os.listdir(folder_path) 
                  if f.endswith('.csv') and f.startswith('log_test')]
    
    if not test_files:
        print(f"  ⚠️ No test files found in {folder_path}")
        return
    
    # If augmentation not provided, extract from folder name or files
    if augmentation is None:
        if folder_path.name.startswith('test_'):
            augmentation = folder_path.name.replace('test_', '')
        else:
            # Try to extract from first file
            if test_files:
                first_file = test_files[0]
                parts = first_file.replace('log_test_', '').replace('.csv', '').split('_')
                augmentation = parts[0] if parts else 'unknown'
    
    print(f"  🎨 Augmentation: {augmentation}")
    print(f"  📊 Found {len(test_files)} test files")
    
    for file in test_files:
        file_path = folder_path / file
        max_prec1 = None
        
        print(f"    📄 Processing: {file}")
        
        try:
            with open(file_path, "r") as f:
                for line in f:
                    prec1_match = re.search(r"Prec@1\s+([\d.]+)", line)
                    if prec1_match:
                        prec1 = float(prec1_match.group(1))
                        if max_prec1 is None or prec1 > max_prec1:
                            max_prec1 = prec1
            
            if max_prec1 is not None:
                prec1_data.append({
                    'file': file,
                    'dataset': dataset_name or 'unknown',
                    'strategy': strategy or 'unknown', 
                    'augmentation': augmentation,
                    'max_Prec@1': max_prec1
                })
                print(f"      ✅ Max Prec@1: {max_prec1}")
            else:
                print(f"      ⚠️ No Prec@1 found in {file}")
                
        except Exception as e:
            print(f"      ❌ Error processing {file}: {e}")
    
    # Create and save DataFrame
    if prec1_data:
        df = pd.DataFrame(prec1_data)
        
        # Create output filename
        if dataset_name and strategy and augmentation:
            output_filename = f"{dataset_name}_{strategy}_{augmentation}_TEST.csv"
        else:
            output_filename = f"test_summary_{augmentation}.csv"
        
        output_path = folder_path / output_filename
        df.to_csv(output_path, index=False)
        
        print(f"  ✅ Saved: {output_filename}")
        print(f"  📈 Summary: {len(prec1_data)} files processed")
        
        return df
    else:
        print(f"  ⚠️ No valid test results found")
        return None

def find_test_folders(root_dir):
    """
    Find test folders in two cases:
    1. Direct subdirectories starting with 'test_'
    2. Test folders inside strategy/mixed/ subdirectories  
    """
    root_dir = Path(root_dir)
    
    if not root_dir.exists():
        print(f"❌ Root directory does not exist: {root_dir}")
        return
    
    dataset_name = root_dir.name.replace('log_', '')
    print(f"\n🔍 Searching for test folders in: {root_dir} (dataset: {dataset_name})")
    
    processed_folders = []
    
    for subdir in os.listdir(root_dir):
        subdir_path = root_dir / subdir
        
        if not subdir_path.is_dir():
            continue
            
        print(f"\n📂 Checking subdirectory: {subdir}")
        
        # Case 1: Direct test folders (test_*)
        if subdir.startswith('test_'):
            print(f"  ✅ Found direct test folder: {subdir}")
            augmentation = subdir.replace('test_', '')
            
            result_df = parse_data(
                subdir_path, 
                dataset_name=dataset_name,
                strategy='unknown',  # Can't determine strategy from this structure
                augmentation=augmentation
            )
            
            if result_df is not None:
                processed_folders.append(subdir_path)
        
        else:
            # Case 2: Check for mixed folder inside strategy folder
            mixed_folder_path = subdir_path / 'mixed'
            
            if mixed_folder_path.exists():
                print(f"  📁 Found mixed folder: {subdir}/mixed")
                strategy = subdir
                
                # Look for test files to determine augmentations
                test_files = list(mixed_folder_path.glob("log_test_*.csv"))
                
                if test_files:
                    # Extract unique augmentations from test files
                    augmentations = set()
                    for test_file in test_files:
                        file_name = test_file.name
                        remainder = file_name.replace('log_test_', '').replace('.csv', '')
                        parts = remainder.split('_')
                        if parts:
                            augmentations.add(parts[0])
                    
                    print(f"    🎨 Found augmentations: {list(augmentations)}")
                    
                    # Process each augmentation separately
                    for augmentation in augmentations:
                        print(f"\n    🔄 Processing {strategy}/{augmentation}")
                        
                        result_df = parse_data(
                            mixed_folder_path,
                            dataset_name=dataset_name,
                            strategy=strategy,
                            augmentation=augmentation
                        )
                        
                        if result_df is not None:
                            processed_folders.append(mixed_folder_path)
                else:
                    print(f"    ⚠️ No test files found in mixed folder")
            
            # Case 3: Check for organized test folders within strategy
            for item in os.listdir(subdir_path):
                item_path = subdir_path / item
                if item_path.is_dir() and item.startswith('test_'):
                    print(f"  ✅ Found organized test folder: {subdir}/{item}")
                    strategy = subdir
                    augmentation = item.replace('test_', '')
                    
                    result_df = parse_data(
                        item_path,
                        dataset_name=dataset_name,
                        strategy=strategy,
                        augmentation=augmentation
                    )
                    
                    if result_df is not None:
                        processed_folders.append(item_path)
    
    print(f"\n🎉 Processed {len(processed_folders)} test folder(s) for {dataset_name}")
    return processed_folders

# Main execution
def process_all_test_folders():
    """Process all test folders across target directories"""
    target_dirs = ['log_cinic10', 'log_tiny200']
    
    print("🔍 PROCESSING ALL TEST FOLDERS")
    print("=" * 60)
    
    all_processed = []
    
    for target_dir in target_dirs:
        if not os.path.exists(target_dir):
            print(f"❌ Directory not found: {target_dir}")
            continue
            
        processed_folders = find_test_folders(target_dir)
        all_processed.extend(processed_folders)
    
    print(f"\n🎉 COMPLETE! Processed {len(all_processed)} test folders total")

# Execute
process_all_test_folders()

In [2]:
import os
import pandas as pd
import numpy as np
from pathlib import Path

def analyze_all_test_log(root_dir):
    """
    Analyze all test log CSV files and add mean/std statistics
    Handles both direct test folders and strategy-based structure
    """
    root_dir = Path(root_dir)
    
    if not root_dir.exists():
        print(f"❌ Root directory does not exist: {root_dir}")
        return
    
def analyze_test_file(file_path):
    """Helper function to analyze a single test CSV file"""
    try:
        df = pd.read_csv(file_path)
        
        if 'max_Prec@1' not in df.columns:
            print(f"      ⚠️ No 'max_Prec@1' column in {file_path.name}")
            return
        
        results = df['max_Prec@1'].values
        mean = np.mean(results)
        std = np.std(results, ddof=1)  # Sample standard deviation
        
        
        
        # Update the DataFrame with mean and std as new columns
        df['mean_Prec@1'] = mean
        df['std_Prec@1'] = std
        
        # Save updated file
        df.to_csv(file_path, index=False)
      
        
    except Exception as e:
        print(f"      ❌ Error analyzing {file_path.name}: {e}")


In [20]:
analyze_test_file('/home/hamt/light_weight/imbalanced-DL/example/log_cinic10/ERM/test_None/cinic10_ERM_None_TEST.csv')

In [2]:
import os
import re
import ast
def convert_args_to_dict(dir_path):
    for file in os.listdir(dir_path):
        if file.endswith('.txt'):
            with open(os.path.join(dir_path, file), 'r') as f:
                content = f.read().strip()

                # Remove outer braces { }
                if content.startswith("{") and content.endswith("}"):
                    content = content[1:-1]

                # Remove Namespace( ... )
                if content.startswith("Namespace(") and content.endswith(")"):
                    content = content[len("Namespace("):-1]

                # Convert key=value → 'key': value
                content = re.sub(r"(\w+)=([^,]+)", r"'\1': \2", content)

                # Wrap with { } so it's a valid Python dict
                content = "{" + content + "}"
                try:
                    return ast.literal_eval(content)
                except Exception as e:
                    print(f"Failed to parse {file}: {e}")
                    return None

In [3]:
import os 
import re

root_dir = 'log_tiny200' 
results = []

for subdir in os.listdir(root_dir):
    parts = subdir.split('_')
    if len(parts) > 3 and parts[3] == 'ERM':
        arguments = convert_args_to_dict(os.path.join(root_dir, subdir))
        
        if arguments is None:
            print(f"⚠️ Could not parse arguments for {subdir}")
            continue
            
        data_augment = arguments.get('data_augment', 'unknown')
        
        for file in os.listdir(os.path.join(root_dir, subdir)):
            if file.endswith('.csv') and file.startswith('log_test'):
                max_training_result = None  # Reset for each file
                
                print(f"📄 Processing: {subdir}/{file}")
                
                try:
                    with open(os.path.join(root_dir, subdir, file), 'r') as f:
                        for line in f:
                            # Look for "Best Prec@1" results in log_test files
                            train_match = re.search(r"Best Prec@1:\s+([\d.]+)", line)

                            if train_match:
                                value = float(train_match.group(1))
                                if (max_training_result is None) or (value > max_training_result):
                                    max_training_result = value
                    
                    # Add result for this file
                    results.append({
                        "file": file,
                        "subdir": subdir,
                        "dataset": root_dir.split('_')[1],
                        "strategy": subdir.split('_')[3],
                        "augmentation": data_augment,
                        "max_training_result": max_training_result,
                    })
                    
                    print(f"    ✅ Max result: {max_training_result}")
                    
                except Exception as e:
                    print(f"    ❌ Error processing {file}: {e}")

print(f"\n🎉 PROCESSING COMPLETE!")
print(f"📊 Found {len(results)} files")

# Convert to DataFrame for easier analysis
import pandas as pd
df = pd.DataFrame(results)
print("\n📋 RESULTS SUMMARY:")
print(df)
strategy = df['strategy'].iloc[0]
dataset = root_dir.split('_')[1]

os.makedirs(f'latex/{dataset}/{strategy}', exist_ok=True)
# Save results
df.to_csv(f'latex/{dataset}/{strategy}/{strategy}_results.csv', index=False)
print(f"✅ Results saved to latex/{dataset}/{strategy}/{strategy}_results.csv")

📄 Processing: tiny200_exp_0.01_ERM_200_1448/log_test_randaugment.csv
    ✅ Max result: 36.06
Failed to parse args.txt: invalid syntax (<unknown>, line 1)
⚠️ Could not parse arguments for tiny200_exp_0.01_ERM_200_1916
📄 Processing: tiny200_exp_0.01_ERM_200_2618/log_test_cutout.csv
    ✅ Max result: 33.08
📄 Processing: tiny200_exp_0.01_ERM_200_2916/log_test_None.csv
    ✅ Max result: 34.44
📄 Processing: tiny200_exp_0.01_ERM_200_2591/log_test_autoaugment_imagenet.csv
    ✅ Max result: 34.25
📄 Processing: tiny200_exp_0.01_ERM_200_3448/log_test_randaugment.csv
    ✅ Max result: 35.4
📄 Processing: tiny200_exp_0.01_ERM_200_3618/log_test_cutout.csv
    ✅ Max result: 32.57
📄 Processing: tiny200_exp_0.01_ERM_200_3916/log_test_None.csv
    ✅ Max result: 33.91
Failed to parse args.txt: invalid syntax (<unknown>, line 1)
⚠️ Could not parse arguments for tiny200_exp_0.01_ERM_200_1618
📄 Processing: tiny200_exp_0.01_ERM_200_1591/log_test_autoaugment_imagenet.csv
    ✅ Max result: 34.1
📄 Processing: ti

In [12]:
import pandas as pd
df =pd.read_csv('/home/hamt/light_weight/imbalanced-DL/example/latex/tiny200/DRW/DRW_results.csv')
stats = df.groupby('augmentation',dropna=False)['max_training_result'].agg([
    'mean',
    'std',
    'count',
    'min',
    'max'
]).round(2)
print(stats)
stats.to_csv("/home/hamt/light_weight/imbalanced-DL/example/latex/tiny200/ERM/ERM_summary.csv")

                   mean    std  count    min    max
augmentation                                       
autoaugment_svhn  33.62   0.25      3  33.33  33.77
cutout            23.93  15.61      3   5.91  33.09
randaugment       36.26   0.26      3  36.09  36.56
NaN               33.90   0.41      3  33.59  34.37
