In [2]:
from pathlib import Path
import pandas as pd
import numpy as np

In [11]:
def calculate_combined_stats(df, column_patterns):
    """
    Calculate the mean, median, and standard deviation of columns that match specific patterns.
    
    Parameters
    ----------
    df : pandas.DataFrame
        Input DataFrame containing the data
    column_patterns : list of str
        List of patterns to match column names
        
    Returns
    -------
    pandas.DataFrame
        DataFrame containing the combined statistics for each pattern
    """
    results = []
    for pattern in column_patterns:
        # Find columns that match the pattern exactly
        matching_cols = [col for col in df.columns if col.startswith(pattern + '_') or col == pattern]
        if matching_cols:
            # Calculate statistics across matching columns
            combined_mean = df[matching_cols].mean(axis=1).mean()
            combined_median = df[matching_cols].median(axis=1).median()
            combined_std = df[matching_cols].std(axis=1).mean()
            results.append({
                'Pattern': pattern,
                'Combined Mean': combined_mean,
                'Combined Median': combined_median,
                'Combined Std': combined_std
            })
    
    return pd.DataFrame(results)

In [12]:
# Set pandas display options
pd.set_option('display.max_rows', None)  # Show all rows
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.width', None)  # Auto-detect display width
pd.set_option('display.float_format', lambda x: '%.6f' % x)  # Format float numbers

base_dir = Path("/scratch/zsa8rk/logs")

# Get Mean, Median and STD from loss dataframes
RUNS = [
    "m-main-4-1",
]

for run in RUNS:
    eval_dir = base_dir / run / "eval"
    loss_df = pd.read_csv(eval_dir / "losses.csv", header=0)

    # # calc mean, median and std for each column
    # mean_loss = loss_df.mean()
    # median_loss = loss_df.median()
    # std_loss = loss_df.std()

    # # Create a DataFrame with the statistics
    # stats_df = pd.DataFrame({
    #     'Mean': mean_loss,
    #     'Median': median_loss,
    #     'Std': std_loss
    # })
    

    # Example usage:
    # Calculate combined means for different flow types
    flow_patterns = [
        'cylinder_sym_flow_water',
        'cylinder_pipe_flow_water',
        'object_periodic_flow_water',
        'object_sym_flow_water',
        'object_sym_flow_air',
        'rayleigh_benard',
        'rayleigh_benard_obstacle',
        'twophase_flow',
        'shear_flow',
        'euler_multi_quadrants_periodicBC',
        'heated_object_pipe_flow_air',
        'cooled_object_pipe_flow_air',
        'acoustic_scattering_inclusions'

    ]

    combined_means = calculate_combined_stats(loss_df, flow_patterns)

    # Calculate overall statistics across all columns
    overall_stats = pd.DataFrame([{
        'Pattern': 'OVERALL',
        'Combined Mean': np.nanmean(loss_df.values),
        'Combined Median': np.nanmedian(loss_df.values),
        'Combined Std': np.nanstd(loss_df.values)
    }])

    # Concatenate the overall stats with the pattern-specific stats
    combined_means = pd.concat([combined_means, overall_stats], ignore_index=True)

    display(combined_means)

Unnamed: 0,Pattern,Combined Mean,Combined Median,Combined Std
0,cylinder_sym_flow_water,7e-06,6e-06,1e-06
1,cylinder_pipe_flow_water,8e-06,7e-06,2e-06
2,object_periodic_flow_water,0.00072,7.2e-05,0.001304
3,object_sym_flow_water,0.000736,7e-05,0.00151
4,object_sym_flow_air,0.000307,0.000172,0.000353
5,rayleigh_benard,0.186362,0.001003,0.674342
6,rayleigh_benard_obstacle,0.000333,0.000254,0.000291
7,twophase_flow,0.007223,7.2e-05,0.019119
8,shear_flow,0.000408,3.3e-05,0.000852
9,euler_multi_quadrants_periodicBC,0.014264,0.006908,0.016501
