In [1]:
from pathlib import Path
import pandas as pd
import numpy as np

pd.set_option('display.max_rows', None)  # Show all rows
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.width', None)  # Auto-detect display width
pd.set_option('display.float_format', lambda x: '%.6f' % x)  # Format float numbers

from lpfm.utils.plotting.plot_lossVsTime import LossVsTimePlotter

base_dir = Path("/scratch/zsa8rk/logs")

# Get Mean, Median and STD from loss dataframes
RUNS = [
    "m-main-4-1",
]


In [2]:
def calculate_combined_stats(df, column_patterns, level=0):
    """
    Calculate the mean, median, and standard deviation of columns that match specific patterns.
    
    Parameters
    ----------
    df : pandas.DataFrame
        Input DataFrame containing the data
    column_patterns : list of str
        List of patterns to match column names
    level : int
        Level of the column to match
        
    Returns
    -------
    pandas.DataFrame
        DataFrame containing the combined statistics for each pattern
    """
    results = []
    for pattern in column_patterns:
        # Find columns that match the pattern exactly
        matching_cols = [col for col in df.columns.get_level_values(level) if col.startswith(pattern + '_') or col == pattern]
        if matching_cols:
            # Calculate statistics across matching columns
            combined_mean = df[matching_cols].mean(axis=1).mean()
            combined_median = df[matching_cols].median(axis=1).median()
            combined_std = df[matching_cols].std(axis=1).mean()
            results.append({
                'Pattern': pattern,
                'Combined Mean': combined_mean,
                'Combined Median': combined_median,
                'Combined Std': combined_std
            })
    
    return pd.DataFrame(results)

In [None]:


for run in RUNS:
    eval_dir = base_dir / run / "eval"
    loss_df = pd.read_csv(eval_dir / "losses.csv", header=0)

    # # calc mean, median and std for each column
    # mean_loss = loss_df.mean()
    # median_loss = loss_df.median()
    # std_loss = loss_df.std()

    # # Create a DataFrame with the statistics
    # stats_df = pd.DataFrame({
    #     'Mean': mean_loss,
    #     'Median': median_loss,
    #     'Std': std_loss
    # })
    

    # Example usage:
    # Calculate combined means for different flow types
    flow_patterns = [
        'cylinder_sym_flow_water',
        'cylinder_pipe_flow_water',
        'object_periodic_flow_water',
        'object_sym_flow_water',
        'object_sym_flow_air',
        'rayleigh_benard',
        'rayleigh_benard_obstacle',
        'twophase_flow',
        'shear_flow',
        'euler_multi_quadrants_periodicBC',
        'heated_object_pipe_flow_air',
        'cooled_object_pipe_flow_air',
        'acoustic_scattering_inclusions'

    ]

    combined_means = calculate_combined_stats(loss_df, flow_patterns)

    # Calculate overall statistics across all columns
    overall_stats = pd.DataFrame([{
        'Pattern': 'OVERALL',
        'Combined Mean': np.nanmean(loss_df.values),
        'Combined Median': np.nanmedian(loss_df.values),
        'Combined Std': np.nanstd(loss_df.values)
    }])

    # Concatenate the overall stats with the pattern-specific stats
    combined_means = pd.concat([combined_means, overall_stats], ignore_index=True)

    display(combined_means)

In [None]:
def calculate_combined_stats_rollout(df, column_patterns, level=0):
    """
    Calculate statistics for multi-level columns while preserving the second level structure.
    
    Parameters
    ----------
    df : pandas.DataFrame
        Input DataFrame containing multi-level column data
    column_patterns : list of str
        List of patterns to match first level column names
    level : int
        Level of the column to match (default=0 for first level)
        
    Returns
    -------
    pandas.DataFrame
        DataFrame containing the combined statistics for each pattern,
        preserving the second level column structure
    """
    data = []
    index = []
    for pattern in column_patterns:
        # Find columns that match the pattern exactly in the first level
        matching_cols = [col for col in df.columns.get_level_values(level) if col.startswith(pattern + '_') or col == pattern]
        if matching_cols:
            # Get all second level columns for the matching first level columns
            second_level_cols = df.columns.get_level_values(1).unique()
            third_level_cols = df.columns.get_level_values(2).unique()
            
            # Calculate statistics for each second level column
            for second_col in second_level_cols:
                for third_col in third_level_cols:
                    # Get all columns that match both the pattern and second level
                    cols_to_combine = [col for col in df.columns 
                                    if col[0] in matching_cols and col[1] == second_col and col[2] == third_col]
                    
                    if cols_to_combine:
                        index.append((pattern, second_col, third_col))
                        # Calculate statistics across matching columns
                        data.append(df[cols_to_combine].mean(axis=1))
            
    index = pd.MultiIndex.from_tuples(index, names=["pattern", "metric", "channel"])
    df = pd.DataFrame(data, index=index).T
    
    return df


for run in RUNS:
    eval_dir = base_dir / run / "eval"
    rollout_df = pd.read_csv(eval_dir / "rollout_losses.csv", header=[0,1,2])
    single_step_df = pd.read_csv(eval_dir / "single_step_losses.csv", header=[0,1,2])


    # Calculate combined means for different flow types
    flow_patterns = [
        'cylinder_sym_flow_water',
        'cylinder_pipe_flow_water',
        'object_periodic_flow_water',
        'object_sym_flow_water',
        'object_sym_flow_air',
        'rayleigh_benard',
        'rayleigh_benard_obstacle',
        'twophase_flow',
        'shear_flow',
        'euler_multi_quadrants_periodicBC',
        'heated_object_pipe_flow_air',
        'cooled_object_pipe_flow_air',
        'acoustic_scattering_inclusions'

    ]
    combined_df = calculate_combined_stats_rollout(rollout_df, column_patterns=flow_patterns)

    # combined_means = calculate_combined_stats(rollout_df, flow_patterns, level=0)
    # combined_means = calculate_combined_stats(single_step_df, flow_patterns, level=0)
    #print(combined_df.head())
    # display(combined_df)

    data_mean = combined_df["cylinder_sym_flow_water"]["mean"]
    data_std = combined_df["cylinder_sym_flow_water"]["std"]
    # Plot the combined dfs over time
    x_ticks = [0, 25, 50]
    y_ticks = [0, 0.2]
    plotter = LossVsTimePlotter(x_ticks=x_ticks, y_ticks=y_ticks)
    plotter.plot(mean_loss=data_mean.values, std_loss=None)
    plotter.legend("cylinder_sym_flow_water")