In [None]:
import numpy as np
import re
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
import scipy
import shutil
import h5py
import numpy as np
from typing import Dict, Any
from pathlib import Path
import subprocess
import sys

In [None]:

def get_lm(l,m,df):
  L_max = int(np.sqrt(df.attrs['Extents'][1]/2))
  if m >= 0:
    idx = L_max*l +m
  else:
    idx = L_max*l + np.abs(m) + L_max**2
  return idx

def pow_in_l(l,df):
  # For each l compute the power by taking L2 norms over all m
  indices_for_m = [get_lm(l,m,df) for m in range(-l,l+1)]
  L2_normed = np.linalg.norm(df[indices_for_m], axis=1)
  return L2_normed



In [None]:
def read_spec_into_pd(file_path: Path):

    with file_path.open("r") as f:
        file_content = f.read()

    # Extract metadata
    time_value = float(re.search(r"Time\[0\]\s*=\s*([\d.]+)", file_content).group(1))
    extents = tuple(
        map(int, re.search(r"Extents\s*=\s*\((\d+),(\d+)\)", file_content).groups())
    )

    # Extract data rows
    data_rows = re.findall(r"\(:,(\d+)\):\s*([\d\s\.,eE+-]+)", file_content)

    # Convert data into a list of lists
    data = {}
    for row in data_rows:
        index = int(row[0])
        values = list(map(float, row[1].split(",")))
        data[index] = values

    # Convert to DataFrame
    df = pd.DataFrame(data)

    # Add metadata as attributes (optional)
    df.attrs["Time"] = time_value
    df.attrs["Extents"] = extents

    return df


def read_phy_into_pd(file_path: Path):

    with file_path.open("r") as f:
        file_content = f.read()

    # Extract metadata
    time_value = float(re.search(r"Time\[0\]\s*=\s*([\d.]+)", file_content).group(1))
    extents = tuple(
        map(
            int,
            re.search(r"Extents\s*=\s*\((\d+),(\d+),(\d+)\)", file_content).groups(),
        )
    )

    # Extract data rows
    data_rows = re.findall(r"\(:,(\d+),(\d+)\):\s*(.*)", file_content)

    # Convert data into a list of lists
    data = {}
    for row in data_rows:
        l = int(row[0])
        m = int(row[1])
        values = list(map(float, row[2].split(",")))
        data[f"{l},{m}"] = values

    # Convert to DataFrame
    df = pd.DataFrame(data)

    # Add metadata as attributes (optional)
    df.attrs["Time"] = time_value
    df.attrs["Extents"] = extents

    return df.T

In [None]:
import numpy as np
from typing import Tuple, Optional, Union
from numpy.typing import ArrayLike


def estimate_noise_level(
    coefficients: ArrayLike, window_size: int = 5, derivative_threshold: float = 0.1
) -> Tuple[Optional[float], Optional[np.ndarray]]:
    # Input validation
    coefficients = np.asarray(coefficients)
    if coefficients.size == 0:
        raise ValueError("Coefficients array cannot be empty")
    if window_size < 2 or window_size > len(coefficients):
        raise ValueError("Invalid window size")

    # Take absolute values of coefficients
    coeff_abs = np.abs(coefficients)

    # Compute moving average of log coefficients
    log_coeffs = np.log10(coeff_abs + 1e-16)  # Add small number to avoid log(0)
    moving_avg = np.convolve(
        log_coeffs, np.ones(window_size) / window_size, mode="valid"
    )

    # Find where the derivative becomes small
    derivatives = np.diff(moving_avg)
    plateau_idx = np.where(np.abs(derivatives) < derivative_threshold)[0]

    # Group consecutive indices to find the longest plateau
    if len(plateau_idx) > 0:
        # Find gaps in plateau indices
        gaps = np.diff(plateau_idx) > 1
        # Get start indices of each group
        group_starts = np.concatenate([[0], np.where(gaps)[0] + 1])
        # Get end indices of each group
        group_ends = np.concatenate([np.where(gaps)[0], [len(plateau_idx) - 1]])
        # Find the longest group
        longest_group = np.argmax(group_ends - group_starts)
        start_idx = plateau_idx[group_starts[longest_group]]
        end_idx = plateau_idx[group_ends[longest_group]] + 1

        # Calculate noise level from the longest plateau
        noise_level = 10 ** np.mean(log_coeffs[start_idx:end_idx])
        # Return noise indices adjusted for the window size effect
        noise_indices = np.arange(
            start_idx, min(end_idx + window_size, len(coefficients))
        )

        return noise_level, noise_indices

    return None, None


def estimate_noise_level(
    coefficients: ArrayLike, window_size: int = 5, derivative_threshold: float = 0.1
) -> Tuple[Optional[float], Optional[np.ndarray]]:
    coeff_abs = np.abs(coefficients)

    log_coeffs = np.log10(coeff_abs + 1e-16)  # Add small number to avoid log(0)
    moving_avg = np.convolve(
        log_coeffs, np.ones(window_size) / window_size, mode="valid"
    )
    derivatives = np.diff(moving_avg)
    plateau_idx = np.where(np.abs(derivatives) < derivative_threshold)[0]

    # Group consecutive indices to find the longest plateau
    if len(plateau_idx) > 0:
        # Find gaps in plateau indices
        gaps = np.diff(plateau_idx) > 1
        # Get start indices of each group
        group_starts = np.concatenate([[0], np.where(gaps)[0] + 1])
        # Get end indices of each group
        group_ends = np.concatenate([np.where(gaps)[0], [len(plateau_idx) - 1]])
        # Find the longest group
        longest_group = np.argmax(group_ends - group_starts)
        start_idx = plateau_idx[group_starts[longest_group]]
        end_idx = plateau_idx[group_ends[longest_group]] + 1

        # Calculate noise level from the longest plateau
        noise_level = 10 ** np.mean(log_coeffs[start_idx:end_idx])
        # Return noise indices adjusted for the window size effect
        noise_indices = np.arange(
            start_idx, min(end_idx + window_size, len(coefficients))
        )

        return noise_level, noise_indices
    return None, None


In [None]:
def read_h5_file(file_path: str) -> Dict[str, Any]:
    def read_group(group) -> Dict[str, Any]:
        result = {}
        # Read all datasets in current group
        for name, item in group.items():
            if isinstance(item, h5py.Dataset):
                # Convert dataset to numpy array
                result[name] = item[()]
            elif isinstance(item, h5py.Group):
                # Recursively read nested group
                result[name] = read_group(item)
        return result

    # Check if file exists
    if not Path(file_path).exists():
        raise FileNotFoundError(f"The file {file_path} does not exist")
    try:
        with h5py.File(file_path, "r") as f:
            # Read all contents
            data = {}
            # Read main groups
            for group_name in ["InitGridHi", "InitHhatt", "kappa", "psi"]:
                if group_name in f:
                    data[group_name] = read_group(f[group_name])
        return data

    except OSError as e:
        raise OSError(f"Error reading HDF5 file: {str(e)}")


def read_h5_file_dump_tensors(file_path: str) -> Dict[str, Any]:
    def read_group(group) -> Dict[str, Any]:
        result = {}
        # Read all datasets in current group
        for name, item in group.items():
            if isinstance(item, h5py.Dataset):
                # Convert dataset to numpy array
                result[name] = item[()]
            elif isinstance(item, h5py.Group):
                # Recursively read nested group
                result[name] = read_group(item)
            else:
                print(name, item)
        for name, item in group.attrs.items():
            result[name] = item
        return result

    # Check if file exists
    if not Path(file_path).exists():
        raise FileNotFoundError(f"The file {file_path} does not exist")
    try:
        with h5py.File(file_path, "r") as f:
            # Read all contents
            data = {}
            # Read main groups
            for name, item in f.items():
                data[name] = read_group(f[name])
            # # Read main groups
            # for group_name in ["InitGridHi", "InitHhatt", "kappa", "psi"]:
            #     if group_name in f:
            #         data[group_name] = read_group(f[group_name])
        return data

    except OSError as e:
        raise OSError(f"Error reading HDF5 file: {str(e)}")



def are_dicts_equal(
    dict1: Dict[str, Any], dict2: Dict[str, Any], rtol: float = 1e-5
) -> bool:
    """
    Compare two nested dictionaries that may contain NumPy arrays.

    Args:
        dict1: First dictionary to compare
        dict2: Second dictionary to compare
        rtol: Relative tolerance for NumPy array comparison (default: 1e-5)

    Returns:
        bool: True if dictionaries are equal, False otherwise

    Examples:
        >>> d1 = {'a': {'b': np.array([1, 2, 3])}}
        >>> d2 = {'a': {'b': np.array([1, 2, 3])}}
        >>> are_dicts_equal(d1, d2)
        True
    """
    # Check if both inputs are dictionaries
    if not isinstance(dict1, dict) or not isinstance(dict2, dict):
        return False

    # Check if they have the same keys
    if dict1.keys() != dict2.keys():
        return False

    # Compare each key-value pair
    for key in dict1:
        val1, val2 = dict1[key], dict2[key]

        # Handle numpy arrays
        if isinstance(val1, np.ndarray) or isinstance(val2, np.ndarray):
            if not (isinstance(val1, np.ndarray) and isinstance(val2, np.ndarray)):
                return False
            if val1.shape != val2.shape:
                return False
            if not np.allclose(val1, val2, rtol=rtol):
                return False

        # Handle nested dictionaries
        elif isinstance(val1, dict):
            if not are_dicts_equal(val1, val2, rtol):
                return False

        # Handle other types
        elif val1 != val2:
            return False

    return True

def mean_diff_h5(file_1:Path, file_2:Path):
    data1 = read_h5_file(file_1)
    data2 = read_h5_file(file_2)

    diff_dict = {}
    for var in data1.keys():
        # diff_dict[var] = {}
        for component in data1[var]["Step000000"].keys():
            diff_dict[f"{var}_{component}"] = np.mean(np.abs(data1[var]["Step000000"][component] - data2[var]["Step000000"][component]))


    return diff_dict

In [None]:
subdomain = 'SphereC0'
file_path = Path(f"/groups/sxs/hchaudha/spec_runs/19_filtered_checkpoint_runs/test_fake_kappa/checkpoints/14012_workdir/Vars2_{subdomain}.h5")
file_path = Path(f"/groups/sxs/hchaudha/spec_runs/19_filtered_checkpoint_runs/test_fake_kappa/checkpoints/14012_workdir/filtered_checkpoint_test/Vars2_SphereC0.h5")
data = read_h5_file_dump_tensors(file_path)

In [None]:
data['diff_kappa_psi']

In [None]:
subdomain = 'SphereC0'
check_pts = Path("/groups/sxs/hchaudha/spec_runs/19_filtered_checkpoint_runs/6_set1_L3_FK_14012_C_13/Ev/Lev3_AA/Run/Checkpoints/14012")
check_pts = Path("/groups/sxs/hchaudha/spec_runs/19_filtered_checkpoint_runs/6_set1_L3_FK_9443_All/Ev/Lev3_AA/Run/Checkpoints/9443")
# check_pts = Path("/groups/sxs/hchaudha/spec_runs/19_filtered_checkpoint_runs/6_set1_L3_fil_buff0_14012/Ev/Lev3_AA/Run/Checkpoints/14012")
file_1 = Path(f"{check_pts}/Cp-VarsGr_{subdomain}.h5")
file_2 = Path(f"{check_pts}_original/Cp-VarsGr_{subdomain}.h5")

# data1 = read_h5_file(file_1)
# data2 = read_h5_file(file_2)

mean_diff_h5(file_1,file_2)

In [None]:
file_1 = Path("/groups/sxs/hchaudha/spec_runs/19_filtered_checkpoint_runs/6_set1_L3_5517_6/Ev/Lev3_AA/Run/Checkpoints/5517/Cp-VarsGr_SphereC0.h5")
file_2 = Path("/groups/sxs/hchaudha/spec_runs/19_filtered_checkpoint_runs/6_set1_L3_5517_6/Ev/Lev3_AA/Run/Checkpoints/5517_original/Cp-VarsGr_SphereC0.h5")

# data1 = read_h5_file(file_1)
# data2 = read_h5_file(file_2)

mean_diff_h5(file_1,file_2)

In [None]:
def replace_data_from_h5(input_file, output_file):
    # The idea is to just copy the data and not mess with the attributes because that causes issues with the reader
    with h5py.File(output_file, "r+") as outfile, h5py.File(input_file) as infile:
        # Level 1: Root level items
        for key1, item1 in outfile.items():
            if isinstance(item1, h5py.Group):
                # Level 2: First nested level
                for key2, item2 in item1.items():
                    if isinstance(item2, h5py.Group):
                        # Level 3: Second nested level
                        for key3, item3 in item2.items():
                            if isinstance(item3, h5py.Dataset):
                                outfile[key1][key2][key3][()] = infile[key1][key2][key3]
                            else:
                                raise ValueError(f"Unexpected item type: {type(item3)}")
                    else:
                        raise ValueError(f"Unexpected item type: {type(item2)}")
            else:
                raise ValueError(f"Unexpected item type: {type(item1)}")


# input_file = "/workspaces/spec/Tests/BlackBoxTests/GeneralizedHarmonicExamples/BBHLong/not_tracked/checkpoint_observer/data/Cp-VarsGr_SphereA0.h5"
# output_file = "/workspaces/spec/Tests/BlackBoxTests/GeneralizedHarmonicExamples/BBHLong/not_tracked/checkpoint_observer/data/Cp-VarsGr_SphereA0_copy.h5"
# copy_and_modify_h5file(input_file, output_file, data_dict['SphereA0'])

In [None]:
input_path = Path("/groups/sxs/hchaudha/spec_runs/19_filtered_checkpoint_runs/del/1294/Cp-VarsGr_SphereC0.h5")
output_path_original = Path("/groups/sxs/hchaudha/spec_runs/19_filtered_checkpoint_runs/del/5517/Cp-VarsGr_SphereC0.h5")

output_path = output_path_original.parent/"Cp-VarsGr_SphereC0_changed.h5"
shutil.copy(output_path_original,output_path)

before = mean_diff_h5(input_path,output_path)

replace_data_from_h5(input_path,output_path)

after = mean_diff_h5(input_path,output_path)

In [None]:
before,after