In [None]:
import h5py
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
import copy
from typing import List, Dict
import pandas as pd
plt.style.use('ggplot')
# plt.rcParams["figure.figsize"] = (12,10)
plt.rcParams["figure.figsize"] = (10,8)

# Code to read Horizons.h5

In [None]:
def make_Bh_pandas(h5_dir):
    # Empty dataframe
    df = pd.DataFrame()
    
    # List of all the vars in the h5 file
    var_list = []
    h5_dir.visit(var_list.append)
    
    
    for var in var_list:
        # This means there is no time column
        # print(f"{var} : {h5_dir[var].shape}")
        if df.shape == (0,0):
            # data[:,0] is time and then we have the data
            data = h5_dir[var]
            
            # vars[:-4] to remove the .dat at the end
            col_names = make_col_names(var[:-4],data.shape[1]-1)
            col_names.append('t')
            # Reverse the list so that we get ["t","var_name"]
            col_names.reverse()            
            append_to_df(data[:],col_names,df)
            
        else:
            data = h5_dir[var]
            col_names = make_col_names(var[:-4],data.shape[1]-1)         
            append_to_df(data[:,1:],col_names,df)
            
    return df

def append_to_df(data,col_names,df):
    for i,col_name in enumerate(col_names):
        df[col_name] = data[:,i]
        
def make_col_names(val_name:str,val_size:int):
    col_names = []
    if val_size == 1:
        col_names.append(val_name)
    else:
        for i in range(val_size):
            col_names.append(val_name+f"_{i}")
    return col_names


def horizon_to_pandas(horizon_path:Path):
    assert(horizon_path.exists())
    df_dict = {}
    with h5py.File(horizon_path,'r') as hf:
        # Not all horizon files may have AhC
        for key in hf.keys():
            df_dict[key[:-4]] = make_Bh_pandas(hf[key])

    return df_dict

def read_horizon_across_Levs(path_list:List[Path]):
    df_listAB = []
    df_listC = []
    final_dict = {}
    for path in path_list:
        df_lev = horizon_to_pandas(path)
        # Either [AhA,AhB] or [AhA,AhB,AhC]
        if len(df_lev.keys()) > 1:
            df_listAB.append(df_lev)
        # Either [AhC] or [AhA,AhB,AhC]
        if (len(df_lev.keys()) == 1) or (len(df_lev.keys()) ==3):
            df_listC.append(df_lev)
    if len(df_listAB)==1:
        # There was only one lev
        final_dict = df_listAB[0]
    else:
        final_dict["AhA"] = pd.concat([df["AhA"] for df in df_listAB])
        final_dict["AhB"] = pd.concat([df["AhB"] for df in df_listAB])
        if len(df_listC) > 0:
            final_dict["AhC"] = pd.concat([df["AhC"] for df in df_listC])       
    
    return final_dict

def moving_average(array,avg_len):
    return np.convolve(array,np.ones(avg_len))/avg_len
    
def moving_average_valid(array,avg_len):
    return np.convolve(array,np.ones(avg_len),'valid')/avg_len

def plot_graph_for_runs(runs_data_dict, x_axis, y_axis, minT, maxT, save_path=None, moving_avg_len=0, plot_fun = lambda x,y,label : plt.plot(x,y,label=label)):

  minT_indx_list={}
  maxT_indx_list={}
  
  for run_name in runs_data_dict.keys():
    minT_indx_list[run_name] = len(runs_data_dict[run_name][x_axis][runs_data_dict[run_name][x_axis] < minT])
    maxT_indx_list[run_name] = len(runs_data_dict[run_name][x_axis][runs_data_dict[run_name][x_axis] < maxT])

  if moving_avg_len == 0:

    for run_name in runs_data_dict.keys():
      x_data = runs_data_dict[run_name][x_axis][minT_indx_list[run_name]:maxT_indx_list[run_name]]
      y_data = runs_data_dict[run_name][y_axis][minT_indx_list[run_name]:maxT_indx_list[run_name]]
      plot_fun(x_data, y_data,run_name)

    plt.xlabel(x_axis)
    plt.ylabel(y_axis)
    title = "\"" +  y_axis+"\" vs \""+x_axis+"\""
    plt.title(title)
    plt.legend()

  else:
    for run_name in runs_data_dict.keys():
      x_data = runs_data_dict[run_name][x_axis][minT_indx_list[run_name] + moving_avg_len-1:maxT_indx_list[run_name]]
      y_data = moving_average_valid(runs_data_dict[run_name][y_axis][minT_indx_list[run_name]:maxT_indx_list[run_name]], moving_avg_len)
      plot_fun(x_data, y_data,run_name)

    plt.xlabel(x_axis)
    plt.ylabel(y_axis)
    title = "\"" + y_axis+ "\" vs \"" + x_axis + "\"  " + f"avg_window_len={moving_avg_len}"
    plt.title(title)
    plt.legend()

  
  if save_path is not None:
    fig_x_label = x_axis.replace("/","_").replace(".","_")
    fig_y_label = y_axis.replace("/","_").replace(".","_")
    save_file_name = f"{fig_y_label}_vs_{fig_x_label}_minT={minT}_maxT={maxT}_moving_avg_len={moving_avg_len}"
    for run_name in runs_data_dict.keys():
      save_file_name = save_file_name + "__" + run_name

    plt.savefig(save_path+save_file_name)

def load_data_from_levs(base_path:Path, runs_path:Dict[str,Path]):
  data_dict = {}
  for run_name in runs_path.keys():
    path_list = list(base_path.glob(runs_path[run_name]))
    print(path_list)
    data_dict[run_name] = read_horizon_across_Levs(path_list)
  return data_dict

def flatten_dict(horizon_data_dict:Dict[str,pd.DataFrame]) -> Dict[str,pd.DataFrame] :
  flattened_data = {}
  for run_name in horizon_data_dict.keys():
      for horizons in horizon_data_dict[run_name]:
          flattened_data[run_name+"_"+horizons] = horizon_data_dict[run_name][horizons]
          # print(run_name+"_"+horizons)
  return flattened_data

In [None]:
runs_to_plot = {}
base_path = Path("/panfs/ds09/sxs/himanshu/gauge_stuff/gauge_driver_runs/runs")
runs_to_plot["76_ngd_master_mr1_50_3000"] =  "76_ngd_master_mr1_50_3000/Ev/Lev3_A?/Run/ApparentHorizons/Horizons.h5"
runs_to_plot["76_ngd_master_mr1_200_3000"] =  "76_ngd_master_mr1_200_3000/Ev/Lev3_A?/Run/ApparentHorizons/Horizons.h5"
# runs_to_plot["77_gd_Kerr_q1"] =  "77_gd_Kerr_q1/Ev/Lev1_A?/Run/ApparentHorizons/Horizons.h5"
# runs_to_plot["77_gd_Kerr_q3"] =  "77_gd_Kerr_q3/Ev/Lev1_A?/Run/ApparentHorizons/Horizons.h5"
# runs_to_plot["77_gd_Kerr_q1_Kerr"] =  "77_gd_Kerr_q1/Ev_Kerr/Lev1_A?/Run/ApparentHorizons/Horizons.h5"
# runs_to_plot["77_gd_Kerr_q3_Kerr"] =  "77_gd_Kerr_q3/Ev_Kerr/Lev1_A?/Run/ApparentHorizons/Horizons.h5"
# runs_to_plot["78_ngd_master_mr1"] =  "78_ngd_master_mr1/Ev/Lev3_A?/Run/ApparentHorizons/Horizons.h5"
runs_to_plot["79_ngd_master_mr1_1000_3000"] =  "79_ngd_master_mr1_1000_3000/Ev/Lev3_A?/Run/ApparentHorizons/Horizons.h5"
# runs_to_plot["79_ngd_master_mr1_200_3000"] =  "79_ngd_master_mr1_200_3000/Ev/Lev3_A?/Run/ApparentHorizons/Horizons.h5"
# runs_to_plot["80_ngd_master_mr1_100"] =  "80_ngd_master_mr1_100/Ev/Lev3_A?/Run/ApparentHorizons/Horizons.h5"
# runs_to_plot["80_ngd_master_mr1_50"] =  "80_ngd_master_mr1_50/Ev/Lev3_A?/Run/ApparentHorizons/Horizons.h5"
# runs_to_plot["80_ngd_master_mr1_10"] =  "80_ngd_master_mr1_10/Ev/Lev3_A?/Run/ApparentHorizons/Horizons.h5"
# runs_to_plot["80_ngd_master_mr1_5"] =  "80_ngd_master_mr1_5/Ev/Lev3_A?/Run/ApparentHorizons/Horizons.h5"
# runs_to_plot["80_ngd_master_mr1_300"] =  "80_ngd_master_mr1_300/Ev/Lev3_A?/Run/ApparentHorizons/Horizons.h5"
# runs_to_plot["81_gd_Kerr_q3_0_9_0__0_0_0"] =  "81_gd_Kerr_q3_0_9_0__0_0_0/Ev/Lev3_A?/Run/ApparentHorizons/Horizons.h5"
# runs_to_plot["81_gd_DH_q3_0_9_0__0_0_0"] =  "81_gd_DH_q3_0_9_0__0_0_0/Ev/Lev3_A?/Run/ApparentHorizons/Horizons.h5"
# runs_to_plot["82_ngd_master_mr1_50_3000_DH_to_DH"] =  "82_ngd_master_mr1_50_3000_DH_to_DH/Ev/Lev3_A?/Run/ApparentHorizons/Horizons.h5"
# runs_to_plot["83_ngd_master_mr1_200_3000_no_eps"] =  "83_ngd_master_mr1_200_3000_no_eps/Ev/Lev3_A?/Run/ApparentHorizons/Horizons.h5"
# runs_to_plot["83_ngd_master_mr1_200_3000_no_eps_no_lsr"] =  "83_ngd_master_mr1_200_3000_no_eps_no_lsr/Ev/Lev3_A?/Run/ApparentHorizons/Horizons.h5"
# runs_to_plot["84_gd_KerrI_3000_200"] =  "84_gd_KerrI_3000_200/Ev/Lev3_A?/Run/ApparentHorizons/Horizons.h5"
# runs_to_plot["84_gd_DH_3000_200"] =  "84_gd_DH_3000_200/Ev/Lev3_A?/Run/ApparentHorizons/Horizons.h5"
# runs_to_plot["83_no_eps_Ev_wrong_evolution"] =  "83_ngd_master_mr1_200_3000_no_eps/Ev_wrong_evolution/Lev3_A?/Run/ApparentHorizons/Horizons.h5"
runs_to_plot["83_no_eps_Ev_pow2"] =  "83_ngd_master_mr1_200_3000_no_eps/Ev_pow2/Lev3_A?/Run/ApparentHorizons/Horizons.h5"
runs_to_plot["83_no_eps_Ev_pow6"] =  "83_ngd_master_mr1_200_3000_no_eps/Ev_pow6/Lev3_A?/Run/ApparentHorizons/Horizons.h5"
# runs_to_plot["83_no_eps_Ev_tanh15"] =  "83_ngd_master_mr1_200_3000_no_eps/Ev_tanh15/Lev3_A?/Run/ApparentHorizons/Horizons.h5"
# runs_to_plot["83_no_eps_Ev_tanh7"] =  "83_ngd_master_mr1_200_3000_no_eps/Ev_tanh7/Lev3_A?/Run/ApparentHorizons/Horizons.h5"
# runs_to_plot["83_no_eps_Ev_tanh7_lsr_correct_evolution"] =  "83_ngd_master_mr1_200_3000_no_eps/Ev_tanh7_lsr_correct_evolution/Lev3_A?/Run/ApparentHorizons/Horizons.h5"

data_dict = load_data_from_levs(base_path, runs_to_plot)
data_dict = flatten_dict(data_dict)
data_dict[list(data_dict.keys())[0]].columns

In [None]:
moving_avg_len = 0
save_path = None

x_axis = 't'
# y_axis = 'ArealMass'
# y_axis = 'ChristodoulouMass'
# y_axis = 'CoordCenterInertial_0'
# y_axis = 'CoordCenterInertial_1'
# y_axis = 'CoordCenterInertial_2'
# y_axis = 'DimensionfulInertialSpin_0'
# y_axis = 'DimensionfulInertialSpin_1'
# y_axis = 'DimensionfulInertialSpin_2'
y_axis = 'DimensionfulInertialSpinMag'
# y_axis = 'SpinFromShape_0'
# y_axis = 'SpinFromShape_1'
# y_axis = 'SpinFromShape_2'
# y_axis = 'SpinFromShape_3'
# y_axis = 'chiInertial_0'
# y_axis = 'chiInertial_1'
# y_axis = 'chiInertial_2'
# y_axis = 'chiMagInertial'



# moving_avg_len=25
minT = 2500
maxT = 5000

plot_fun = lambda x,y,label : plt.plot(x,y,label=label)
# plot_fun = lambda x,y,label : plt.semilogy(x,y,label=label)
# plot_fun = lambda x,y,label : plt.loglog(x,y,label=label)
# plot_fun = lambda x,y,label : plt.scatter(x,y,label=label)
# save_path = "/panfs/ds09/sxs/himanshu/scripts/report/not_tracked/temp2/"

filtered_dict = {}
allowed_horizons = ["AhA"]
for horizons in allowed_horizons:
  for runs_keys in data_dict.keys():
    if horizons in runs_keys:
      filtered_dict[runs_keys] = data_dict[runs_keys]

with plt.style.context('default'):
  plt.rcParams["figure.figsize"] = (12,10)
  plt.rcParams["figure.autolayout"] = True
  plot_graph_for_runs(filtered_dict, x_axis, y_axis, minT, maxT, save_path=save_path, moving_avg_len=moving_avg_len, plot_fun=plot_fun)

In [None]:
base_path = Path("/panfs/ds09/sxs/himanshu/gauge_stuff/gauge_driver_runs/runs/83_ngd_master_mr1_200_3000_no_eps/Ev_pow2")
base_path = Path("/panfs/ds09/sxs/himanshu/gauge_stuff/gauge_driver_runs/runs/83_ngd_master_mr1_200_3000_no_eps/Ev_pow6")
# base_path = Path("/panfs/ds09/sxs/himanshu/gauge_stuff/gauge_driver_runs/runs/76_ngd_master_mr1_50_3000/Ev")
# base_path = Path("/panfs/ds09/sxs/himanshu/gauge_stuff/gauge_driver_runs/runs/76_ngd_master_mr1_200_3000/Ev")
# base_path = Path("/panfs/ds09/sxs/himanshu/gauge_stuff/gauge_driver_runs/runs/79_ngd_master_mr1_200_3000/Ev")
# base_path = Path("/panfs/ds09/sxs/himanshu/gauge_stuff/gauge_driver_runs/runs/79_ngd_master_mr1_1000_3000/Ev")
file_pattern = "Lev3_A?/Run/ApparentHorizons/Horizons.h5"
path_list = list(base_path.glob(file_pattern))
path_list

In [None]:
df = read_horizon_across_Levs(path_list)
print(df.keys())
df["AhA"].keys()

In [None]:
x = 't'
y = "SpinFromShape_2"
plt.semilogy(df['AhA'][x],df['AhA'][y],label="AhA")
plt.semilogy(df['AhB'][x],df['AhB'][y],label="AhB")
# plt.plot(df['AhC'][x],df['AhC'][y],label="AhC")
plt.xlabel(x)
plt.ylabel(y)
plt.legend()

In [None]:
plt.plot(df['AhC'][x],df['AhC'][y],label="AhC")
plt.xlabel(x)
plt.ylabel(y)
plt.legend()

In [None]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(df["AhA"].describe())

# Read profiler results

In [None]:
from pathlib import Path
from copy import deepcopy
from pprint import pprint

In [None]:
# file_path = Path("AdjustGridExtents.h5")
# file_path = Path("CachedH5Info.h5")
# file_path = Path("ControlNthDeriv.h5")
file_path = Path("FilterDiagnostics.h5")
# file_path = Path("IncProfiler.h5")
# file_path = Path("MemProfiler.h5")
# file_path = Path("OrbitDiagnostics.h5")
# file_path = Path("PowerDiagnostics.h5")
# file_path = Path("Profiler.h5")
# file_path = Path("ProjectedCon.h5")
# file_path = Path("RhsExpense.h5")
file_path = Path("/groups/sxs/hchaudha/spec_runs/high_accuracy_L35/cce_bondi/Lev3_R0200/Lev3_R0200.h5")
# file_path = Path("/groups/sxs/hchaudha/spec_runs/high_accuracy_L35/Ev/Lev3_AC/Run/GW2/BondiCceR0258.h5")

In [None]:
with h5py.File(file_path,'r') as f:
    names = []
    f.visit(names.append)
    f.visit(print)
    data = np.array(f['Beta.dat'])
    # print(np.array(data),np.array(data).shape)

print(names)

In [None]:
a = list(Path("/groups/sxs/hchaudha/spec_runs/high_accuracy_L35").glob(f"Ev/Lev4_??/Run/GW2/BondiCceR0100.h5"))
a = a+list(Path("/groups/sxs/hchaudha/spec_runs/high_accuracy_L35").glob(f"Ev/Lev4_Ringdown/Lev4_??/Run/GW2/BondiCceR0100.h5"))
a.sort()
a

In [None]:
plt.plot(data[:,0])

In [None]:
with h5py.File(file_path,'r') as f:
    steps = set()
    procs = set()
    names = []
    f.visit(names.append)
    for name in names:
      step = name.split('.')[0][4:]
      steps.add(step)
      if 'Proc' in name:
         procs.add(name.split('/')[-1][4:-4])

    dict_list = []
    for step in steps:
       for proc in procs:
          data = f[f'Step{step}.dir/Proc{proc}.txt'][0].decode()

          lines = data.split("\n")
          time = float((lines[0].split("=")[-1])[:-1])

          curr_dict = {
             "t(M)": time,
             "step": step,
             "proc": proc
          }
          # Find where the columns end
          a = lines[4]
          event_end = a.find("Event")+5
          cum_end = a.find("cum(%)")+6
          exc_end = a.find("exc(%)")+6
          inc_end = a.find("inc(%)")+6

          for line in lines[6:-2]:
            Event = line[:event_end].strip()
            cum = float(line[event_end:cum_end].strip())
            exc = float(line[cum_end:exc_end].strip())
            inc = float(line[exc_end:inc_end].strip())
            N = int(line[inc_end:].strip())
            # print(a)
            # a = line.split("  ")
            # Event,cum,exc,inc,N = [i.strip() for i in a if i!= '']
            curr_dict[f'{Event}_cum'] = cum
            curr_dict[f'{Event}_exc'] = exc
            curr_dict[f'{Event}_inc'] = inc
            curr_dict[f'{Event}_N'] = N

          dict_list.append(curr_dict)


print(steps,procs)

In [None]:
pd.DataFrame(dict_list)

In [None]:
lines = data.split("\n")
for line in lines[6:-2]:
  a = line.split("  ")
  Event,cum,exc,inc,N = [i.strip() for i in a if i!= '']
  print(Event,cum,exc,inc,N)

In [None]:
Event,cum,exc,inc,N

In [None]:
float(exc)

In [None]:
a = "                                                            Event    cum(%)    exc(%)    inc(%)         N"
a.find("cum(%)")

In [None]:
a[:75]

In [None]:
event_end = a.find("Event")+5
cum_end = a.find("cum(%)")+6
exc_end = a.find("exc(%)")+6
inc_end = a.find("inc(%)")+6
event = a[:event_end].strip()
cum = float(a[event_end:cum_end].strip())
exc = float(a[event_end:exc_end].strip())
inc = float(a[event_end:inc_end].strip())
N = int(a[inc_end:].strip())
print(event,cum,exc,inc,N)

In [None]:
a[event_end:]

## Parsing speed test

In [None]:
file_path = Path("/groups/sxs/hchaudha/scripts/dealing_with_h5_files/del/extracted-./PowerDiagnostics/SphereA0.dir/Bf0I1_ConvergenceFactor.dat")
file_path = Path("/groups/sxs/hchaudha/spec_runs/2_SpKS_q1_sA_0_0_9_sB_0_0_9_d15/Ev/Lev3_AB/Run/ConstraintNorms/GhCe_Linf.dat")

In [None]:
def parse_text_file_to_csv(input_file):
    # Read the file and extract lines
    with open(input_file, 'r') as file:
        lines = file.readlines()

    # Extract column names
    column_names = [line.strip().split('= ')[1] for line in lines if line.startswith('#') and "=" in line]
    
    # Extract data lines
    data_lines = [line.strip() for line in lines if not line.startswith('#') and line.strip()]

    # Convert data lines to a list of lists
    data = [list(map(float, line.split())) for line in data_lines]

    return data

def parse_text_file_to_csv_np(input_file):
    # Read the file and extract lines
    with open(input_file, 'r') as file:
        lines = file.readlines()

    # Extract column names
    column_names = [line.strip().split('= ')[1] for line in lines if line.startswith('#') and "=" in line]
    print(column_names)
    # Extract data lines
    data_lines = np.genfromtxt(input_file,comments="#",delimiter='    ')

    return data_lines

In [None]:
file_path = Path("/groups/sxs/hchaudha/spec_runs/3_DH_q1_ns_d18_L6/Ev/Lev6_AD/Run/TStepperDiag.dat")

In [None]:
%%timeit
np.genfromtxt(file_path,comments="#",delimiter='  ',dtype=float)

In [None]:
%%timeit
parse_text_file_to_csv(file_path)

In [None]:
parse_text_file_to_csv_np(file_path)

In [None]:
parse_text_file_to_csv(file_path)

In [None]:
np.genfromtxt(file_path,comments="#",delimiter='  ',dtype=float)

In [None]:
%%timeit
pd.read_csv(file_path,sep="\s+",comment="#")

In [None]:
def find_column_names_dat_file(file_path):
  cols_names = []
  with open(file_path,'r') as f:
      lines = f.readlines()
      for line in lines:
        if "#" not in line:
          break
        elif "=" in line:
          cols_names.append(line.split('=')[-1][1:-1].strip())
        else:
          continue
  return cols_names

def read_dat_file(file_path):
  cols_names = find_column_names_dat_file(file_path)
  return pd.read_csv(file_path,sep="\s+",comment="#",names=cols_names)

In [None]:
AdjustGridExtents_path = Path("/groups/sxs/hchaudha/scripts/dealing_with_h5_files/del/extracted-./AdjustGridExtents")
CachedH5Info_path = Path("/groups/sxs/hchaudha/scripts/dealing_with_h5_files/del/extracted-./CachedH5Info")
ControlNthDeriv_path = Path("/groups/sxs/hchaudha/scripts/dealing_with_h5_files/del/extracted-./ControlNthDeriv")
FilterDiagnostics_path = Path("/groups/sxs/hchaudha/scripts/dealing_with_h5_files/del/extracted-./FilterDiagnostics")
IncProfiler_path = Path("/groups/sxs/hchaudha/scripts/dealing_with_h5_files/del/extracted-./IncProfiler")
MemProfiler_path = Path("/groups/sxs/hchaudha/scripts/dealing_with_h5_files/del/extracted-./MemProfiler")
OrbitDiagnostics_path = Path("/groups/sxs/hchaudha/scripts/dealing_with_h5_files/del/extracted-./OrbitDiagnostics")
PowerDiagnostics_path = Path("/groups/sxs/hchaudha/scripts/dealing_with_h5_files/del/extracted-./PowerDiagnostics")
Profiler_path = Path("/groups/sxs/hchaudha/scripts/dealing_with_h5_files/del/extracted-./Profiler")
ProjectedCon_path = Path("/groups/sxs/hchaudha/scripts/dealing_with_h5_files/del/extracted-./ProjectedCon")
RhsExpense_path = Path("/groups/sxs/hchaudha/scripts/dealing_with_h5_files/del/extracted-./RhsExpense")

## Read various h5 files:

In [None]:
def list_all_dir_and_dat_files(main_folder:Path):
  # Returns [dir_names], [dat_file_names]
  dirs = []
  dat_files = []
  for path in main_folder.iterdir():
    if ".dir" in path.name:
      dirs.append(path)
    elif ".dat" in path.name:
      dat_files.append(path)

  return dirs,dat_files


def pad_tuple(var,required_size):
  # pad_tuple(('a'),4) # ('a', '', '', '')
  # pad_tuple('a',4) # ('a', '', '', '')
  # pad_tuple((1,2),4) # (1, 2, '', '')
  # pad_tuple(('1','2'),4) # ('1', '2', '', '')
  list_sized = ['' for i in range(required_size)]
  if isinstance(var,tuple):
    if len(var) > required_size:
      raise Exception(f"Length of {var}={len(var)} is larger than the {required_size=}")
    for i,val in enumerate(var):
      list_sized[i] = val
  elif isinstance(var, str):
    list_sized[0] = var
  else:
    raise ValueError(f"{var} is of type {type(var)}. Only string and tuples are supported.")
  return tuple(list_sized)


#### Profiler.h5

In [None]:
def read_profiler_old(file_name):
  with h5py.File(file_name,'r') as f:
    steps = set()
    procs = set()
    names = []
    f.visit(names.append)
    for name in names:
      step = name.split('.')[0][4:]
      steps.add(step)
      if 'Proc' in name:
        procs.add(name.split('/')[-1][4:-4])

    dict_list = []
    for step in steps:
      for proc in procs:
        data = f[f'Step{step}.dir/Proc{proc}.txt'][0].decode()

        lines = data.split("\n")
        time = float((lines[0].split("=")[-1])[:-1])

        curr_dict = {
            "t(M)": time,
            "step": step,
            "proc": proc
        }
        # Find where the columns end
        a = lines[4]
        event_end = a.find("Event")+5
        cum_end = a.find("cum(%)")+6
        exc_end = a.find("exc(%)")+6
        inc_end = a.find("inc(%)")+6

        for line in lines[6:-2]:
          Event = line[:event_end].strip()
          cum = float(line[event_end:cum_end].strip())
          exc = float(line[cum_end:exc_end].strip())
          inc = float(line[exc_end:inc_end].strip())
          N = int(line[inc_end:].strip())
          # print(a)
          # a = line.split("  ")
          # Event,cum,exc,inc,N = [i.strip() for i in a if i!= '']
          curr_dict[f'{Event}_cum'] = cum
          curr_dict[f'{Event}_exc'] = exc
          curr_dict[f'{Event}_inc'] = inc
          curr_dict[f'{Event}_N'] = N

        dict_list.append(curr_dict)
  return pd.DataFrame(dict_list)

In [None]:
def read_profiler_multiindex(folder_path:Path):
  dir_paths,dat_paths = list_all_dir_and_dat_files(folder_path)
  steps = set()
  # Get step names
  for dir in dir_paths:
    step = dir.name.split('.')[0][4:]
    steps.add(step)

  procs = set()
  # Get the proc names
  for txt in dir_paths[0].iterdir():
    if ".txt" in txt.name and "Summary" not in txt.name:
      procs.add(txt.name[4:-4])

  dict_list = []
  col_names = set()
  row_names = []
  for step in steps:
    for proc in procs:
      txt_file_path = folder_path/f'Step{step}.dir/Proc{proc}.txt'

      with txt_file_path.open("r") as f:
        lines = f.readlines()

      time = float((lines[0].split("=")[-1])[:-2])

      curr_dict = {
          "time": time,
          "step": step,
          "proc": proc
      }

      # Find where the columns end
      a = lines[4]
      event_end = a.find("Event")+5
      cum_end = a.find("cum(%)")+6
      exc_end = a.find("exc(%)")+6
      inc_end = a.find("inc(%)")+6

      row_names.append((str(proc),str(time)))

      for line in lines[6:-2]:
        Event = line[:event_end].strip()
        cum = float(line[event_end:cum_end].strip())
        exc = float(line[cum_end:exc_end].strip())
        inc = float(line[exc_end:inc_end].strip())
        N = int(line[inc_end:].strip())
        # print(a)
        # a = line.split("  ")
        # Event,cum,exc,inc,N = [i.strip() for i in a if i!= '']
        col_names.add(Event)
        curr_dict[("cum",Event)] = cum
        curr_dict[("exc",Event)] = exc
        curr_dict[("inc",Event)] = inc
        curr_dict[("N",Event)] = N

      dict_list.append(curr_dict)

  # Multi index rows
  index = pd.MultiIndex.from_tuples(row_names, names=["proc","t(M)"])
  df = pd.DataFrame(dict_list,index=index)
  
  # Multi index cols
  multi_index_columns = [(k if isinstance(k, tuple) else (k, '')) for k in df.columns]
  df.columns = pd.MultiIndex.from_tuples(multi_index_columns)
  df.columns.names = ['metric', 'process']

  # data.xs('24', level="proc")['N']
  # data.xs('0.511442', level="t(M)")['cum']
  # data.xs(('0','0.511442'),level=('proc','t(M)'))
  # data.xs('cum',level='metric',axis=1) = data['cum']
  # data.xs('MPI::MPreduceAdd(MV<double>)',level='process',axis=1)
  # data[data['time']<50]
  # data[data['time']<50]['cum'].xs('0',level='proc')['MPI::MPreduceAdd(MV<double>)']
  return df.sort_index()

In [None]:
def read_profiler_multiindex(folder_path:Path):
  dir_paths,dat_paths = list_all_dir_and_dat_files(folder_path)
  steps = set()
  # Get step names
  for dir in dir_paths:
    step = dir.name.split('.')[0][4:]
    steps.add(step)

  procs = set()
  # Get the proc names
  for txt in dir_paths[0].iterdir():
    if ".txt" in txt.name and "Summary" not in txt.name:
      procs.add(txt.name[4:-4])

  dict_list = []
  col_names = set()
  row_names = []
  for step in steps:
    for proc in procs:
      txt_file_path = folder_path/f'Step{step}.dir/Proc{proc}.txt'

      with txt_file_path.open("r") as f:
        lines = f.readlines()

      time = float((lines[0].split("=")[-1])[:-2])

      curr_dict = {
          "time": time,
          "step": step,
          "proc": proc
      }

      # Find where the columns end
      a = lines[4]
      event_end = a.find("Event")+5
      cum_end = a.find("cum(%)")+6
      exc_end = a.find("exc(%)")+6
      inc_end = a.find("inc(%)")+6

      # row_names.append((str(proc),str(time)))

      for line in lines[6:-2]:
        Event = line[:event_end].strip()
        cum = float(line[event_end:cum_end].strip())
        exc = float(line[cum_end:exc_end].strip())
        inc = float(line[exc_end:inc_end].strip())
        N = int(line[inc_end:].strip())
        # print(a)
        # a = line.split("  ")
        # Event,cum,exc,inc,N = [i.strip() for i in a if i!= '']
        col_names.add(Event)
        curr_dict[("cum",Event,str(proc))] = cum
        curr_dict[("exc",Event,str(proc))] = exc
        curr_dict[("inc",Event,str(proc))] = inc
        curr_dict[("N",Event,str(proc))] = N

      dict_list.append(curr_dict)

  df = pd.DataFrame(dict_list)
  
  # Multi index cols
  multi_index_columns = [pad_tuple(k,3) for k in df.columns]
  df.columns = pd.MultiIndex.from_tuples(multi_index_columns)
  df.columns.names = ['metric', 'process', 'procs']

  # data.xs('24', level="proc")['N']
  # data.xs('0.511442', level="t(M)")['cum']
  # data.xs(('0','0.511442'),level=('proc','t(M)'))
  # data.xs('cum',level='metric',axis=1) = data['cum']
  # data.xs('MPI::MPreduceAdd(MV<double>)',level='process',axis=1)
  # data[data['time']<50]
  # data[data['time']<50]['cum'].xs('0',level='proc')['MPI::MPreduceAdd(MV<double>)']
  return df.sort_index()

In [None]:
data = read_profiler_multiindex(Profiler_path)
data

In [None]:
data.xs(('exc','24'),level=('metric','procs'),axis=1)

#### AdjustGridExtent.h5

In [None]:
def read_AdjustGridExtent_multiindex(folder_path:Path):
  dir_paths,_ = list_all_dir_and_dat_files(folder_path)
  domains = set()
  # Get domain names
  for dir in dir_paths:
    domains.add(dir.name[:-4])

    _,dat_paths = list_all_dir_and_dat_files(dir)
    

  return domains

read_AdjustGridExtent_multiindex(AdjustGridExtents_path)
  # procs = set()
  # # Get the proc names
  # for txt in dir_paths[0].iterdir():
  #   if ".txt" in txt.name and "Summary" not in txt.name:
  #     procs.add(txt.name[4:-4])

  # dict_list = []
  # col_names = set()
  # row_names = []
  # for step in steps:
  #   for proc in procs:
  #     txt_file_path = folder_path/f'Step{step}.dir/Proc{proc}.txt'

  #     with txt_file_path.open("r") as f:
  #       lines = f.readlines()

  #     time = float((lines[0].split("=")[-1])[:-2])

  #     curr_dict = {
  #         "time": time,
  #         "step": step,
  #         "proc": proc
  #     }

  #     # Find where the columns end
  #     a = lines[4]
  #     event_end = a.find("Event")+5
  #     cum_end = a.find("cum(%)")+6
  #     exc_end = a.find("exc(%)")+6
  #     inc_end = a.find("inc(%)")+6

  #     row_names.append((str(proc),str(time)))

  #     for line in lines[6:-2]:
  #       Event = line[:event_end].strip()
  #       cum = float(line[event_end:cum_end].strip())
  #       exc = float(line[cum_end:exc_end].strip())
  #       inc = float(line[exc_end:inc_end].strip())
  #       N = int(line[inc_end:].strip())
  #       # print(a)
  #       # a = line.split("  ")
  #       # Event,cum,exc,inc,N = [i.strip() for i in a if i!= '']
  #       col_names.add(Event)
  #       curr_dict[("cum",Event)] = cum
  #       curr_dict[("exc",Event)] = exc
  #       curr_dict[("inc",Event)] = inc
  #       curr_dict[("N",Event)] = N

  #     dict_list.append(curr_dict)

  # # Multi index rows
  # index = pd.MultiIndex.from_tuples(row_names, names=["proc","t(M)"])
  # df = pd.DataFrame(dict_list,index=index)
  
  # # Multi index cols
  # multi_index_columns = [(k if isinstance(k, tuple) else (k, '')) for k in df.columns]
  # df.columns = pd.MultiIndex.from_tuples(multi_index_columns)
  # df.columns.names = ['metric', 'process']

  # data.xs('24', level="proc")['N']
  # data.xs('0.511442', level="t(M)")['cum']
  # data.xs(('0','0.511442'),level=('proc','t(M)'))
  # data.xs('cum',level='metric',axis=1) = data['cum']
  # data.xs('MPI::MPreduceAdd(MV<double>)',level='process',axis=1)
  # return df.sort_index()

# Checkpoint files

In [None]:
import h5py
import numpy as np
from typing import Dict, Any
from pathlib import Path

def read_h5_file(file_path: str) -> Dict[str, Any]:
    """
    Read HDF5 file and return its contents in a structured dictionary.
    
    Parameters:
    -----------
    file_path : str
        Path to the HDF5 file
        
    Returns:
    --------
    dict
        Nested dictionary containing all the datasets from the file
    """
    
    def read_group(group) -> Dict[str, Any]:
        """
        Recursively read groups and datasets.
        """
        result = {}
        
        # Read all datasets in current group
        for name, item in group.items():
            if isinstance(item, h5py.Dataset):
                # Convert dataset to numpy array
                result[name] = item[()]
            elif isinstance(item, h5py.Group):
                # Recursively read nested group
                result[name] = read_group(item)
                
        return result

    # Check if file exists
    if not Path(file_path).exists():
        raise FileNotFoundError(f"The file {file_path} does not exist")

    try:
        with h5py.File(file_path, 'r') as f:
            # Read all contents
            data = {}
            
            # Read main groups
            for group_name in ['InitGridHi', 'InitHhatt', 'kappa', 'psi']:
                if group_name in f:
                    data[group_name] = read_group(f[group_name])
                    
        return data
    
    except OSError as e:
        raise OSError(f"Error reading HDF5 file: {str(e)}")




In [None]:

file_path = Path("/groups/sxs/hchaudha/scripts/dealing_with_h5_files/del/4882/Cp-VarsGr_SphereA0.h5")
data = read_h5_file(file_path)

# Example of accessing data
print("Available groups:", list(data.keys()))

# Example: accessing coordinates if they exist
if 'InitGridHi' in data and 'Step000000' in data['InitGridHi']:
    coords = data['InitGridHi']['Step000000']
    if 'x' in coords:
        print("\nShape of x coordinates:", coords['x'].shape)
        
# Example: accessing kappa components
if 'kappa' in data and 'Step000000' in data['kappa']:
    kappa_data = data['kappa']['Step000000']
    print("\nAvailable kappa components:", list(kappa_data.keys()))


In [None]:
data['psi']

In [None]:
len(data['psi']['Step000000']['tt'])

In [None]:
plt.plot(data['psi']['Step000000']['xx'])

# CCE vol data

In [None]:
data_path = Path("/groups/sxs/hchaudha/spec_runs/single_bh_CCE/runs/obs_vol_data/rad_2500/red_cce.h5")

In [None]:
vol_vars = set()
max_val = {}
R_pts = 0
with h5py.File(data_path,'r') as f:
    names = []
    f.visit(names.append)
    # print(f['Cce/VolumeData/BondiBeta/CompactifiedRadius_0.dat'].shape)
    for name in names:
        # if 'VolumeData/' in name and 'CompactifiedRadius' not in name:
        #     vol_vars.add(name.split('/')[-1])
        #     print(name)
        if 'CompactifiedRadius' in name:
            # max_val[name] = np.max(np.abs(f[name][0,1:]),axis=1)
            max_val[name] = f[name][0,1:]
            print(name)

    max_val['Cce/VolumeData/InertialRetardedTime.dat'] = np.array(f['Cce/VolumeData/InertialRetardedTime.dat'])
    max_val['Cce/VolumeData/OneMinusY.dat'] = np.array(f['Cce/VolumeData/OneMinusY.dat'])

    # data = f['Cce/VolumeData/W/CompactifiedRadius_0.dat'][()]

# vol_vars

In [None]:
def get_info(h5path:Path):
    with h5py.File(h5path,'r') as f:
        names = []
        f.visit(names.append)
    var_names = set()
    num_comp_rad = 0
    for name in names:
        if 'VolumeData/' in name and 'CompactifiedRadius' not in name:
            var_names.add(name.split('/')[-1])
        if 'VolumeData/' in name and 'CompactifiedRadius' in name:
            num_comp_rad = max(num_comp_rad,int(name.split("CompactifiedRadius_")[-1].split('.')[0]))

    return var_names,num_comp_rad

var_names,num_comp_rad = get_info(data_path)

In [None]:
def get_data_all_comp_rad(h5_datapath:Path, var_name:str, comp_rad_list:list, time_slice:slice , red_func=np.linalg.norm):
    with h5py.File(h5_datapath,'r') as f:
        data = {}
        for comp_rad in comp_rad_list:
            curr_data = f[f'Cce/VolumeData/{var_name}/CompactifiedRadius_{comp_rad}.dat'][time_slice,1:]
            data[comp_rad] = red_func(curr_data,axis=1)
        t = f[f'Cce/VolumeData/{var_name}/CompactifiedRadius_0.dat'][time_slice,0]
    return t,data


In [None]:
data_path = Path("/groups/sxs/hchaudha/spec_runs/single_bh_CCE/runs/obs_vol_data/rad_0100/red_cce.h5")

In [None]:
var_name = 'Q'
t,var_data = get_data_all_comp_rad(data_path, var_name, range(num_comp_rad), slice(0,-1,100))

In [None]:
var_data[0]

for comp_rad in var_data:
    plt.plot(t,var_data[comp_rad],label=f'CompactifiedRadius_{comp_rad}')
plt.xlabel('t')
plt.ylabel(f'L2({var_name})')
plt.title("Extraction radius: " + str(data_path).split("/")[-2][4:] + "M")
plt.yscale('log')
plt.tight_layout()
plt.legend()

In [None]:
data_path = Path("/groups/sxs/hchaudha/spec_runs/single_bh_CCE/runs/obs_vol_data/rad_0100/red_cce.h5")
data_path = Path("/groups/sxs/hchaudha/spec_runs/single_bh_CCE/runs/obs_vol_data/rad_0100_IC/red_cce.h5")
data_path = Path("/groups/sxs/hchaudha/spec_runs/single_bh_CCE/runs/obs_vol_data/rad_0100_ZNS/red_cce.h5")
# data_path = Path("/groups/sxs/hchaudha/spec_runs/single_bh_CCE/runs/obs_vol_data/rad_0100_NIR/red_cce.h5")
with h5py.File(data_path,'r') as f:
    for var in ["BondiBeta", "Du(J)", "DuRDividedByR", "Dy(BondiBeta)", "Dy(Du(J))", "Dy(Dy(BondiBeta))", "Dy(Dy(Du(J)))", "Dy(Dy(J))", "Dy(Dy(Q))", "Dy(Dy(U))", "Dy(Dy(W))", "Dy(H)", "Dy(J)", "Dy(Q)", "Dy(U)", "Dy(W)", "EthRDividedByR", "H", "J", "Psi0", "Psi1", "Q", "R", "U", "W"]:
    # for var in ['J','Q','H','U','W','BondiBeta']:
        max_val = np.max(np.abs(f[f'Cce/VolumeData/{var}/CompactifiedRadius_0.dat'][0,1:]))
        # print(var,max_val)
        print(f"{var:<30} {max_val}")
    

In [None]:
Path("")

In [None]:
max_val['Cce/VolumeData/InertialRetardedTime.dat'][:,1]

In [None]:
2/max_val['Cce/VolumeData/OneMinusY.dat'][0,1:-1]

In [None]:
for i in max_val.keys():
    if "_0" in i:
        print(i,np.max(max_val[i]),np.min(max_val[i]))

In [None]:
plt.plot(max_val['Cce/VolumeData/W/CompactifiedRadius_0.dat'][:])

In [None]:
# import pickle
# with open(Path("/groups/sxs/hchaudha/scripts/dealing_with_h5_files/max_val_rad_0500.pkl"), 'wb') as f:
#     pickle.dump(max_val, f)

In [None]:
t = data[:,0]
max_val = np.max(np.abs(data[:,1:]),axis=1)

In [None]:
plt.plot(t,max_val)