In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from pathlib import Path
from random import choice as rc
import re
from typing import List


In [None]:
def limit_by_col_val(min_val,max_val,col_name,df):
  filter = (df[col_name]>=min_val) &(df[col_name] <=max_val)
  return df[filter]

def read_dat_file(file_name):
  cols_names = []
  # Read column names
  with open(file_name,'r') as f:
      lines = f.readlines()
      for line in lines:
        if "#" not in line:
          # From now onwards it will be all data
          break
        elif "=" in line:
          if ("[" not in line) and ("]" not in line):
             continue
          cols_names.append(line.split('=')[-1][1:-1].strip())
        else:
          continue

  return pd.read_csv(file_name,sep="\s+",comment="#",names=cols_names)

def find_subdomains(path:Path):
  subdomain_set = set()
  for i in path.iterdir():
    if i.is_dir():
      subdomain_set.add(i.stem)

  return list(subdomain_set)

def find_topologies(path:Path):
  topologies_set = set()
  for i in path.iterdir():
    if i.is_file():
      topologies_set.add(i.stem.split("_")[0])

  return list(topologies_set)

def find_dat_file_names(path:Path):
  file_name_set = set()
  for i in path.iterdir():
    if i.is_file():
      file_name_set.add(i.stem.split("_")[1])

  return list(file_name_set)

def get_top_name_and_mode(name):
  # Bf0I1(12 modes).dat -> Bf0I1, 12
  top_name = name.split("(")[0]
  mode = int(name.split("(")[-1].split(" ")[0])
  return top_name,mode

def find_highest_modes_for_topologies(path:Path):
  highest_mode_dict = {}
  for i in path.iterdir():
    if i.is_file():
      top_name, mode = get_top_name_and_mode(i.stem)
      if top_name in highest_mode_dict:
        if highest_mode_dict[top_name] < mode:
          highest_mode_dict[top_name] = mode
      else:
        highest_mode_dict[top_name] = mode

  return highest_mode_dict

def make_mode_dataframe(path:Path):
  highest_mode_dict = find_highest_modes_for_topologies(path)
  top_dataframe_list = {i:[] for i in highest_mode_dict}

  for i in path.iterdir():
    for top_name in highest_mode_dict:
      if (top_name+"(") in i.stem:
        top_dataframe_list[top_name].append(read_dat_file(i))

  top_mode_df_dict = {}
  for i,df_list in top_dataframe_list.items():
    result = pd.concat(df_list, ignore_index=True)

    # Remove duplicates based on 't' column (keep first occurrence)
    # result = result.drop_duplicates(subset='t', keep='first')

    # Sort by 't' and reset index
    top_mode_df_dict[i] = result.sort_values('t').reset_index(drop=True)
  return top_mode_df_dict

def filter_columns(cols: List[str], include_patterns: List[str] = None, 
                  exclude_patterns: List[str] = None) -> List[str]:
    """
    Filter a list of column names using include and exclude regex patterns.
    
    Args:
        cols: List of column names to filter
        include_patterns: List of regex patterns to include (if None, includes all)
        exclude_patterns: List of regex patterns to exclude (if None, excludes none)
    
    Returns:
        List of filtered column names
    
    Examples:
        >>> cols = ['age_2020', 'age_2021', 'height_2020', 'weight_2021']
        >>> filter_columns(cols, ['age_.*'], ['.*2021'])
        ['age_2020']
    """
    # Handle None inputs
    include_patterns = include_patterns or ['.*']
    exclude_patterns = exclude_patterns or []
    
    # First, get columns that match any include pattern
    included_cols = set()
    for pattern in include_patterns:
        included_cols.update(
            col for col in cols 
            if re.search(pattern, col)
        )
    
    # Then remove any columns that match exclude patterns
    for pattern in exclude_patterns:
        included_cols = {
            col for col in included_cols 
            if not re.search(pattern, col)
        }
    
    return sorted(list(included_cols))

def chain_filter_columns(cols: List[str], include_patterns: List[str] = None, 
                        exclude_patterns: List[str] = None) -> List[str]:
    """
    Filter columns sequentially using chained include and exclude regex patterns.
    Each pattern filters from the result of the previous pattern.
    
    Args:
        cols: List of column names to filter
        include_patterns: List of regex patterns to include (if None, includes all)
        exclude_patterns: List of regex patterns to exclude (if None, excludes none)
    
    Returns:
        List of filtered column names
    
    Examples:
        >>> cols = ['age_2020_q1', 'age_2020_q2', 'age_2021_q1', 'height_2020_q1']
        >>> chain_filter_columns(cols, ['age_.*', '.*q1'], ['.*2021.*'])
        ['age_2020_q1']
    """
    # Handle None inputs
    include_patterns = include_patterns or ['.*']
    exclude_patterns = exclude_patterns or []
    
    # Start with all columns
    filtered_cols = set(cols)
    
    # Apply include patterns sequentially
    for pattern in include_patterns:
        filtered_cols = {
            col for col in filtered_cols 
            if re.search(pattern, col)
        }
    
    # Apply exclude patterns sequentially
    for pattern in exclude_patterns:
        filtered_cols = {
            col for col in filtered_cols 
            if not re.search(pattern, col)
        }
    
    return sorted(list(filtered_cols))

def sort_by_coefs_numbers(col_list:List[str]):
  with_coef_list = []
  without_coef_list = []
  for col in col_list:
    if 'coef' not in col:
      without_coef_list.append(col)
    else:
      with_coef_list.append(col)
  return without_coef_list+sorted(with_coef_list, key=lambda x: int(x.split("_")[-1][4:]))


def load_power_diagonistics(PowDiag_path:Path):
  pow_diag_dict = {}
  for sd in find_subdomains(PowDiag_path):
    pow_diag_dict[sd] = {}
    sd_path = PowDiag_path/f"{sd}.dir"

    psi_pd = make_mode_dataframe(sd_path/f"Powerpsi.dir")
    kappa_pd = make_mode_dataframe(sd_path/f"Powerkappa.dir")
    # For each subdomain save things by topology
    for top in find_topologies(sd_path):
      pow_diag_dict[sd][top]={}
      psi_pd_sorted_cols = sort_by_coefs_numbers(psi_pd[top].columns.to_list())
      pow_diag_dict[sd][top][f'psi_ps'] = psi_pd[top][psi_pd_sorted_cols]

      kappa_pd_sorted_cols = sort_by_coefs_numbers(kappa_pd[top].columns.to_list())
      pow_diag_dict[sd][top][f'kappa_ps'] = kappa_pd[top][kappa_pd_sorted_cols]

      for dat_file in find_dat_file_names(sd_path):
        pow_diag_dict[sd][top][f'{dat_file}'] = read_dat_file(sd_path/f"{top}_{dat_file}.dat")
  
  return pow_diag_dict


def load_power_diagonistics_flat(PowDiag_path:Path, return_df:bool=True):
  # Same as load_power_diagonistics but no nested dicts. This makes it easy to filter
  pow_diag_dict = {}
  for sd in find_subdomains(PowDiag_path):
    sd_path = PowDiag_path/f"{sd}.dir"

    psi_pd = make_mode_dataframe(sd_path/f"Powerpsi.dir")
    kappa_pd = make_mode_dataframe(sd_path/f"Powerkappa.dir")
    # For each subdomain save things by topology
    for top in find_topologies(sd_path):
      psi_pd_sorted_cols = sort_by_coefs_numbers(psi_pd[top].columns.to_list())
      pow_diag_dict[f'{sd}_{top}_psi_ps'] = psi_pd[top][psi_pd_sorted_cols]

      kappa_pd_sorted_cols = sort_by_coefs_numbers(kappa_pd[top].columns.to_list())
      pow_diag_dict[f'{sd}_{top}_kappa_ps'] = kappa_pd[top][kappa_pd_sorted_cols]

      for dat_file in find_dat_file_names(sd_path):
        pow_diag_dict[f'{sd}_{top}_{dat_file}'] = read_dat_file(sd_path/f"{top}_{dat_file}.dat")
  
  if return_df:
    # This can be definitely merged with the stuff above but it's fast enough anyways
    flat_dict = {}
    flat_dict['t'] = pow_diag_dict[rc(list(pow_diag_dict.keys()))]['t']
    for key,item in pow_diag_dict.items():
      for col in item.columns:
        if 't' == col:
          continue 
        else:
          flat_dict[f"{key}_{col}"] = item[col]

    flat_df = pd.DataFrame(flat_dict)
    return flat_df

  return pow_diag_dict




In [None]:
file = Path("/groups/sxs/hchaudha/spec_runs/del/filtering/6_set1_L3_Lev3/extracted-PowerDiagnostics/SphereC0.dir/Powerpsi.dir/Bf0I1(19 modes).dat")
file = Path("/groups/sxs/hchaudha/spec_runs/del/filtering/16_set1_L3_Lev3/extracted-PowerDiagnostics/SphereC0.dir/Powerpsi.dir/Bf0I1(20 modes).dat")
# file = Path("/groups/sxs/hchaudha/spec_runs/del/filtering/16_set1_L3_Lev3/extracted-PowerDiagnostics/SphereC0.dir/Bf0I1_HighestThirdConvergenceFactor.dat")
# file = Path("/groups/sxs/hchaudha/spec_runs/del/filtering/6_set1_L3_Lev3/extracted-PowerDiagnostics/SphereC0.dir/Bf0I1_TruncationError.dat")
# file = Path("/groups/sxs/hchaudha/spec_runs/del/filtering/13_set1_L4_1500_Lev4/extracted-PowerDiagnostics/SphereC10.dir/Powerpsi.dir/Bf1S2(15 modes).dat")
file = Path("/groups/sxs/hchaudha/spec_runs/del/filtering/16_set1_L3_HP32_AF_Lev3/extracted-PowerDiagnostics/SphereC0.dir/Powerkappa.dir/Bf0I1(15 modes).dat")

In [None]:
# data = read_dat_file(file)
h5_path = Path('/groups/sxs/hchaudha/spec_runs/del/filtering/13_set1_L4_1500_Lev4')
# h5_path = Path('/groups/sxs/hchaudha/spec_runs/6_segs/6_set1_L3/h5_files_Lev3')
h5_path = Path('/groups/sxs/hchaudha/spec_runs/6_segs/6_set1_L6/h5_files_Lev6/')
h5_path = Path('/groups/sxs/hchaudha/spec_runs/high_accuracy_L35_master/h5_files_Lev5')
h5_path = Path('/groups/sxs/hchaudha/spec_runs/high_accuracy_L35/h5_files_Lev5')
# h5_path = Path('/groups/sxs/hchaudha/spec_runs/6_segs/6_set1_L6/h5_files_Lev5/')
# h5_path = Path('/groups/sxs/hchaudha/spec_runs/6_segs/6_set1_L6/h5_files_Lev4/')
# h5_path = Path('/groups/sxs/hchaudha/spec_runs/16_set1_L3/h5_files_Lev3/')
# h5_path = Path('/groups/sxs/hchaudha/spec_runs/16_set1_L3_HP28/h5_files_Lev3')
# h5_path = Path('/groups/sxs/hchaudha/spec_runs/6_segs/6_set1_L6/h5_files_Lev6/')
# h5_path = Path('/groups/sxs/hchaudha/spec_runs/17_set_main_q3_18_L3/h5_files_Lev3')
# h5_path = Path('/groups/sxs/hchaudha/spec_runs/17_set1_q3_18_L3/h5_files_Lev3')
# h5_path = Path('/groups/sxs/hchaudha/spec_runs/17_set3_q3_18_L3/h5_files_Lev3')

domain = 'FilledCylinderCB0'
domain = 'SphereC5'
domain = 'SphereA0'

psi_or_kappa = 'kappa'
# psi_or_kappa = 'psi'

folder_path = Path(f"{h5_path}/extracted-PowerDiagnostics/{domain}.dir/Power{psi_or_kappa}.dir")
top_data = make_mode_dataframe(folder_path)
print(top_data.keys())

In [None]:
top_name = 'Bf1S2'
top_name = 'Bf0I1'
# top_name = list(top_data.keys())[0]
t_min = 0
t_max = 4000
data = top_data[top_name]
data = limit_by_col_val(t_min,t_max,'t',data)
data = data.dropna(axis=1, how='all')  # Some columns will have just nans remove those
column_names = data.columns[1:]
visual_data = data[column_names]

plt.figure(figsize=(15, 10))
imshow_plot = plt.imshow(
    # visual_data, 
    np.log10(visual_data), 
    aspect='auto', 
    cmap='RdYlGn_r', 
    origin='lower',interpolation='none',
)

plt.xticks(
    ticks=np.arange(len(visual_data.columns)), 
    labels=[i.split(" ")[-1] for i in column_names], 
    rotation=90
)

ytick_step = 1
ytick_step = len(visual_data) // 10  # Show about 10 ticks
plt.yticks(
    ticks=np.arange(0, len(visual_data), ytick_step), 
    labels=data['t'][::ytick_step].astype(int)
)
plt.colorbar(imshow_plot)
plt.ylabel('t(M)')
plt.title(f'{str(folder_path)[31:]}_{top_name}')
plt.tight_layout() 
plt.grid(False)
plt.show()

In [None]:
cols_to_use = [i for i in data.columns if 't' not in i]
df = np.log10(data[cols_to_use])
df['row_min'] = df.min(axis=1)
df['row_max'] = df.max(axis=1)
df['row_mean'] = df.mean(axis=1)
df['row_std'] = df.std(axis=1)

# plt.plot(df['row_min'])
# plt.plot(df['row_mean'])
# plt.plot(df['row_max'])
# plt.plot(df['row_std'])
plt.figure(figsize=(12, 8))

for i in cols_to_use:
  # plt.plot(data['t'], df[f'{i}'])
  plt.plot(data['t'], df[f'{i}'], label = f'{i}')
plt.legend()
plt.tight_layout()
plt.title(f'{str(folder_path)[31:]}_{top_name}')
plt.tight_layout() 
plt.grid(False)
plt.show()
# x = data['t']
# y = df['row_mean']
# y_err = df['row_std']
# plt.errorbar(x, y, yerr=y_err, fmt='-o', label='Data with Error Bars', ecolor='red', capsize=4)

# load all of power diagnostics

### df flat

In [None]:
PowDiag_path = Path("/groups/sxs/hchaudha/spec_runs/16_set1_L3/h5_files_Lev3/extracted-PowerDiagnostics")
flat_df = load_power_diagonistics_flat(PowDiag_path)

In [None]:
include_list = [ r'SphereA0' ,r'Bf0I1', r'psi_ps', r'coef']
exclude_list = [ ]
# Example usage with a DataFrame
filtered_cols = chain_filter_columns(
    cols=flat_df.columns.tolist(),
    include_patterns=include_list,
    exclude_patterns=exclude_list
)

# You can then use these columns to filter your DataFrame
filtered_cols = sort_by_coefs_numbers(filtered_cols)
filtered_df = flat_df[filtered_cols]
filtered_df

In [None]:
find_dat_file_names(Path("/groups/sxs/hchaudha/spec_runs/16_set1_L3/h5_files_Lev3/extracted-PowerDiagnostics/SphereC1.dir"))

In [None]:
include_list = [ r'SphereA0' ,r'Bf0I1', 'Number']
exclude_list = [  r'coef']
# Example usage with a DataFrame
filtered_cols = chain_filter_columns(
    cols=flat_df.columns.tolist(),
    include_patterns=include_list,
    exclude_patterns=exclude_list
)

# You can then use these columns to filter your DataFrame
filtered_cols = ['t']+sort_by_coefs_numbers(filtered_cols)
filtered_df = flat_df[filtered_cols]
filtered_df.columns

In [None]:
plt.figure(figsize=(12, 8))
for col in filtered_df.columns:
  if 't' == col:
    continue
  plt.plot(filtered_df['t'], filtered_df[col],label=col)
  # plt.plot(filtered_df['t'], np.log10(np.abs(filtered_df[col])),label=col)
plt.legend()
plt.xlabel('t')
# plt.ylabel('Convergence Factor')
plt.show()

### df dict

In [None]:
PowDiag_path = Path("/groups/sxs/hchaudha/spec_runs/16_set1_L3/h5_files_Lev3/extracted-PowerDiagnostics")
dict_df = load_power_diagonistics(PowDiag_path)
dict_df.keys()

In [None]:
# filter subdomains
# include_list = [ r'SphereA[2-9]']
include_list = [ ]
exclude_list = [  r'Cylinder',r'SphereC']
# Example usage with a DataFrame
filtered_cols = chain_filter_columns(
    cols=dict_df.keys(),
    include_patterns=include_list,
    exclude_patterns=exclude_list
)
filtered_dict = {key:dict_df[key] for key in filtered_cols}
top_set = set()
for k,i in filtered_dict.items():
  top_set.update(i.keys())
print(filtered_dict.keys())
print(top_set)

In [None]:
# INCOMPLETE!!!!!
# filter subdomains
include_list = [ 'Bf0I1']
exclude_list = [ ]
# Example usage with a DataFrame
filtered_cols = chain_filter_columns(
    cols=dict_df.keys(),
    include_patterns=include_list,
    exclude_patterns=exclude_list
)
filtered_dict = {key:dict_df[key] for key in filtered_cols}
filtered_dict.keys()

## Exp damping 

In [None]:
def exp_damping(p,a,N):
  return np.exp(-a*(np.arange(N)/(N-1))**(2*p))

In [None]:
1-exp_damping(36,36,15), 1-exp_damping(30,36,15), 1-exp_damping(28,36,15)

In [None]:
plt.plot(1-exp_damping(28,36,15),label='28,36', marker='o')
plt.plot(1-exp_damping(30,36,15),label='30,36', marker='o')
plt.plot(1-exp_damping(32,36,15),label='32,36', marker='o')
plt.plot(1-exp_damping(36,36,15),label='36,36', marker='o')
plt.yscale('log')
plt.legend()