### Dual-Tree Complex Wavelet Packet Transforms (DT-CWPT)

- Basic understanding of Wavelet Packet Transforms:

    - [Understanding Wavelets, Part 1: What Are Wavelets](https://www.youtube.com/watch?v=QX1-xGVFqmw&list=PLn8PRpmsu08ojy02wi4QLVzELM545Xw3p&index=1)

    - [Understanding Wavelets, Part 2: Types of Wavelet Transforms](https://www.youtube.com/watch?v=F7Lg-nFYooU&list=PLn8PRpmsu08ojy02wi4QLVzELM545Xw3p&index=2)

- [Dual-Tree Complex Wavelet Packet Transform](https://web.itu.edu.tr/ibayram/dtcwpt/)

In [1]:
# DTCWPT

import numpy as np
from scipy.signal import convolve

def afb(x, lowpass, highpass):
    """
    Analysis Filter Bank (AFB)
    
    Parameters:
    x : input signal
    lowpass, highpass : analysis filters
    
    Returns:
    y : output -> [lowpass_channel, highpass_channel]
    """
    # Low-pass filter
    temp = convolve(x, lowpass, mode='full')
    temp[:len(temp) - len(x)] += temp[len(x):]
    y0 = temp[:len(x):2]
    
    # High-pass filter
    temp = convolve(x, highpass, mode='full')
    temp[:len(temp) - len(x)] += temp[len(x):]
    y1 = temp[:len(x):2]
    
    return np.vstack((y0, y1))

def DTCWPT(x, first, h, f, max_level):
    """
    Dual-Tree Complex Wavelet Packet Transform (DTCWPT)
    
    Parameters:
    x : input signal
    first: first stage filters ([first_lowpass, first_highpass]), also it is Daubechies filters with 5 and 6 vanishing moments
        *** Note: If need the first and the second DWPT, run seperately (2 times)
    h (g): Kingsbury's Q-shift filters ([h_lowpass, h_highpass]) or ([g_lowpass, g_highpass]) 
        *** Note: h is for the first DWPT, and g is for the second DWPT, run seperately (2 times)
    f : Daubechies filters with 5 and 6 vanishing moments ([f_lowpass, f_highpass])
    max_level : maximum level
    
    Returns:
    y : output list containing all of the branches
    """
    y = {}

    # First stage
    fil0 = first[0, :]
    fil1 = first[1, :]
    yy = afb(x, fil0, fil1) # Whenever need to seperate to lowpass and highpass, use afb function
    # print(yy)
    y[(1, 1)] = yy[0, :]
    # print(y[(1, 1)])
    y[(1, 2)] = yy[1, :]
    # print(y[(1, 2)])

    # Second stage
    fil0 = h[0, :]
    fil1 = h[1, :]
    yy = afb(y[(1, 1)], fil0, fil1)
    y[(2, 1)] = yy[0, :]
    # print(y[(2, 1)])
    y[(2, 2)] = yy[1, :]
    # print(y[(2, 2)])

    yy = afb(y[(1, 2)], fil0, fil1)
    y[(2, 3)] = yy[0, :]
    # print(y[(2, 3)])
    y[(2, 4)] = yy[1, :]
    # print(y[(2, 4)])

    for n in range(3, max_level + 1):
        for k in range(1, 2**(n - 1) + 1):
            if k % 2**(n - 2) == 1:
                fil0 = h[0, :]
                fil1 = h[1, :]
            else:
                fil0 = f[0, :]
                fil1 = f[1, :]
            yy = afb(y[(n - 1, k)], fil0, fil1)
            y[(n, 2 * k - 1)] = yy[0, :]
            y[(n, 2 * k)] = yy[1, :]

    return [y[(max_level, k)] for k in range(1, 2**max_level + 1)]

In [2]:
# IDTCWPT

import numpy as np
from scipy.signal import convolve

def sfb(y, lowpass, highpass):
    """
    Synthesis Filter Bank (SFB)
    
    Parameters:
    y : output from 'afb' function
    lowpass, highpass : synthesis filters
    
    Returns:
    x : reconstructed input
    """
    # Upsample the signals
    x0 = np.zeros(2 * len(y[0]), dtype=y.dtype)
    x1 = np.zeros(2 * len(y[0]), dtype=y.dtype)

    x0[::2] = y[0]
    x1[::2] = y[1]

    # Convolve with the synthesis filters
    temp = convolve(x0, lowpass, mode='full')
    temp[:len(temp) - len(x0)] += temp[len(x0):]
    take = temp[:len(x0)]
    shift = len(lowpass)
    take0 = take[np.mod(np.arange(len(x0)) + shift - 1, len(x0))]

    temp = convolve(x1, highpass, mode='full')
    temp[:len(temp) - len(x1)] += temp[len(x1):]
    take = temp[:len(x1)]
    shift = len(highpass)
    take1 = take[np.mod(np.arange(len(x1)) + shift - 1, len(x1))]

    return take0 + take1

def IDTCWPT(y, first, h, f):
    """
    Inverse Dual-Tree Complex Wavelet Packet Transform (IDTCWPT)
    
    Parameters:
    y : the cell array arranged as in DTCWPT
    first: first stage filters ([first_lowpass, first_highpass]), also it is Daubechies filters with 5 and 6 vanishing moments
    h (g): Kingsbury's Q-shift filters ([h_lowpass, h_highpass]) or ([g_lowpass, g_highpass]) 
    f : Daubechies filters with 5 and 6 vanishing moments ([f_lowpass, f_highpass])
    
    Returns:
    x : reconstructed input signal
    """
    max_level = int(np.log2(len(y)))

    xx = y[0, :].tolist()

    for n in range(max_level, 2, -1):
        x2 = [None] * (2**(n - 1))
        for k in range(1, 2**(n - 1) + 1):
            if k % 2**(n - 2) == 1:
                fil0 = h[0, :]
                fil1 = h[1, :]
            else:
                fil0 = f[0, :]
                fil1 = f[1, :]
            x2[k - 1] = sfb([xx[2 * k - 2], xx[2 * k - 1]], fil0, fil1)
        xx = x2

    # Second stage
    fil0 = h[0, :]
    fil1 = h[1, :]
    x2 = [None] * 2
    x2[0] = sfb([xx[0], xx[1]], fil0, fil1)
    x2[1] = sfb([xx[2], xx[3]], fil0, fil1)

    # First stage
    fil0 = first[0, :]
    fil1 = first[1, :]
    x = sfb([x2[0], x2[1]], fil0, fil1)

    return x

Filter banks:

In [44]:
# # Short FBs
# import scipy.io
# import numpy as np

# # Load the .mat file
# mat_file_path = 'DT-CWPT/dtcwpt_filters.mat'  # Replace with your .mat file path
# mat_contents = scipy.io.loadmat(mat_file_path)

# # Display the keys in the .mat file
# print("Keys in the .mat file:", mat_contents.keys())

# # List of variables to save
# variables_to_save = ['h', 'g', 'f', 'first_1', 'first_2']

# # Dictionary to store the variables as numpy arrays
# variables_dict = {}

# # Process each variable
# for var in variables_to_save:
#     if var in mat_contents:
#         variable_data = mat_contents[var]
#         # Ensure the variable is a 2-row matrix
#         if variable_data.shape[0] == 2:
#             variables_dict[var] = variable_data
#             print(f"Stored {var} in variables_dict")
#         else:
#             print(f"{var} is not a 2-row matrix, not stored")
#     else:
#         print(f"{var} not found in the .mat file")

# # Example usage: Accessing the variables from the dictionary
# for var in variables_dict:
#     print(f"{var}:")
#     print(variables_dict[var])

# # Now you can use the variables_dict in your further processing
# # Example: Accessing 'h' variable
# h_variable = variables_dict.get('h')
# print("h variable data:", h_variable)

In [3]:
# Long FBs
import scipy.io
import numpy as np

# Load the .mat file
mat_file_path = 'DT-CWPT/dtcwpt_filters_long.mat'  # Replace with your .mat file path
mat_contents = scipy.io.loadmat(mat_file_path)

# Display the keys in the .mat file
print("Keys in the .mat file:", mat_contents.keys())

# List of variables to save
variables_to_save = ['h', 'g', 'f', 'first_1', 'first_2']

# Dictionary to store the variables as numpy arrays
variables_dict = {}

# Process each variable
for var in variables_to_save:
    if var in mat_contents:
        variable_data = mat_contents[var]
        # Ensure the variable is a 2-row matrix
        if variable_data.shape[0] == 2:
            variables_dict[var] = variable_data
            print(f"Stored {var} in variables_dict")
        else:
            print(f"{var} is not a 2-row matrix, not stored")
    else:
        print(f"{var} not found in the .mat file")

# Example usage: Accessing the variables from the dictionary
for var in variables_dict:
    print(f"{var}:")
    print(variables_dict[var])

# Now you can use the variables_dict in your further processing
# Example: Accessing 'h' variable
h_variable = variables_dict.get('h')
print("h variable data:", h_variable)


Keys in the .mat file: dict_keys(['__header__', '__version__', '__globals__', 'h', 'g', 'f', 'first_1', 'first_2'])
Stored h in variables_dict
Stored g in variables_dict
Stored f in variables_dict
Stored first_1 in variables_dict
Stored first_2 in variables_dict
h:
[[ 0.00325314 -0.00388321  0.03466035 -0.0388728  -0.11720389  0.27529538
   0.75614564  0.56881042  0.01186609 -0.1067118   0.02382538  0.01702522
  -0.00543948 -0.0045569 ]
 [-0.0045569   0.00543948  0.01702522 -0.02382538 -0.1067118  -0.01186609
   0.56881042 -0.75614564  0.27529538  0.11720389 -0.0388728  -0.03466035
  -0.00388321 -0.00325314]]
g:
[[-0.0045569  -0.00543948  0.01702522  0.02382538 -0.1067118   0.01186609
   0.56881042  0.75614564  0.27529538 -0.11720389 -0.0388728   0.03466035
  -0.00388321  0.00325314]
 [ 0.00325314  0.00388321  0.03466035  0.0388728  -0.11720389 -0.27529538
   0.75614564 -0.56881042  0.01186609  0.1067118   0.02382538 -0.01702522
  -0.00543948  0.0045569 ]]
f:
[[ 6.46115344e-03  6.23647

- DTCWPT should be run two times, one for real part, the other one for imaginary part of the signal.
- Make Hilbert transform for the imaginary part before applying the second DTCWPT.
- Use Daubechies filters with 5 and 6 vanishing moments for first and f.
- Use Kingsbury's Q-shift filters for h (g)

In [4]:
# Apply DTCWPT on Flow:
## CDGR individual sample:

import pandas as pd

# Define the path to the CSV file
csv_file_path = '/nfs/turbo/med-kayvan-lab/Projects/PARDS/02-Data/02-EVENTS/Cleaned/CDGR/a_58_CDGR_39_1st (1).csv'

# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file_path)

# Display the column names
print("Column names:", df.columns)


Column names: Index(['Relative Time (sec)', 'Time', 'CDGR - FiO₂', 'CDGR - PEEP',
       'CDGR - Set: VT in mL', 'CDGR - Paw', 'CDGR - Set: PEEP L',
       'CDGR - eVT', 'CDGR - PIP', 'CDGR - Pplat', 'CDGR - Flow',
       'CDGR - iPress Wave', 'GE - SpO2 1', 'OSI',
       'CDGR - iPress Wave (FTandBBF)', 'CDGR - Flow (FTandBBF)',
       'CDGR - eVT (FTandBBF)', 'Tumbling_window'],
      dtype='object')


In [5]:
# Hilbert transform:

import pandas as pd
import numpy as np
from scipy.signal import hilbert

# Perform Hilbert transform on the entire Flow column
df['CDGR - Flow (FTandBBF) - HT'] = np.abs(hilbert(df['CDGR - Flow (FTandBBF)'].values))

# Display the column names
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 291508 entries, 0 to 291507
Data columns (total 19 columns):
 #   Column                         Non-Null Count   Dtype  
---  ------                         --------------   -----  
 0   Relative Time (sec)            291508 non-null  float64
 1   Time                           291508 non-null  object 
 2   CDGR - FiO₂                    291508 non-null  int64  
 3   CDGR - PEEP                    291508 non-null  float64
 4   CDGR - Set: VT in mL           291508 non-null  float64
 5   CDGR - Paw                     291508 non-null  int64  
 6   CDGR - Set: PEEP L             291508 non-null  float64
 7   CDGR - eVT                     291508 non-null  float64
 8   CDGR - PIP                     291508 non-null  float64
 9   CDGR - Pplat                   291508 non-null  float64
 10  CDGR - Flow                    291508 non-null  float64
 11  CDGR - iPress Wave             291508 non-null  float64
 12  GE - SpO2 1                   

In [7]:
# First DWPT (a set of real-valued filters)

# Extract the necessary columns
flow_column = "CDGR - Flow (FTandBBF)"
window_column = "Tumbling_window"
flow_data = df[flow_column]
# print(len(flow_data))
window_data = df[window_column]
# print(len(window_data))

# Unique tumbling windows
tumbling_windows = window_data.unique()
print(len(tumbling_windows))

# Define filter coefficients
first = variables_dict['first_1']
print(first.shape)
h = variables_dict['h']
print(h.shape)
f = variables_dict['f']
print(f.shape)
max_level = 4

# Process each tumbling window
results = {}

for window in tumbling_windows:
    # Extract data for the current tumbling window
    window_data = flow_data[df[window_column] == window].values
    
    # Apply DTCWPT
    dtcwpt_result = DTCWPT(window_data, first, h, f, max_level)
    
    # Store the result
    results[window] = dtcwpt_result

# Display results
for window, result in results.items():
    print(f"Window {window}:")
    print(f"Number of results: {len(result)}")
    print(f"Number of coefficients for each feature: {len(result[0])}")
    print(result)


5
(2, 28)
(2, 14)
(2, 28)
Window 1:
Number of results: 16
Number of coefficients for each feature: 3676
[array([ 3.4701896 , -2.45884194, 26.0558659 , ..., -6.37072583,
       -1.65763871,  1.30167019]), array([ 0.8030346 , -2.91897109,  0.72824284, ...,  0.64396935,
       -0.21203089, -0.27857949]), array([-1.17403397,  2.03520967,  2.13255267, ..., -0.01987263,
        0.03062331, -0.14938993]), array([ 0.14257033,  0.06990516, -0.1340063 , ...,  0.19009464,
        0.1897528 ,  0.38563935]), array([ 0.27117178,  0.67420105, -0.63360659, ..., -0.08994416,
        0.01606567, -0.17438955]), array([ 0.07809048,  0.03991902, -0.01717318, ..., -0.41132424,
       -0.1671577 ,  0.08527363]), array([ 0.0551106 ,  0.04035032,  0.02635115, ..., -0.03492499,
       -0.0078471 ,  0.09405296]), array([ 0.2617007 ,  0.03444783, -0.01603086, ...,  0.68102934,
       -1.27589487, -1.4456683 ]), array([ 0.04109274,  0.06044239, -0.0304511 , ...,  0.01325811,
       -0.04889528,  0.02259487]), arra

In [8]:
# # Inverse DTCWPT and display results
# inverse_results = {}

# for window, result in results.items():
#     idtcwpt_result = IDTCWPT(result, first, h, f)
#     inverse_results[window] = idtcwpt_result

# # Display inverse results
# for window, result in inverse_results.items():
#     print(f"Inverse Result for Window {window}:")
#     print(result)


In [8]:
# Second DWPT (a set of imaginary (or quadrature) filters)

# Extract the necessary columns
flow_column = "CDGR - Flow (FTandBBF) - HT"
window_column = "Tumbling_window"
flow_data = df[flow_column]
# print(len(flow_data))
window_data = df[window_column]
# print(len(window_data))

# Unique tumbling windows
tumbling_windows = window_data.unique()
print(len(tumbling_windows))

# Define filter coefficients
first = variables_dict['first_2']
print(first.shape)
g = variables_dict['g']
print(h.shape)
f = variables_dict['f']
print(f.shape)
max_level = 4

# Process each tumbling window
results = {}

for window in tumbling_windows:
    # Extract data for the current tumbling window
    window_data = flow_data[df[window_column] == window].values
    
    # Apply DTCWPT
    dtcwpt_result = DTCWPT(window_data, first, g, f, max_level)
    
    # Store the result
    results[window] = dtcwpt_result

# Display results
for window, result in results.items():
    print(f"Window {window}:")
    print(f"Number of results: {len(result)}")
    print(f"Number of coefficients for each feature: {len(result[0])}")
    print(result)


5
(2, 29)
(2, 14)
(2, 28)
Window 1:
Number of results: 16
Number of coefficients for each feature: 3676
[array([29.50451794, 35.74997386, 65.02128485, ..., 19.03364406,
       24.82516492, 26.64712054]), array([ 2.2086015 ,  1.24472092, -2.85382693, ...,  0.56879089,
       -0.00327241,  0.21217521]), array([ 0.58688761, -0.21944861, -1.88255928, ...,  0.01085553,
        0.104448  ,  0.56687973]), array([ 0.06515062,  0.07996508, -0.1185612 , ..., -0.05729362,
       -0.09731726,  0.18611552]), array([-0.06316731, -1.45472068,  0.62794313, ..., -0.06804275,
        0.11279485,  0.34785767]), array([ 0.06461431,  0.04067694, -0.04328055, ...,  0.0084094 ,
        0.13445079,  0.16785526]), array([ 0.06001139,  0.0196125 ,  0.0391962 , ..., -0.03455399,
        0.00357278,  0.15797102]), array([ 0.17337128,  0.24893871,  0.01501586, ..., -0.46302857,
        1.1580172 ,  1.09797925]), array([-0.02417282, -0.04111018, -0.11106069, ..., -0.06681201,
       -0.09918498,  0.00086486]), arra

In [9]:
# # Inverse DTCWPT and display results
# inverse_results = {}

# for window, result in results.items():
#     idtcwpt_result = IDTCWPT(result, first, h, f)
#     inverse_results[window] = idtcwpt_result

# # Display inverse results
# for window, result in inverse_results.items():
#     print(f"Inverse Result for Window {window}:")
#     print(result)

In [28]:
# Apply DTCWPT to CDGR (all):

import os
import pandas as pd
import numpy as np
from scipy.signal import hilbert

# Define the directory containing the CSV files
input_dir = "/nfs/turbo/med-kayvan-lab/Projects/PARDS/02-Data/02-EVENTS/Cleaned/CDGR/"
output_dir = "/nfs/turbo/med-kayvan-lab/Projects/PARDS/02-Data/02-EVENTS/Cleaned/CDGR/Flow_DTCWPT/"

# Define the necessary column names
flow_column = "CDGR - Flow (FTandBBF)"
window_column = "Tumbling_window"
hilbert_column = f"{flow_column} - HT"

# Define filter coefficients and other parameters (replace with your actual values)
first_real = variables_dict['first_1']
h = variables_dict['h']
f = variables_dict['f']
first_imaginary = variables_dict['first_2']
g = variables_dict['g']
max_level = 4

# Function to save features for a window
def save_features(window, real_result, imag_result, filename):
  """Saves real and imaginary features for a window to separate files.

  Args:
    window: Window identifier (e.g., tumbling window number).
    real_result: Real-valued DTCWPT coefficients for the window.
    imag_result: Imaginary-valued DTCWPT coefficients for the window.
    filename: Base filename for the file (without window and feature suffixes).
  """
  # Save real features
  real_filename = f"{output_dir}/{filename}_w{window}_real.csv"
  np.savetxt(real_filename, real_result, delimiter=',')

  # Save imaginary features
  imag_filename = f"{output_dir}/{filename}_w{window}_imag.csv"
  np.savetxt(imag_filename, imag_result, delimiter=',')

# Process each CSV file in the input directory
for filename in os.listdir(input_dir):
  if filename.endswith(".csv"):
    filepath = os.path.join(input_dir, filename)
    df = pd.read_csv(filepath)

    # Perform Hilbert transform on the entire Flow column
    df[hilbert_column] = np.abs(hilbert(df[flow_column].values))

    # Apply the first DWPT (real-valued filters)
    flow_data = df[flow_column]
    window_data = df[window_column]
    tumbling_windows = window_data.unique()
    results_real = {}

    for window in tumbling_windows:
      window_flow_data = flow_data[df[window_column] == window].values
      dtcwpt_result_real = DTCWPT(window_flow_data, first_real, h, f, max_level)
      results_real[window] = dtcwpt_result_real

      # Apply the second DWPT (imaginary filters)
      window_hilbert_data = df[hilbert_column][df[window_column] == window].values
      dtcwpt_result_imaginary = DTCWPT(window_hilbert_data, first_imaginary, g, f, max_level)
      results_imaginary[window] = dtcwpt_result_imaginary

      # Save features for the current window
      save_features(window, results_real[window], results_imaginary[window], filename)

print(f"Processed all files and saved features to {output_dir}")

Processed all files and saved features to /nfs/turbo/med-kayvan-lab/Projects/PARDS/02-Data/02-EVENTS/Cleaned/CDGR/Flow_DTCWPT/


In [10]:
import pandas as pd
import os

# Directory where the files are stored
directory_path = "/nfs/turbo/med-kayvan-lab/Projects/PARDS/02-Data/02-EVENTS/Cleaned/CDGR/Flow_DTCWPT/"

# List of base file names
base_file_names = [
    f"a_58_CDGR_39_1st ({i}).csv" for i in range(1, 40) if i not in [10, 36]
] + [
    f"b_151_CDGR_11_1st ({i}).csv" for i in range(1, 12)
]

# Function to process each group of files
def process_files(base_file_name):
    # List of file names for the current base file
    file_names = [
        f"{base_file_name}_w1_real.csv", f"{base_file_name}_w1_imag.csv",
        f"{base_file_name}_w2_real.csv", f"{base_file_name}_w2_imag.csv",
        f"{base_file_name}_w3_real.csv", f"{base_file_name}_w3_imag.csv",
        f"{base_file_name}_w4_real.csv", f"{base_file_name}_w4_imag.csv",
        f"{base_file_name}_w5_real.csv", f"{base_file_name}_w5_imag.csv"
    ]

    # Initialize an empty list to hold DataFrames
    dataframes = []

    # Loop through each file
    for file in file_names:
        # Construct the full file path
        file_path = os.path.join(directory_path, file)
        
        # Read the CSV file
        df = pd.read_csv(file_path, header=None)
        
        # Transpose the DataFrame
        df_transposed = df.T
        
        # Generate column names based on the file name without '.csv' and in the format f{i}
        file_base = os.path.basename(file).replace(".csv", "")
        df_transposed.columns = [f"{file_base}_f{i+1}" for i in range(df_transposed.shape[1])]
        
        # Append the transposed DataFrame to the list
        dataframes.append(df_transposed)

    # Concatenate all DataFrames
    final_df = pd.concat(dataframes, axis=1)

    # Define the output file name without ".csv"
    output_file_base = base_file_name.replace(".csv", "")
    output_file_name = f"{output_file_base}_Flow_DTCWPT.csv"
    output_file_path = os.path.join(directory_path, output_file_name)

    # Save the concatenated DataFrame to the new CSV file
    final_df.to_csv(output_file_path, index=False)

    print(f"CSV files for {base_file_name} have been concatenated and saved as {output_file_path}")

# Loop through each base file name
for base_file_name in base_file_names:
    process_files(base_file_name)


CSV files for a_58_CDGR_39_1st (1).csv have been concatenated and saved as /nfs/turbo/med-kayvan-lab/Projects/PARDS/02-Data/02-EVENTS/Cleaned/CDGR/Flow_DTCWPT/a_58_CDGR_39_1st (1)_Flow_DTCWPT.csv
CSV files for a_58_CDGR_39_1st (2).csv have been concatenated and saved as /nfs/turbo/med-kayvan-lab/Projects/PARDS/02-Data/02-EVENTS/Cleaned/CDGR/Flow_DTCWPT/a_58_CDGR_39_1st (2)_Flow_DTCWPT.csv
CSV files for a_58_CDGR_39_1st (3).csv have been concatenated and saved as /nfs/turbo/med-kayvan-lab/Projects/PARDS/02-Data/02-EVENTS/Cleaned/CDGR/Flow_DTCWPT/a_58_CDGR_39_1st (3)_Flow_DTCWPT.csv
CSV files for a_58_CDGR_39_1st (4).csv have been concatenated and saved as /nfs/turbo/med-kayvan-lab/Projects/PARDS/02-Data/02-EVENTS/Cleaned/CDGR/Flow_DTCWPT/a_58_CDGR_39_1st (4)_Flow_DTCWPT.csv
CSV files for a_58_CDGR_39_1st (5).csv have been concatenated and saved as /nfs/turbo/med-kayvan-lab/Projects/PARDS/02-Data/02-EVENTS/Cleaned/CDGR/Flow_DTCWPT/a_58_CDGR_39_1st (5)_Flow_DTCWPT.csv
CSV files for a_58_C

In [27]:
# Apply DTCWPT to AVEAA (all):

import os
import pandas as pd
import numpy as np
from scipy.signal import hilbert

# Define the directory containing the CSV files
input_dir = "/nfs/turbo/med-kayvan-lab/Projects/PARDS/02-Data/02-EVENTS/Cleaned/AVEAA/"
output_dir = "/nfs/turbo/med-kayvan-lab/Projects/PARDS/02-Data/02-EVENTS/Cleaned/AVEAA/Flow_DTCWPT/"

# Define the necessary column names
flow_column = "AVEA - Air Flow Wave (FTandBBF)"
window_column = "Tumbling_window"
hilbert_column = f"{flow_column} - HT"

# Define filter coefficients and other parameters (replace with your actual values)
first_real = variables_dict['first_1']
h = variables_dict['h']
f = variables_dict['f']
first_imaginary = variables_dict['first_2']
g = variables_dict['g']
max_level = 4

# Function to save features for a window
def save_features(window, real_result, imag_result, filename):
  """Saves real and imaginary features for a window to separate files.

  Args:
    window: Window identifier (e.g., tumbling window number).
    real_result: Real-valued DTCWPT coefficients for the window.
    imag_result: Imaginary-valued DTCWPT coefficients for the window.
    filename: Base filename for the file (without window and feature suffixes).
  """
  # Save real features
  real_filename = f"{output_dir}/{filename}_w{window}_real.csv"
  np.savetxt(real_filename, real_result, delimiter=',')

  # Save imaginary features
  imag_filename = f"{output_dir}/{filename}_w{window}_imag.csv"
  np.savetxt(imag_filename, imag_result, delimiter=',')

# Process each CSV file in the input directory
for filename in os.listdir(input_dir):
  if filename.endswith(".csv"):
    filepath = os.path.join(input_dir, filename)
    df = pd.read_csv(filepath)

    # Perform Hilbert transform on the entire Flow column
    df[hilbert_column] = np.abs(hilbert(df[flow_column].values))

    # Apply the first DWPT (real-valued filters)
    flow_data = df[flow_column]
    window_data = df[window_column]
    tumbling_windows = window_data.unique()
    results_real = {}

    for window in tumbling_windows:
      window_flow_data = flow_data[df[window_column] == window].values
      dtcwpt_result_real = DTCWPT(window_flow_data, first_real, h, f, max_level)
      results_real[window] = dtcwpt_result_real

      # Apply the second DWPT (imaginary filters)
      window_hilbert_data = df[hilbert_column][df[window_column] == window].values
      dtcwpt_result_imaginary = DTCWPT(window_hilbert_data, first_imaginary, g, f, max_level)
      results_imaginary[window] = dtcwpt_result_imaginary

      # Save features for the current window
      save_features(window, results_real[window], results_imaginary[window], filename)

print(f"Processed all files and saved features to {output_dir}")


Processed all files and saved features to /nfs/turbo/med-kayvan-lab/Projects/PARDS/02-Data/02-EVENTS/Cleaned/AVEAA/Flow_DTCWPT/


In [9]:
import pandas as pd
import os

# Directory where the files are stored
directory_path = "/nfs/turbo/med-kayvan-lab/Projects/PARDS/02-Data/02-EVENTS/Cleaned/AVEAA/Flow_DTCWPT/"

# List of base file names without the w1, w2, etc.
base_file_names = [
    "a_58_AVEAA_4_1st (1).csv",
    "a_58_AVEAA_4_1st (2).csv",
    "a_58_AVEAA_4_1st (3).csv",
    "a_58_AVEAA_4_1st (4).csv",
    "b_151_AVEAA_4_1st (1).csv",
    "b_151_AVEAA_4_1st (2).csv",
    "b_151_AVEAA_4_1st (3).csv",
    "b_151_AVEAA_4_1st (4).csv"
]

# Function to process each group of files
def process_files(base_file_name):
    # List of file names for the current base file
    file_names = [
        f"{base_file_name}_w1_real.csv", f"{base_file_name}_w1_imag.csv",
        f"{base_file_name}_w2_real.csv", f"{base_file_name}_w2_imag.csv",
        f"{base_file_name}_w3_real.csv", f"{base_file_name}_w3_imag.csv",
        f"{base_file_name}_w4_real.csv", f"{base_file_name}_w4_imag.csv",
        f"{base_file_name}_w5_real.csv", f"{base_file_name}_w5_imag.csv"
    ]

    # Initialize an empty list to hold DataFrames
    dataframes = []

    # Loop through each file
    for file in file_names:
        # Construct the full file path
        file_path = os.path.join(directory_path, file)
        
        # Read the CSV file
        df = pd.read_csv(file_path, header=None)
        
        # Transpose the DataFrame
        df_transposed = df.T
        
        # Generate column names based on the file name without '.csv' and in the format f{i}
        file_base = os.path.basename(file).replace(".csv", "")
        df_transposed.columns = [f"{file_base}_f{i+1}" for i in range(df_transposed.shape[1])]
        
        # Append the transposed DataFrame to the list
        dataframes.append(df_transposed)

    # Concatenate all DataFrames
    final_df = pd.concat(dataframes, axis=1)

    # Define the output file name without ".csv"
    output_file_base = base_file_name.replace(".csv", "")
    output_file_name = f"{output_file_base}_Flow_DTCWPT.csv"
    output_file_path = os.path.join(directory_path, output_file_name)

    # Save the concatenated DataFrame to the new CSV file
    final_df.to_csv(output_file_path, index=False)

    print(f"CSV files for {base_file_name} have been concatenated and saved as {output_file_path}")

# Loop through each base file name
for base_file_name in base_file_names:
    process_files(base_file_name)


CSV files for a_58_AVEAA_4_1st (1).csv have been concatenated and saved as /nfs/turbo/med-kayvan-lab/Projects/PARDS/02-Data/02-EVENTS/Cleaned/AVEAA/Flow_DTCWPT/a_58_AVEAA_4_1st (1)_Flow_DTCWPT.csv
CSV files for a_58_AVEAA_4_1st (2).csv have been concatenated and saved as /nfs/turbo/med-kayvan-lab/Projects/PARDS/02-Data/02-EVENTS/Cleaned/AVEAA/Flow_DTCWPT/a_58_AVEAA_4_1st (2)_Flow_DTCWPT.csv
CSV files for a_58_AVEAA_4_1st (3).csv have been concatenated and saved as /nfs/turbo/med-kayvan-lab/Projects/PARDS/02-Data/02-EVENTS/Cleaned/AVEAA/Flow_DTCWPT/a_58_AVEAA_4_1st (3)_Flow_DTCWPT.csv
CSV files for a_58_AVEAA_4_1st (4).csv have been concatenated and saved as /nfs/turbo/med-kayvan-lab/Projects/PARDS/02-Data/02-EVENTS/Cleaned/AVEAA/Flow_DTCWPT/a_58_AVEAA_4_1st (4)_Flow_DTCWPT.csv
CSV files for b_151_AVEAA_4_1st (1).csv have been concatenated and saved as /nfs/turbo/med-kayvan-lab/Projects/PARDS/02-Data/02-EVENTS/Cleaned/AVEAA/Flow_DTCWPT/b_151_AVEAA_4_1st (1)_Flow_DTCWPT.csv
CSV files for