In [21]:
import os

In [22]:
os.listdir("Data_Disc/217a")

['hr_20.csv',
 'hr_25.csv',
 'hr_30.csv',
 'hr_35.csv',
 'hr_40.csv',
 'merged_20.csv',
 'merged_25.csv',
 'merged_30.csv',
 'merged_35.csv',
 'merged_40.csv',
 'prt_20.csv',
 'prt_25.csv',
 'prt_30.csv',
 'prt_35.csv',
 'prt_40.csv',
 'rsp_20.csv',
 'rsp_25.csv',
 'rsp_30.csv',
 'rsp_35.csv',
 'rsp_40.csv',
 'spo_20.csv',
 'spo_25.csv',
 'spo_30.csv',
 'spo_35.csv',
 'spo_40.csv',
 'spv_20.csv',
 'spv_25.csv',
 'spv_30.csv',
 'spv_35.csv',
 'spv_40.csv']

In [23]:
import os
import pandas as pd

# Path to the main directory containing subdirectories
main_dir = 'Data_Disc'

# List of required prefixes and suffixes
prefixes = ['hr_', 'prt_', 'spv_']
suffixes = ['20', '25', '30', '35', '40']

# Function to merge CSV files based on timestamp
def merge_files(subdir_path, suffix):
    # Dictionary to store dataframes with the required prefixes
    dfs = {}
    
    for prefix in prefixes:
        file_name = f"{prefix}{suffix}.csv"
        file_path = os.path.join(subdir_path, file_name)
        
        # Read CSV file assuming no header, assign timestamp and data column names dynamically
        if os.path.exists(file_path):
            df = pd.read_csv(file_path, header=None)
            df.columns = ['timestamp', file_name.split('.')[0]]  # Assign 'timestamp' and file-specific header
            # df['timestamp'] = pd.to_datetime(df['timestamp'], format='%d-%m-%Y %H:%M:%S')  # Parse timestamp
            df['timestamp'] = pd.to_datetime(df['timestamp'], format='%Y-%m-%d %H:%M:%S')

            df.set_index('timestamp', inplace=True)  # Set timestamp as index
            dfs[prefix] = df
        else:
            print(f"{file_name} not found in {subdir_path}")
            return
    
    # Merge all the dataframes on the 'timestamp'
    merged_df = dfs['hr_'].join([dfs['prt_'], dfs['spv_']], how='inner')
    
    # Save the merged file in the subdirectory
    output_file = os.path.join(subdir_path, f'merged_{suffix}.csv')
    merged_df.to_csv(output_file)
    print(f"Saved merged file for suffix {suffix} in {subdir_path}")

# Iterate through each subdirectory
for subdir in os.listdir(main_dir):
    subdir_path = os.path.join(main_dir, subdir)
    
    if os.path.isdir(subdir_path):
        print(f"Processing directory: {subdir_path}")
        
        # Merge files for each suffix
        for suffix in suffixes:
            merge_files(subdir_path, suffix)


Processing directory: Data_Disc\217a
Saved merged file for suffix 20 in Data_Disc\217a
Saved merged file for suffix 25 in Data_Disc\217a
Saved merged file for suffix 30 in Data_Disc\217a
Saved merged file for suffix 35 in Data_Disc\217a
Saved merged file for suffix 40 in Data_Disc\217a
Processing directory: Data_Disc\218c
Saved merged file for suffix 20 in Data_Disc\218c
Saved merged file for suffix 25 in Data_Disc\218c
Saved merged file for suffix 30 in Data_Disc\218c
Saved merged file for suffix 35 in Data_Disc\218c
Saved merged file for suffix 40 in Data_Disc\218c
Processing directory: Data_Disc\219a
Saved merged file for suffix 20 in Data_Disc\219a
Saved merged file for suffix 25 in Data_Disc\219a
Saved merged file for suffix 30 in Data_Disc\219a
Saved merged file for suffix 35 in Data_Disc\219a
Saved merged file for suffix 40 in Data_Disc\219a
Processing directory: Data_Disc\223a
Saved merged file for suffix 20 in Data_Disc\223a
Saved merged file for suffix 25 in Data_Disc\223a
Sa

In [24]:
import os
import pandas as pd

# Path to the main directory containing subdirectories
main_dir = 'Data_Disc'

# List of suffixes to merge
suffixes = ['20', '25', '30', '35', '40']

# Function to merge all merged_<suffix>.csv files
def merge_all_files(suffix):
    all_dataframes = []
    
    # Iterate through each subdirectory
    for subdir in os.listdir(main_dir):
        subdir_path = os.path.join(main_dir, subdir)
        
        if os.path.isdir(subdir_path):
            file_name = f'merged_{suffix}.csv'
            file_path = os.path.join(subdir_path, file_name)
            
            # Read the merged file and add a new column for the subdirectory name
            if os.path.exists(file_path):
                df = pd.read_csv(file_path)
                df['timestamp'] = pd.to_datetime(df['timestamp'], format='%Y-%m-%d %H:%M:%S')
                df['participant'] = subdir  # Add subdirectory name as a new column
                all_dataframes.append(df)
            else:
                print(f"{file_name} not found in {subdir_path}")
    
    # Concatenate all dataframes row-wise
    if all_dataframes:
        combined_df = pd.concat(all_dataframes, ignore_index=True)
        
        # Save the combined dataframe
        output_file = os.path.join(main_dir, f'combined_merged_{suffix}.csv')
        combined_df.to_csv(output_file, index=False)
        print(f"Saved combined file for suffix {suffix} in {main_dir}")

# Iterate through each suffix to merge the files
for suffix in suffixes:
    merge_all_files(suffix)


Saved combined file for suffix 20 in Data_Disc
Saved combined file for suffix 25 in Data_Disc
Saved combined file for suffix 30 in Data_Disc
Saved combined file for suffix 35 in Data_Disc
merged_40.csv not found in Data_Disc\328a
Saved combined file for suffix 40 in Data_Disc


In [25]:
import pandas as pd

# Define file paths and corresponding altitudes
file_paths = [
    ('Data_Disc/combined_merged_20.csv', 2.0),   # 2.0 km altitude for combined_merged_20.csv
    ('Data_Disc//combined_merged_25.csv', 2.5),   # 2.5 km altitude for combined_merged_25.csv
    ('Data_Disc/combined_merged_30.csv', 3.0),   # 3.0 km altitude for combined_merged_30.csv
    ('Data_Disc/combined_merged_35.csv', 3.5),   # 3.5 km altitude for combined_merged_35.csv
    ('Data_Disc/combined_merged_40.csv', 4.0)    # 4.0 km altitude for combined_merged_40.csv
]

# Initialize an empty list to store dataframes
dataframes = []
# Function to rename columns by removing the altitude suffix (_20, _25, etc.)
def clean_column_names(df):
    df.columns = df.columns.str.replace(r'_\d+', '', regex=True)  # Remove suffixes like _20, _25
    return df

# Loop over each file and its corresponding altitude
for file_path, altitude in file_paths:
    df = pd.read_csv(file_path)    # Read the csv file
    df = clean_column_names(df)    # Clean column names by removing altitude-specific suffixes
    df['altitude'] = altitude      # Add a new column for altitude
    dataframes.append(df)          # Append dataframe to the list

# Concatenate all dataframes row-wise
merged_data = pd.concat(dataframes, ignore_index=True)

merged_data['hr'] = merged_data['hr']*250
merged_data['prt'] = merged_data['prt']*250
merged_data['spv'] = merged_data['spv']*100
df.head()

Unnamed: 0,timestamp,hr,prt,spv,participant,altitude
0,2023-02-17 09:51:30,0.653846,0.729167,0.0,217a,4.0
1,2023-02-17 09:51:31,0.653846,0.729167,0.0,217a,4.0
2,2023-02-17 09:51:32,0.653846,0.75,0.0,217a,4.0
3,2023-02-17 09:51:33,0.7,0.75,0.028571,217a,4.0
4,2023-02-17 09:51:34,0.830769,0.770833,0.085714,217a,4.0


FiO2 = baseline_FiO2 + α1 × (92−spv) + α2× altitude + α3 × (hr−70)+ α4 × (prt−70)

α1 : Altitude effect (e.g., 0.01 per 100 meters)

α2 : SpO2 effect (e.g., 0.5 per 1% decrease below 92%)

α3 : Heart rate effect (e.g., 0.2 per bpm above 70)

α4 : Pulse rate effect (e.g., 0.2 per bpm above 70)

In [26]:
import numpy as np

merged_data = merged_data[merged_data['spv'] != 0]
merged_data['fio2'] = 33.39 / (1013.25 * np.exp(-(merged_data['altitude'] * 1000) / 8400))
merged_data['inter'] = merged_data.apply(lambda row: row['fio2'] * (90 / row['spv']) if row['spv'] < 90 else row['fio2'], axis=1)
# merged_data['fio2'] = 1 / (1013.25 * np.exp(-(merged_data['altitude'] * 1000) / 8400))
# merged_data['inter'] = merged_data.apply(lambda row: row['fio2'] * (90 / row['spv']) if row['spv'] < 90 else row['fio2'], axis=1)

In [27]:
merged_data['target'] = merged_data['inter'] * (1 + (merged_data['hr'] - 72) / 100)
df = merged_data

In [28]:
import numpy as np

# Set baseline FiO2 (ambient air is 21%)
baseline_FiO2 = 21

# Define coefficients for each factor (you can adjust these values based on tuning)
alpha_1 = 0.5  # Weight for SpO2 deviation
alpha_2 = 0.01  # Weight for altitude (adjust for altitude unit, e.g., per 100m)
alpha_3 = 0.1  # Weight for heart rate deviation
alpha_4 = 0.1  # Weight for pulse rate deviation

# Calculate FiO2 based on the formula
df['FiO2'] = baseline_FiO2 + (
    alpha_1 * (92 - np.maximum(92,df['spv'])) +  # SpO2 adjustment
    alpha_2 * df['altitude'] * 1000 +  # Altitude adjustment (convert to meters if needed)
    alpha_3 * (df['hr'] - 70) +        # Heart rate adjustment
    alpha_4 * (df['prt'] - 70)         # Pulse rate adjustment
)
df['FiO2'] = np.clip(df['FiO2'], 0, 100)

# Display the result
# print(df[['hr', 'prt', 'spv', 'altitude', 'FiO2']])
df.head()

Unnamed: 0,timestamp,hr,prt,spv,participant,altitude,fio2,inter,target,FiO2
0,2023-02-17 09:16:12,159.615385,135.416667,85.714286,217a,2.0,0.041812,0.043903,0.082368,56.503205
1,2023-02-17 09:16:13,151.923077,130.208333,85.714286,217a,2.0,0.041812,0.043903,0.078991,55.213141
2,2023-02-17 09:16:14,151.923077,130.208333,85.714286,217a,2.0,0.041812,0.043903,0.078991,55.213141
3,2023-02-17 09:16:15,136.538462,130.208333,85.714286,217a,2.0,0.041812,0.043903,0.072237,53.674679
4,2023-02-17 09:16:16,125.0,130.208333,85.714286,217a,2.0,0.041812,0.043903,0.067171,52.520833


In [29]:
# Save the merged data to a new CSV file
output_file = 'merged_with_altitude_and_FiO2.csv'
df.to_csv(output_file, index=False)

print(f"Merged data with cleaned column names and altitudes saved to {output_file}")

Merged data with cleaned column names and altitudes saved to merged_with_altitude_and_FiO2.csv
