In [None]:
import pandas as pd
import numpy as np
import os
import warnings
import datetime
import ast
import statistics
import sys
from timeit import default_timer as timer
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from scipy.interpolate import griddata
from scipy.stats import mannwhitneyu
from scipy.stats import skew

In [None]:
# Checking the resolution of the examples

def excel_serial_to_datetime(serial):
    return datetime.datetime(1899, 12, 30) + datetime.timedelta(days=serial)

csv_file_directory = r"D:\Thesis Project\Thesis Project 2\Research Phase 4\Data\CAHR Data\CAHR Combined - Physiology Data"
#save_directory = r"D:\Thesis Project\Thesis Project 2\Research Phase 1\All Data\All clean with CPP"

for file in os.listdir(csv_file_directory):
    
    df_individual= pd.read_csv(os.path.join(csv_file_directory, file))

    # converting the datetime to a format that python understands
    df_individual['datetime'] = df_individual['DateTime'].apply(excel_serial_to_datetime)
    df_individual.set_index('datetime', inplace=True)
    df_individual.drop(columns=['DateTime'], inplace=True)

    #df_individual_cleaned = df_individual.dropna()
    
    break

In [None]:
df_individual.head()

# Optimal ARIMA Model for Individual Patient 

In [None]:
warnings.filterwarnings('ignore')

csv_file_directory = r"D:\Thesis Project\Thesis Project 2\Research Phase 4\Data\CAHR Data\CAHR Combined - Physiology Data"
files = os.listdir(csv_file_directory)
# The list of the parameters that need to be considered
parameter_list = ['RAP', 'ICP', 'AMP', 'MAP', 'CPP', 'RSO2_L', 'RSO2_R', 'PbtO2', 'PRx', \
                      'PAx', 'RAC', 'COx_L', 'COx_R', 'COxA_L', 'COxA_R']

parameter_list_modified = parameter_list + ['Patient']
# Defining the empty dataframe to save the optimal orders
df_resolution = pd.DataFrame(index=range(len(files)), columns=parameter_list_modified)
d = 0
count = 0
# Iterate through each patient
for index, file in enumerate(files):
    file = "CAHR-126.csv"
    df_individual = pd.read_csv(os.path.join(csv_file_directory, file))
    
    df_individual = df_individual.reindex(columns = parameter_list)
    df_individual_cleaned = df_individual.dropna(axis=1, how='all')
    #df_individual_cleaned = df_individual_cleaned.dropna()
    # Iterate through each parameter
    for parameter in parameter_list:
        # Checking if the parameter is in the dataframe columns
        if parameter in df_individual_cleaned.columns:
            
            aic_old = float('inf')
            optimal_ARIMA = {'p': None, 'd': None, 'q': None}

            first_ord_diff = df_individual_cleaned[parameter].dropna()
            # Checking for the optimal order: p and q range 1 to 10 
            for p in range(1, 2):
                for q in range(1, 2):

                    try:
                        model = ARIMA(first_ord_diff, order=(p, d, q))
                        model_fit = model.fit()
                        aic_new = model_fit.aic

                        if aic_new < aic_old:
                            aic_old = aic_new
                            optimal_ARIMA['p'], optimal_ARIMA['d'], optimal_ARIMA['q'] = p, 1, q

                    except Exception as e:
                        print(f"An error occurred: {str(e)}")

            print("Combination done till p =", p, "and q =", q)
            print("Best combination for", parameter, "is", optimal_ARIMA, "Best AIC value:", aic_old)
            print("--------------------------------------------------")

            formatted_aic_old = "{:.3f}".format(aic_old)
            df_resolution.at[index, parameter] = [optimal_ARIMA['p'], 1, optimal_ARIMA['q'], formatted_aic_old]

    df_resolution.at[index, 'Patient'] = file[:-4]
    print(df_resolution.iloc[index])
    count+=1

#         print("--------------------------------------------------")
#         print("--------------------------------------------------\n")
        
#         tac = timer()

#         print("Execution time: ",round(tac-tic, 3), "seconds")
    
    if count > 2:
        break

In [None]:
for index, file in enumerate(files):
    file = "CAHR-95.csv"
    df_individual = pd.read_csv(os.path.join(csv_file_directory, file))
    
    df_individual = df_individual.reindex(columns = parameter_list)
    df_individual_cleaned = df_individual.dropna(axis=1, how='all')
    for parameter in parameter_list:
        if parameter in df_individual_cleaned.columns:
            first_ord_diff = df_individual_cleaned[parameter].dropna()
            first_ord_diff = first_ord_diff.diff().dropna()
            print(first_ord_diff.shape[0])
        
        
    #df_individual_cleaned = df_individual_cleaned.dropna()
    break

In [None]:
df_individual_cleaned

## RAP trichotomize based on thresholds

## Scatterplots (piece-wise linear regression)

In [None]:
# Splitting the files according to measurement methods

tic = timer()

csv_file_directory = r"D:\Thesis Project\Thesis Project 2\Research Phase 4\Data\CAHR Data\CAHR Combined - Physiology Data"
files = os.listdir(csv_file_directory)

parameter_list_ICP_ABP_measures = ['RAP', 'ICP', 'AMP', 'MAP', 'CPP', 'PRx', 'PAx', 'RAC']
parameter_list_NIRS_measures = ['RAP', 'ICP', 'AMP', 'MAP', 'CPP', 'RSO2_L', 'RSO2_R', 'PRx', 'PAx', \
                      'RAC', 'COx_L', 'COx_R', 'COxA_L', 'COxA_R']
parameter_list_PbtO2_measures = ['RAP', 'ICP', 'AMP', 'MAP', 'CPP', 'PRx', 'PAx', 'RAC', 'PbtO2']
                      
files_ICP_ABP_measures = []
files_NIRS_measures = []
files_PbtO2_measures = []

for file in files:
    df_individual = pd.read_csv(os.path.join(csv_file_directory, file))
    df_individual = df_individual.dropna(axis=1, how='all')
    if all(col in df_individual.columns for col in parameter_list_ICP_ABP_measures):
        files_ICP_ABP_measures.append(file)
    if all(col in df_individual.columns for col in parameter_list_NIRS_measures):
        files_NIRS_measures.append(file)
    if all(col in df_individual.columns for col in parameter_list_PbtO2_measures):
        files_PbtO2_measures.append(file)
        
tac = timer()
print("Execution time: ",round(tac-tic, 3), "seconds")

In [None]:
print(len(files_ICP_ABP_measures))
print(len(files_NIRS_measures))
print(len(files_PbtO2_measures))

In [None]:
# Reading all the files and concatenating

csv_file_directory = r"D:\Thesis Project\Thesis Project 2\Research Phase 4\Data\CAHR Data\CAHR Combined - Physiology Data"
files = os.listdir(csv_file_directory)

df_all = pd.DataFrame()

parameter_list = ['RAP', 'ICP', 'AMP', 'MAP', 'CPP', 'PRx',  'PAx', 'RAC', 'RSO2_L', 'RSO2_R', \
                'COx_L', 'COx_R', 'COxA_L', 'COxA_R', 'PbtO2', ]

for file in files:
    df_individual= pd.read_csv(os.path.join(csv_file_directory, file))
    df_individual_cleaned = df_individual.reindex(columns = parameter_list)
    
    filter_condition = (df_individual_cleaned['ICP'] > 100) | (df_individual_cleaned['ICP'] < 0) | \
                       (df_individual_cleaned['MAP'] > 200) | (df_individual_cleaned['MAP'] < 0) | \
                       (df_individual_cleaned['AMP'] > 40)
    df_individual_cleaned = df_individual_cleaned[~filter_condition] 
    
    df_all = pd.concat([df_all, df_individual_cleaned], axis=0, ignore_index=True)

In [None]:
# Reading all the files and concatenating
def concatenating_files(files):
    
    df_all = pd.DataFrame(index=range(len(files)), columns=parameter_list_modified)

    for file in files:
        df_individual= pd.read_csv(os.path.join(csv_file_directory, file))
        df_individual_cleaned = df_individual.reindex(columns = parameter_list)

        filter_condition = (df_individual_cleaned['ICP'] > 100) | (df_individual_cleaned['ICP'] < 0) | \
                           (df_individual_cleaned['MAP'] > 200) | (df_individual_cleaned['MAP'] < 0) | \
                           (df_individual_cleaned['AMP'] > 40) | 
        df_individual_cleaned = df_individual_cleaned[~filter_condition] 

        df_all = pd.concat([df_all, df_individual_cleaned], axis=0, ignore_index=True)

In [None]:
df_all.head()

In [None]:
def scatter_plot_with_piece_wise_linear_regression(segments, dependent_param):
    
    warnings.filterwarnings('ignore')
    
    NIRS_measures_list = ['RSO2_L', 'RSO2_R', 'COx_L', 'COx_R', 'COxA_L', 'COxA_R']
    
    segment1 = segments[0][['RAP', dependent_param]].dropna()
    segment2 = segments[1][['RAP', dependent_param]].dropna()
    segment3 = segments[2][['RAP', dependent_param]].dropna()
    
    if dependent_param in NIRS_measures_list:
        segment1 = segment1[(segment1 != 0).all(axis=1)]
        segment2 = segment2[(segment2 != 0).all(axis=1)]
        segment3 = segment3[(segment3 != 0).all(axis=1)]
    
    # Fitting linear regression models for each segment
    model1 = LinearRegression().fit(segment1[['RAP']], segment1[dependent_param])
    model2 = LinearRegression().fit(segment2[['RAP']], segment2[dependent_param])
    model3 = LinearRegression().fit(segment3[['RAP']], segment3[dependent_param])

    # Predict values for plotting
    x_range1 = np.linspace(segment1['RAP'].min(), segment1['RAP'].max(), 100).reshape(-1, 1)
    x_range2 = np.linspace(segment2['RAP'].min(), segment2['RAP'].max(), 100).reshape(-1, 1)
    x_range3 = np.linspace(segment3['RAP'].min(), segment3['RAP'].max(), 100).reshape(-1, 1)

    y_pred1 = model1.predict(x_range1)
    y_pred2 = model2.predict(x_range2)
    y_pred3 = model3.predict(x_range3)

    plt.figure(figsize=(10, 8))
    
    if dependent_param in NIRS_measures_list:
        df_pair = df_all[['RAP', dependent_param]]
        df_pair = df_pair[(df_pair != 0).all(axis=1)]
        plt.scatter(df_pair['RAP'], df_pair[dependent_param], alpha=0.6)
    else:
        # Scatterplot
        plt.scatter(df_all['RAP'], df_all[dependent_param], alpha=0.6)

    # Plot fitted lines
    plt.plot(x_range1, y_pred1, color='darkred', label='RAP < 0 Fit')
    plt.plot(x_range2, y_pred2, color='darkgreen', label='0 ≤ RAP ≤ 0.4 Fit')
    plt.plot(x_range3, y_pred3, color='#CCCC00', label='RAP > 0.4 Fit')

    # Add titles and labels
    title = 'Piece-wise Linear Regression with Two Thresholds Between RAP and ' + dependent_param
    plt.title(title)
    plt.xlabel('RAP')
    plt.ylabel(dependent_param)
    plt.legend(loc='upper left')
    
    save_image_directory = r"D:\Thesis Project\Thesis Project 2\Research Phase 4\Plots\Piece_wise_linear_regression\RAP_independent"
    save_image_name = 'RAP and ' + dependent_param
    #plt.savefig(os.path.join(save_image_directory, save_image_name))
    
    plt.show()

    # Print slopes and intercepts
    print(f"RAP < 0 Fit: Slope = {model1.coef_[0]:.2f}, Intercept = {model1.intercept_:.2f}")
    print(f"0 ≤ RAP ≤ 0.4 Fit: Slope = {model2.coef_[0]:.2f}, Intercept = {model2.intercept_:.2f}")
    print(f"RAP > 0.4 Fit: Slope = {model3.coef_[0]:.2f}, Intercept = {model3.intercept_:.2f}")
    
    return round(model1.coef_[0], 2), round(model2.coef_[0], 2), round(model3.coef_[0], 2)

In [None]:
# For a single dependent parameter
breakpoints = [0, 0.4]

segment1 = df_all[df_all['RAP'] < breakpoints[0]]
segment2 = df_all[(df_all['RAP'] >= breakpoints[0]) & (df_all['RAP'] <= breakpoints[1])]
segment3 = df_all[df_all['RAP'] > breakpoints[1]]

segments = [segment1, segment2, segment3]

dependent_param = 'RSO2_L'
scatter_plot_with_piece_wise_linear_regression(segments, dependent_param)

In [None]:
breakpoints = [0, 0.4]

segment1 = df_all[df_all['RAP'] < breakpoints[0]]
segment2 = df_all[(df_all['RAP'] >= breakpoints[0]) & (df_all['RAP'] <= breakpoints[1])]
segment3 = df_all[df_all['RAP'] > breakpoints[1]]

segments = [segment1, segment2, segment3]

labels = ['RAP < 0', '0 ≤ RAP ≤ 0.4', 'RAP > 0.4']
columns = df_all.columns.to_list()
columns.remove('RAP')
columns.insert(0, 'Labels')
df_slope = pd.DataFrame(index=range(len(labels)), columns=columns)


for index, parameter in enumerate(df_all.columns):
    if index == 0:           # excluding RAP 
        pass
    else:
        slope_1, slope_2, slope_3 = scatter_plot_with_piece_wise_linear_regression(segments, parameter)
        df_slope.at[0, parameter] = slope_1
        df_slope.at[1, parameter] = slope_2
        df_slope.at[2, parameter] = slope_3
        
    df_slope['Labels'] = labels
    

In [None]:
df_slope

## Boxplots

In [None]:
def boxplots_with_thresholds(segments, dependent_param):
    
    tic = timer()
    warnings.filterwarnings('ignore')
    
    segment1 = segments[0][['RAP', dependent_param]].dropna()
    segment2 = segments[1][['RAP', dependent_param]].dropna()
    segment3 = segments[2][['RAP', dependent_param]].dropna()
    
    plt.boxplot([segment1[dependent_param], segment2[dependent_param], segment3[dependent_param]], labels=['RAP < 0', '0 ≤ RAP ≤ 0.4', 'RAP > 0.4'])
    title = dependent_param + ' Measures at Different RAP Thresholds'
    plt.title(title)
    plt.tight_layout()

    save_image_directory = r"D:\Thesis Project\Thesis Project 2\Research Phase 4\Plots\Boxplots\RAP_independent"
    save_image_name = 'RAP and ' + dependent_param
    plt.savefig(os.path.join(save_image_directory, save_image_name))

    plt.show()
    
    tac = timer()
    print("Execution time:", round(tac-tic, 2), "seconds")

In [None]:
def boxplots_with_values(values, dependent_param, df_all):
    
    tic = timer()
    warnings.filterwarnings('ignore')
    
    NIRS_measures_list = ['RSO2_L', 'RSO2_R', 'COx_L', 'COx_R', 'COxA_L', 'COxA_R']
    
    labels = []
    segments = []
    
    for index in range(len(values)-1):
        label = "[" + str(values[index]) + "," + str(values[index+1]) + ")"
        labels.append(label)
        segment = df_all[(df_all['RAP'] >= values[index]) & (df_all['RAP'] < values[index+1])]
        if dependent_param in NIRS_measures_list:
            segment = segment[(segment != 0).all(axis=1)]
        segments.append(segment[dependent_param].dropna())
        
    plt.figure(figsize=(8, 6))
    plt.boxplot(segments, labels=labels, showfliers=False)
    title = dependent_param + ' Measures at Different RAP Values'
    plt.title(title)
    plt.xticks(fontsize=8)
    plt.xlabel("RAP")
    plt.ylabel(dependent_param)
    plt.axvline(x=5.5, color='black', linestyle='--', linewidth=1)  # Approx middle of 0 and 0.2
    plt.axvline(x=7.5, color='black', linestyle='--', linewidth=1)  # Approx middle of 0.4 and 0.6

    plt.tight_layout()

    save_image_directory = r"D:\Thesis Project\Thesis Project 2\Research Phase 4\Plots\Boxplots_v2\RAP_independent"
    save_image_name = 'RAP and ' + dependent_param
    #plt.savefig(os.path.join(save_image_directory, save_image_name))

    plt.show()
    
    tac = timer()
    print("Execution time:", round(tac-tic, 2), "seconds")

In [None]:
# For all the parameters
breakpoints = [0, 0.4]

segment1 = df_all[df_all['RAP'] < breakpoints[0]]
segment2 = df_all[(df_all['RAP'] >= breakpoints[0]) & (df_all['RAP'] <= breakpoints[1])]
segment3 = df_all[df_all['RAP'] > breakpoints[1]]

segments = [segment1, segment2, segment3]

for index, parameter in enumerate(df_all.columns):
    if index == 0:           # excluding RAP 
        pass
    else:
        boxplots_with_thresholds(segments, parameter)

In [None]:
values = np.round(np.arange(-1, 1.1, 0.2), 1)
values = np.where(values == -0.0, 0.0, values)

for index, parameter in enumerate(df_all.columns):
    if index == 0:           # excluding RAP 
        pass
    else:
        boxplots_with_values(values, parameter, df_all)

In [None]:
values = np.round(np.arange(-1, 1.1, 0.2), 1)
values = np.where(values == -0.0, 0.0, values)
values

## Contour Plots

In [None]:
def contour_plot_across_full_spectrum(breakpoints, dependent_param):
    
    tic = timer()
    warnings.filterwarnings('ignore')
    
    segment = df_all[['RAP', 'ICP', dependent_param]].dropna()
    
    # Extract X, Y, and Z columns
    x = segment['RAP']
    y = segment['ICP']
    z = segment[dependent_param]
    
    # Create grid data for contour plot
    grid_x, grid_y = np.meshgrid(
        np.linspace(x.min(), x.max(), 100),
        np.linspace(y.min(), y.max(), 100)
    )

    # Interpolate Z values on the grid
    grid_z = griddata((x, y), z, (grid_x, grid_y), method='linear')

    # Plot the contour
    plt.figure(figsize=(8, 6))
    contour = plt.contourf(grid_x, grid_y, grid_z, levels=20, cmap='viridis')
    plt.colorbar(contour, label=dependent_param)
    
    plt.axvline(x=breakpoints[0], color='black', linestyle='--', linewidth=2)  
    plt.axvline(x=breakpoints[1], color='black', linestyle='--', linewidth=2)
    
    title = 'Contour Plot of ' + dependent_param
    plt.title(title)
    plt.xlabel('RAP')
    plt.ylabel('ICP')

    save_image_directory = r"D:\Thesis Project\Thesis Project 2\Research Phase 4\Plots\Contour_plots\RAP_independent"
    save_image_name = 'RAP and ' + dependent_param
    #plt.savefig(os.path.join(save_image_directory, save_image_name))

    plt.show()
    
    tac = timer()
    print("Execution time:", round(tac-tic, 2), "seconds")

In [None]:
# For all the parameters

breakpoints = [0, 0.4]

for index, parameter in enumerate(df_all.columns):
    if index == 0 or index == 1:           # excluding RAP and ICP 
        pass
    else:
        contour_plot_across_full_spectrum(breakpoints, parameter)

# Median Physiology Measures

In [None]:
# Splitting data into three segments

breakpoints = [0, 0.4]

segment1 = df_all[df_all['RAP'] < breakpoints[0]]
segment2 = df_all[(df_all['RAP'] >= breakpoints[0]) & (df_all['RAP'] <= breakpoints[1])]
segment3 = df_all[df_all['RAP'] > breakpoints[1]]

segments = [segment1, segment2, segment3]

labels = ['RAP < 0', '0 ≤ RAP ≤ 0.4', 'RAP > 0.4']

In [None]:
# Median value of each physiology measure
# Defining the dataframe
num_rows = len(labels)
columns = df_all.columns.to_list()
columns.insert(0, 'Labels')

df_median = pd.DataFrame(index=range(num_rows), columns=columns)
NIRS_measures_list = ['RSO2_L', 'RSO2_R', 'COx_L', 'COx_R', 'COxA_L', 'COxA_R']

# Insert median values
for index, segment in enumerate(segments):
    df_median.loc[index] = segment.median().round(3)
    
    for col in segment.columns:
        if col in NIRS_measures_list:
            individual_param = segment[col]
            individual_param = individual_param.loc[individual_param != 0]
            df_median.at[index, parameter] = individual_param.median()
        
df_median['Labels'] = labels
df_median

In [None]:
NIRS_measures_list = ['RSO2_L', 'RSO2_R', 'COx_L', 'COx_R', 'COxA_L', 'COxA_R', 'PbtO2']

for col in segment.columns:
    if col in NIRS_measures_list:
        a_row = segment[col]
        a_row = a_row.loc[a_row != 0]
        print(a_row.median())

In [None]:
# Median value of each physiology measure
# Defining the dataframe
num_rows = len(labels)
columns = df_all.columns.to_list()
columns.insert(0, 'Labels')

df_median = pd.DataFrame(index=range(num_rows), columns=columns)
NIRS_measures_list = ['RSO2_L', 'RSO2_R', 'COx_L', 'COx_R', 'COxA_L', 'COxA_R', 'PbtO2']

# Insert median values
for index, segment in enumerate(segments):
    df_median.loc[index] = segment.median().round(3)
    
    for col in segment.columns:
        if col in NIRS_measures_list:
            individual_param = segment[col]
            individual_param = individual_param.loc[individual_param != 0]
            df_median.at[index, col] = individual_param.median()
        
df_median['Labels'] = labels
df_median

In [None]:
a_row.shape

## Jonckheere-Terpstra testing

In [None]:
save_directory = r"D:\Thesis Project\Thesis Project 2\Research Phase 4\Data\Segment_csv\RAP_independent"

for index, segment in enumerate(segments):
    file_name = "segment" + str(index+1) + ".csv"
    segment.to_csv(os.path.join(save_directory, file_name), index=False)

In [None]:
# The rest is conducted in R

# Reverse with Other Physiological Thresholds

## Median

In [None]:
def median_with_other_physiologic_thresholds(segments):
        
    num_rows = len(segments)
    columns = df_all.columns.to_list()
    df_median = pd.DataFrame(index=range(num_rows), columns=columns)

    # Insert median values
    for index, segment in enumerate(segments):
        df_median.loc[index] = segment.median().round(3)
            
    return df_median

## ICP

In [None]:
# Calculating the segments, threshold 20
independent_param = "ICP"
breakpoints = [20]
all_data = df_all

segment1 = all_data[all_data[independent_param] < breakpoints[0]]
segment2 = all_data[all_data[independent_param] >= breakpoints[0]]

segments = [segment1, segment2]

In [None]:
df_median = median_with_other_physiologic_thresholds(segments)
print("Median based on threshold", breakpoints[0])
df_median

In [None]:
# Calculating the segments, threshold 22.5
independent_param = "ICP"
breakpoints = [22.5]
all_data = df_all

segment1 = all_data[all_data[independent_param] < breakpoints[0]]
segment2 = all_data[all_data[independent_param] >= breakpoints[0]]

segments = [segment1, segment2]

In [None]:
df_median = median_with_other_physiologic_thresholds(segments)
print("Median based on threshold", breakpoints[0])
df_median

## CPP

In [None]:
# Calculating the segments, threshold below 60 and above 70
independent_param = "CPP"
breakpoints = [60, 70]
all_data = df_all

segment1 = all_data[all_data[independent_param] < breakpoints[0]]
segment2 = all_data[all_data[independent_param] > breakpoints[1]]

segments = [segment1, segment2]

In [None]:
df_median = median_with_other_physiologic_thresholds(segments)
print("Median based on threshold", breakpoints[0], breakpoints[1])
df_median

## rSO2

In [None]:
independent_param = "RSO2_L"
breakpoints = [60, 70, 80, 90]
all_data = df_all

segment1 = all_data[all_data[independent_param] < breakpoints[0]]
segment2 = all_data[all_data[independent_param] < breakpoints[1]]
segment3 = all_data[all_data[independent_param] < breakpoints[2]]
segment4 = all_data[all_data[independent_param] < breakpoints[3]]

segments = [segment1, segment2, segment3, segment4]

In [None]:
df_median = median_with_other_physiologic_thresholds(segments)
print("Median based on threshold", breakpoints[0], breakpoints[1], breakpoints[2], breakpoints[3])
df_median

In [None]:
independent_param = "RSO2_R"
breakpoints = [60, 70, 80, 90]
all_data = df_all

segment1 = all_data[all_data[independent_param] < breakpoints[0]]
segment2 = all_data[all_data[independent_param] < breakpoints[1]]
segment3 = all_data[all_data[independent_param] < breakpoints[2]]
segment4 = all_data[all_data[independent_param] < breakpoints[3]]

segments = [segment1, segment2, segment3, segment4]

In [None]:
df_median = median_with_other_physiologic_thresholds(segments)
print("Median based on threshold", breakpoints[0], breakpoints[1], breakpoints[2], breakpoints[3])
df_median

## PbtO2

In [None]:
independent_param = "PbtO2"
breakpoints = [15, 20]
all_data = df_all

segment1 = all_data[all_data[independent_param] < breakpoints[0]]
segment2 = all_data[all_data[independent_param] < breakpoints[1]]

segments = [segment1, segment2]

In [None]:
df_median = median_with_other_physiologic_thresholds(segments)
print("Median based on threshold", breakpoints[0], breakpoints[1])
df_median

## PRx

In [None]:
independent_param = "PRx"
breakpoints = [0, 0.25, 0.35]
all_data = df_all

segment1 = all_data[all_data[independent_param] > breakpoints[0]]
segment2 = all_data[all_data[independent_param] > breakpoints[1]]
segment3 = all_data[all_data[independent_param] > breakpoints[2]]

segments = [segment1, segment2, segment3]

In [None]:
df_median = median_with_other_physiologic_thresholds(segments)
print("Median based on threshold", breakpoints[0], breakpoints[1], breakpoints[2])
df_median

## PAx

In [None]:
independent_param = "PAx"
breakpoints = [0, 0.25]
all_data = df_all

segment1 = all_data[all_data[independent_param] > breakpoints[0]]
segment2 = all_data[all_data[independent_param] > breakpoints[1]]

segments = [segment1, segment2]

In [None]:
df_median = median_with_other_physiologic_thresholds(segments)
print("Median based on threshold", breakpoints[0], breakpoints[1])
df_median

## RAC

In [None]:
independent_param = "RAC"
breakpoints = [0]
all_data = df_all

segment1 = all_data[all_data[independent_param] > breakpoints[0]]
segment2 = all_data[all_data[independent_param] < breakpoints[0]]

segments = [segment1, segment2]

In [None]:
df_median = median_with_other_physiologic_thresholds(segments)
print("Median based on threshold", breakpoints[0])
df_median

## COx

In [None]:
independent_param = "COx_L"
breakpoints = [0, 0.20]
all_data = df_all

segment1 = all_data[all_data[independent_param] > breakpoints[0]]
segment2 = all_data[all_data[independent_param] > breakpoints[1]]

segments = [segment1, segment2]

In [None]:
df_median = median_with_other_physiologic_thresholds(segments)
print("Median based on threshold", breakpoints[0], breakpoints[1])
df_median

In [None]:
independent_param = "COx_R"
breakpoints = [0, 0.20]
all_data = df_all

segment1 = all_data[all_data[independent_param] > breakpoints[0]]
segment2 = all_data[all_data[independent_param] > breakpoints[1]]

segments = [segment1, segment2]

In [None]:
df_median = median_with_other_physiologic_thresholds(segments)
print("Median based on threshold", breakpoints[0], breakpoints[1])
df_median

## COx-a

In [None]:
independent_param = "COxA_L"
breakpoints = [0, 0.20]
all_data = df_all

segment1 = all_data[all_data[independent_param] > breakpoints[0]]
segment2 = all_data[all_data[independent_param] > breakpoints[1]]

segments = [segment1, segment2]

In [None]:
df_median = median_with_other_physiologic_thresholds(segments)
print("Median based on threshold", breakpoints[0], breakpoints[1])
df_median

In [None]:
independent_param = "COxA_R"
breakpoints = [0, 0.20]
all_data = df_all

segment1 = all_data[all_data[independent_param] > breakpoints[0]]
segment2 = all_data[all_data[independent_param] > breakpoints[1]]

segments = [segment1, segment2]

In [None]:
df_median = median_with_other_physiologic_thresholds(segments)
print("Median based on threshold", breakpoints[0], breakpoints[1])
df_median

## Jonckheere-Terpstra testing

## rSO2

In [None]:
independent_param = "RSO2_L"
breakpoints = [60, 70, 80, 90]
all_data = df_all

segment1 = all_data[all_data[independent_param] < breakpoints[0]]
segment2 = all_data[all_data[independent_param] < breakpoints[1]]
segment3 = all_data[all_data[independent_param] < breakpoints[2]]
segment4 = all_data[all_data[independent_param] < breakpoints[3]]

segments = [segment1, segment2, segment3, segment4]

save_directory = r"D:\Thesis Project\Thesis Project 2\Research Phase 4\Segment_csv\Other_variables_independent"

for index, segment in enumerate(segments):
    file_name = independent_param + "_segment" + str(index+1) + ".csv"
    segment.to_csv(os.path.join(save_directory, file_name), index=False)

In [None]:
# The Rest is done in R

In [None]:
independent_param = "RSO2_R"
breakpoints = [60, 70, 80, 90]
all_data = df_all

segment1 = all_data[all_data[independent_param] < breakpoints[0]]
segment2 = all_data[all_data[independent_param] < breakpoints[1]]
segment3 = all_data[all_data[independent_param] < breakpoints[2]]
segment4 = all_data[all_data[independent_param] < breakpoints[3]]

segments = [segment1, segment2, segment3, segment4]

save_directory = r"D:\Thesis Project\Thesis Project 2\Research Phase 4\Segment_csv\Other_variables_independent"

for index, segment in enumerate(segments):
    file_name = independent_param + "_segment" + str(index+1) + ".csv"
    segment.to_csv(os.path.join(save_directory, file_name), index=False)

In [None]:
# The Rest is done in R

## PRx

In [None]:
independent_param = "PRx"
breakpoints = [0, 0.25, 0.35]
all_data = df_all

segment1 = all_data[all_data[independent_param] > breakpoints[0]]
segment2 = all_data[all_data[independent_param] > breakpoints[1]]
segment3 = all_data[all_data[independent_param] > breakpoints[2]]

segments = [segment1, segment2, segment3]

save_directory = r"D:\Thesis Project\Thesis Project 2\Research Phase 4\Segment_csv\Other_variables_independent"

for index, segment in enumerate(segments):
    file_name = independent_param + "_segment" + str(index+1) + ".csv"
    segment.to_csv(os.path.join(save_directory, file_name), index=False)

In [None]:
# The Rest is done in R

## Mann_Whitney_U_Test

In [None]:
def mann_whitney_U_test_result(segments):
    
    df_comparison = pd.DataFrame(index=range(2), columns=segments[0].columns)

    for parameter in segments[0].columns:
        stat, p_value = mannwhitneyu(segments[0][parameter].dropna(), segments[1][parameter].dropna())
        df_comparison.at[0, parameter] = stat
        df_comparison.at[1, parameter] = p_value
        
    df_comparison = df_comparison.applymap(lambda x: "{:.2e}".format(x))
    
    return df_comparison

## ICP

In [None]:
# Calculating the segments, threshold 20
independent_param = "ICP"
breakpoints = [20]
all_data = df_all

segment1 = all_data[all_data[independent_param] < breakpoints[0]]
segment2 = all_data[all_data[independent_param] >= breakpoints[0]]

segments = [segment1, segment2]

In [None]:
df_comparison = mann_whitney_U_test_result(segments)
df_comparison

In [None]:
# Calculating the segments, threshold 22.5
independent_param = "ICP"
breakpoints = [22.5]
all_data = df_all

segment1 = all_data[all_data[independent_param] < breakpoints[0]]
segment2 = all_data[all_data[independent_param] >= breakpoints[0]]

segments = [segment1, segment2]

In [None]:
df_comparison = mann_whitney_U_test_result(segments)
df_comparison

## CPP

In [None]:
# Calculating the segments, threshold below 60 and above 70
independent_param = "CPP"
breakpoints = [60, 70]
all_data = df_all

segment1 = all_data[all_data[independent_param] < breakpoints[0]]
segment2 = all_data[all_data[independent_param] > breakpoints[1]]

segments = [segment1, segment2]

In [None]:
df_comparison = mann_whitney_U_test_result(segments)
df_comparison

## PbtO2

In [None]:
independent_param = "PbtO2"
breakpoints = [15, 20]
all_data = df_all

segment1 = all_data[all_data[independent_param] < breakpoints[0]]
segment2 = all_data[all_data[independent_param] < breakpoints[1]]

segments = [segment1, segment2]

In [None]:
df_comparison = mann_whitney_U_test_result(segments)
df_comparison

## PAx

In [None]:
independent_param = "PAx"
breakpoints = [0, 0.25]
all_data = df_all

segment1 = all_data[all_data[independent_param] > breakpoints[0]]
segment2 = all_data[all_data[independent_param] > breakpoints[1]]

segments = [segment1, segment2]

In [None]:
df_comparison = mann_whitney_U_test_result(segments)
df_comparison

## RAC

In [None]:
independent_param = "RAC"
breakpoints = [0]
all_data = df_all

segment1 = all_data[all_data[independent_param] > breakpoints[0]]
segment2 = all_data[all_data[independent_param] < breakpoints[0]]

segments = [segment1, segment2]

In [None]:
df_comparison = mann_whitney_U_test_result(segments)
df_comparison

## COx

In [None]:
independent_param = "COx_L"
breakpoints = [0, 0.20]
all_data = df_all

segment1 = all_data[all_data[independent_param] > breakpoints[0]]
segment2 = all_data[all_data[independent_param] > breakpoints[1]]

segments = [segment1, segment2]

In [None]:
df_comparison = mann_whitney_U_test_result(segments)
df_comparison

In [None]:
independent_param = "COx_R"
breakpoints = [0, 0.20]
all_data = df_all

segment1 = all_data[all_data[independent_param] > breakpoints[0]]
segment2 = all_data[all_data[independent_param] > breakpoints[1]]

segments = [segment1, segment2]

In [None]:
df_comparison = mann_whitney_U_test_result(segments)
df_comparison

## COx-a

In [None]:
independent_param = "COxA_L"
breakpoints = [0, 0.20]
all_data = df_all

segment1 = all_data[all_data[independent_param] > breakpoints[0]]
segment2 = all_data[all_data[independent_param] > breakpoints[1]]

segments = [segment1, segment2]

In [None]:
df_comparison = mann_whitney_U_test_result(segments)
df_comparison

In [None]:
independent_param = "COxA_R"
breakpoints = [0, 0.20]
all_data = df_all

segment1 = all_data[all_data[independent_param] > breakpoints[0]]
segment2 = all_data[all_data[independent_param] > breakpoints[1]]

segments = [segment1, segment2]

In [None]:
df_comparison = mann_whitney_U_test_result(segments)
df_comparison