In [None]:
# import libraries
import os
import pyreadr
import pandas as pd
import collections
import numpy as np
import re
import datetime
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy.stats import mannwhitneyu, chi2_contingency
import seaborn as sns
from matplotlib.patches import Patch
import pyreadr
from lifelines import KaplanMeierFitter

In [None]:
import pyreadr

# Import the necessary library

# Read the RDS file and load it into a DataFrame
# Note: Replace 'path/to/your/file.RDS' with the actual path to your RDS file
result = pyreadr.read_r('path/to/your/file.RDS')

# Extract the DataFrame from the result
df = result[None]

# Convert the 'STUDY_NUMBER' column to float type
df['STUDY_NUMBER'] = df['STUDY_NUMBER'].astype(float)

# Display the DataFrame
df

In [None]:
# Read the imputed data from a CSV file and load it into a DataFrame
# Note: Replace 'path/to/your/imputed_data.csv' with the actual path to your imputed data file
df_imp = pd.read_csv('path/to/your/imputed_data.csv')

# Convert the 'STUDY_NUMBER' column to float type
df_imp['STUDY_NUMBER'] = df_imp['STUDY_NUMBER'].astype(float)

# Display the DataFrame
df_imp

In [None]:
# Merge the dataframes on 'STUDY_NUMBER', appending suffixes to overlapping columns
merged_df = df.merge(df_imp, on='STUDY_NUMBER', suffixes=('', '_new'))

# Automatically replace original columns with the new ones from the imputed dataframe
for col in df.columns:
    if col in df_imp.columns and col != 'STUDY_NUMBER':  # Skip the 'STUDY_NUMBER' column
        merged_df[col] = merged_df[col + '_new']
        merged_df.drop(columns=[col + '_new'], inplace=True)  # Drop the suffixed column

# Update the original dataframe with the merged dataframe
df = merged_df

# Display the updated dataframe
df

In [None]:
# Read the IPH data from a CSV file
# Note: Replace 'path/to/your/iph_data.csv' with the actual path to your IPH data file
df_iph = pd.read_csv('path/to/your/iph_data.csv')

# Extract the 'AENR' from the 'case_id' column by splitting the string and taking the last part before the file extension
df_iph['AENR'] = [i.split('/')[-1].rsplit('.')[0] for i in df_iph['case_id']]

# Convert the 'AENR' to 'STUDY_NUMBER' by removing the 'AE' prefix and converting to integer
df_iph['STUDY_NUMBER'] = [int(i.replace('AE', '')) for i in df_iph['AENR']]

# Display the DataFrame
df_iph

In [None]:
# Find the intersection of 'STUDY_NUMBER' between the two dataframes
intersection = set(df['STUDY_NUMBER']) & set(df_iph['STUDY_NUMBER'])

# Print the number of common 'STUDY_NUMBER' entries
len(intersection)

2220

In [None]:
# Filter the IPH dataframe to only include rows with 'STUDY_NUMBER' present in the intersection
df_iph = df_iph[df_iph['STUDY_NUMBER'].isin(list(intersection))]

# Filter the main dataframe to only include rows with 'STUDY_NUMBER' present in the intersection
df = df[df['STUDY_NUMBER'].isin(list(intersection))]

In [None]:
# Merge the main dataframe with the IPH dataframe on 'STUDY_NUMBER' using an inner join
df_combined = df.merge(df_iph, how='inner', on='STUDY_NUMBER')

# Display the combined dataframe
df_combined

In [None]:
# Define the columns to be used for analysis
# The columns include demographic information, medical history, medication usage, and various clinical measurements
# The columns are selected based on the updated requirements (26-08-2024)

# Columns:
# Age: Age of the patient
# Gender: Gender of the patient
# Symptoms.Update2G: Updated symptoms information
# Med.Statin.LLD: Medication - Statin or Lipid-Lowering Drugs
# MI_Dx: History of myocardial infarction
# Med.acetylsal: Medication - Acetylsalicylic acid (Aspirin)
# Med.anticoagulants: Medication - Anticoagulants
# Med.dipyridamole: Medication - Dipyridamole
# Hypertension.composite: Composite measure of hypertension
# stenosis_con_bin: Binary indicator of stenosis condition
# CRP_avg: Average C-reactive protein level
# HDL_final: Final HDL cholesterol level
# epcom.3years: Composite endpoint at 3 years
# ep_com_t_3years: Time to composite endpoint at 3 years
# IPH.bin: Binary indicator of intraplaque hemorrhage (manual)
# area: Area measurement
# prob: Probability measurement
# IPH: Intraplaque hemorrhage indicator (model)
# dateok: Date of the observation

# Select the columns from the combined dataframe for analysis
df_analysis = df_combined[['Age', 'Gender', "Symptoms.Update2G", 'Med.Statin.LLD', 'MI_Dx', 'Med.acetylsal', 
                           'Med.anticoagulants', 'Med.dipyridamole', 'Hypertension.composite', 'stenosis_con_bin', 
                           'CRP_avg', 'HDL_final', 'epcom.3years', "ep_com_t_3years", 'IPH.bin', 'area', 'prob', 
                           'IPH', 'dateok']]


In [None]:
# Define the date for filtering
date_y = 2008
date_m = 3
date_d = 11

# Create a date filter to select rows with 'dateok' before the specified date
date_filter = (df_analysis['dateok'] < datetime.date(date_y, date_m, date_d))

# Apply the date filter to the dataframe
df_analysis = df_analysis[date_filter]

In [None]:
# Check for missing values in the dataframe
df_analysis.isnull().sum()

In [None]:
# Drop rows with any missing values from the dataframe
df_analysis = df_analysis.dropna()

In [None]:
# Drop the 'dateok' column from the dataframe
df_analysis = df_analysis.drop(['dateok'], axis=1)

In [None]:
df_analysis.isnull().sum()

In [None]:
# Count the occurrences of each unique value in the 'Gender' column
gender_counts = df_analysis['Gender'].value_counts()

# Display the counts
gender_counts

In [None]:
# Calculate the mean of the 'ep_com_t_3years' column in the dataframe
mean_ep_com_t_3years = df_analysis["ep_com_t_3years"].mean()

# Display the mean value
mean_ep_com_t_3years

In [None]:
# Count the occurrences of each unique value in the 'epcom.3years' column
# This column represents the composite endpoint at 3 years
epcom_counts = df_analysis["epcom.3years"].value_counts()

# Display the counts
epcom_counts

In [None]:
# Count the occurrences of each unique value in the 'epcom.3years' column for male patients
# This column represents the composite endpoint at 3 years

# Filter the dataframe to include only male patients
male_patients = df_analysis[df_analysis["Gender"] == "male"]

# Count the occurrences of each unique value in the 'epcom.3years' column for the filtered dataframe
epcom_counts_male = male_patients["epcom.3years"].value_counts()

# Display the counts
epcom_counts_male

In [None]:
# Count the occurrences of each unique value in the 'epcom.3years' column for female patients
# This column represents the composite endpoint at 3 years

# Filter the dataframe to include only female patients
female_patients = df_analysis[df_analysis["Gender"] == "female"]

# Count the occurrences of each unique value in the 'epcom.3years' column for the filtered dataframe
epcom_counts_female = female_patients["epcom.3years"].value_counts()

# Display the counts
epcom_counts_female

In [None]:
T = df_analysis["ep_com_t_3years"]
E = df_analysis["epcom.3years"]

In [None]:
kmf = KaplanMeierFitter()
# Fit the Kaplan-Meier estimator on the data
kmf.fit(durations=T, event_observed=E)
kmf.plot_survival_function()

In [None]:
kmf.survival_function_

In [None]:
kmf.confidence_interval_survival_function_

In [None]:
# Define the directory to save plots
SAVE_DIR = "./KMF_plots/"

# Check if the directory exists, if not, create it
if not os.path.exists(SAVE_DIR):
    os.mkdir(SAVE_DIR)

In [None]:
# Create a subplot
ax = plt.subplot(111)

# Define subsets for IPH and non-IPH groups
sub_iph = (df_analysis["IPH.bin"] == 'yes')
sub_no_iph = (df_analysis["IPH.bin"] == 'no')

# Fit the Kaplan-Meier estimator for the IPH group and plot the survival function
kmf.fit(durations=T[sub_iph], event_observed=E[sub_iph], label="IPH=True")
kmf.plot_survival_function(ax=ax, ci_show=False, color="#ff7f0e")

# Fit the Kaplan-Meier estimator for the non-IPH group and plot the survival function
kmf.fit(durations=T[sub_no_iph], event_observed=E[sub_no_iph], label="IPH=False")
kmf.plot_survival_function(ax=ax, ci_show=False, color="#1f77b4")

# Set plot limits and labels
plt.ylim(0.60, 1.0)
plt.xlabel("Time (years)")
plt.ylabel("Survival Probability")

# Remove the 'right' and 'top' spines for a cleaner look
ax = plt.gca()  # Get current axes
ax.spines[['right', 'top']].set_visible(False)

# Update font size for better readability
plt.rcParams.update({'font.size': 14})

# Save the plot to a specified directory
plt.savefig(SAVE_DIR + 'kaplan_meijer_plot_all_IPH_manual.pdf', format='pdf', dpi=300)

# Show the plot
plt.ion()
plt.show()

In [None]:
# Create a subplot
ax = plt.subplot(111)

# Define subsets for male patients with and without IPH
sub_iph = (df_analysis["Gender"] == 'male') & (df_analysis["IPH.bin"] == 'yes')
sub_no_iph = (df_analysis["Gender"] == 'male') & (df_analysis["IPH.bin"] == 'no')

# Fit the Kaplan-Meier estimator for the male IPH group and plot the survival function
kmf.fit(durations=T[sub_iph], event_observed=E[sub_iph], label="IPH=True")
kmf.plot_survival_function(ax=ax, ci_show=False, color="#ff7f0e")

# Fit the Kaplan-Meier estimator for the male non-IPH group and plot the survival function
kmf.fit(durations=T[sub_no_iph], event_observed=E[sub_no_iph], label="IPH=False")
kmf.plot_survival_function(ax=ax, ci_show=False, color="#1f77b4")

# Set plot limits and labels
plt.ylim(0.60, 1.0)
plt.xlabel("Time (years)")
plt.ylabel("Survival Probability")

# Remove the 'right' and 'top' spines for a cleaner look
ax = plt.gca()  # Get current axes
ax.spines[['right', 'top']].set_visible(False)

# Update font size for better readability
plt.rcParams.update({'font.size': 14})

# Save the plot to a specified directory with a generalized file name
plt.savefig(SAVE_DIR + 'kaplan_meijer_plot_male_IPH_manual.pdf', format='pdf', dpi=300)

# Show the plot
plt.ion()
plt.show()

In [None]:
# Create a subplot
ax = plt.subplot(111)

# Define subsets for female patients with and without IPH
sub_iph = (df_analysis["Gender"] == 'female') & (df_analysis["IPH.bin"] == 'yes')
sub_no_iph = (df_analysis["Gender"] == 'female') & (df_analysis["IPH.bin"] == 'no')

# Fit the Kaplan-Meier estimator for the female IPH group and plot the survival function
kmf.fit(durations=T[sub_iph], event_observed=E[sub_iph], label="IPH=True")
kmf.plot_survival_function(ax=ax, ci_show=False, color="#ff7f0e")

# Fit the Kaplan-Meier estimator for the female non-IPH group and plot the survival function
kmf.fit(durations=T[sub_no_iph], event_observed=E[sub_no_iph], label="IPH=False")
kmf.plot_survival_function(ax=ax, ci_show=False, color="#1f77b4")

# Set plot limits and labels
plt.ylim(0.60, 1.0)
plt.xlabel("Time (years)")
plt.ylabel("Survival Probability")

# Remove the 'right' and 'top' spines for a cleaner look
ax = plt.gca()  # Get current axes
ax.spines[['right', 'top']].set_visible(False)

# Update font size for better readability
plt.rcParams.update({'font.size': 14})

# Save the plot to a specified directory with a generalized file name
plt.savefig(SAVE_DIR + 'kaplan_meijer_plot_female_IPH_manual.pdf', format='pdf', dpi=300)

# Show the plot
plt.ion()
plt.show()

In [None]:
# Create a subplot
ax = plt.subplot(111)

# Define subsets for patients with and without IPH
sub_iph = (df_analysis["IPH"] == True)
sub_no_iph = (df_analysis["IPH"] == False)

# Fit the Kaplan-Meier estimator for the IPH group and plot the survival function
kmf.fit(durations=T[sub_iph], event_observed=E[sub_iph], label="IPH=True")
kmf.plot_survival_function(ax=ax, ci_show=False, color="#ff7f0e")

# Fit the Kaplan-Meier estimator for the non-IPH group and plot the survival function
kmf.fit(durations=T[sub_no_iph], event_observed=E[sub_no_iph], label="IPH=False")
kmf.plot_survival_function(ax=ax, ci_show=False, color="#1f77b4")

# Set plot limits and labels
plt.ylim(0.60, 1.0)
plt.xlabel("Time (years)")
plt.ylabel("Survival Probability")

# Remove the 'right' and 'top' spines for a cleaner look
ax = plt.gca()  # Get current axes
ax.spines[['right', 'top']].set_visible(False)

# Update font size for better readability
plt.rcParams.update({'font.size': 14})

# Save the plot to a specified directory with a generalized file name
plt.savefig(SAVE_DIR + 'kaplan_meijer_plot_all_IPH_model.pdf', format='pdf', dpi=300)

# Show the plot
plt.ion()
plt.show()

In [None]:
# Create a subplot
ax = plt.subplot(111)

# Define subsets for male patients with and without IPH
sub_iph = (df_analysis["Gender"] == 'male') & (df_analysis["IPH"] == True)
sub_no_iph = (df_analysis["Gender"] == 'male') & (df_analysis["IPH"] == False)

# Fit the Kaplan-Meier estimator for the male IPH group and plot the survival function
kmf.fit(durations=T[sub_iph], event_observed=E[sub_iph], label="IPH=True")
kmf.plot_survival_function(ax=ax, ci_show=False, color="#ff7f0e")

# Fit the Kaplan-Meier estimator for the male non-IPH group and plot the survival function
kmf.fit(durations=T[sub_no_iph], event_observed=E[sub_no_iph], label="IPH=False")
kmf.plot_survival_function(ax=ax, ci_show=False, color="#1f77b4")

# Set plot limits and labels
plt.ylim(0.60, 1.0)
plt.xlabel("Time (years)")
plt.ylabel("Survival Probability")

# Remove the 'right' and 'top' spines for a cleaner look
ax = plt.gca()  # Get current axes
ax.spines[['right', 'top']].set_visible(False)

# Update font size for better readability
plt.rcParams.update({'font.size': 14})

# Save the plot to a specified directory with a generalized file name
plt.savefig(SAVE_DIR + 'kaplan_meijer_plot_male_IPH_model.pdf', format='pdf', dpi=300)

# Show the plot
plt.ion()
plt.show()

In [None]:
# Create a subplot
ax = plt.subplot(111)

# Define subsets for female patients with and without IPH
sub_iph = (df_analysis["Gender"] == 'female') & (df_analysis["IPH"] == True)
sub_no_iph = (df_analysis["Gender"] == 'female') & (df_analysis["IPH"] == False)

# Fit the Kaplan-Meier estimator for the female IPH group and plot the survival function
kmf.fit(durations=T[sub_iph], event_observed=E[sub_iph], label="IPH=True")
kmf.plot_survival_function(ax=ax, ci_show=False, color="#ff7f0e")

# Fit the Kaplan-Meier estimator for the female non-IPH group and plot the survival function
kmf.fit(durations=T[sub_no_iph], event_observed=E[sub_no_iph], label="IPH=False")
kmf.plot_survival_function(ax=ax, ci_show=False, color="#1f77b4")

# Set plot limits and labels
plt.ylim(0.60, 1.0)
plt.xlabel("Time (years)")
plt.ylabel("Survival Probability")

# Remove the 'right' and 'top' spines for a cleaner look
ax = plt.gca()  # Get current axes
ax.spines[['right', 'top']].set_visible(False)

# Update font size for better readability
plt.rcParams.update({'font.size': 14})

# Save the plot to a specified directory with a generalized file name
plt.savefig(SAVE_DIR + 'kaplan_meijer_plot_female_IPH_model.pdf', format='pdf', dpi=300)

# Show the plot
plt.ion()
plt.show()

In [None]:
# Define the maximum and minimum values for normalization
val_max = 3
val_min = 0

# Create a colormap and normalize it based on the defined min and max values
cmap = mpl.cm.coolwarm
norm = mpl.colors.Normalize(vmin=val_min, vmax=val_max)

# Example usage of the colormap normalization (uncomment to print a color value)
# print(cmap(norm(5)))

In [None]:
# Calculate quartile boundaries for the 'area' column
cutoffs = np.quantile(df_analysis['area'], [0, 0.25, 0.5, 0.75, 1])
print("Quartile Cutoffs:", cutoffs)

# Create a new column 'area_quartile' to categorize 'area' into quartiles
df_analysis['area_quartile'] = pd.cut(
    df_analysis['area'],  # Column to be binned
    bins=cutoffs,         # Bin edges
    labels=[1, 2, 3, 4],  # Labels for the bins
    include_lowest=True   # Include the lowest value in the first bin
)

# Print the distribution of the quartiles to check the binning
print(df_analysis['area_quartile'].value_counts())

In [None]:
# Define a color palette suitable for colorblind individuals
colorblind_palette = ['#1f77b4', '#2ca02c', '#ff7f0e', '#d62728', '#9467bd', 
                      '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']

In [None]:
# Create a subplot
ax = plt.subplot(111)

# Define subsets for each area quartile
sub_0 = (df_analysis['area_quartile'] == 1)
sub_1 = (df_analysis['area_quartile'] == 2)
sub_2 = (df_analysis['area_quartile'] == 3)
sub_3 = (df_analysis['area_quartile'] == 4)

# Print the number of samples in each quartile
print(f'Number of samples:')
print(f'Area \t- {cutoffs[0]} - {cutoffs[1]}: \t{len(T[sub_0])}')
print(f'Area \t- {cutoffs[1]} - {cutoffs[2]}: \t{len(T[sub_1])}')
print(f'Area \t- {cutoffs[2]} - {cutoffs[3]}: \t{len(T[sub_2])}')
print(f'Area \t- {cutoffs[3]} - {cutoffs[4]}: \t{len(T[sub_3])}')

# Fit the Kaplan-Meier estimator and plot the survival function for each quartile
kmf.fit(durations=T[sub_0], event_observed=E[sub_0], label=f"IPH area={round(cutoffs[0], 2)} - {round(cutoffs[1], 2)}")
kmf.plot_survival_function(ax=ax, ci_show=False, color=colorblind_palette[0], alpha=1.0)

kmf.fit(durations=T[sub_1], event_observed=E[sub_1], label=f"IPH area={round(cutoffs[1], 2)} - {round(cutoffs[2], 2)}")
kmf.plot_survival_function(ax=ax, ci_show=False, color=colorblind_palette[1], alpha=1.0)

kmf.fit(durations=T[sub_2], event_observed=E[sub_2], label=f"IPH area={round(cutoffs[2], 2)} - {round(cutoffs[3], 2)}")
kmf.plot_survival_function(ax=ax, ci_show=False, color=colorblind_palette[2], alpha=1.0)

kmf.fit(durations=T[sub_3], event_observed=E[sub_3], label=f"IPH area={round(cutoffs[3], 2)} - {round(cutoffs[4], 2)}")
kmf.plot_survival_function(ax=ax, ci_show=False, color=colorblind_palette[3], alpha=1.0)

# Set plot limits and labels
plt.ylim(0.60, 1.0)
plt.xlabel("Time (years)")
plt.ylabel("Survival Probability")

# Remove the 'right' and 'top' spines for a cleaner look
ax = plt.gca()  # Get current axes
ax.spines[['right', 'top']].set_visible(False)

# Update font size for better readability
plt.rcParams.update({'font.size': 14})

# Save the plot to a specified directory with a generalized file name
plt.savefig(SAVE_DIR + 'kaplan_meijer_plot_all_IPH_area.pdf', format='pdf', dpi=300)

# Show the plot
plt.ion()
plt.show()


In [None]:
# Create a subplot
ax = plt.subplot(111)

# Define subsets for male patients in each area quartile
sub_0 = (df_analysis["Gender"] == 'male') & (df_analysis['area_quartile'] == 1) 
sub_1 = (df_analysis["Gender"] == 'male') & (df_analysis['area_quartile'] == 2) 
sub_2 = (df_analysis["Gender"] == 'male') & (df_analysis['area_quartile'] == 3)
sub_3 = (df_analysis["Gender"] == 'male') & (df_analysis['area_quartile'] == 4)

# Print the number of samples in each quartile
print(f'Number of samples:')
print(f'Area \t- {cutoffs[0]} - {cutoffs[1]}: \t{len(T[sub_0])}')
print(f'Area \t- {cutoffs[1]} - {cutoffs[2]}: \t{len(T[sub_1])}')
print(f'Area \t- {cutoffs[2]} - {cutoffs[3]}: \t{len(T[sub_2])}')
print(f'Area \t- {cutoffs[3]} - {cutoffs[4]}: \t{len(T[sub_3])}')

# Fit the Kaplan-Meier estimator and plot the survival function for each quartile
kmf.fit(durations=T[sub_0], event_observed=E[sub_0], label=f"IPH area={round(cutoffs[0], 2)} - {round(cutoffs[1], 2)}")
kmf.plot_survival_function(ax=ax, ci_show=False, color=colorblind_palette[0], alpha=1.0)

kmf.fit(durations=T[sub_1], event_observed=E[sub_1], label=f"IPH area={round(cutoffs[1], 2)} - {round(cutoffs[2], 2)}")
kmf.plot_survival_function(ax=ax, ci_show=False, color=colorblind_palette[1], alpha=1.0)

kmf.fit(durations=T[sub_2], event_observed=E[sub_2], label=f"IPH area={round(cutoffs[2], 2)} - {round(cutoffs[3], 2)}")
kmf.plot_survival_function(ax=ax, ci_show=False, color=colorblind_palette[2], alpha=1.0)

kmf.fit(durations=T[sub_3], event_observed=E[sub_3], label=f"IPH area={round(cutoffs[3], 2)} - {round(cutoffs[4], 2)}")
kmf.plot_survival_function(ax=ax, ci_show=False, color=colorblind_palette[3], alpha=1.0)

# Set plot limits and labels
plt.ylim(0.60, 1.0)
plt.xlabel("Time (years)")
plt.ylabel("Survival Probability")

# Remove the 'right' and 'top' spines for a cleaner look
ax = plt.gca()  # Get current axes
ax.spines[['right', 'top']].set_visible(False)

# Update font size for better readability
plt.rcParams.update({'font.size': 14})

# Save the plot to a specified directory with a generalized file name
plt.savefig(SAVE_DIR + 'kaplan_meijer_plot_male_IPH_area.pdf', format='pdf', dpi=300)

# Show the plot
plt.ion()
plt.show()

In [None]:
# Create a subplot
ax = plt.subplot(111)

# Define subsets for female patients in each area quartile
sub_0 = (df_analysis["Gender"] == 'female') & (df_analysis['area_quartile'] == 1) 
sub_1 = (df_analysis["Gender"] == 'female') & (df_analysis['area_quartile'] == 2) 
sub_2 = (df_analysis["Gender"] == 'female') & (df_analysis['area_quartile'] == 3)
sub_3 = (df_analysis["Gender"] == 'female') & (df_analysis['area_quartile'] == 4)

# Print the number of samples in each quartile
print(f'Number of samples:')
print(f'Area \t- {cutoffs[0]} - {cutoffs[1]}: \t{len(T[sub_0])}')
print(f'Area \t- {cutoffs[1]} - {cutoffs[2]}: \t{len(T[sub_1])}')
print(f'Area \t- {cutoffs[2]} - {cutoffs[3]}: \t{len(T[sub_2])}')
print(f'Area \t- {cutoffs[3]} - {cutoffs[4]}: \t{len(T[sub_3])}')

# Fit the Kaplan-Meier estimator and plot the survival function for each quartile
kmf.fit(durations=T[sub_0], event_observed=E[sub_0], label=f"IPH area={round(cutoffs[0], 2)} - {round(cutoffs[1], 2)}")
kmf.plot_survival_function(ax=ax, ci_show=False, color=colorblind_palette[0], alpha=1.0)

kmf.fit(durations=T[sub_1], event_observed=E[sub_1], label=f"IPH area={round(cutoffs[1], 2)} - {round(cutoffs[2], 2)}")
kmf.plot_survival_function(ax=ax, ci_show=False, color=colorblind_palette[1], alpha=1.0)

kmf.fit(durations=T[sub_2], event_observed=E[sub_2], label=f"IPH area={round(cutoffs[2], 2)} - {round(cutoffs[3], 2)}")
kmf.plot_survival_function(ax=ax, ci_show=False, color=colorblind_palette[2], alpha=1.0)

kmf.fit(durations=T[sub_3], event_observed=E[sub_3], label=f"IPH area={round(cutoffs[3], 2)} - {round(cutoffs[4], 2)}")
kmf.plot_survival_function(ax=ax, ci_show=False, color=colorblind_palette[3], alpha=1.0)

# Set plot limits and labels
plt.ylim(0.60, 1.0)
plt.xlabel("Time (years)")
plt.ylabel("Survival Probability")

# Remove the 'right' and 'top' spines for a cleaner look
ax = plt.gca()  # Get current axes
ax.spines[['right', 'top']].set_visible(False)

# Update font size for better readability
plt.rcParams.update({'font.size': 14})

# Save the plot to a specified directory with a generalized file name
plt.savefig(SAVE_DIR + 'kaplan_meijer_plot_female_IPH_area.pdf', format='pdf', dpi=300)

# Show the plot
plt.ion()
plt.show()


In [None]:
# Define the maximum and minimum values for normalization
val_max = 3
val_min = 0

# Create a colormap using the 'coolwarm' colormap from matplotlib
cmap = mpl.cm.coolwarm

# Normalize the colormap based on the defined min and max values
norm = mpl.colors.Normalize(vmin=val_min, vmax=val_max)

# Example usage of the colormap normalization (uncomment to print a color value)
# print(cmap(norm(1.5)))  # Example: Get the color for the normalized value 1.5

In [None]:
# Create a subplot
ax = plt.subplot(111)

# Define subsets for different probability ranges
sub_0 = (df_analysis['prob'] > 0.0) & (df_analysis['prob'] <= 0.25)
sub_1 = (df_analysis['prob'] > 0.25) & (df_analysis['prob'] <= 0.5)
sub_2 = (df_analysis['prob'] > 0.5) & (df_analysis['prob'] <= 0.75)
sub_3 = (df_analysis['prob'] > 0.75) & (df_analysis['prob'] <= 1.0)

# Print the number of samples in each probability range
print(f'Number of samples:')
print(f'prob \t- 0.0 - 0.25: \t{len(T[sub_0])}')
print(f'prob \t- 0.25 - 0.5: \t{len(T[sub_1])}')
print(f'prob \t- 0.5 - 0.75: \t{len(T[sub_2])}')
print(f'prob \t- 0.75 - 1.0: \t{len(T[sub_3])}')

# Fit the Kaplan-Meier estimator and plot the survival function for each probability range
kmf.fit(durations=T[sub_0], event_observed=E[sub_0], label="IPH prob=0.0 - 0.25")
kmf.plot_survival_function(ax=ax, ci_show=False, color=colorblind_palette[0], alpha=1.0)

kmf.fit(durations=T[sub_1], event_observed=E[sub_1], label="IPH prob=0.25 - 0.5")
kmf.plot_survival_function(ax=ax, ci_show=False, color=colorblind_palette[1], alpha=1.0)

kmf.fit(durations=T[sub_2], event_observed=E[sub_2], label="IPH prob=0.5 - 0.75")
kmf.plot_survival_function(ax=ax, ci_show=False, color=colorblind_palette[2], alpha=1.0)

kmf.fit(durations=T[sub_3], event_observed=E[sub_3], label="IPH prob=0.75 - 1.0")
kmf.plot_survival_function(ax=ax, ci_show=False, color=colorblind_palette[3], alpha=1.0)

# Set plot limits and labels
plt.ylim(0.60, 1.0)
plt.xlabel("Time (years)")
plt.ylabel("Survival Probability")

# Remove the 'right' and 'top' spines for a cleaner look
ax = plt.gca()  # Get current axes
ax.spines[['right', 'top']].set_visible(False)

# Update font size for better readability
plt.rcParams.update({'font.size': 14})

# Save the plot to a specified directory with a generalized file name
plt.savefig(SAVE_DIR + 'kaplan_meijer_plot_all_IPH_prob.pdf', format='pdf', dpi=300)

# Show the plot
plt.ion()
plt.show()

In [None]:
# Create a subplot
ax = plt.subplot(111)

# Define subsets for male patients in different probability ranges
sub_0 = (df_analysis["Gender"] == 'male') & (df_analysis['prob'] > 0.0) & (df_analysis['prob'] <= 0.25)
sub_1 = (df_analysis["Gender"] == 'male') & (df_analysis['prob'] > 0.25) & (df_analysis['prob'] <= 0.5)
sub_2 = (df_analysis["Gender"] == 'male') & (df_analysis['prob'] > 0.5) & (df_analysis['prob'] <= 0.75)
sub_3 = (df_analysis["Gender"] == 'male') & (df_analysis['prob'] > 0.75) & (df_analysis['prob'] <= 1.0)

# Print the number of samples in each probability range
print(f'Number of samples:')
print(f'prob \t- 0.0 - 0.25: \t{len(T[sub_0])}')
print(f'prob \t- 0.25 - 0.5: \t{len(T[sub_1])}')
print(f'prob \t- 0.5 - 0.75: \t{len(T[sub_2])}')
print(f'prob \t- 0.75 - 1.0: \t{len(T[sub_3])}')

# Fit the Kaplan-Meier estimator and plot the survival function for each probability range
kmf.fit(durations=T[sub_0], event_observed=E[sub_0], label="IPH prob=0.0 - 0.25")
kmf.plot_survival_function(ax=ax, ci_show=False, color=colorblind_palette[0], alpha=1.0)

kmf.fit(durations=T[sub_1], event_observed=E[sub_1], label="IPH prob=0.25 - 0.5")
kmf.plot_survival_function(ax=ax, ci_show=False, color=colorblind_palette[1], alpha=1.0)

kmf.fit(durations=T[sub_2], event_observed=E[sub_2], label="IPH prob=0.5 - 0.75")
kmf.plot_survival_function(ax=ax, ci_show=False, color=colorblind_palette[2], alpha=1.0)

kmf.fit(durations=T[sub_3], event_observed=E[sub_3], label="IPH prob=0.75 - 1.0")
kmf.plot_survival_function(ax=ax, ci_show=False, color=colorblind_palette[3], alpha=1.0)

# Set plot limits and labels
plt.ylim(0.60, 1.0)
plt.xlabel("Time (years)")
plt.ylabel("Survival Probability")

# Remove the 'right' and 'top' spines for a cleaner look
ax = plt.gca()  # Get current axes
ax.spines[['right', 'top']].set_visible(False)

# Update font size for better readability
plt.rcParams.update({'font.size': 14})

# Save the plot to a specified directory with a generalized file name
plt.savefig(SAVE_DIR + 'kaplan_meijer_plot_male_IPH_prob.pdf', format='pdf', dpi=300)

# Show the plot
plt.ion()
plt.show()

In [None]:
# Create a subplot
ax = plt.subplot(111)

# Define subsets for female patients in different probability ranges
sub_0 = (df_analysis["Gender"] == 'female') & (df_analysis['prob'] > 0.0) & (df_analysis['prob'] <= 0.25)
sub_1 = (df_analysis["Gender"] == 'female') & (df_analysis['prob'] > 0.25) & (df_analysis['prob'] <= 0.5)
sub_2 = (df_analysis["Gender"] == 'female') & (df_analysis['prob'] > 0.5) & (df_analysis['prob'] <= 0.75)
sub_3 = (df_analysis["Gender"] == 'female') & (df_analysis['prob'] > 0.75) & (df_analysis['prob'] <= 1.0)

# Print the number of samples in each probability range
print(f'Number of samples:')
print(f'prob \t- 0.0 - 0.25: \t{len(T[sub_0])}')
print(f'prob \t- 0.25 - 0.5: \t{len(T[sub_1])}')
print(f'prob \t- 0.5 - 0.75: \t{len(T[sub_2])}')
print(f'prob \t- 0.75 - 1.0: \t{len(T[sub_3])}')

# Fit the Kaplan-Meier estimator and plot the survival function for each probability range
kmf.fit(durations=T[sub_0], event_observed=E[sub_0], label="IPH prob=0.0 - 0.25")
kmf.plot_survival_function(ax=ax, ci_show=False, color=colorblind_palette[0], alpha=1.0)

kmf.fit(durations=T[sub_1], event_observed=E[sub_1], label="IPH prob=0.25 - 0.5")
kmf.plot_survival_function(ax=ax, ci_show=False, color=colorblind_palette[1], alpha=1.0)

kmf.fit(durations=T[sub_2], event_observed=E[sub_2], label="IPH prob=0.5 - 0.75")
kmf.plot_survival_function(ax=ax, ci_show=False, color=colorblind_palette[2], alpha=1.0)

kmf.fit(durations=T[sub_3], event_observed=E[sub_3], label="IPH prob=0.75 - 1.0")
kmf.plot_survival_function(ax=ax, ci_show=False, color=colorblind_palette[3], alpha=1.0)

# Set plot limits and labels
plt.ylim(0.60, 1.0)
plt.xlabel("Time (years)")
plt.ylabel("Survival Probability")

# Remove the 'right' and 'top' spines for a cleaner look
ax = plt.gca()  # Get current axes
ax.spines[['right', 'top']].set_visible(False)

# Update font size for better readability
plt.rcParams.update({'font.size': 14})

# Save the plot to a specified directory with a generalized file name
plt.savefig(SAVE_DIR + 'kaplan_meijer_plot_female_IPH_prob.pdf', format='pdf', dpi=300)

# Show the plot
plt.ion()
plt.show()
