In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
cardiac_df = pd.read_csv("/home/workspace/files/MilanK/Model1/final_models/final_cardiac_models/combined_model_simpler_demographics/integrated_gradients_magnitude_norm.csv")
resp_df = pd.read_csv("/home/workspace/files/MilanK/Model1/final_models/final_resp_models/combined_model_simpler_demographics/integrated_gradients_magnitude_norm.csv")

cardiac_df

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# -------------------------------
# Read CSV files for both models
# -------------------------------
cardiac_df = pd.read_csv("/home/workspace/files/MilanK/Model1/final_models/final_cardiac_models/combined_model_simpler_demographics/integrated_gradients_magnitude_norm.csv")
resp_df = pd.read_csv("/home/workspace/files/MilanK/Model1/final_models/final_resp_models/combined_model_simpler_demographics/integrated_gradients_magnitude_norm.csv")

# -------------------------------
# Define column groups
# -------------------------------
co2_cols = ["nCoo", "nacooMin"]
nibp_cols = ["nnbpSys", "nnbpDia", "nnbpMean"]
nart_cols = ["nartSys", "nartDia", "nartMean"]
temp_cols = ["nTemp", "ntempCo", "ntempEs", "ntempSk"]

# -------------------------------
# Function to process a dataframe
# -------------------------------
def process_df(df):
    # Sum feature importance for each group
    df["nCoo"] = df[co2_cols].abs().sum(axis=1)
    df["nibp"] = df[nibp_cols].abs().sum(axis=1)
    df["nart"] = df[nart_cols].abs().sum(axis=1)
    df["ntemp"] = df[temp_cols].abs().sum(axis=1)
    
    # Drop original columns
    df.drop(columns=['nacooMin'] + nibp_cols + nart_cols + temp_cols, inplace=True)
    
    # Rename columns
    rename_map = {
        'patient_id': 'Patient ID',
        'nCoo': 'EtCO₂',
        'nawRr': 'Respiratory rate (airway)',
        'nrespRate': 'Respiratory rate (plethysmograph)',
        'nPleth': 'SpO₂',
        'necgRate': 'Heart Rate (ECG)',
        'previous_resp_deteriorations': 'Previous respiratory deteriorations',
        'previous_cardiovascular_deteriorations': 'Previous cardiovascular deteriorations',
        'pim3': 'PIM3',
        'day_night': 'Time of Transfer (Day/Night)',
        'diagnosis_embedding': 'Diagnosis',
        'age': 'Age',
        'sex': 'Sex',
        'weight': 'Weight',
        'ventilation_support': 'Ventilation support',
        'vasoactive_agents': 'Vasoactive support',
        'destination_care_area': 'Destination Care Area',
        'medical_history': 'Medical History',
        'nibp': 'Blood Pressure (non-invasive)',
        'nart': 'Blood Pressure (arterial)',
        'ntemp': 'Temperature',
        # Keep these as-is
        'y_prob': 'y_prob',
        'y_pred': 'y_pred',
        'y_true_resp': 'y_true',
        'y_true_cardiac': 'y_true'
    }
    
    return df.rename(columns=rename_map)

# Process both dataframes
cardiac_df = process_df(cardiac_df.copy())
resp_df = process_df(resp_df.copy())
cardiac_df.to_csv("/home/workspace/files/MilanK/Model1/final_models/cardiac_integrated_gradients_normalised_test_set.csv")
resp_df.to_csv("/home/workspace/files/MilanK/Model1/final_models/resp_integrated_gradients_normalised_test_set.csv")

# -------------------------------
# Function to create a horizontal bar plot on a given axis
# -------------------------------
def create_plot(df, title, ax):
    # Identify feature columns by excluding non-feature columns
    feature_cols = [col for col in df.columns if col not in ['Patient ID', 'y_prob', 'y_pred', 'y_true']]
    
    # Split DataFrame by predicted class (using 'y_pred')
    df_positive = df[df['y_pred'] == 1]
    df_negative = df[df['y_pred'] == 0]
    
    # Compute the mean importance for each feature within each group
    mean_importance_pos = df_positive[feature_cols].mean()
    mean_importance_neg = df_negative[feature_cols].mean()
    
    # Combine into a DataFrame
    comparison_df = pd.DataFrame({
        'Positive Predictions': mean_importance_pos,
        'Negative Predictions': mean_importance_neg
    })
    
    # Calculate total importance (the overall bar size)
    comparison_df['Total'] = comparison_df['Positive Predictions'] + comparison_df['Negative Predictions']
    
    # Sort features by total importance in descending order
    comparison_df = comparison_df.sort_values('Total', ascending=False)
    features_sorted = comparison_df.index.tolist()
    
    # Drop the helper column
    comparison_df = comparison_df.drop(columns='Total')
    
    # Melt the DataFrame for seaborn plotting
    df_melt = comparison_df.reset_index().melt(id_vars='index', var_name='Prediction', value_name='Importance')
    df_melt = df_melt.rename(columns={'index': 'Feature'})
    
    # Create horizontal bar plot using seaborn
    sns.barplot(
        data=df_melt,
        x='Importance',
        y='Feature',
        hue='Prediction',
        order=features_sorted,
        palette={'Positive Predictions': 'orange', 'Negative Predictions': 'blue'},
        ax=ax
    )
    # Set title with increased font size
    ax.set_title(title, fontsize=24)
    ax.set_xlabel('Relative Importance', fontsize=18)
    ax.set_ylabel('')
    ax.legend(title='Predicted Class', loc='lower right', fontsize=16, title_fontsize=18)

# -------------------------------
# Plotting both graphs side by side
# -------------------------------
sns.set(style='whitegrid')
sns.set_context("notebook", font_scale=1.5)

# Use constrained_layout to automatically adjust subplot spacing.
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(24, 18), constrained_layout=True)

# Respiratory model plot labeled as a)
create_plot(resp_df, 'a) Respiratory Model Relative Feature Importance', axes[0])

# Cardiac (Cardiovascular) model plot labeled as b)
create_plot(cardiac_df, 'b) Cardiovascular Model Relative Feature Importance', axes[1])

# Save with bbox_inches='tight' so everything fits nicely.
plt.savefig("/home/workspace/files/MilanK/Model1/final_models/figures/feature_importance.png", dpi=1000, bbox_inches='tight')
plt.show()



In [None]:
import pandas as pd

# Assuming your DataFrame is called df
column_averages = df.mean()          # This returns a Series with each column's average
averages_list = column_averages.tolist()  # Convert the Series to a list

print("Column Averages:")
print(column_averages)

print("\nAverages List:")
print(averages_list)


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# -------------------------------
# Read CSV files for both models
# -------------------------------
cardiac_df = pd.read_csv("/home/workspace/files/MilanK/Model1/final_models/final_cardiac_models/combined_model_simpler_demographics/integrated_gradients_magnitude_norm.csv")
resp_df = pd.read_csv("/home/workspace/files/MilanK/Model1/final_models/final_resp_models/combined_model_simpler_demographics/integrated_gradients_magnitude_norm.csv")

# -------------------------------
# Define column groups
# -------------------------------
co2_cols = ["nCoo", "nacooMin"]
nibp_cols = ["nnbpSys", "nnbpDia", "nnbpMean"]
nart_cols = ["nartSys", "nartDia", "nartMean"]
temp_cols = ["nTemp", "ntempCo", "ntempEs", "ntempSk"]

# -------------------------------
# Function to process a dataframe
# -------------------------------
def process_df(df):
    # Sum feature importance for each group
    df["nCoo"] = df[co2_cols].abs().sum(axis=1)
    df["nibp"] = df[nibp_cols].abs().sum(axis=1)
    df["nart"] = df[nart_cols].abs().sum(axis=1)
    df["ntemp"] = df[temp_cols].abs().sum(axis=1)
    
    # Drop original columns
    df.drop(columns=['nacooMin'] + nibp_cols + nart_cols + temp_cols, inplace=True)
    
    # Rename columns
    rename_map = {
        'patient_id': 'Patient ID',
        'nCoo': 'EtCO₂',
        'nawRr': 'Respiratory rate (airway)',
        'nrespRate': 'Respiratory rate (plethysmograph)',
        'nPleth': 'SpO₂',
        'necgRate': 'Heart Rate (ECG)',
        'previous_resp_deteriorations': 'Previous respiratory deteriorations',
        'previous_cardiovascular_deteriorations': 'Previous cardiovascular deteriorations',
        'pim3': 'PIM3',
        'day_night': 'Time of Transfer (Day/Night)',
        'diagnosis_embedding': 'Diagnosis',
        'age': 'Age',
        'sex': 'Sex',
        'weight': 'Weight',
        'ventilation_support': 'Ventilation support',
        'vasoactive_agents': 'Vasoactive support',
        'destination_care_area': 'Destination Care Area',
        'medical_history': 'Medical History',
        'nibp': 'Blood Pressure (non-invasive)',
        'nart': 'Blood Pressure (arterial)',
        'ntemp': 'Temperature',
        # Keep these as-is. Note: Both CSVs use the same names,
        # so after merging, overlapping columns will receive suffixes.
        'y_prob': 'y_prob',
        'y_pred': 'y_pred',
        'y_true_resp': 'y_true',      # for the respiratory model
        'y_true_cardiac': 'y_true'      # for the cardiac model
    }
    
    return df.rename(columns=rename_map)

# Process both dataframes
cardiac_df = process_df(cardiac_df.copy())
resp_df = process_df(resp_df.copy())

# -------------------------------
# Merge dataframes to find a patient meeting criteria
# -------------------------------
merged_df = pd.merge(resp_df, cardiac_df, on="Patient ID", suffixes=('_resp', '_cardiac'))

# Filter for a patient where:
#   - Respiratory model: positive prediction and true label (== 1)
#   - Cardiac model: negative prediction and true label (== 0)
filtered = merged_df[
    (merged_df['y_pred_resp'] == 1) & (merged_df['y_true_resp'] == 1) &
    (merged_df['y_pred_cardiac'] == 0) & (merged_df['y_true_cardiac'] == 0)
]

if filtered.empty:
    print("No patient found with positive respiratory and negative cardiovascular predictions and true labels.")
else:
    # Use the first patient that meets the criteria.
    patient_id = filtered.iloc[0]["Patient ID"]
    print("Found patient with ID:", patient_id)
    
    patient_resp = resp_df[resp_df["Patient ID"] == patient_id]
    patient_cardiac = cardiac_df[cardiac_df["Patient ID"] == patient_id]
    
    def create_patient_plot(df_single, title, ax):
        feature_cols = [col for col in df_single.columns if col not in ['Patient ID', 'y_prob', 'y_pred', 'y_true']]
        patient_features = df_single.iloc[0][feature_cols].sort_values(ascending=False)
        
        sns.barplot(
            x=patient_features.values,
            y=patient_features.index,
            ax=ax,
            palette="viridis"
        )
        ax.set_title(title, fontsize=24)
        ax.set_xlabel('Relative Importance', fontsize=18)
        ax.set_ylabel('')
        
        predicted_prob = df_single.iloc[0]['y_prob']
        actual_outcome = df_single.iloc[0]['y_true']
        annotation = f"Predicted Probability: {predicted_prob:.2f}\nActual Outcome: {actual_outcome}"
        
        # Move the annotation higher and slightly left
        ax.text(
            0.9, 0.1, annotation,
            transform=ax.transAxes,
            ha='right', va='top',
            fontsize=18,
            bbox=dict(boxstyle="round", facecolor="white", alpha=0.8)
        )
    
    sns.set(style='whitegrid')
    sns.set_context("notebook", font_scale=1.5)
    
    # IMPORTANT: Fix the extra parenthesis, and use constrained_layout
    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(24, 12), constrained_layout=True)
    
    create_patient_plot(patient_resp, "a) Respiratory Model", axes[0])
    create_patient_plot(patient_cardiac, "b) Cardiovascular Model", axes[1])
    
    # Optionally add manual spacing if needed
    # fig.subplots_adjust(wspace=0.4)
    
    # Save with bbox_inches='tight' to ensure all labels fit inside
    plt.savefig(
        "/home/workspace/files/MilanK/Model1/final_models/figures/individual_feature_importance.png",
        dpi=1000,
        bbox_inches='tight'
    )
    plt.show()


In [None]:
# Specify the patient ID you want to print
patient_id = 152

# Filter the dataframes for patient 152
patient_resp = resp_df[resp_df["Patient ID"] == patient_id]
patient_cardiac = cardiac_df[cardiac_df["Patient ID"] == patient_id]

# Print the data for patient 152 from each dataframe
print("Respiratory Data for Patient", patient_id, ":\n", patient_resp)
print("\nCardiac Data for Patient", patient_id, ":\n", patient_cardiac)
