In [None]:
### import os
import os
import sys

## CHANGE THIS
dir2 = os.path.abspath("/volume/Orion/orion")
dir1 = os.path.dirname(dir2)
if not dir1 in sys.path:
    sys.path.append(dir1)
import importlib

importlib.util.find_spec("orion")
# import torch
import csv

import numpy as np
import pandas as pd
from IPython.core.display import HTML, display

display(HTML("<style>.container { width:100% !important; }</style>"))
pd.set_option("display.max_rows", 50)
pd.set_option("display.max_columns", 250)
pd.set_option("display.width", 1000)
pd.set_option("display.max_colwidth", 300)
pd.options.mode.chained_assignment = None  # default='warn'

def df_stats(df):
    from tabulate import tabulate 
    print("\n***** Shape: ", df.shape," *****\n")
    
    columns_list = df.columns.values.tolist()
    isnull_list = df.isnull().sum().values.tolist()
    isunique_list = df.nunique().values.tolist()
    dtypes_list = df.dtypes.tolist()
    
    list_stat_val = list(zip(columns_list, isnull_list, isunique_list, dtypes_list))
    df_stat_val = pd.DataFrame(list_stat_val, columns=['Name', 'Null', 'Unique', 'Dtypes'])
    print(tabulate(df_stat_val, headers='keys', tablefmt='psql'))
    return df.head()

### CathEF

In [None]:
checkpoints_folder = "./swin3d_s_5_32_2_AdamW_new_20240122-184127_7hfxk75z" # path to the folder where the model checkpoints are stored
data_path = '../data/inference_val_set_LCA_REGRESSION_with_MHI_2021_data_NAS_path_mu.csv'


### Code to run inference on validation set and save results to csv

In [None]:
from orion.utils import video_run_inference

model_file_name = "best.pt" # name of the checkpoint file
config_file_path = "config/config_regression_swin3d.yaml" # path to the model configuration file

df_predictions_inference = video_run_inference.run_inference_and_no_logging(
    checkpoints_folder=checkpoints_folder, 
    data_path=data_path,
    model_file_name=model_file_name,
    config_path=config_file_path,
)

In [None]:
df_predictions_inference.to_csv(checkpoints_folder + "/df_predictions_inference_val.csv")

### Code to run inference on test set, log it to WANDB and save results to csv

In [None]:
from orion.utils import video_run_inference
checkpoints_folder = "./swin3d_s_5_32_2_AdamW_new_20240120-151714_4iivz6og" # path to the folder where the model checkpoints are stored
model_file_name = "best.pt" # name of the checkpoint file
wandb_id = "4iivz6og" # wandb run id, this comes from weights & biases website
resume=True # If a model training run will be resumed or new training
config_file_path = "config/config_regression_swin3d.yaml" # path to the model configuration file
split='test' # evaluation mode

df_predictions_inference = video_run_inference.run_inference_and_log_to_wandb(
    checkpoints_folder=checkpoints_folder, 
    model_file_name=model_file_name,
    wandb_id=wandb_id, 
    resume=resume, 
    config_path=config_file_path, 
    split=split
)


### Explore predictions and AUC

In [None]:
checkpoints_folder = "./swin3d_s_5_32_2_AdamW_new_20240122-184127_7hfxk75z" # path to the folder where the model checkpoints are stored
data_path = '../data/inference_val_set_LCA_REGRESSION_with_MHI_2021_data_NAS_path_mu.csv'

# load predictions
df_predictions = pd.read_csv(checkpoints_folder + '/val_predictions.csv')

df = pd.read_csv(
    "../data/inference_val_set_LCA_REGRESSION_with_MHI_2021_data_NAS_path_mu.csv",
    sep="µ",
)


In [None]:
merged_df = df.merge(df_predictions, left_on='FileName', right_on='filename')
merged_df['y_true'] = merged_df['Value'].astype(float)
merged_df['dataset'] = merged_df['Patient_Name'].apply(lambda x: 'MHI' if pd.isnull(x) else 'UCSF')

# Create 'y_true_cat' column based on the condition
merged_df['y_true_cat'] = np.where(merged_df['Value'] <= 40, 'low ef', 'normal EF')
merged_df['y_hat_cat'] = np.where(merged_df['y_hat'] <= 40, 'low ef', 'normal EF')
merged_df['discordant_ef'] = np.where(merged_df['y_true_cat'] == merged_df['y_hat_cat'], 'no_discordant', 'yes_discordant')
merged_df['y_true_cat'] = merged_df['y_true_cat'] + '_' + merged_df['dataset']
merged_df['y_true_y_hat'] = merged_df['y_true'].astype(str) + '_Prediction:' + merged_df['y_hat'].astype(str)

display(merged_df.tail(n=10))
display(merged_df['dataset'].value_counts())
display(merged_df['y_true_cat'].value_counts())
display(merged_df.dicom_path.nunique())


In [None]:
dir2 = os.path.abspath("/volume/DicomVideoProcessing/downloadAvi/")
dir1 = os.path.dirname(dir2)
if not dir1 in sys.path:
    sys.path.append(dir1)
import importlib

importlib.util.find_spec("downloadAvi")
from downloadAvi import plot_avi as plot_avi

merged_df_f = merged_df.loc[merged_df['discordant_ef'] == 'yes_discordant']
#print(df_predictions.head(n=25))
# Example usage
# Assuming your DataFrame is named df
plot_avi.sample_and_plot_middle_frames(merged_df_f, N=15, label_column='y_true_cat', second_label_column='y_true_y_hat', path_column='FileName')


In [None]:
from sklearn.metrics import ConfusionMatrixDisplay
import numpy as np
import matplotlib.pyplot as plt

### some analyses, we then dichotomized this continuous prediction into ≤/>40%, since LVEF ≤40% defines significant left ventricular dysfunction associated and has been a common cutoff for HFrEF.
### In sensitivity analysis, we also dichotomized using the <50%/≥50% cutoff which defines mildly reduced LVEF.5 More details in Supplement.

grouped_df = merged_df.groupby(['Patient_ID', 'dataset', 'y_true']).agg({'Value': 'mean', 'y_hat': 'mean'}).reset_index()

In [None]:
import pandas as pd
from sklearn.metrics import roc_auc_score, confusion_matrix, ConfusionMatrixDisplay
import numpy as np
import matplotlib.pyplot as plt

def calculate_performance_metrics(df, thresholds=(40, 50), stratify_by='dataset', stratify_values=('UCSF', 'MHI')):
    # Initialize a DataFrame to store performance metrics
    performance_df = pd.DataFrame(columns=['Dataset', 'Metric', 'Value'])
    
    # Function to evaluate and store each metric
    def evaluate_metric(dataset_name, metric_name, value):
        # Create and return a one-row DataFrame
        new_df = pd.DataFrame({'Dataset': [dataset_name], 'Metric': [metric_name], 'Value': [value]})
        return new_df

    # Stratify the dataframe if necessary and calculate metrics
    for dataset in stratify_values + ('Overall',):
        if dataset != 'Overall':
            # Select the subset dataset
            subset_df = df[df[stratify_by] == dataset]
        else:
            # Use the entire dataframe
            subset_df = df

        # Calculate the correlation
        correlation = subset_df['y_hat'].corr(subset_df['y_true'])
        performance_df = pd.concat([performance_df, evaluate_metric(dataset, 'Correlation', correlation)], ignore_index=True)
        
        # Calculate the AUC for various thresholds
        for threshold in thresholds:
            y_true_binary = np.where(subset_df['Value'] < threshold, 0, 1)
            auc = roc_auc_score(y_true_binary, subset_df['y_hat'])
            performance_df = pd.concat([performance_df, evaluate_metric(dataset, f'AUC for y_true_{threshold}', auc)], ignore_index=True)
            
            # If confusion matrix calculations are needed, they can be added here

    return performance_df

# Make sure merged_df and grouped_df are defined before calling this function

# You can then use this function to calculate the metrics
# (assuming merged_df and grouped_df are already defined):

# Calculate metrics for the individual (merged) dataframe
individual_performance = calculate_performance_metrics(merged_df)
# Calculate metrics for the grouped dataframe
grouped_performance = calculate_performance_metrics(grouped_df)

# Optionally, concatenate the individual and grouped performance dataframes
combined_performance = pd.concat([individual_performance, grouped_performance], ignore_index=True)

# Print combined performance
display(combined_performance)

### Object Pred

In [None]:
df_test = pd.read_csv('/volume/Orion/swin3d_s_5_32_2_RAdam_new_20240103-032243/test_predictions.csv')
# Remove square brackets and split the string into a list of floats
df_test['y_hat'] = df_test['y_hat'].apply(lambda x: np.fromstring(x.strip('[]'), sep=' '))

# Extract the argmax class for each row and convert to integer
df_test['argmax_class'] = df_test['y_hat'].apply(lambda x: np.argmax(x).astype(int))


In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.preprocessing import OneHotEncoder
import matplotlib.pyplot as plt

object_to_names = {
    0: "Aorta",
    1: "Catheter",
    2: "Femoral",
    3: "Graft",
    4: "LV",
    5: "Left Coronary",
    6: "Other",
    7: "Pigtail",
    8: "Radial",
    9: "Right Coronary",
    10: "Stenting",
}

# Assuming df_test is your dataframe with 'y_hat' and y_true is your actual labels
y_true = df_test['y_true'].astype(int)  # Ensure y_true is in integer format

# One-hot encode y_true
one_hot_encoder = OneHotEncoder(sparse=False, categories='auto')
y_true_encoded = one_hot_encoder.fit_transform(y_true.values.reshape(-1, 1))

# Plotting ROC curves for each class
plt.figure(figsize=(12, 8))
for i in range(y_true_encoded.shape[1]):
    try:
        fpr, tpr, _ = roc_curve(y_true_encoded[:, i], df_test['y_hat'].apply(lambda x: x[i]))
        auc_score = roc_auc_score(y_true_encoded[:, i], df_test['y_hat'].apply(lambda x: x[i]))
        plt.plot(fpr, tpr, label=f'{object_to_names[i]} (Class {i}) - AUC: {auc_score:.2f}')
    except ValueError:
        # Skip the class if error (like no variation in y_true labels for a class)
        continue


plt.plot([0, 1], [0, 1], 'k--')  # Dashed diagonal line
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curves for Each Class')
plt.legend(loc='best')
plt.show()


In [None]:
## CHANGE THIS
dir2 = os.path.abspath("/volume/DicomVideoProcessing/downloadAvi")
dir1 = os.path.dirname(dir2)
if not dir1 in sys.path:
    sys.path.append(dir1)

from downloadAvi import plot_avi 


In [None]:
# Assuming df_test['y_hat'] contains the predicted probabilities
# and object_to_names is your mapping dictionary

# Determine the predicted class index for each row in y_hat
df_test['predicted_class'] = df_test['y_hat'].apply(lambda x: np.argmax(x))

# Map the predicted class indices to their respective names
df_test['predicted_class_name'] = df_test['predicted_class'].map(object_to_names)
df_test['true_class_name'] = df_test['y_true'].astype(float).astype(int).map(object_to_names)

# Displaying the first few rows of the dataframe to verify the new column
df_test.head()

In [None]:

# Filter the DataFrame where y_true is not equal to y_hat
df_discordant = df_test[df_test['y_true'] == df_test['predicted_class']]

# Group the DataFrame by 'predicted_class' and sample 5 examples from each group
df_discordant = df_discordant.groupby('predicted_class').apply(lambda x: x.sample(min(5, len(x)), replace=False))

# Reset the index of the DataFrame
df_discordant = df_discordant.reset_index(drop=True)

# Now call your plotting function
plot_avi.sample_and_plot_middle_frames(df_discordant, 35, label_column='true_class_name', second_label_column='predicted_class_name', path_column='filename')
