In [None]:
import math
import matplotlib.pyplot as plt
import heartpy as hp
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor, as_completed
from random import shuffle
from scipy.interpolate import CubicSpline
from numba import jit

import numpy as np
import pandas as pd

# pd.set_option('display.height', 1000)
pd.set_option("display.max_rows", 500)
pd.set_option("display.max_columns", 500)
pd.set_option("display.width", 1000) 
pd.set_option("max_colwidth", 200)
from IPython.core.display import HTML, display

display(HTML("<style>.container { width:100% !important; }</style>"))

def df_stats(df):
    from tabulate import tabulate

    print("\n***** Shape: ", df.shape, " *****\n")

    columns_list = df.columns.values.tolist()
    isnull_list = df.isnull().sum().values.tolist()
    isunique_list = df.nunique().values.tolist()
    dtypes_list = df.dtypes.tolist()

    list_stat_val = list(zip(columns_list, isnull_list, isunique_list, dtypes_list))
    df_stat_val = pd.DataFrame(
        list_stat_val, columns=["Name", "Null", "Unique", "Dtypes"]
    )
    print(tabulate(df_stat_val, headers="keys", tablefmt="psql"))
    return df.head()


In [None]:
#df_validated_by_cardiologist = pd.read_csv('data/core_model_performance/df_validated_by_cardiologist.csv')

In [None]:
import json
def classify_severity(df, json_path):
    # Load the JSON file
    with open(json_path, 'r') as f:
        severity_dict = json.load(f)

    # Create new columns
    df['diagnosis_category'] = 'NORMAL'
    df['diagnosis_category_cardiologist'] = 'NORMAL'
    df['diagnosis_category_muse'] = 'NORMAL'
    df['ai_diagnosis_string'] = ''  # New column for AI diagnosis string

    # Define the severity order
    severity_order = ['NORMAL', 'BORDERLINE', 'PATHOLOGICAL']

    # Iterate over each row
    for index, row in df.iterrows():
        severities = []
        severities_cardiologist = []
        severities_muse = []
        ai_diagnosis = []  # List to store AI diagnosis

        # Iterate over each column
        for col in df.columns:
            if col in severity_dict:
                if row[col] == 1:
                    severity = severity_dict[col]
                    severities.append(severity)
                    ai_diagnosis.append(col)  # Append column name to AI diagnosis
            elif col.endswith('_CARDIOLOGIST'):
                base_col = col[:-len('_CARDIOLOGIST')]
                if base_col in severity_dict:
                    if row[col] == 1:
                        severity = severity_dict[base_col]
                        severities_cardiologist.append(severity)
            elif col.endswith('_MUSE'):
                base_col = col[:-len('_MUSE')]
                if base_col in severity_dict:
                    if row[col] == 1:
                        severity = severity_dict[base_col]
                        severities_muse.append(severity)

        # Assign the highest severity to the row
        if severities:
            max_severity = max(severities, key=lambda x: severity_order.index(x))
            df.at[index, 'diagnosis_category'] = max_severity
        if severities_cardiologist:
            max_severity_cardiologist = max(severities_cardiologist, key=lambda x: severity_order.index(x))
            df.at[index, 'diagnosis_category_cardiologist'] = max_severity_cardiologist
        if severities_muse:
            max_severity_muse = max(severities_muse, key=lambda x: severity_order.index(x))
            df.at[index, 'diagnosis_category_muse'] = max_severity_muse

        # Assign the AI diagnosis string to the row
        df.at[index, 'ai_diagnosis_string'] = ', '.join(ai_diagnosis)

    return df

In [None]:
df_validated_by_cardiologist = classify_severity(df_validated_by_cardiologist, 'utils/severity_classification.json')

In [None]:
# Sample 10 examples where diagnosis_category != diagnosis_category_cardiologist
sample_df = df_validated_by_cardiologist[df_validated_by_cardiologist.diagnosis_category != df_validated_by_cardiologist.diagnosis_category_cardiologist].sample(10)

# Print the diagnosis field and each adjudication for the sampled examples
for _, row in sample_df.iterrows():
    print(f"Diagnosis: {row['diagnosis']}")
    print(f"DeepECG Severity: {row['diagnosis_category']}")
    print(f"Cardiologist Severity: {row['diagnosis_category_cardiologist']}")
    print(f"Muse Severity: {row['diagnosis_category_muse']}")
    print("---")


In [None]:
import seaborn as sns
# Compute the confusion matrices separately
cm_deepecg_cardiologist = pd.crosstab(df_validated_by_cardiologist['diagnosis_category'], 
                                      df_validated_by_cardiologist['diagnosis_category_cardiologist'],
                                      rownames=['DeepECG'], colnames=['Cardiologist'])

cm_deepecg_muse = pd.crosstab(df_validated_by_cardiologist['diagnosis_category'], 
                              df_validated_by_cardiologist['diagnosis_category_muse'],
                              rownames=['DeepECG'], colnames=['Muse'])

cm_cardiologist_muse = pd.crosstab(df_validated_by_cardiologist['diagnosis_category_cardiologist'], 
                                   df_validated_by_cardiologist['diagnosis_category_muse'],
                                   rownames=['Cardiologist'], colnames=['Muse'])

# Create a 3x3 subplot
fig, ax = plt.subplots(1, 3, figsize=(15, 5))

# Plot the confusion matrices
sns.heatmap(cm_deepecg_cardiologist, annot=True, fmt='d', cmap='Blues', ax=ax[0], 
            cbar=False, square=True, linewidths=.5)
ax[0].set_title('DeepECG vs Cardiologist')

sns.heatmap(cm_deepecg_muse, annot=True, fmt='d', cmap='Blues', ax=ax[1],
            cbar=False, square=True, linewidths=.5)  
ax[1].set_title('DeepECG vs Muse')

sns.heatmap(cm_cardiologist_muse, annot=True, fmt='d', cmap='Blues', ax=ax[2],
            cbar=False, square=True, linewidths=.5)
ax[2].set_title('Cardiologist vs Muse')  

plt.tight_layout()
plt.show()

In [None]:
# Combine df_validated_by_cardiologist.diagnosis_category and ai_diagnosis_string 
df_validated_by_cardiologist['diagnosis_category_ai'] = "AI DIAGNOSIS: " +  df_validated_by_cardiologist['diagnosis_category'] + ' - ' + df_validated_by_cardiologist['ai_diagnosis_string']

# Combine diagnosis_category_cardiologist with 'diagnosis' string
df_validated_by_cardiologist['diagnosis_category_cardiologist_full'] = "CARDIO DIAGNOSIS: " +  df_validated_by_cardiologist['diagnosis_category_cardiologist'] + ' - ' + df_validated_by_cardiologist['diagnosis'] 


In [None]:
#df_validated_by_cardiologist.to_csv('/volume/DeepECG/data/core_model_performance/df_validated_by_cardiologist_with_diagnosis_category.csv')

In [None]:
df_ecg = pd.read_parquet('/media/data1/muse_ge/ECG_ad202207_1453937_cat_labels_v1.1_with_additional_columns.parquet')

In [None]:
display(df_ecg.loc[df_ecg['new_PatientID']=='0241579'][['RestingECG_TestDemographics_AcquisitionDate','RestingECG_TestDemographics_AcquisitionTime','Afib','Q wave (inferior - II, III, aVF)']])

In [None]:
# Keep only columns with 'RestingECG_' or 'npy_path' in df_ecg
df_ecg = df_ecg.filter(regex='RestingECG_|npy_path')
# Extract patient ID from npy_path column
df_validated_by_cardiologist['patient_id'] = df_validated_by_cardiologist['npy_path'].str.split('/').str[-1].str.split('_').str[0]

In [None]:
# Merge df_validated_by_cardiologist and df_ecg on npy_path
df_merged = pd.merge(df_validated_by_cardiologist, df_ecg, on='npy_path', how='inner')


In [None]:
display(df_merged.head(n=5))

In [None]:
display(df_merged.loc[df_merged['RestingECG_PatientDemographics_PatientID']=='0248327'])

In [None]:
from utils import plot_from_parquet
# Set the plot style to default to reset the background color to white
plt.style.use('default') 


# Plot cases where cardiologist says normal but DeepECG says pathological
discrepancies_df = df_merged[(df_merged.diagnosis_category_cardiologist == 'PATHOLOGICAL') & 
                                                (df_merged.diagnosis_category == 'NORMAL')].reset_index(drop=True)
discrepancies_df = discrepancies_df.sample(1)
# Plot all cases
for idx in range(len(discrepancies_df)):
    image = plot_from_parquet.plot_from_parquet(
        discrepancies_df, 
        patient_id_column="patient_id",
        index=idx,
        diagnosis_column="diagnosis_category_ai",
        subtitle_column="diagnosis_category_cardiologist_full",
        save=True,
        out_dir="ekg_JPEG",
        anonymize=False
    )


In [None]:
import json

# Load the severity classification JSON
with open('utils/severity_classification.json', 'r') as f:
    severity_dict = json.load(f)

# Get unique diagnoses from the JSON
diagnoses = list(severity_dict.keys())

# Create an empty dataframe to store the sampled examples
sampled_df = pd.DataFrame(columns=df_merged.columns)

# Iterate over each diagnosis
for diagnosis in diagnoses:
    # Sample one example with agreement
    try:
        agreement_df = df_merged[(df_merged[diagnosis] == 1) & 
                                (df_merged.diagnosis_category_cardiologist == df_merged.diagnosis_category)]
        if not agreement_df.empty:
            sampled_df = pd.concat([sampled_df, agreement_df.sample(1)], ignore_index=True)
        
        # Sample one example with disagreement
        disagreement_df = df_merged[(df_merged[diagnosis] == 1) & 
                                    (df_merged.diagnosis_category_cardiologist != df_merged.diagnosis_category)]
        if not disagreement_df.empty:
            sampled_df = pd.concat([sampled_df, disagreement_df.sample(1)], ignore_index=True)
    except:
        print("Column not found", diagnosis)

# Reset the index of the sampled dataframe
#sampled_df = sampled_df.reset_index(drop=True)
#sampled_df.to_csv('data/sampled_discrepancy_and_concordance_DeepECG_Cardiologist_20240413.csv')

In [None]:
# Plot all cases
for idx in range(len(sampled_df)):
    image = plot_from_parquet.plot_from_parquet(
        sampled_df, 
        patient_id_column="patient_id",
        index=idx,
        diagnosis_column="diagnosis_category_ai",
        subtitle_column="diagnosis_category_cardiologist_full",
        save=True,
        out_dir="ekg_JPEG",
        anonymize=False
    )


In [None]:
import os
import boto3

# Set up the S3 client with the specified region
s3 = boto3.client('s3', region_name='ca-central-1')

# Specify the directory containing the PNG files
png_directory = 'ekg_JPEG'

# Specify the S3 bucket name
bucket_name = 'sampled-deepecg-cardiologist-performance'

# Iterate through all PNG files in the directory
for filename in os.listdir(png_directory):
    if filename.endswith('.png'):
        # Construct the full file path
        file_path = os.path.join(png_directory, filename)
        
        # Upload the file to S3
        s3.upload_file(file_path, bucket_name, filename)
        
        print(f'Uploaded {filename} to S3 bucket {bucket_name}')