In [1]:
import pandas as pd 

result_df = pd.read_csv('./generated/aug_results/[all-lesions] [with-doc] [trx] [nopk] [result].csv')

In [2]:
radiologists_noted_importance_atelectasis_df = pd.read_csv('./radiologist_annotations/atalectasis.csv')
radiologists_noted_importance_enlarged_cardiac_silhouette_df = pd.read_csv('./radiologist_annotations/enlarged_card.csv')
radiologists_noted_importance_consolidation_df = pd.read_csv('./radiologist_annotations/consolidation.csv')

# drop columns 'XGBoost', 'RandomForest', 'Decision Tree', 'TabNet'
for df in [
    radiologists_noted_importance_atelectasis_df,
    radiologists_noted_importance_consolidation_df,
    radiologists_noted_importance_enlarged_cardiac_silhouette_df,
]:
    df.drop(
        columns=["XGBoost", "RandomForest", "Decision Tree", "TabNet"],
        inplace=True,
    )

def combine_radiologist_scores(dfs):
    """
    Combines multiple dataframes containing radiologist scores.
    If a feature appears in multiple dataframes, its scores are averaged.
    
    Parameters:
        dfs (list of pd.DataFrame): List of dataframes to combine.
    
    Returns:
        pd.DataFrame: Combined dataframe with averaged scores.
    """
    # Concatenate all dataframes
    combined_df = pd.concat(dfs, ignore_index=True)
    
    # Convert all non-feature columns to numeric (ignoring errors for non-numeric values)
    for col in combined_df.columns:
        if col != "Feature":
            combined_df[col] = pd.to_numeric(combined_df[col], errors='coerce')
    
    # Group by 'Feature' and take the mean for duplicate features
    combined_df = combined_df.groupby("Feature", as_index=False).mean()
    
    return combined_df

combined_df = combine_radiologist_scores([
    radiologists_noted_importance_atelectasis_df,
    radiologists_noted_importance_consolidation_df,
    radiologists_noted_importance_enlarged_cardiac_silhouette_df
])

In [3]:
dall_m_to_graphrag = {
    "Pleural Effusion": {
        "Name in DALL-M": "pleural effusion [atelectasis]",
        "Name in GraphRAG for all": "Pleural Effusion",
        "type": "boolean",
        "disease": "atelectasis",
    },
    "Pulmonary Tuberculosis": {
        "Name in DALL-M": "pulmonary tuberculosis [atelectasis]",
        "Name in GraphRAG for all": "Pulmonary Tuberculosis",
        "type": "boolean",
        "disease": "atelectasis",
    },
    "Fatigue": {
        "Name in DALL-M": "fatigue [enlarged cardiac silhouette]",
        "Name in GraphRAG for all": "Fatigue",
        "type": "boolean",
        "disease": "enlarged cardiac silhouette",
    },
    "Fever": {
        "Name in DALL-M": "fever [atelectasis]",
        "Name in GraphRAG for all": "Fever",
        "type": "boolean",
        "disease": "atelectasis",
    },
    "Chest pain": {
        "Name in DALL-M": "chest pain [atelectasis]",
        "Name in GraphRAG for all": "Chest pain",
        "type": "boolean",
        "disease": "atelectasis",
    },
    "Palpitations": {
        "Name in DALL-M": "palpitations [enlarged cardiac silhouette]",
        "Name in GraphRAG for all": "Palpitations",
        "type": "boolean",
        "disease": "enlarged cardiac silhouette",
    },
    "Pericardial effusion": {
        "Name in DALL-M": "pericardial effusion [enlarged cardiac silhouette]",
        "Name in GraphRAG for all": "Pericardial effusion",
        "type": "boolean",
        "disease": "enlarged cardiac silhouette",
    },
    "Shortness of breath": {
        "Name in DALL-M": "shortness of breath [enlarged cardiac silhouette]",
        "Name in GraphRAG for all": "Shortness of breath",
        "type": "boolean",
        "disease": "enlarged cardiac silhouette",
    },
    "Coughing up blood": {
        "Name in DALL-M": "hemoptysis [pulmonary edema]",
        "Name in GraphRAG for all": "Coughing up blood",
        "type": "boolean",
        "disease": "pulmonary edema",
    },
    "Coughing up mucus": {
        "Name in DALL-M": "frothy sputum [pulmonary edema]",
        "Name in GraphRAG for all": "Coughing up mucus",
        "type": "boolean",
        "disease": "pulmonary edema",
    },
    "Wheezing": {
        "Name in DALL-M": "wheezing [pulmonary edema]",
        "Name in GraphRAG for all": "Wheezing",
        "type": "boolean",
        "disease": "pulmonary edema",
    },
    "Chronic Obstructive Pulmonary Disease": {
        "Name in DALL-M": "chronic obstructive pulmonary disease [atelectasis]",
        "Name in GraphRAG for all": "Chronic Obstructive Pulmonary Disease",
        "type": "boolean",
        "disease": "atelectasis",
    },
    "Smoking History": {
        "Name in DALL-M": "smoking [atelectasis]",
        "Name in GraphRAG for all": "Smoking History",
        "type": "boolean",
        "disease": "atelectasis",
    },
}

dall_m_to_graphrag_without_lesion = {
    "Pleural Effusion": {
        "Name in DALL-M": "pleural effusion",
        "Name in GraphRAG for all": "Pleural Effusion",
        "type": "boolean",
        "disease": "atelectasis",
    },
    "Pulmonary Tuberculosis": {
        "Name in DALL-M": "pulmonary tuberculosis",
        "Name in GraphRAG for all": "Pulmonary Tuberculosis",
        "type": "boolean",
        "disease": "atelectasis",
    },
    "Fatigue": {
        "Name in DALL-M": "fatigue",
        "Name in GraphRAG for all": "Fatigue",
        "type": "boolean",
        "disease": "enlarged cardiac silhouette",
    },
    "Fever": {
        "Name in DALL-M": "fever",
        "Name in GraphRAG for all": "Fever",
        "type": "boolean",
        "disease": "atelectasis",
    },
    "Chest pain": {
        "Name in DALL-M": "chest pain",
        "Name in GraphRAG for all": "Chest pain",
        "type": "boolean",
        "disease": "atelectasis",
    },
    "Palpitations": {
        "Name in DALL-M": "palpitations",
        "Name in GraphRAG for all": "Palpitations",
        "type": "boolean",
        "disease": "enlarged cardiac silhouette",
    },
    "Pericardial effusion": {
        "Name in DALL-M": "pericardial effusion",
        "Name in GraphRAG for all": "Pericardial effusion",
        "type": "boolean",
        "disease": "enlarged cardiac silhouette",
    },
    "Shortness of breath": {
        "Name in DALL-M": "shortness of breath",
        "Name in GraphRAG for all": "Shortness of breath",
        "type": "boolean",
        "disease": "enlarged cardiac silhouette",
    },
    "Coughing up blood": {
        "Name in DALL-M": "hemoptysis",
        "Name in GraphRAG for all": "Coughing up blood",
        "type": "boolean",
        "disease": "pulmonary edema",
    },
    "Coughing up mucus": {
        "Name in DALL-M": "frothy sputum",
        "Name in GraphRAG for all": "Coughing up mucus",
        "type": "boolean",
        "disease": "pulmonary edema",
    },
    "Wheezing": {
        "Name in DALL-M": "wheezing",
        "Name in GraphRAG for all": "Wheezing",
        "type": "boolean",
        "disease": "pulmonary edema",
    },
    "Chronic Obstructive Pulmonary Disease": {
        "Name in DALL-M": "chronic obstructive pulmonary disease",
        "Name in GraphRAG for all": "Chronic Obstructive Pulmonary Disease",
        "type": "boolean",
        "disease": "atelectasis",
    },
    "Smoking History": {
        "Name in DALL-M": "smoking",
        "Name in GraphRAG for all": "Smoking History",
        "type": "boolean",
        "disease": "atelectasis",
    },
}

# make keys in the dall_m_to_graphrag_without_lesion to be lowercase
dall_m_to_graphrag_without_lesion = {
    k.lower(): v for k, v in dall_m_to_graphrag_without_lesion.items()
}

In [4]:
import json

def transform_to_list(x):
    fi_list = json.loads(x.replace("'", "\""))

    # remove Augmented_ prefix
    fi_list = [fi.replace("Augmented_", "").lower() for fi in fi_list]

    # transform to DALL-M feature name
    for i, fi in enumerate(fi_list):
        if fi in dall_m_to_graphrag_without_lesion:
            fi_list[i] = dall_m_to_graphrag_without_lesion[fi]["Name in DALL-M"]
            print(f"Transformed {fi} to {fi_list[i]}")
    return fi_list

result_df['fi>dummy'] = result_df['fi>dummy'].apply(transform_to_list)

Transformed shortness of breath to shortness of breath
Transformed fatigue to fatigue
Transformed chest pain to chest pain
Transformed pericardial effusion to pericardial effusion
Transformed smoking history to smoking
Transformed fever to fever
Transformed coughing up mucus to frothy sputum
Transformed pleural effusion to pleural effusion
Transformed smoking history to smoking
Transformed palpitations to palpitations
Transformed smoking history to smoking
Transformed wheezing to wheezing
Transformed coughing up mucus to frothy sputum
Transformed smoking history to smoking
Transformed shortness of breath to shortness of breath
Transformed fatigue to fatigue
Transformed chest pain to chest pain
Transformed coughing up blood to hemoptysis
Transformed pericardial effusion to pericardial effusion
Transformed fever to fever
Transformed wheezing to wheezing
Transformed coughing up mucus to frothy sputum
Transformed pleural effusion to pleural effusion
Transformed shortness of breath to short

In [5]:
result_df

Unnamed: 0,Feature Set,Model,Accuracy,AUC,Precision,F-1,Recall,p-value,#fi>dummy,#input_cols,fi>dummy
0,original,DecisionTree,0.921,0.5822,0.1041,0.1417,0.2224,0.0,1,9,[sbp]
1,original,RandomForest,0.9721,0.5485,0.6583,0.1711,0.0985,0.0,5,9,"[temperature, heartrate, sbp, dbp, age]"
2,original,XGBoost,0.9611,0.5168,0.1071,0.0632,0.0448,0.0001,8,9,"[temperature, heartrate, resprate, o2sat, sbp,..."
3,original,Neural Network,0.9685,0.5097,0.1988,0.0399,0.0224,0.0,8,9,"[temperature, heartrate, resprate, o2sat, sbp,..."
4,llama3_augmented_with_db,DecisionTree,0.9387,0.6225,0.1722,0.215,0.2866,0.0012,6,115,"[temperature, heartrate, sbp, dbp, age, pulmon..."
5,llama3_augmented_with_db,RandomForest,0.9728,0.5575,0.7198,0.2002,0.1164,0.0,5,115,"[temperature, heartrate, sbp, dbp, age]"
6,llama3_augmented_with_db,XGBoost,0.9698,0.5864,0.4615,0.2581,0.1791,0.0,57,115,"[heartrate, resprate, o2sat, sbp, dbp, age, sh..."
7,llama3_augmented_with_db,Neural Network,0.9655,0.569,0.3132,0.2002,0.1478,0.0009,24,115,"[heartrate, resprate, age, gender, palpitation..."
8,llama3_augmented_without_db,DecisionTree,0.9468,0.6006,0.1815,0.2038,0.2328,0.155,1,115,[fluid buildup in the lungs]
9,llama3_augmented_without_db,RandomForest,0.974,0.582,0.7522,0.2711,0.1657,0.0,5,115,"[temperature, heartrate, sbp, dbp, age]"


In [6]:
def combine_radiologist_scores(dfs):
    """
    Combines multiple dataframes containing radiologist scores.
    If a feature appears in multiple dataframes, its scores are averaged.
    
    Parameters:
        dfs (list of pd.DataFrame): List of dataframes to combine.
    
    Returns:
        pd.DataFrame: Combined dataframe with averaged scores.
    """
    # Concatenate all dataframes
    combined_df = pd.concat(dfs, ignore_index=True)
    
    # Convert all non-feature columns to numeric (ignoring errors for non-numeric values)
    for col in combined_df.columns:
        if col != "Feature":
            combined_df[col] = pd.to_numeric(combined_df[col], errors='coerce')
    
    # Group by 'Feature' and take the mean for duplicate features
    combined_df = combined_df.groupby("Feature", as_index=False).mean()
    
    return combined_df

In [7]:
combined_df = combine_radiologist_scores([
    radiologists_noted_importance_atelectasis_df,
    radiologists_noted_importance_consolidation_df,
    radiologists_noted_importance_enlarged_cardiac_silhouette_df
])

In [8]:
combined_df

Unnamed: 0,Feature,Radiologist 1 Clinical Plausibility (Agreement),Radiologist 1 Clinical Relevance (Importance),Radiologist 1 Clinical Confidence (Certainty),Radiologist 2 Clinical Plausibility (Agreement),Radiologist 2 Clinical Relevance (Importance),Radiologist 2 Clinical Confidence (Certainty)
0,abdominal liquid,2.000000,2.500000,3.000000,2.500000,3.000000,3.000000
1,age,1.666667,2.000000,1.666667,1.666667,2.333333,1.666667
2,anesthesia,2.000000,2.000000,2.000000,1.500000,2.000000,2.000000
3,anxiety,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000
4,asymptomatic,2.333333,2.333333,1.666667,2.333333,3.333333,2.333333
...,...,...,...,...,...,...,...
71,volume overload,3.000000,4.000000,3.000000,3.000000,4.000000,4.000000
72,weakness,3.000000,2.000000,2.000000,3.000000,4.000000,2.000000
73,wheezing,3.000000,2.000000,2.000000,3.000000,3.000000,2.000000
74,worsening of pain by inspiration,1.000000,1.666667,1.000000,1.000000,1.333333,1.000000


In [9]:
import pandas as pd

def compare_feature_sets(result_df, radiologist_df):
    """
    Compares feature importance between ML models and radiologists, grouped by Feature Set.

    Parameters:
        result_df (pd.DataFrame): Dataframe containing ML model results.
        radiologist_df (pd.DataFrame): Combined radiologist scores dataframe.

    Returns:
        pd.DataFrame: Summary dataframe comparing ML and radiologist feature importance.
    """
    comparison_results = []

    # Loop through each feature set in the ML results
    for _, row in result_df.iterrows():
        feature_set = row['Feature Set']
        model_name = row['Model']
        important_features = row['fi>dummy']
        
        # Ensure important_features is a list
        if isinstance(important_features, str):  
            important_features = eval(important_features)  # Convert string to list if needed
        
        # Get the set of important features in this ML feature set
        ml_feature_set = set(important_features)
        
        # Get the set of features scored by radiologists
        radiologist_feature_set = set(radiologist_df['Feature'])
        
        # Find overlapping features
        aligned_features = ml_feature_set.intersection(radiologist_feature_set)
        
        # Calculate statistics
        total_ml_features = len(ml_feature_set)
        total_radiologist_features = len(radiologist_feature_set)
        aligned_count = len(aligned_features)
        alignment_ratio = aligned_count / total_ml_features if total_ml_features > 0 else 0

        # Store results
        comparison_results.append({
            'Feature Set': feature_set,
            'Model': model_name,
            'Total ML Important Features': total_ml_features,
            'Total Radiologist Features': total_radiologist_features,
            'Aligned Features Count': aligned_count,
            'Alignment Ratio': alignment_ratio,
            'Aligned Features': list(aligned_features)  # Store the actual feature names
        })
    
    # Convert results into a DataFrame
    comparison_df = pd.DataFrame(comparison_results)

    return comparison_df

In [10]:
comparison_df = compare_feature_sets(result_df, combined_df)


In [11]:
comparison_df

Unnamed: 0,Feature Set,Model,Total ML Important Features,Total Radiologist Features,Aligned Features Count,Alignment Ratio,Aligned Features
0,original,DecisionTree,1,76,0,0.0,[]
1,original,RandomForest,5,76,3,0.6,"[age, temperature, heartrate]"
2,original,XGBoost,8,76,4,0.5,"[age, temperature, heartrate, gender]"
3,original,Neural Network,8,76,4,0.5,"[age, temperature, heartrate, gender]"
4,llama3_augmented_with_db,DecisionTree,6,76,3,0.5,"[age, temperature, heartrate]"
5,llama3_augmented_with_db,RandomForest,5,76,3,0.6,"[age, temperature, heartrate]"
6,llama3_augmented_with_db,XGBoost,56,76,7,0.125,"[age, heartrate, shortness of breath, fever, p..."
7,llama3_augmented_with_db,Neural Network,23,76,6,0.26087,"[age, heartrate, palpitations, wheezing, gende..."
8,llama3_augmented_without_db,DecisionTree,1,76,0,0.0,[]
9,llama3_augmented_without_db,RandomForest,5,76,3,0.6,"[age, temperature, heartrate]"


In [13]:
import matplotlib.pyplot as plt
import numpy as np

def visualize_feature_comparison(comparison_df, radiologist_df):
    """
    Visualizes the comparison between ML-important features and radiologists' scores.

    Parameters:
        comparison_df (pd.DataFrame): DataFrame containing comparison results.
    """
    # Sort by alignment ratio for better readability
    comparison_df = comparison_df.sort_values(by="Alignment Ratio", ascending=False)

    # Bar Plot - Alignment Ratio by Feature Set
    plt.figure(figsize=(10, 6))
    plt.bar(comparison_df["Feature Set"], comparison_df["Alignment Ratio"], color='skyblue')
    plt.xlabel("Feature Set")
    plt.ylabel("Alignment Ratio")
    plt.title("Alignment Ratio of ML-important Features with Radiologists' Features")
    plt.xticks(rotation=45, ha="right")
    plt.ylim(0, 1)
    plt.grid(axis="y", linestyle="--", alpha=0.7)
    plt.show()

    # Stacked Bar Plot - Feature Alignment Count
    plt.figure(figsize=(10, 6))
    aligned = comparison_df["Aligned Features Count"]
    non_aligned = comparison_df["Total ML Important Features"] - aligned

    plt.bar(comparison_df["Feature Set"], aligned, label="Aligned Features", color="green")
    plt.bar(comparison_df["Feature Set"], non_aligned, bottom=aligned, label="Non-Aligned Features", color="red")

    plt.xlabel("Feature Set")
    plt.ylabel("Number of Features")
    plt.title("Number of Aligned vs. Non-Aligned Features in ML Models")
    plt.xticks(rotation=45, ha="right")
    plt.legend()
    plt.grid(axis="y", linestyle="--", alpha=0.7)
    plt.show()

    # Scatter Plot - Per Feature Importance (if individual feature scores exist)
    if "Aligned Features" in comparison_df.columns:
        aligned_features = sum(comparison_df["Aligned Features"], [])  # Flatten list of lists
        aligned_scores = []

        for feature in aligned_features:
            score_row = radiologist_df[radiologist_df["Feature"] == feature]
            if not score_row.empty:
                avg_score = score_row.iloc[:, 1:].mean(axis=1).values[0]  # Average across all radiologist scores
                aligned_scores.append(avg_score)
        
        if aligned_scores:
            plt.figure(figsize=(12, 6))
            plt.scatter(aligned_features, aligned_scores, color="blue", alpha=0.6)
            plt.xticks(rotation=90)
            plt.xlabel("ML-identified Features")
            plt.ylabel("Average Radiologist Score")
            plt.title("ML-identified Features vs. Radiologist Score")
            plt.grid(axis="y", linestyle="--", alpha=0.7)
            plt.show()

# Run visualization
visualize_feature_comparison(comparison_df, combined_df)


: 