In [None]:
import pandas as pd
import json
import pandas as pd
import matplotlib.pyplot as plt
import re
from sklearn.metrics import precision_recall_curve, auc
import ast

In [None]:
import json
import ast
import pandas as pd

def try_parse_json_or_literal(val):
    try:
        return json.loads(val)
    except (json.JSONDecodeError, TypeError):
        try:
            return ast.literal_eval(val)  # fallback for '[0.1 0.2 0.3]' etc.
        except Exception:
            return val  # return as-is if everything fails

def load_results_csv(filepath, mapping=None):
    """
    Load a results CSV, parse JSON/list-like columns, convert auc_roc and auc_pr
    to floats (using the first value if they contain multiple numbers), and apply model name mapping.
    """
    df = pd.read_csv(filepath)

    # Parse JSON/list-like columns
    for col in ["all_probs", "all_labels", "fpr_vals", "tpr_vals"]:
        if col in df.columns:
            df[col] = df[col].apply(try_parse_json_or_literal)
            

    if mapping is not None:
        df["Model"] = df["Model"].replace(mapping)

    return df

# Define your mapping
mapping = {
    "combined_model_full_demographics": "Combined Transformer (Vector Diagnosis, Full Baseline)", 
    "combined_model_simpler_demographics": "Combined Transformer (Vector Diagnosis, Reduced Baseline)",
    "combined_model_simpler_demographics_simpler_embeddings": "Combined Transformer (One-Hot Diagnosis, Reduced Baseline)",  
    "combined_model_without_transformer_simpler_demographics": "Combined FF",  
    "demographics_only_full": "Baseline-Only FF",               
    "dynamic_only": "Vitals-Only Transformer"                          
}

# Load results
cardiac_df = load_results_csv("/home/workspace/files/MilanK/Model1/final_models/cardiac_results.csv", mapping)
resp_df = load_results_csv("/home/workspace/files/MilanK/Model1/final_models/resp_results.csv", mapping)


In [None]:



resp_df['Model'].iloc[2]

In [None]:
import matplotlib.pyplot as plt
import re

plt.figure(figsize=(10, 7))

for idx, row in cardiac_df.iterrows():
    model_name = row["Model"]
    auc_roc_value = row["auc_roc"]
    
    # Check if auc_roc is in the new format "first_number (second_number-third_number)"
    if isinstance(auc_roc_value, str) and "(" in auc_roc_value:
        # Use regex to extract the numbers
        m = re.match(r"([\d.]+)\s+\(([\d.]+)-([\d.]+)\)", auc_roc_value)
        if m:
            auroc_mean = float(m.group(1))
            auroc_lower = float(m.group(2))
            auroc_upper = float(m.group(3))
            label_str = f'{model_name} (AUROC = {auroc_mean:.3f}, 95% CI [{auroc_lower:.3f}, {auroc_upper:.3f}])'
        else:
            # Fallback if regex fails: assume space-separated values
            parts = auc_roc_value.split()
            auroc_mean = float(parts[0])
            label_str = f'{model_name} (AUROC = {auroc_mean:.3f})'
    else:
        # Otherwise, assume it's a single float value.
        auroc_mean = float(auc_roc_value)
        label_str = f'{model_name} (AUROC = {auroc_mean:.3f})'
    
    # fpr and tpr are assumed to be lists
    fpr = row["fpr_vals"]
    tpr = row["tpr_vals"]

    plt.plot(fpr, tpr, label=label_str)

# Add baseline diagonal
plt.plot([0, 1], [0, 1], color='gray', linestyle='--')

# Plot formatting
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curves for All Cardiac Models')
plt.xlim(0, 1)
plt.ylim(0, 1)
plt.legend(loc='lower right', fontsize='small')
plt.grid(False)
plt.tight_layout()
plt.show()


In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import precision_recall_curve
import re

# Set up figure
fig, ax = plt.subplots(figsize=(10, 7))

for idx, row in resp_df.iterrows():
    model_name = row["Model"]
    auc_pr_value = row["auc_pr"]
    
    # Check if auc_pr is in the new format "first_number (second_number-third_number)"
    if isinstance(auc_pr_value, str) and "(" in auc_pr_value:
        # Use regex to extract the numbers
        m = re.match(r"([\d.]+)\s+\(([\d.]+)-([\d.]+)\)", auc_pr_value)
        if m:
            aucpr_mean = float(m.group(1))
            aucpr_lower = float(m.group(2))
            aucpr_upper = float(m.group(3))
            label_str = f'{model_name} (AUPRC = {aucpr_mean:.3f}, 95% CI [{aucpr_lower:.3f}, {aucpr_upper:.3f}])'
        else:
            # Fallback if regex fails
            parts = auc_pr_value.split()
            aucpr_mean = float(parts[0])
            label_str = f'{model_name} (AUPRC = {aucpr_mean:.3f})'
    else:
        # Otherwise, assume it's a single float value.
        aucpr_mean = float(auc_pr_value)
        label_str = f'{model_name} (AUPRC = {aucpr_mean:.3f})'
    
    probs = row["all_probs"]
    labels = row["all_labels"]

    precision, recall, _ = precision_recall_curve(labels, probs)
    ax.plot(recall, precision, label=label_str)

# Baseline: average positive rate (calculated from the last row's labels)
baseline = sum(labels) / len(labels)
ax.axhline(y=baseline, color='gray', linestyle='--', label=f'Base rate = {baseline:.2f}')

# Axis labels and formatting
ax.set_xlabel('Recall')
ax.set_ylabel('Precision')
ax.set_title('Precision-Recall Curves for All Respiratory Models')
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
ax.grid(False)

# Inset legend box in top right
ax.legend(
    loc='upper right',
    bbox_to_anchor=(1.0, 1.0),
    fontsize='small',
    frameon=True,
    fancybox=True,
    shadow=True,
    borderpad=1
)

plt.tight_layout()
plt.show()


In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import precision_recall_curve
import numpy as np
import os
import re

fig, axs = plt.subplots(2, 2, figsize=(16, 14))
plt.subplots_adjust(hspace=0.3, wspace=0.2)  # space between subplots

label_fontsize = 16
title_fontsize = 18
legend_fontsize = 7.5
subplot_labels = ['(a)', '(b)', '(c)', '(d)']
label_positions = [(-0.12, 1.02), (-0.12, 1.02), (-0.12, 1.02), (-0.12, 1.02)]  # top-left inside each subplot

def parse_metric(metric_value, metric_name):
    """
    Parse a metric string in the format:
        "mean (lower-upper)"
    If the format is not matched, assume metric_value is a single float.
    Returns a tuple (mean, lower, upper, label_str) where lower and upper
    may be None if not available.
    """
    if isinstance(metric_value, str) and "(" in metric_value:
        m = re.match(r"([\d.]+)\s+\(([\d.]+)-([\d.]+)\)", metric_value)
        if m:
            mean = float(m.group(1))
            lower = float(m.group(2))
            upper = float(m.group(3))
            label_str = f"{metric_name} = {mean:.3f}, 95% CI [{lower:.3f}, {upper:.3f}]"
            return mean, lower, upper, label_str
    # Fallback: assume it's a single float value
    mean = float(metric_value)
    label_str = f"{metric_name} = {mean:.3f}"
    return mean, None, None, label_str

# ------------------- Respiratory AUROC (Top Left)
ax = axs[0, 0]
for idx, row in resp_df.iterrows():
    model_name = row["Model"]
    auc_roc_value = row["auc_roc"]
    auroc_mean, auroc_lower, auroc_upper, metric_label = parse_metric(auc_roc_value, "AUROC")
    label_str = f'{model_name} ({metric_label})'
    
    fpr = row["fpr_vals"]  # Already a list
    tpr = row["tpr_vals"]  # Already a list
    ax.plot(fpr, tpr, label=label_str, linewidth=1)

ax.plot([0, 1], [0, 1], color='gray', linestyle='--', linewidth=1)
ax.set_xlabel('False Positive Rate', fontsize=label_fontsize)
ax.set_ylabel('True Positive Rate', fontsize=label_fontsize)
ax.set_title('Respiratory Model ROC Curves', fontsize=title_fontsize)
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
ax.grid(False)
ax.legend(loc='lower right', fontsize=legend_fontsize, frameon=True)
ax.text(*label_positions[0], subplot_labels[0], transform=ax.transAxes,
        fontsize=title_fontsize, fontweight='bold')

# ------------------- Respiratory AUPRC (Top Right)
ax = axs[0, 1]
for idx, row in resp_df.iterrows():
    model_name = row["Model"]
    auc_pr_value = row["auc_pr"]
    aucpr_mean, aucpr_lower, aucpr_upper, metric_label = parse_metric(auc_pr_value, "AUPRC")
    label_str = f'{model_name} ({metric_label})'
    
    probs = row["all_probs"]
    labels = row["all_labels"]
    precision, recall, _ = precision_recall_curve(labels, probs)
    ax.plot(recall, precision, label=label_str, linewidth=1)

# Baseline from the last row's labels (adjust as needed)
baseline = sum(labels) / len(labels)
ax.axhline(y=baseline, color='gray', linestyle='--', label=f'Baseline = {baseline:.2f}', linewidth=1)
ax.set_xlabel('Recall', fontsize=label_fontsize)
ax.set_ylabel('Precision', fontsize=label_fontsize)
ax.set_title('Respiratory Model Precision-Recall Curves', fontsize=title_fontsize)
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
ax.grid(False)
ax.legend(loc='upper right', fontsize=legend_fontsize, frameon=True)
ax.text(*label_positions[1], subplot_labels[1], transform=ax.transAxes,
        fontsize=title_fontsize, fontweight='bold')

# ------------------- Cardiac AUROC (Bottom Left)
ax = axs[1, 0]
for idx, row in cardiac_df.iterrows():
    model_name = row["Model"]
    auc_roc_value = row["auc_roc"]
    auroc_mean, auroc_lower, auroc_upper, metric_label = parse_metric(auc_roc_value, "AUROC")
    label_str = f'{model_name} ({metric_label})'
    
    fpr = row["fpr_vals"]
    tpr = row["tpr_vals"]
    ax.plot(fpr, tpr, label=label_str, linewidth=1)

ax.plot([0, 1], [0, 1], color='gray', linestyle='--', linewidth=1)
ax.set_xlabel('False Positive Rate', fontsize=label_fontsize)
ax.set_ylabel('True Positive Rate', fontsize=label_fontsize)
ax.set_title('Cardiovascular Model ROC Curves', fontsize=title_fontsize)
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
ax.grid(False)
ax.legend(loc='lower right', fontsize=legend_fontsize, frameon=True)
ax.text(*label_positions[2], subplot_labels[2], transform=ax.transAxes,
        fontsize=title_fontsize, fontweight='bold')

# ------------------- Cardiac AUPRC (Bottom Right)
ax = axs[1, 1]
for idx, row in cardiac_df.iterrows():
    model_name = row["Model"]
    auc_pr_value = row["auc_pr"]
    aucpr_mean, aucpr_lower, aucpr_upper, metric_label = parse_metric(auc_pr_value, "AUPRC")
    label_str = f'{model_name} ({metric_label})'
    
    probs = row["all_probs"]
    labels = row["all_labels"]
    precision, recall, _ = precision_recall_curve(labels, probs)
    ax.plot(recall, precision, label=label_str, linewidth=1)

baseline = sum(labels) / len(labels)
ax.axhline(y=baseline, color='gray', linestyle='--', label=f'Baseline = {baseline:.2f}', linewidth=1)
ax.set_xlabel('Recall', fontsize=label_fontsize)
ax.set_ylabel('Precision', fontsize=label_fontsize)
ax.set_title('Cardiovascular Model Precision-Recall Curves', fontsize=title_fontsize)
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
ax.grid(False)
ax.legend(loc='upper right', fontsize=legend_fontsize, frameon=True)
ax.text(*label_positions[3], subplot_labels[3], transform=ax.transAxes,
        fontsize=title_fontsize, fontweight='bold')

# Final adjustments and save
plt.tight_layout()

# Define your two output directories
output_dir1 = '/home/workspace/files/MilanK/Model1/final_models/figures'
output_dir2 = '/home/workspace/files/MilanK/Model1/figures'

# Ensure both directories exist
os.makedirs(output_dir1, exist_ok=True)
os.makedirs(output_dir2, exist_ok=True)

# Define the full file paths for each save
save_path1 = os.path.join(output_dir1, 'auroc_auprc.png')
save_path2 = os.path.join(output_dir2, 'auroc_auprc.png')

# Save the figure to both locations
plt.savefig(save_path1, dpi=1000)
plt.savefig(save_path2, dpi=1000)
plt.show()
