# Correlation analysis between volume differences and DSC/HD95

In [None]:
import matplotlib.pyplot as plt 
import seaborn as sns
import pandas as pd
from scipy.stats import pearsonr, spearmanr
from sklearn.linear_model import LinearRegression

In [None]:
#Linear Regression and R^2 values
def annotate_r2(ax, x, y, color='black', xpos=0.05, ypos=0.95):
    model = LinearRegression().fit(x.reshape(-1, 1), y)
    r2 = model.score(x.reshape(-1, 1), y)
    ax.text(xpos, ypos, f'$R^2$ = {r2:.3f}', transform=ax.transAxes,
            ha='left', va='top', fontsize=16, color=color)


def analyze_segmentation_metrics(vol_ai, vol_gt, dice_scores, hd95_scores=None, plot=True):
   
    df = pd.DataFrame({
        'V_AI': vol_ai, #Volume AI model
        'V_GT': vol_gt, #Volume Ground Truth
        'DICE': dice_scores
    })
    
    df['abs_vol_diff'] = abs(df['V_AI'] - df['V_GT'])
    df['rel_vol_diff_percent'] = ((df['V_AI'] - df['V_GT']) / df['V_GT']) * 100

    if hd95_scores is not None:
        df['HD95'] = hd95_scores

    #Pearson and Spearman - DICE
    pearson_r, pearson_p = pearsonr(df['rel_vol_diff_percent'], df['DICE'])
    spearman_rho, spearman_p = spearmanr(df['rel_vol_diff_percent'], df['DICE'])

    print(f"Pearson r = {pearson_r:.3f}, p-value = {pearson_p:.4f}")
    print(f"Spearman ρ = {spearman_rho:.3f}, p-value = {spearman_p:.4f}")

    #Pearson and Spearman - HD95
    pearson_r_hd, pearson_p_hd = pearsonr(df['rel_vol_diff_percent'], df['HD95'])
    spearman_rho_hd, spearman_p_hd = spearmanr(df['rel_vol_diff_percent'], df['HD95'])

    print(f"Pearson r = {pearson_r_hd:.3f}, p-value = {pearson_p_hd:.4f}")
    print(f"Spearman ρ = {spearman_rho_hd:.3f}, p-value = {spearman_p_hd:.4f}")

    #Plotting
    if plot:
        sns.set(style='whitegrid')
        fig, axes = plt.subplots(1, 2, figsize=(14, 5))

    #DICE vs Relative Volume Difference
        sns.regplot(data=df, x='rel_vol_diff_percent', y='DICE',
                    scatter_kws={'s': 80}, line_kws={'color': 'blue'}, ax=axes[0])
        annotate_r2(axes[0],
                    df['rel_vol_diff_percent'].values,
                    df['DICE'].values,
                    color='blue')
        
        axes[0].axhline(0, color='gray', linestyle='--', linewidth=1)
        axes[0].axvline(0, color='gray', linestyle='--', linewidth=1)
        axes[0].set_title('DICE vs. Relative Volume Difference')
        axes[0].set_xlabel('Relative Volume Difference (%)')
        axes[0].set_ylabel('DICE Score')

    #95HD vs Relative Volume Difference
        if hd95_scores is not None:
            sns.regplot(data=df, x='rel_vol_diff_percent', y='HD95',
                        scatter_kws={'s':80}, line_kws={'color': 'darkred'}, ax=axes[1])
            annotate_r2(axes[1],
                        df['rel_vol_diff_percent'].values,
                        df['HD95'].values,
                        color='darkred')
            axes[1].axhline(0, color='gray', linestyle='--', linewidth=1)
            axes[1].axvline(0, color='gray', linestyle='--', linewidth=1)
            axes[1].set_title('95HD vs. Relative Volume Difference')
            axes[1].set_xlabel('Relative Volume Difference (%)')
            axes[1].set_ylabel('95% Hausdorff Distance')

        plt.tight_layout()
        plt.show()

    return df


### With filtered data:

In [None]:
def analyze_segmentation_metrics(vol_ai, vol_gt, dice_scores, hd95_scores=None, plot=True):
   
    df = pd.DataFrame({
        'V_AI': vol_ai,
        'V_GT': vol_gt,
        'DICE': dice_scores
    })
    
    df['abs_vol_diff'] = abs(df['V_AI'] - df['V_GT'])
    df['rel_vol_diff_percent'] = ((df['V_AI'] - df['V_GT']) / df['V_GT']) * 100


    if hd95_scores is not None:
        df['HD95'] = hd95_scores

    #Filtering
    filtered_vol_ai = []
    filtered_vol_gt = []
    filtered_dice = []
    filtered_hd95 = [] if hd95_scores is not None else None

    for ai, gt, dice, *hd95 in zip(vol_ai, vol_gt, dice_scores, *(hd95_scores,) if hd95_scores is not None else ()):
        rel_diff = ((ai - gt) / gt) * 100
        if abs(rel_diff) <= 800:
            filtered_vol_ai.append(ai)
            filtered_vol_gt.append(gt)
            filtered_dice.append(dice)
            if hd95_scores is not None:
                filtered_hd95.append(hd95[0]) 

#DataFrame from filtered values
    df = pd.DataFrame({
        'V_AI': filtered_vol_ai,
        'V_GT': filtered_vol_gt,
        'DICE': filtered_dice
    })
    df['rel_vol_diff_percent'] = ((df['V_AI'] - df['V_GT']) / df['V_GT']) * 100
    df['abs_vol_diff'] = abs(df['V_AI'] - df['V_GT'])

    if hd95_scores is not None:
        df['HD95'] = filtered_hd95


    #Pearson and Spearman: DICE
    pearson_r, pearson_p = pearsonr(df['rel_vol_diff_percent'], df['DICE'])
    spearman_rho, spearman_p = spearmanr(df['rel_vol_diff_percent'], df['DICE'])

    print(f"Pearson r = {pearson_r:.3f}, p-value = {pearson_p:.4f}")
    print(f"Spearman ρ = {spearman_rho:.3f}, p-value = {spearman_p:.4f}")

    #Pearson and Spearman: HD95
    pearson_r_hd, pearson_p_hd = pearsonr(df['rel_vol_diff_percent'], df['HD95'])
    spearman_rho_hd, spearman_p_hd = spearmanr(df['rel_vol_diff_percent'], df['HD95'])

    print(f"Pearson r = {pearson_r_hd:.3f}, p-value = {pearson_p_hd:.4f}")
    print(f"Spearman ρ = {spearman_rho_hd:.3f}, p-value = {spearman_p_hd:.4f}")

    #Plotting
    if plot:
        sns.set(style='whitegrid')
        fig, axes = plt.subplots(1, 2, figsize=(14, 5))

    #DICE vs Relative Volume Difference
        sns.regplot(data=df, x='rel_vol_diff_percent', y='DICE',
                    scatter_kws={'s': 80}, line_kws={'color': 'blue'}, ax=axes[0])
        annotate_r2(axes[0],
                    df['rel_vol_diff_percent'].values,
                    df['DICE'].values,
                    color='blue')
        
        axes[0].axhline(0, color='gray', linestyle='--', linewidth=1)
        axes[0].axvline(0, color='gray', linestyle='--', linewidth=1)
        axes[0].set_title('DICE vs. Relative Volume Difference')
        axes[0].set_xlabel('Relative Volume Difference (%)')
        axes[0].set_ylabel('DICE Score')

    #95HD vs Relative Volume Difference
        if hd95_scores is not None:
            sns.regplot(data=df, x='rel_vol_diff_percent', y='HD95',
                        scatter_kws={'s':80}, line_kws={'color': 'darkred'}, ax=axes[1])
            annotate_r2(axes[1],
                    df['rel_vol_diff_percent'].values,
                    df['HD95'].values,
                    color='blue')
            axes[1].axhline(0, color='gray', linestyle='--', linewidth=1)
            axes[1].axvline(0, color='gray', linestyle='--', linewidth=1)
            axes[1].set_title('95HD vs. Relative Volume Difference')
            axes[1].set_xlabel('Relative Volume Difference (%)')
            axes[1].set_ylabel('95% Hausdorff Distance')

        plt.tight_layout()
        plt.show()

    return df