In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import ttest_ind
import os

# Load Excel file
excel_path = r"E:\iron man\Ktrans\Categorized_ktrans_2.xlsx"
df = pd.read_excel(excel_path, engine='openpyxl')

# Add readable group labels
df["group"] = df["label"].map({1: "TP", 0: "PsP"})

# Melt DataFrame for seaborn boxplot
df_melted = df.melt(id_vars=["group"], value_vars=['high', 'low', 'nec'], 
                    var_name="Feature", value_name="Value")

# Initialize the plot
plt.figure(figsize=(7, 6))
sns.boxplot(
    x="Feature",
    y="Value",
    hue="group",
    data=df_melted,
    palette={"TP": "#fffac8", "PsP": "#baffc9"},
    width=0.6
)

# Compute means for annotation
grouped_means = df_melted.groupby(['Feature', 'group'])['Value'].mean().reset_index()

# Centered mean plotting with bold text
feature_order = ['high', 'low', 'nec']
group_order = ['TP', 'PsP']
width_offset = 0.2  # Half the width of the boxes

for i, feature in enumerate(feature_order):
    for j, group in enumerate(group_order):
        mean_val = grouped_means[
            (grouped_means['Feature'] == feature) & 
            (grouped_means['group'] == group)
        ]['Value'].values[0]
        
        # Calculate x position based on group and feature
        if group == "TP":
            x_pos = i - width_offset / 2
        else:
            x_pos = i + width_offset / 2

        # Plot red horizontal mean line
        plt.plot([x_pos - 0.08, x_pos + 0.08], [mean_val, mean_val], color='red', lw=2.5, zorder=3)

        # Add bold mean value
        plt.text(x_pos, mean_val + 0.01, f'{mean_val:.2f}', 
                 ha='center', va='bottom', fontsize=10, fontweight='bold', color='black')

# Perform t-tests and display p-values
p_values_text = []
for feature in feature_order:
    tp_vals = df[df['label'] == 1][feature]
    psp_vals = df[df['label'] == 0][feature]
    t_stat, p_val = ttest_ind(tp_vals, psp_vals, equal_var=False)
    p_values_text.append(f"{feature}: p = {p_val:.3f}")

# Add p-values as text box
p_text = "\n".join(p_values_text)
plt.gcf().text(0.95, 0.70, p_text, fontsize=10, va='top', ha='right',
               bbox=dict(boxstyle="round,pad=0.5", facecolor="white", edgecolor="gray"))

# Final plot adjustments
plt.title("Cluster of Ktrans")
plt.xlabel("Cluster")
plt.ylabel("Percentage of Pixels")
plt.legend(title="Group", loc="upper right")
plt.tight_layout()

# Save figure
output_path = os.path.join(os.path.dirname(excel_path), "Ktrans_boxplot_highres3.png")
plt.savefig(output_path, dpi=1000)

plt.show()
