In [None]:
import pandas as pd
import numpy as np
from sklearn.manifold import MDS
from sklearn.metrics import pairwise_distances
from scipy.spatial.distance import pdist, squareform
from statsmodels.stats.anova import AnovaRM
from scipy.stats import f_oneway
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse
import matplotlib.transforms as transforms

In [None]:
def zy_pcoa(
    dt=None, sample_map=None, group=None, ID=None, sample_color=None,
    ado_method="bray", pca_method="bray",
    levels=0.95, star_plot=False, ellipse_plot=True,
    title="PCoA", x="V1", y="V2"
):
    # Align profile and grouping sample names
    dt = dt.loc[:, sample_map[ID]]

    # Define colors if not provided
    if sample_color is None:
        sample_color = sns.color_palette("tab10", n_colors=len(sample_map[group].unique()))

    # Summarize groups for labels
    group_summary = sample_map.groupby(group).size().reset_index(name='count')
    group_summary['new_label'] = group_summary.apply(lambda row: f"{row[group]} ({row['count']})", axis=1)
    new_label = dict(zip(group_summary[group], group_summary['new_label']))

    # Adonis-like test (One-way ANOVA for simplicity here)
    group_values = sample_map[group].unique()
    grouped_data = [dt.loc[:, sample_map[sample_map[group] == g][ID]].values.flatten() for g in group_values]
    f_stat, ado_p = f_oneway(*grouped_data)
    ado_r2 = f_stat / (f_stat + (len(dt.columns) - len(group_values)))

    # PCoA (Multidimensional Scaling)
    dist_matrix = pairwise_distances(dt.T, metric=ado_method)
    mds = MDS(n_components=2, dissimilarity="precomputed", random_state=42)
    coords = mds.fit_transform(dist_matrix)

    # Variance explained (use eigenvalues approximation)
    eigs = mds.stress_ / np.sum(mds.stress_)

    # Plot
    df_coords = pd.DataFrame(coords, columns=['V1', 'V2'])
    df_coords[ID] = dt.columns
    df_coords = df_coords.merge(sample_map, on=ID)

    plt.figure(figsize=(10, 8))
    sns.scatterplot(data=df_coords, x='V1', y='V2', hue=group, palette=sample_color, s=100)
    
    if ellipse_plot:
        for g, data in df_coords.groupby(group):
            confidence_ellipse(data['V1'], data['V2'], ax=plt.gca(), n_std=levels,
                               edgecolor=sample_color[group_values.tolist().index(g)])

    plt.axhline(0, linestyle='--', color='grey', linewidth=0.7)
    plt.axvline(0, linestyle='--', color='grey', linewidth=0.7)
    plt.title(f"{title}\nR2={ado_r2:.4f}\np-value={ado_p:.4f}")
    plt.xlabel(f"{x} ({eigs[0]*100:.2f}%)")
    plt.ylabel(f"{y} ({eigs[1]*100:.2f}%)")
    plt.legend(title=group)
    plt.tight_layout()
    plt.show()

    return {'plot': plt.gca(), 'new_label': new_label}

def confidence_ellipse(x, y, ax, n_std=1.0, **kwargs):
    """
    Create a plot of the covariance confidence ellipse of *x* and *y*.
    """
    if x.size != y.size:
        raise ValueError("x and y must be the same size")

    cov = np.cov(x, y)
    pearson = cov[0, 1] / np.sqrt(cov[0, 0] * cov[1, 1])
    ellipse_radius_x = np.sqrt(1 + pearson)
    ellipse_radius_y = np.sqrt(1 - pearson)
    ellipse = Ellipse((0, 0), width=ellipse_radius_x * 2, height=ellipse_radius_y * 2, **kwargs)

    scale_x = np.sqrt(cov[0, 0]) * n_std
    mean_x = np.mean(x)

    scale_y = np.sqrt(cov[1, 1]) * n_std
    mean_y = np.mean(y)

    transf = transforms.Affine2D() \
        .rotate_deg(45) \
        .scale(scale_x, scale_y) \
        .translate(mean_x, mean_y)

    ellipse.set_transform(transf + ax.transData)
    ax.add_patch(ellipse)
    return ax
