In [None]:
import numpy as np
import pandas as pd
from scipy.spatial.distance import pdist, squareform
from skbio.stats.distance import permanova, DistanceMatrix
from sklearn.metrics import pairwise_distances
from multiprocessing import Pool, cpu_count

In [None]:
def adjusted_r2(r2, n_observations, df_model):
    """
    Calculate the adjusted R-squared value.
    """
    numerator = (1 - r2) * (n_observations - 1)
    denominator = n_observations - df_model - 1
    return 1 - (numerator / denominator)

In [None]:
def zy_adonis(query, target, method="bray"):
    """
    Perform PERMANOVA (adonis) for each column in `query` against the `target` data.
    """
    results = []

    for column in query.columns:
        print(f"Processing column: {column}")
        groups = query[column]

        if groups.nunique() <= 1 or groups.nunique() == len(groups):
            continue  # Skip if all values are identical or unique

        groups = groups.dropna()
        valid_indices = groups.index

        # Filter target data based on non-NA indices
        filtered_target = target[valid_indices]
        filtered_groups = groups[valid_indices]

        # Compute distance matrix
        dist_matrix = pairwise_distances(filtered_target.T, metric=method)
        dist_matrix = DistanceMatrix(dist_matrix, ids=filtered_groups.index)

        # Perform PERMANOVA
        try:
            perm_results = permanova(dist_matrix, filtered_groups.values)
            r2 = perm_results['pseudo-F'] / (perm_results['pseudo-F'] + len(filtered_groups) - 1)
            adjusted_r2_val = adjusted_r2(r2, len(filtered_groups), 1)  # df_model is 1 for one factor
            results.append([column, r2, perm_results['p-value'], adjusted_r2_val])
        except Exception as e:
            print(f"Error processing column {column}: {e}")

    # Convert results to DataFrame
    results_df = pd.DataFrame(results, columns=["name", "r2", "pvalue", "adjust.R2"])
    return results_df

In [None]:
def zy_parallel_adonis(query, target, method="bray", n_jobs=None):
    """
    Parallelized version of zy_adonis.
    """
    if n_jobs is None:
        n_jobs = cpu_count()  # Use all available CPUs if not specified

    def process_column(column):
        groups = query[column]

        if groups.nunique() <= 1 or groups.nunique() == len(groups):
            return None  # Skip if all values are identical or unique

        groups = groups.dropna()
        valid_indices = groups.index

        # Filter target data based on non-NA indices
        filtered_target = target[valid_indices]
        filtered_groups = groups[valid_indices]

        # Compute distance matrix
        dist_matrix = pairwise_distances(filtered_target.T, metric=method)
        dist_matrix = DistanceMatrix(dist_matrix, ids=filtered_groups.index)

        # Perform PERMANOVA
        try:
            perm_results = permanova(dist_matrix, filtered_groups.values)
            r2 = perm_results['pseudo-F'] / (perm_results['pseudo-F'] + len(filtered_groups) - 1)
            adjusted_r2_val = adjusted_r2(r2, len(filtered_groups), 1)  # df_model is 1 for one factor
            return [column, r2, perm_results['p-value'], adjusted_r2_val]
        except Exception as e:
            print(f"Error processing column {column}: {e}")
            return None

    with Pool(n_jobs) as pool:
        results = pool.map(process_column, query.columns)

    # Remove None values and convert to DataFrame
    results = [res for res in results if res is not None]
    results_df = pd.DataFrame(results, columns=["name", "r2", "pvalue", "adjust.R2"])
    return results_df