In [None]:
import numpy as np
import pandas as pd
from scipy.stats import spearmanr, pearsonr
from tqdm import tqdm

In [None]:
def zy_corr(x, y=None, method="spearman", output_type="matrix", step=100, na_rm=True):
    """
    Calculate correlations between rows of a matrix, or between rows of two matrices.

    Args:
        x (pd.DataFrame): Dataframe where rows represent entities and columns represent observations.
        y (pd.DataFrame, optional): Optional second dataframe for cross-correlation. Default is None.
        method (str): Correlation method ("spearman" or "pearson").
        output_type (str): Output format ("matrix" or "long").
        step (int): Frequency for progress updates (default 100).
        na_rm (bool): Remove NaN values before correlation (default True).

    Returns:
        dict or pd.DataFrame:
            - If `output_type="matrix"`, returns a dictionary with 'p' and 'c' matrices for p-values and correlations.
            - If `output_type="long"`, returns a long-form dataframe with columns ["name_a", "name_b", "corr", "pval"].
    """
    def correlation(x_row, y_row):
        """Helper function to compute correlation based on the chosen method."""
        if method == "spearman":
            corr, pval = spearmanr(x_row, y_row, nan_policy="omit" if na_rm else "propagate")
        elif method == "pearson":
            corr, pval = pearsonr(x_row, y_row)
        else:
            raise ValueError(f"Unsupported method: {method}. Use 'spearman' or 'pearson'.")
        return corr, pval

    x = x.copy()  # Ensure the input isn't modified
    if y is not None:
        y = y.copy()

    num_x = x.shape[0]
    name_x = x.index.tolist()

    if y is not None:
        name_y = y.index.tolist()
        num_y = y.shape[0]

        if output_type == "matrix":
            cor_result = np.full((num_x, num_y), np.nan)
            p_result = np.full((num_x, num_y), np.nan)

            for i in tqdm(range(num_x), desc="Processing rows of x"):
                if i % step == 0:
                    print(f"Processing row {i+1}/{num_x}")
                for j in range(num_y):
                    corr, pval = correlation(x.iloc[i, :], y.iloc[j, :])
                    cor_result[i, j] = corr
                    p_result[i, j] = pval

            cor_df = pd.DataFrame(cor_result, index=name_x, columns=name_y)
            pval_df = pd.DataFrame(p_result, index=name_x, columns=name_y)
            return {"c": cor_df, "p": pval_df}

        elif output_type == "long":
            results = []
            for i in tqdm(range(num_x), desc="Processing rows of x"):
                if i % step == 0:
                    print(f"Processing row {i+1}/{num_x}")
                for j in range(num_y):
                    corr, pval = correlation(x.iloc[i, :], y.iloc[j, :])
                    results.append([name_x[i], name_y[j], corr, pval])

            return pd.DataFrame(results, columns=["name_a", "name_b", "corr", "pval"])

    else:
        # Correlations within x (no y provided)
        if output_type == "matrix":
            cor_result = np.full((num_x, num_x), np.nan)
            p_result = np.full((num_x, num_x), np.nan)

            for i in tqdm(range(num_x), desc="Processing rows of x"):
                if i % step == 0:
                    print(f"Processing row {i+1}/{num_x}")
                for j in range(i, num_x):  # Only upper triangle to save computation
                    corr, pval = correlation(x.iloc[i, :], x.iloc[j, :])
                    cor_result[i, j] = cor_result[j, i] = corr
                    p_result[i, j] = p_result[j, i] = pval

            cor_df = pd.DataFrame(cor_result, index=name_x, columns=name_x)
            pval_df = pd.DataFrame(p_result, index=name_x, columns=name_x)
            return {"c": cor_df, "p": pval_df}

        elif output_type == "long":
            results = []
            for i in tqdm(range(num_x), desc="Processing rows of x"):
                if i % step == 0:
                    print(f"Processing row {i+1}/{num_x}")
                for j in range(i, num_x):
                    corr, pval = correlation(x.iloc[i, :], x.iloc[j, :])
                    results.append([name_x[i], name_x[j], corr, pval])

            return pd.DataFrame(results, columns=["name_a", "name_b", "corr", "pval"])
