In [None]:
import pandas as pd
import numpy as np
from scipy.stats import wilcoxon
from itertools import combinations

In [None]:
# Function to convert sample map to groups
def taxo_sample_to_grps(sample_map, group, ID):
    grps = sample_map[group].unique()
    grps_list = {}
    for grp in grps:
        grps_list[grp] = sample_map[sample_map[group] == grp][ID].tolist()
    return grps_list

In [None]:
# Function to calculate fold change and p-value for volcano plot
def zy_raw_profile_to_volcano(dt, sample_map, group, ID, cutoff=10):
    # Subset data based on sample map
    dt = dt[sample_map[ID]]
    
    # Get the groupings from the sample map
    grps = taxo_sample_to_grps(sample_map, group, ID)
    
    # Generate combinations of group pairs
    com = list(combinations(grps.keys(), 2))
    
    nspecies = dt.shape[0]  # Number of species
    names = dt.index  # Species names
    result = []

    # Loop through each species and calculate fold change and p-value
    for n in range(nspecies):
        temp_dt = dt.iloc[n, :]
        for g1, g2 in com:
            # Get the data for each group
            dt1 = temp_dt[grps[g1]].values
            dt2 = temp_dt[grps[g2]].values
            
            # Calculate means and fold-change
            m1 = np.mean(dt1)
            m2 = np.mean(dt2)
            enrich = g1 if m1 > m2 else g2
            fold = max(m1 / m2, m2 / m1)
            
            # Perform Wilcoxon test
            _, p = wilcoxon(dt1, dt2)  # Use Wilcoxon signed-rank test
            
            # Store the result
            result.append([names[n], g1, g2, m1, m2, enrich, fold, p])
    
    # Convert results to a DataFrame
    result_df = pd.DataFrame(result, columns=["name", "g1", "g2", "m1", "m2", "enriched", "fold_change", "pvalue"])
    
    # Convert numeric columns to float
    for col in ["m1", "m2", "fold_change", "pvalue"]:
        result_df[col] = result_df[col].astype(float)
    
    return result_df