In [1]:
import numpy as np
import pandas as pd
import pickle
from scipy.stats import spearmanr
from statsmodels.stats.multitest import multipletests

# Set seed for reproducibility
np.random.seed(531)

# Load data
with open("p5_NPT.QMP_138_species", "rb") as f:
    p5_NPT_QMP_138_species = pickle.load(f)

with open("p5_NPT.RMP_138_species", "rb") as f:
    p5_NPT_RMP_138_species = pickle.load(f)

# Function to perform correlation analysis
def perform_correlation_analysis(data, covariates):
    # Extract relevant covariate and species data
    CMB_df = data[covariates]
    species = data.drop(columns=covariates)

    # Initialize lists to store correlation and p-values
    correlations, p_values = [], []

    # Perform Spearman correlation between each covariate and each species
    for covariate in CMB_df.columns:
        for species_col in species.columns:
            corr, pval = spearmanr(CMB_df[covariate], species[species_col], nan_policy='omit')
            correlations.append((covariate, species_col, corr, pval))

    # Convert to DataFrame
    corr_df = pd.DataFrame(correlations, columns=["Covariate", "Species", "Correlation", "P"])

    # Adjust p-values using Benjamini-Hochberg method
    p_values = corr_df["P"].values
    _, adj_p_values, _, _ = multipletests(p_values, method='fdr_bh')
    corr_df["Adjusted P"] = adj_p_values

    return corr_df.head()

# Perform correlation analysis for QMP species
QMP_species_corr_adjP = perform_correlation_analysis(p5_NPT_QMP_138_species, ["Calprotectin", "Moisture", "BMI"])
print(QMP_species_corr_adjP)

# Perform correlation analysis for RMP species
RMP_species_corr_adjP = perform_correlation_analysis(p5_NPT_RMP_138_species, ["Calprotectin", "Moisture", "BMI"])
print(RMP_species_corr_adjP)


FileNotFoundError: [Errno 2] No such file or directory: 'p5_NPT.QMP_138_species'