In [2]:
import os
import pandas as pd
import itertools
import csv
import chime
from scipy import stats

import numpy as np

def metabolite(file_name, if_dss):
    #Read data
    filtered_data = pd.read_csv(file_name, header=None, na_values=["", "NaN", "nan", "NAN", "NA", "na"], keep_default_na=True)
    #print("test")
    #filtered_data = filtered_data.apply(pd.to_numeric, errors="coerce")

    filtered_data = filtered_data.replace("NaN", np.nan)
    
    # Extract metabolite names. Skip the first 2 rows (headers + Sample IDs)
    names = filtered_data.iloc[2:, 0].tolist()
        
    # Generate all 2-element combinations
    pairs = list(itertools.combinations(names, 2))
    results = []

    #temp = 0

  
    for met1, met2 in pairs:
        # Find the row where the first column = met1
        l1 = filtered_data[filtered_data.iloc[:, 0] == met1].iloc[:, 1:]
        #list1 = l1.iloc[:, 1:]
        #print(l1)
        
        # Find the row where the first column = met2
        l2 = filtered_data[filtered_data.iloc[:, 0] == met2].iloc[:, 1:]
        #print(l2)
        #list2 = l2.iloc[:, 1:]
                 
        # Converts to float
        row1 = l1.astype(float).values.flatten()
        row2 = l2.astype(float).values.flatten()
            
        #print(f" {met1} before: \n{row1}\n")
        #print(f"{met2} before:\n {row2}\n")
            
        #remove any comparisons with NaN values
        df = pd.DataFrame({'x': row1, 'y': row2}).dropna()
        row1, row2 = df['x'], df['y']
        
            #mask  = ~np.isnan(row1) & ~np.isnan(row2)
            #r1_masked = row1[mask]
            #r2_masked = row2[mask]

        n = len(row1)
            
            
        if len(row1) <3:
            corr, pval = np.nan, np.nan
            n= 0
            results.append((met1, met2, corr, pval, n))
            #print("CORR and PVAL: np.nan\n")
        
        else:
            #print(f"{met1}: \n{row1}\n")
            #print(f"{met2}:\n {row2}\n")
                
            # Calculate Spearman correlation
            corr, pval = stats.spearmanr(row1, row2)
        
            #print(f"CORR AFTER: {corr}\n")
            #print(f"PVAL AFTER: {pval}\n")
                    
            # Append metabolite names + correlation + p-value
            results.append((met1, met2, corr, pval, n))
            
        
    # Create output file name inside "Metabolite Pairs"
    parts = os.path.basename(file_name).split("_")
    prefix = "_".join(parts[:2])
    
    output_folder = "updated_metabolite_pairs"
    os.makedirs(output_folder, exist_ok=True)
    
    output_file = os.path.join(output_folder, f"COMBINATIONS_{prefix}.csv")
    
    with open(output_file, "w", newline='', encoding="utf-8") as outfile:
        writer = csv.writer(outfile)
        writer.writerow(["Metabolite 1", "Metabolite 2", "Spearman Coefficient", "p-value", "n"])
        writer.writerows(results)

    print(f"Saved {output_file} with {len(results)} metabolite pairs.")


metabolite("normalization/feci_DSS_post_norm.csv", False)
metabolite("normalization/feci_VECPAC_post_norm.csv", False)
metabolite("normalization/feci_LPS_post_norm.csv", False)

chime.success()


Saved updated_metabolite_pairs/COMBINATIONS_feci_DSS.csv with 88410 metabolite pairs.


  corr, pval = stats.spearmanr(row1, row2)


Saved updated_metabolite_pairs/COMBINATIONS_feci_VECPAC.csv with 88410 metabolite pairs.
Saved updated_metabolite_pairs/COMBINATIONS_feci_LPS.csv with 88410 metabolite pairs.
