In [1]:
import pandas as pd
import numpy as np

# Read files
input_path = "./Meta-analysis + FDR Table.csv"
df = pd.read_csv(input_path)
total_edges_all = df.shape[0]

# Drop NaN Rows
df = df.dropna(subset=["Meta-Analysis p-value", "FDR"])
#print(df.head())
#total_edges_all = df.shape[0]


# Thresholds
thresholds = [1.0, 0.3, 0.25, 0.2, 0.15, 0.1, 0.05, 0.01]
results = []

for p_thresh in thresholds:
    for fdr_thresh in thresholds:
        # If less than thresholds
        max_p_val = np.nanmax(df[["VECPAC p-values", "LPS p-values", "DSS p-values"]],axis=1)
        is_pval_pass = (max_p_val <= p_thresh)
        
        is_fdr_pass = df["FDR"] <= fdr_thresh
        matches = df[is_pval_pass & is_fdr_pass]

        # Count total edges
        total_edges = matches.shape[0]

        # Count positive and negative edges
        to_check = ["VECPAC r", "LPS r", "DSS r"]

        #nonnan count is a row with true or false if it is or isn't na
        #so adding them up should be 2 or 3
        non_nan_count = matches[to_check].notna().sum(axis=1)  # Series
        sign_sum = np.nansum(np.sign(matches[to_check].values), axis=1)  # Series
    
        # Element-wise comparisons create boolean masks
        pos_edges = (sign_sum == non_nan_count).sum()  # Count True values
        neg_edges = (sign_sum == -non_nan_count).sum()
    
        #pos_edges = len(df[df["edge_dir"] == 1])
        #neg_edges = len(df[df["edge_dir"] == -1])
    
        if neg_edges > 0:
            pos_neg_ratio = pos_edges/neg_edges 
        else:
            pos_neg_ratio = np.nan


        #  Unique nodes
        unique_nodes = pd.concat([matches["Metabolite 1"], matches["Metabolite 2"]]).unique()
        unique_node_count = len(unique_nodes)

        # Density
        total_edges = matches.shape[0]
        #print(matches)
        edges = (unique_node_count * (unique_node_count-1)) / 2
        density = total_edges / edges

        # Mean Correlation Coefficient
        #print(matches)
        r_cols = ["VECPAC r", "DSS r", "LPS r"]

        edge_means = matches[r_cols].apply(pd.to_numeric, errors="coerce").mean(axis=1)
        mean_corr = edge_means.mean()
        
        # Append results
        results.append({
            "Individual_Pval": p_thresh,
            "FDR": fdr_thresh,
            "Total_Nodes": unique_node_count,
            "Total_Edges": total_edges,
            "Positive_Edges": pos_edges,
            "Negative_Edges": neg_edges,
            "Pos_to_Neg": pos_neg_ratio,            
            "Density": density,
            "Mean_Corr_Coeff": mean_corr
        })

# Output
out_df = pd.DataFrame(results)
    #columns=["Individual P-value", "FDR Threshold", "Unique Nodes", "Edges", "Positive Edges", "Negative Edges", "Positive:Negative Ratio", "Density", "Mean Correlation Coefficient"]


# Save results
out_df.to_csv("median_nan_network_properties.csv", index=False)

print("Done with MED NAN")


Done with MED NAN
