In [1]:
import pandas as pd
import networkx as nx
import numpy as np
import glob
import natsort
import math
import copy
import os
import itertools

### Import files

In [2]:
# Set directories
current_dir = os.getcwd()
parent_dir = os.path.dirname(current_dir)
input_path = os.path.join(parent_dir, "_ModelsAndNetworks")

In [3]:
Adj_filenames = natsort.natsorted(glob.glob(input_path + "/*Adjacency.csv"))
Species = ['_'.join(j.split('_')[:-1]) for j in [i.split('\\')[-1] for i in Adj_filenames]]

Met_adjs = []
Met_ids = []
for i in range(len(Species)):
    M_ad = pd.read_csv(Adj_filenames[i], index_col=0)
    M_id = list(M_ad.columns)

    Met_adjs.append(M_ad)
    Met_ids.append(M_id)

Species

['Actinomyces_odontolyticus_ATCC_17982',
 'Alistipes_putredinis_DSM_17216',
 'Anaerococcus_hydrogenalis_DSM_7454',
 'Anaerofustis_stercorihominis_DSM_17244',
 'Anaerostipes_caccae_DSM_14662',
 'Anaerotruncus_colihominis_DSM_17241',
 'Bacteroides_caccae_ATCC_43185',
 'Bacteroides_cellulosilyticus_DSM_14838',
 'Bacteroides_coprophilus_DSM_18228',
 'Bacteroides_dorei_DSM_17855']

### Filter the networks

In [4]:
# Find and remove glycans
glc_Met_adjs = []
glc_Met_IDs = []
count = 0

for adj in Met_adjs:
    ID = list(adj.columns)
    G = nx.from_pandas_adjacency(adj, create_using= nx.DiGraph)

    glcNodes = [i for i in ID if 'MGlcn' in i]
    
    glc_N = copy.deepcopy(G)
    glc_N.remove_nodes_from(glcNodes)

    glc_adj = nx.to_pandas_adjacency(glc_N)
    glc_ID = list(glc_adj.columns)

    glc_Met_adjs.append(glc_adj)
    glc_Met_IDs.append(glc_ID)
    count += 1

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Find and remove currency metabolites
perc = 0.03

curr_Met_adjs = []
curr_Met_IDs = []
for adj0 in glc_Met_adjs:
    ID = list(adj0.columns)
    top_n_nodes = math.ceil(perc*len(ID))

    glc_G = nx.from_pandas_adjacency(adj0, create_using= nx.DiGraph)
    SortedNodes_Degree = sorted(dict(glc_G.degree(glc_G.nodes())).items(), key=lambda x:x[1], reverse=True)
    SortedNodes = [i[0] for i in SortedNodes_Degree]

    #remove
    curr_N = copy.deepcopy(glc_G)
    curr_N.remove_nodes_from(SortedNodes[:top_n_nodes])

    curr_adj = nx.to_pandas_adjacency(curr_N)
    curr_ID = list(curr_adj.columns)

    curr_Met_adjs.append(curr_adj)
    curr_Met_IDs.append(curr_ID)

### Export filtered adjacency networks

In [5]:
os.makedirs(f'{current_dir}/0_FilteredAdjacency', exist_ok=True)
for m, fil_adj in enumerate(curr_Met_adjs):
    fil_adj.to_csv(f'{current_dir}/0_FilteredAdjacency/{Species[m]}_FilteredNetwork.csv')

### Find the [c], [e] layers of each network

In [6]:
Seed_dict = {}
Product_dict = {}
Cmplmnt_Product_dict = {}

for i in range(len(Species)):
    e_subs = [j for j in curr_Met_IDs[i] if '[e]' in j]
    pr_subs = [item for item in curr_Met_IDs[i] if item not in e_subs]
    # To find intersection between [c] and [e] layers, we convert [c] to [e] in cmplmnt_pr_subs
    cmplmnt_pr_subs = [item.replace('[c]', '[e]') for item in pr_subs]
    
    Seed_dict[f'{Species[i]}'] = e_subs
    Product_dict[f'{Species[i]}'] = pr_subs
    Cmplmnt_Product_dict[f'{Species[i]}'] = cmplmnt_pr_subs

notation = ['E', 'C']
# To save layers
save_regions_list = [Seed_dict, Product_dict]
save_regions_dict = dict(zip(notation, save_regions_list))

# To calculate indices
regions_list = [Seed_dict, Cmplmnt_Product_dict]
regions_dict = dict(zip(notation, regions_list))

### Export the layers

In [7]:
os.makedirs(f'{current_dir}/1_SpeciesMetaLayers', exist_ok=True)
for k_key in save_regions_dict.keys():
    layer = pd.DataFrame([[i for i in v] for k,v in save_regions_dict[k_key].items()], index=Species).T
    layer.to_csv(f'{current_dir}/1_SpeciesMetaLayers/Set_{k_key}.csv')

### Calculate and export competitive and synergistic indices

In [8]:
CI_s = [(i, i) for i in notation]
SI_s = [i for i in itertools.permutations(notation, 2)]

os.makedirs(f'{current_dir}/2_Indices', exist_ok=True)

####### CI
for notn1 in  CI_s:
    CI_Matrix = pd.DataFrame(np.zeros((len(Species), len(Species)), dtype=float), columns= list(regions_list[0].keys()))
    CI_Matrix.index = CI_Matrix.columns

    for speci1 in Species:
        for speci2 in Species:
            X1 = regions_dict[notn1[0]][speci1]
            X2 = regions_dict[notn1[1]][speci2]

            CI_XX = (len(list(set(X1) & set(X2))))/len(set(X1))
            CI_Matrix.loc[speci1, speci2] = CI_XX
    CI_Matrix.to_csv(f'{current_dir}/2_Indices/Set_CI_{notn1[0]}{notn1[1]}_Curr{perc}.csv', header=True, index=True)


####### SI
for notn2 in SI_s:
    SI_Matrix = pd.DataFrame(np.zeros((len(Species), len(Species)), dtype=float), columns= list(regions_list[0].keys()))
    SI_Matrix.index = SI_Matrix.columns

    for speci1 in Species:
        for speci2 in Species:
            if speci1 != speci2:
                X2 = regions_dict[notn2[0]][speci2]
                Y1 = regions_dict[notn2[1]][speci1]
                
                SI_XY = (len(list(set(X2) & set(Y1))))/len(set(Y1))
                SI_Matrix.loc[speci1, speci2] = SI_XY
    SI_Matrix.to_csv(f'{current_dir}/2_Indices/Set_SI_{notn2[0]}{notn2[1]}_Curr{perc}.csv', header=True, index=True)