In [1]:
import pandas as pd
import networkx as nx
import numpy as np 
import glob
import natsort
import math
import copy
import os
import itertools

### Import files

In [2]:
# Set directories
current_dir = os.getcwd()
parent_dir = os.path.dirname(current_dir)
input_path = os.path.join(parent_dir, "_ModelsAndNetworks")

In [3]:
Adj_filenames = natsort.natsorted(glob.glob(input_path + "/*Adjacency.csv"))
Biomass_filenames = natsort.natsorted(glob.glob(input_path + "/*biomassReaction.txt"))
Species = ['_'.join(j.split('_')[:-1]) for j in [i.split('\\')[-1] for i in Adj_filenames]]

Met_adjs = []
Met_IDs = []
BiomassRxns = []
for i in range(len(Species)):
    M_ad = pd.read_csv(Adj_filenames[i], index_col=0)
    M_id = list(M_ad.columns)

    B_rxn = pd.read_table(Biomass_filenames[i], header = None)[0].to_list()
    B_rxn = [[j[1:-1] for j in item[1:-1].split(', ')] for item in B_rxn]

    BiomassRxns.append(B_rxn)
    Met_IDs.append(M_id)
    Met_adjs.append(M_ad)

Species

['Actinomyces_odontolyticus_ATCC_17982',
 'Alistipes_putredinis_DSM_17216',
 'Anaerococcus_hydrogenalis_DSM_7454',
 'Anaerofustis_stercorihominis_DSM_17244',
 'Anaerostipes_caccae_DSM_14662',
 'Anaerotruncus_colihominis_DSM_17241',
 'Bacteroides_caccae_ATCC_43185',
 'Bacteroides_cellulosilyticus_DSM_14838',
 'Bacteroides_coprophilus_DSM_18228',
 'Bacteroides_dorei_DSM_17855']

### Filter the networks

In [4]:
# Find and remove glycans
glc_Met_adjs = []
glc_Met_IDs = []
for adj in Met_adjs:
    
    ID = list(adj.columns)
    G = nx.from_pandas_adjacency(adj, create_using= nx.DiGraph)

    glcNodes = [i for i in ID if 'MGlcn' in i]

    glc_N = copy.deepcopy(G)
    glc_N.remove_nodes_from(glcNodes)

    glc_adj = nx.to_pandas_adjacency(glc_N)
    glc_ID = list(glc_adj.columns)

    glc_Met_adjs.append(glc_adj)
    glc_Met_IDs.append(glc_ID)

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Find and remove currency metabolites
perc = 0.03

curr_Met_adjs = []
curr_Met_IDs = []
for adj0 in glc_Met_adjs:
    ID = list(adj0.columns)
    top_n_nodes = math.ceil(perc*len(ID))

    glc_G = nx.from_pandas_adjacency(adj0, create_using= nx.DiGraph)
    SortedNodes_Degree = sorted(dict(glc_G.degree(glc_G.nodes())).items(), key=lambda x:x[1], reverse=True)
    SortedNodes = [i[0] for i in SortedNodes_Degree]

    #remove
    CurrMet = SortedNodes[:top_n_nodes]
    if 'biomass[c]' in CurrMet:
        CurrMet = [i for i in CurrMet if i != 'biomass[c]']

    curr_N = copy.deepcopy(glc_G)
    curr_N.remove_nodes_from(CurrMet)

    curr_adj = nx.to_pandas_adjacency(curr_N)
    curr_ID = list(curr_adj.columns)

    curr_Met_adjs.append(curr_adj)
    curr_Met_IDs.append(curr_ID)

### Export filtered adjacency networks

In [5]:
os.makedirs(f'{current_dir}/0_FilteredAdjacency', exist_ok=True)
for m, fil_adj in enumerate(curr_Met_adjs):
    fil_adj.to_csv(f'{current_dir}/0_FilteredAdjacency/{Species[m]}_FilteredNetwork.csv')

### Find layers of the networks based on distance from the core - biomass product

In [6]:
Rest = {}
Neighbors = {}
for number in range(len(Species)):
    bio_prods = BiomassRxns[number][1]
    bio_prods = [i for i in bio_prods if i in curr_Met_IDs[number]]
    
    last_G = nx.from_pandas_adjacency(curr_Met_adjs[number], create_using = nx.DiGraph)

    slices = {}
    target_comps = bio_prods
    previous = target_comps
    count = 0
    while True:
        neighbors = []
        for target in target_comps:
            particular_predecessors = [i for i in last_G.predecessors(target)]
            neighbors.append(particular_predecessors)

        new_neighbors = set([item for sublist in neighbors for item in sublist])        
        target_comps = [i for i in new_neighbors if i not in previous]
        if len(target_comps) == 0:
            break

        slices[f'{count+1}_neigbours'] = target_comps
        previous += new_neighbors
        count += 1

    Neighbors[Species[number]] = slices
    neighs = [item for sublist in list(slices.values()) for item in sublist]
    Rest[Species[number]] = [q for q in curr_Met_IDs[number] if q not in neighs]


comparableSlices = np.min([len(list(list(Neighbors.values())[i].values())) for i in range(len(Species))]) - 1
Neighbors_layers = {}
keyss = [f'D{i}' for i in range(1, comparableSlices+2)] + ['R']
for sps in Species:
    valuess = [Neighbors[sps][f'{i}_neigbours'] for i in range(1, comparableSlices+1)]
    valuess.append([item for sublist in [Neighbors[sps][f'{j}_neigbours'] for j in range(comparableSlices+1, len(Neighbors[sps])+1)] for item in sublist])
    valuess.append(Rest[sps])
    inner_dict = dict(zip(keyss, valuess))
    Neighbors_layers[sps] = inner_dict

notation = [f'D{i+1}' for i in range(len(Neighbors_layers[sps])-1)]+['R']

### Export the layers

In [7]:
os.makedirs(f'{current_dir}/1_SpeciesMetaLayers', exist_ok=True)
for ll in notation:
        layer_l1 = []
        for spec in Species:
            layer_l1.append(Neighbors_layers[spec][ll])
        layer = pd.DataFrame(layer_l1, index=Species).T
        layer.to_csv(f'1_SpeciesMetaLayers/Networklayered_{ll}.csv')

### Calculate and export competitive and synergistic indices

In [8]:
CI_s = [(i, i) for i in notation]
SI_s = [i for i in itertools.permutations(notation, 2)]

### CI ###
os.makedirs(f'{current_dir}/2_Indices/CI', exist_ok=True)
for notn1 in CI_s:
    CI_Matrix = pd.DataFrame(np.zeros((len(Species), len(Species)), dtype=float), columns= Species)
    CI_Matrix.index = CI_Matrix.columns

    for speci1 in Species:
        for speci2 in Species:
            X1 = Neighbors_layers[speci1][notn1[0]]
            X2 = Neighbors_layers[speci2][notn1[1]]

            CI_XX = (len(list(set(X1) & set(X2))))/len(set(X1))
            CI_Matrix.loc[speci1, speci2] = CI_XX
    CI_Matrix.to_csv(f'{current_dir}/2_Indices/CI/Network_CI_{notn1[0]}{notn1[1]}_Curr{perc}.csv', header=True, index=True)


### SI ###
os.makedirs(f'{current_dir}/2_Indices/SI', exist_ok=True)
for notn2 in SI_s:
    SI_Matrix = pd.DataFrame(np.zeros((len(Species), len(Species)), dtype=float), columns= Species)
    SI_Matrix.index = SI_Matrix.columns

    for speci1 in Species:
        for speci2 in Species:
            X2 = Neighbors_layers[speci2][notn2[0]]
            Y1 = Neighbors_layers[speci1][notn2[1]]

            SI_XY = (len(list(set(X2) & set(Y1))))/len(set(Y1))
            SI_Matrix.loc[speci1, speci2] = SI_XY
    SI_Matrix.to_csv(f'{current_dir}/2_Indices/SI/Network_SI_{notn2[0]}{notn2[1]}_Curr{perc}.csv', header=True, index=True)