In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import networkx as nx
from sklearn.preprocessing import LabelEncoder
from matplotlib import cm
import matplotlib.pyplot as plt

path_to_store_figures = '~/Microbiota/Cytometry_data/Figures_LDA_microbiota'

In [None]:
time_of_calculation = 40
topic = 8
dataframe_for_tops = pd.read_csv(f'~/Microbiota/Cytometry_data/Data_frame_LDA_on_microbiota/Dataframe_for_network_{topic}_topic_{time_of_calculation}_run.csv',index_col=0)
information_frame = pd.read_csv('~/FlowRepository_FR-FCM-ZYVH_files/attachments/Metadata_DC.csv')


threshold_links = 0.28
dataframe_for_tops_norm = dataframe_for_tops/time_of_calculation
dataframe_for_tops_norm[dataframe_for_tops_norm<threshold_links]=0

#Drawn network
G = nx.from_numpy_matrix(dataframe_for_tops_norm.values)
G = nx.relabel_nodes(G, dict(enumerate(dataframe_for_tops_norm.columns)))
my_pos = nx.spring_layout(G, seed = 100)

information_frame['Health status binary'] = LabelEncoder().fit_transform(information_frame['Health status'])
N_colors=2
cm_dis=np.linspace(0, 0.8 ,N_colors) 
colors = [cm.RdBu(x) for x in cm_dis]
color_edges=[]

fig, axes = plt.subplots( figsize=(30,20))
for node in G:
    temp=information_frame.loc[information_frame['Individual']==node] #Finding time of node 
    
    color=colors[int(temp['Health status binary'])]
    if color not in color_edges:
        plt.scatter([],[],color=color, label=temp['Health status'].values[0])
    color_edges.append(color)

weights = [20*(G[u][v]['weight'])**4 for u,v in G.edges()]

d = dict(G.degree)
nx.draw(G,pos = my_pos,with_labels=None,node_color=color_edges,node_size=[v * 100 for v in d.values()],width=weights)
axes.legend(loc = 'lower right', markerscale=4,scatterpoints=1,bbox_to_anchor=(1, 0), ncol = 1, prop = {'size' : 30})
plt.tight_layout()
plt.savefig(path_to_store_figures + f'/Network_of_patient_healthy_stats_{topic}_topics_{time_of_calculation}_runs_thresholded.svg', format = 'svg', bbox_inches='tight')


In [None]:
#Color by bacterioides
information_frame['Enterotype binary'] = LabelEncoder().fit_transform(information_frame['Enterotype'])
N_colors=4
cm_dis=np.linspace(0, 0.8 ,N_colors) 
colors = [ cm.tab20c(x) for x in cm_dis]
color_edges=[]
                
fig, axes = plt.subplots( figsize=(30,20))
for node in G:
    temp=information_frame.loc[information_frame['Individual']==node] #Finding time of node 
    
    color=colors[int(temp['Enterotype binary'])]
    if color not in color_edges:
        plt.scatter([],[],color=color, label=temp['Enterotype'].values[0])
    color_edges.append(color)

weights = [20*(G[u][v]['weight'])**4 for u,v in G.edges()]
d = dict(G.degree)
nx.draw(G,pos= my_pos, with_labels=False,node_color=color_edges,width=weights, node_size=[v * 100 for v in d.values()])
axes.legend(loc = 'lower right', markerscale=4,scatterpoints=1,bbox_to_anchor=(1, 0), ncol = 1, prop = {'size' : 30})

#plt.legend(loc="lower left",fontsize=30)
plt.savefig(path_to_store_figures + f'/Network_of_patient_microbiota_stats_{topic}_topics_{time_of_calculation}_runs_thresholded.svg', format = 'svg', bbox_inches='tight')

