In [None]:
#| include: false
#| default_exp network
import warnings
warnings.filterwarnings("ignore")
from nbdev.showdoc import show_doc
from IPython.display import HTML
#%load_ext autoreload
#%autoreload 2

In [None]:
#| export
from glycowork.glycan_data.loader import glycomics_data_loader
human_skin_O_PMC5871710_BCC = glycomics_data_loader.human_skin_O_PMC5871710_BCC
from glycowork.network.biosynthesis import *
from glycowork.network.evolution import *

`network` contains functions to arrange and analyze glycans in the context of networks. In such a network, each node represents a glycan and edges represent, for instance, their connection via a biosynthetic step. It should be noted, since `glycowork` treats glycans as molecular graphs, that these networks represent hierarchical graphs, with the network being one graph and each node within the network also a graph. `network` contains the following modules:

- `biosynthesis` contains functions to construct and analyze biosynthetic glycan networks
- `evolution` contains functions to compare (taxonomic) groups as to their glycan repertoires

## biosynthesis
>constructing and analyzing biosynthetic glycan networks

In [None]:
show_doc(construct_network)

---

### construct_network

```python

def construct_network(
    glycans:list, # List of glycans
    allowed_ptms:frozenset=frozenset({'3P', 'OAc', '6S', '4Ac', '1P', 'OS', '9Ac', 'OP', '6P', '3S'}), # Set of allowed PTMs
    edge_type:str='monolink', # Edge label type: monolink/monosaccharide/enzyme
    permitted_roots:frozenset[str] | None=None, # Allowed root nodes
    abundances:list=[], # Glycan abundances in the same order as glycans; default:empty
)->DiGraph: # Biosynthetic network


```

*Construct glycan biosynthetic network*

In [None]:
glycans = ["Gal(b1-4)Glc-ol", "GlcNAc(b1-3)Gal(b1-4)Glc-ol",
           "GlcNAc6S(b1-3)Gal(b1-4)Glc-ol",
           "Gal(b1-4)GlcNAc(b1-3)Gal(b1-4)Glc-ol", "Fuc(a1-2)Gal(b1-4)Glc-ol",
          "Neu5Ac(a2-3)Gal(b1-4)GlcNAc(b1-3)[Gal(b1-3)GlcNAc(b1-6)]Gal(b1-4)Glc-ol"]
network = construct_network(glycans)
network.nodes()

NodeView(('Neu5Ac(a2-3)Gal(b1-4)GlcNAc(b1-3)[Gal(b1-3)GlcNAc(b1-6)]Gal(b1-4)Glc-ol', 'Gal(b1-4)GlcNAc(b1-3)Gal(b1-4)Glc-ol', 'GlcNAc6S(b1-3)Gal(b1-4)Glc-ol', 'GlcNAc(b1-3)Gal(b1-4)Glc-ol', 'Fuc(a1-2)Gal(b1-4)Glc-ol', 'Gal(b1-4)Glc-ol', 'Neu5Ac(a2-3)Gal(b1-4)GlcNAc(b1-3)[GlcNAc(b1-6)]Gal(b1-4)Glc-ol', 'Gal(b1-4)GlcNAc(b1-3)[Gal(b1-3)GlcNAc(b1-6)]Gal(b1-4)Glc-ol', 'Gal(b1-4)GlcNAc(b1-3)[GlcNAc(b1-6)]Gal(b1-4)Glc-ol', 'Neu5Ac(a2-3)Gal(b1-4)GlcNAc(b1-3)Gal(b1-4)Glc-ol', 'Gal(b1-3)GlcNAc(b1-6)[GlcNAc(b1-3)]Gal(b1-4)Glc-ol', 'GlcNAc(b1-3)[GlcNAc(b1-6)]Gal(b1-4)Glc-ol'))

In [None]:
show_doc(plot_network)

---

### plot_network

```python

def plot_network(
    network:DiGraph, # Biosynthetic network
    plot_format:str='spring', # Layout type: pydot2/kamada_kawai/spring
    edge_label_draw:bool=True, # Whether to draw edge labels
    lfc_dict:dict[str, float] | None=None, # Enzyme:log2FC mapping for edge width
)->None: # Displays plot


```

*Visualize biosynthetic network*

In [None]:
plot_network(network)

In [None]:
show_doc(infer_network)

---

### infer_network

```python

def infer_network(
    network:DiGraph, # Network to infer
    network_species:str, # Source species
    species_list:list, # Species to compare against
    network_dic:dict, # Species:network mapping
)->Graph: # Network with inferred nodes


```

*Replace virtual nodes observed in other species*

In [None]:
show_doc(retrieve_inferred_nodes)

---

### retrieve_inferred_nodes

```python

def retrieve_inferred_nodes(
    network:DiGraph, # Network with inferred nodes
    species:str | None=None, # Source species if multiple
)->list[str] | dict[str, list[str]]: # Inferred nodes list or dict


```

*Get inferred virtual nodes from network*

In [None]:
show_doc(update_network)

---

### update_network

```python

def update_network(
    network_in:DiGraph, # Input network
    edge_list:list, # List of edges to add
    edge_labels:list[str] | None=None, # Labels for new edges
    node_labels:dict[str, int] | None=None, # Node virtual status (0: observed, 1: virtual)
)->DiGraph: # Updated network


```

*Update network with new edges and labels*

In [None]:
show_doc(trace_diamonds)

---

### trace_diamonds

```python

def trace_diamonds(
    network:DiGraph, # Biosynthetic network
    species_list:list, # Species to compare against
    network_dic:dict, # Species:network mapping
    threshold:float=0.0, # Cutoff threshold
    nb_intermediates:int=2, # Number of intermediate nodes; has to be a multiple of 2
    mode:str='presence', # Analysis mode: presence/abundance
)->DataFrame: # Path analysis results, with proportion (0-1) of how often glycan has been experimentally observed in this path (or average abundance)


```

*Analyze diamond motif (A->B,A->C,B->D,C->D) path preferences using evolutionary data*

In [None]:
show_doc(evoprune_network)

---

### evoprune_network

```python

def evoprune_network(
    network:DiGraph, # Biosynthetic network
    network_dic:dict[str, networkx.classes.digraph.DiGraph] | None=None, # Species:network mapping
    species_list:list[str] | None=None, # Species to compare against
    node_attr:str='abundance', # Node attribute to use for pruning
    threshold:float=0.01, # Cutoff threshold
    nb_intermediates:int=2, # Number of intermediate nodes; has to be a multiple of 2
    mode:str='presence', # Analysis mode: presence/abundance
)->DiGraph: # Evolutionarily pruned network (with virtual node probability as a new node attribute)


```

*Prune network using evolutionary path preferences*

In [None]:
plot_network(evoprune_network(network))

In [None]:
show_doc(highlight_network)

---

### highlight_network

```python

def highlight_network(
    network:DiGraph, # Biosynthetic network
    highlight:str, # What to highlight: motif/species/abundance/conservation
    motif:str | None=None, # Motif to highlight; highlight=motif
    abundance_df:pandas.core.frame.DataFrame | None=None, # Glycan abundance data; highlight=abundance
    glycan_col:str='glycan', # Glycan column name; highlight=abundance
    intensity_col:str='rel_intensity', # Intensity column name; highlight=abundance
    conservation_df:pandas.core.frame.DataFrame | None=None, # Species-glycan data; highlight=conservation
    network_dic:dict[str, networkx.classes.digraph.DiGraph] | None=None, # Species:network mapping; highlight=conservation/species
    species:str | None=None, # Species to highlight; highlight=species
)->DiGraph: # Network with highlight attributes ('origin' (motif/species) or 'abundance' (abundance/conservation) node attribute)


```

*Add visual highlighting to network nodes, to be used in plot_network*

In [None]:
show_doc(export_network)

---

### export_network

```python

def export_network(
    network:DiGraph, # Biosynthetic network
    filepath:str, # Output path prefix, will be appended by file description and type
    other_node_attributes:list[str] | None=None, # Additional attributes for extraction
)->None: # Saves network files (edge list/labels + node IDs and labels)


```

*Export network to Cytoscape/Gephi compatible files*

In [None]:
show_doc(get_maximum_flow)

---

### get_maximum_flow

```python

def get_maximum_flow(
    network:DiGraph, # Biosynthetic network
    source:str='Gal(b1-4)Glc-ol', # Source node
    sinks:list[str] | None=None, # Target nodes; default:all terminal nodes
)->dict: # Flow results; sink: {maximum flow value, flow path dictionary}


```

*Estimate maximum flow and flow paths between source and sinks*

In [None]:
show_doc(get_max_flow_path)

---

### get_max_flow_path

```python

def get_max_flow_path(
    network:DiGraph, # Biosynthetic network
    flow_dict:dict, # Flow dictionary as returned by get_maximum_flow
    sink:str, # Target node
    source:str='Gal(b1-4)Glc-ol', # Source node
)->list: # Path edge list


```

*Get path giving maximum flow value*

In [None]:
show_doc(get_reaction_flow)

---

### get_reaction_flow

```python

def get_reaction_flow(
    network:DiGraph, # Biosynthetic network
    res:dict, # Flow results as returned by get_maximum_flow
    aggregate:str | None=None, # Aggregation: sum/mean/None
)->dict[str, list[float]] | dict[str, float]: # Reaction flows (reaction: flow)


```

*Get aggregated flows by reaction type*

In [None]:
show_doc(get_differential_biosynthesis)

---

### get_differential_biosynthesis

```python

def get_differential_biosynthesis(
    df:pandas.core.frame.DataFrame | str, # Glycan abundance data (first column: glycan sequences)
    group1:list, # First group column indices/names (or time points in longitudinal analysis)
    group2:list[str | int] | None=None, # Second group column indices/names (or time points in longitudinal analysis)
    analysis:str='reaction', # Type: reaction/flow
    paired:bool=False, # Whether samples are paired
    longitudinal:bool=False, # Whether to do perform longitudinal analysis
    id_column:str='ID', # Sample ID column for longitudinal analysis in the ID-style of participant_time_replicate
)->DataFrame: # Differential analysis results (differential flow features and statistics OR reaction changes over time


```

*Compare biosynthetic patterns between conditions/timepoints*

In [None]:
get_differential_biosynthesis(human_skin_O_PMC5871710_BCC, [1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31,33,35,37,39],
                              [2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40], paired = True)

You're working with an alpha of 0.044390023979542614 that has been adjusted for your sample size of 40.


Unnamed: 0_level_0,Mean abundance,Log2FC,p-val,corr p-val,significant,Effect size
Feature,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Neu5Ac(a2-3),4.542537,-0.391611,0.001702,0.003147,True,-0.816234
Gal(b1-?),3.395793,-0.435594,0.002206,0.003147,True,-0.790748
Gal(b1-3),5.967389,-0.460385,0.003713,0.003147,True,-0.739291
Neu5Ac(a2-?),4.19329,-0.449044,0.004196,0.003147,True,-0.727147
Neu5Ac(a2-6),5.143191,-0.496132,0.007521,0.004513,True,-0.668654
Neu5Ac(a2-8),2.894141,-0.46414,0.013887,0.006944,True,-0.605967
OS,2.24905,-0.521844,0.019236,0.008244,True,-0.57195
6S,0.824198,-0.270074,0.050454,0.015136,True,-0.466992
Gal(b1-4),0.824198,-0.270074,0.050454,0.015136,True,-0.466992
GlcNAc(b1-6),0.824198,-0.270074,0.050454,0.015136,True,-0.466992


In [None]:
show_doc(extend_network)

---

### extend_network

```python

def extend_network(
    network:DiGraph, # Biosynthetic network
    steps:int=1, # Number of extension steps; default:1
    to_extend:str | dict[str, int] | list[str]='all', # Nodes to extend (all, specific leaf node, target composition)
    strict_context:bool=False, # Whether to use network only to derive allowed reaction products; default:False
)->tuple: # (Extended network, New glycans)


```

*Extend biosynthetic network physiologically*

In [None]:
new_network, new_glycans = extend_network(network, strict_context = True)
len(new_glycans)

20

## evolution
>investigating evolutionary relationships of glycans

In [None]:
show_doc(distance_from_embeddings)

---

### distance_from_embeddings

```python

def distance_from_embeddings(
    df:DataFrame, # DataFrame with glycans (rows) and taxonomic info (columns)
    embeddings:DataFrame, # DataFrame with glycans (rows) and embeddings (columns) (e.g., from glycans_to_emb)
    cut_off:int=10, # Minimum glycans per rank to be included; default:10
    rank:str='Species', # Taxonomic rank for grouping; default:Species
    averaging:str='median', # How to average embeddings: median/mean
)->DataFrame: # Rank x rank distance matrix


```

*Calculate cosine distance matrix from learned embeddings*

In [None]:
show_doc(distance_from_metric)

---

### distance_from_metric

```python

def distance_from_metric(
    df:DataFrame, # DataFrame with glycans (rows) and taxonomic info (columns)
    networks:list, # List of networkx networks
    metric:str='Jaccard', # Distance metric to use
    cut_off:int=10, # Minimum glycans per rank to be included; default:10
    rank:str='Species', # Taxonomic rank for grouping; default:Species
)->DataFrame: # Rank x rank distance matrix


```

*Calculate distance matrix between networks using provided metric*

In [None]:
show_doc(dendrogram_from_distance)

---

### dendrogram_from_distance

```python

def dendrogram_from_distance(
    dm:DataFrame, # Rank x rank distance matrix (e.g., from distance_from_embeddings)
    ylabel:str='Mammalia', # Y-axis label
    filepath:str='', # Path to save plot including filename
)->None: # Displays or saves dendrogram plot


```

*Plot dendrogram from distance matrix*

In [None]:
show_doc(check_conservation)

---

### check_conservation

```python

def check_conservation(
    glycan:str, # Glycan or motif in IUPAC-condensed format
    df:DataFrame, # DataFrame with glycans (rows) and taxonomic levels (columns)
    network_dic:dict[str, networkx.classes.graph.Graph] | None=None, # Species:biosynthetic network mapping
    rank:str='Order', # Taxonomic level to assess
    threshold:int=5, # Minimum glycans per species to be included
    motif:bool=False, # Whether glycan is a motif vs sequence
)->dict: # Taxonomic group-to-conservation mapping


```

*Estimate evolutionary conservation of glycans via biosynthetic networks*

In [None]:
show_doc(get_communities)

---

### get_communities

```python

def get_communities(
    network_list:list, # List of undirected biosynthetic networks
    label_list:list[str] | None=None, # Labels for community names, running_number + _ + label_list[k]  for network_list[k]; default:range(len(graph_list))
)->dict: # Community-to-glycan list mapping


```

*Find communities for each graph in list of graphs*

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()