In [None]:
#hide
#default_exp glycan_data
from nbdev.showdoc import show_doc
from IPython.display import HTML
import copy
%load_ext autoreload
%autoreload 2

# glycan_data

>Loading and handling glycan datasets

In [None]:
#export
from glycowork.glycan_data.loader import *
from glycowork.glycan_data.data_entry import *

`glycan_data` contains several helper functions for glycan data loading and data entry. Helper functions for data loading and data objects are in `loader` and include:
- `unwrap` flattens nested lists
- `find_nth` returns the n-th occurrence of a motif
- `df_species` loaded file for all glycans with species information, one row per glycan-species combination
- `df_glycan` loaded file containing all unique glycans with lots of meta-information
- `df_glysum` loaded glycan substitution matrix for glycan alignment
- `glycan_emb` glycan representations for all glycans in df_species from a trained species-level SweetNet model
- `influenza_binding` Z-score data for influenza virus binding to glycans; every row one virus, glycans are columns
- `lib` library of unique monosaccharides + linkages in stored datasets; generated with `get_lib`

# loader

In [None]:
show_doc(unwrap)

In [None]:
unwrap([[1,2], [3,4]])

In [None]:
show_doc(find_nth)

In [None]:
find_nth('This is as good as it gets', 'as', 2)

`df_species`

In [None]:
#hide_input
df_species2 = copy.deepcopy(df_species)
df_species2.index = df_species2.target.values.tolist()
df_species2.drop(['target'], axis = 1, inplace = True)
HTML(df_species2.head().style.set_properties(**{'font-size': '11pt', 'font-family': 'Helvetica','border-collapse': 'collapse','border': '1px solid black'}).render())

`df_glycan`

In [None]:
#hide_input
df_glycan.index = df_glycan.glycan.values.tolist()
df_glycan.drop(['glycan'], axis = 1, inplace = True)
HTML(df_glycan.head().style.set_properties(**{'font-size': '11pt', 'font-family': 'Helvetica','border-collapse': 'collapse','border': '1px solid black'}).render())

`influenza_binding`

In [None]:
#hide_input
HTML(influenza_binding.head().style.set_properties(**{'font-size': '11pt', 'font-family': 'Helvetica','border-collapse': 'collapse','border': '1px solid black'}).render())

# data_entry

Helper functions for entering data are contained in `data_entry` and include:

- `check_presence` tests whether the entered glycan is already in database

In [None]:
show_doc(check_presence)

In [None]:
print("Check presence of Man(a1-3)[Man(a1-6)]Man(b1-4)GlcNAc(b1-4)GlcNAc")
check_presence('Man(a1-3)[Man(a1-6)]Man(b1-4)GlcNAc(b1-4)GlcNAc', df_species)

print("Check presence of Fuc(b1-3)[Man(a1-6)]Man(b1-4)GlcNAc(b1-4)GlcNAc")
check_presence('Fuc(b1-3)[Man(a1-6)]Man(b1-4)GlcNAc(b1-4)GlcNAc', df_species)

print("Check presence of Man(a1-3)[Man(a1-6)]Man(b1-4)GlcNAc(b1-4)GlcNAc in the species Danielus Bojarum")
check_presence('Man(a1-3)[Man(a1-6)]Man(b1-4)GlcNAc(b1-4)GlcNAc', df_species,
               name = 'Danielus Bojarum')

In [None]:
#hide
from nbdev.export import notebook2script; notebook2script()