In [None]:
#hide
#default_exp alignment
from nbdev.showdoc import show_doc
from IPython.display import HTML
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# alignment
>aligning glycan sequences based on a substitution matrix

In [None]:
#export
from glycowork.alignment.glysum import *

`alignment` contains the codebase for aligning glycan sequences based on a substitution matrix, GLYSUM. It currently includes the following modules:
 - `glysum` which contains the actual alignment functions
 
For future iterations of `glycowork`, we are planning to include additional alignment algorithms, such as described in https://pubmed.ncbi.nlm.nih.gov/15215393/

# glysum
>aligning glycan sequences based on a substitution matrix

In [None]:
show_doc(pairwiseAlign)

<h4 id="pairwiseAlign" class="doc_header"><code>pairwiseAlign</code><a href="https://github.com/BojarLab/glycowork/tree/master/glycowork/alignment/glysum.py#L14" class="source_link" style="float:right">[source]</a></h4>

> <code>pairwiseAlign</code>(**`query`**, **`corpus`**=*`None`*, **`n`**=*`5`*, **`vocab`**=*`None`*, **`submat`**=*`None`*, **`mismatch`**=*`-1`*, **`gap`**=*`-1`*, **`col`**=*`'glycan'`*)

aligns glycan sequence from database against rest of the database and returns the best n alignments

| Arguments:
| :-
| query (string): glycan string in IUPAC-condensed notation
| corpus (dataframe): database to align query against; default is SugarBase
| n (int): how many alignments to show; default shows top 5
| vocab (list): list of glycowords used for mapping to tokens
| submat (dataframe): GLYSUM substitution matrix
| mismatch (int): mismatch penalty; default: -1
| gap (int): gap penalty; default: -1
| col (string): column name where glycan sequences are; default: glycan

| Returns:
| :-
| The n best alignments of query against corpus in text form with scores etc

In [None]:
print("Test Alignment")
pairwiseAlign('Man(a1-3)Man(a1-4)Glc(b1-4)Man(a1-5)Kdo')
#pairwiseAlign('Man(a1-3)[Man(a1-6)]Man(b1-4)GlcNAc(b1-4)GlcNAc')

Test Alignment
1                                               9
Man a1-3 Man a1-4 Glc b1-4 Man a1-5 Kdo
Man a1-3 Man a1-3 Glc b1-6 Man a1-5 Kdo
Alignment Score: 21
Percent Identity: 77.77777777777779
Percent Coverage: 100.0
Sequence Index: GalA1P*a1-4*Man*a1-3*Man*a1-3*Glc*b1-6*Man*a1-5*Kdo
Species: []

1                                               9
Man a1-3 Man a1-4 Glc b1-4 Man a1-5 Kdo
Man a1-3 Man a1-4 Glc b1-4 Man a1-5 Kdo
Alignment Score: 21
Percent Identity: 100.0
Percent Coverage: 100.0
Sequence Index: Man*a1-3*Man*a1-4*Glc*b1-4*Man*a1-5*Kdo
Species: ['Xanthomonas_oryzae']

1                                                         9
Man a1-3 Man a1-4 Glc b1-4 Man -    -      a1-5 Kdo
Man a1-3 Man a1-3 Glc b1-6 Man a1-5 GalAOP a1-4 Kdo
Alignment Score: 19
Percent Identity: 54.54545454545454
Percent Coverage: 100.0
Sequence Index: Man*a1-3*Man*a1-3*Glc*b1-6*Man*a1-5*GalAOP*a1-4*Kdo
Species: ['Xanthomonas_campestris']

1                                                         

In [None]:
#hide
from nbdev.export import notebook2script; notebook2script()

Converted 00_core.ipynb.
Converted 01_alignment.ipynb.
Converted 02_glycan_data.ipynb.
Converted 03_ml.ipynb.
Converted 04_motif.ipynb.
Converted 05_examples.ipynb.
Converted 06_network.ipynb.
Converted index.ipynb.
