# Sequence Normalization

```
Signature:
normalize(
    sequence,
    interval,
    alleles,
    mode=<Mode.EXPAND: 4>,
    bounds=None,
    anchor_length=0,
)
Docstring:
normalizes the given alleles, returning a new interval and alleles.

interval: location of alleles as (start, end) tuple, interbase coordiates
alleles[]: array of sequence strings; first element is ref sequence and may be None
bounds: maximal extent of normalization left and right; default to
  (0, len(sequence)); must be provided if sequence doesn't support __len__
sequence: the reference sequence
mode: shuffle left (à la vcf), shuffle right (hgvs), extend (voca)
anchor: 0 (# of bases left and right)
```

In [1]:
from ga4gh.vr import normalize

In [2]:
sequence = "CCCCCCCCACACACACACTAGCAGCAGCA"

#                        1                   2                   3
#    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0
#     C C C C C C C C A C A C A C A C A C T A G C A G C A G C A
#               [3]^           ^      [4]^


In [3]:
normalize(sequence, interval=(13,13), alleles=(None, "CA"), mode="LEFTSHUFFLE", bounds=(0,len(sequence)))

((7, 7), ('', 'CA'))

In [4]:
normalize(sequence, interval=(13,13), alleles=(None, "CA"), mode="RIGHTSHUFFLE", bounds=(0,len(sequence)))

((18, 18), ('', 'AC'))

In [5]:
normalize(sequence, interval=(13,15), alleles=(None, "CACA"), mode="TRIMONLY", bounds=(0,len(sequence)))

((15, 15), ('', 'CA'))

In [6]:
normalize(sequence, interval=(13,13), alleles=(None, "CA"), mode="VCF", bounds=(0,len(sequence)))

((6, 7), ('C', 'CCA'))

In [7]:
normalize(sequence, interval=(13,13), alleles=(None, "CA"), mode="EXPAND", bounds=(0,len(sequence)))

((7, 18), ('CACACACACAC', 'CACACACACACAC'))

In [8]:
normalize(sequence, interval=(13,13), alleles=(None, "CA"), mode="EXPAND", bounds=(0,len(sequence)), anchor_length=3)

((4, 21), ('CCCCACACACACACTAG', 'CCCCACACACACACACTAG'))