`BpForms` is a toolkit for unambiguously describing the primary sequence of biopolymers such as DNA, RNA, and proteins, including modified DNA, RNA, and proteins. BpForms represents biopolymers as monomeric forms linked.
This tutorial illustrates how to use the `BpForms` Python library. Please see the second tutorial for more details and more examples. Please also see the [documentation](https://docs.karrlab.org/bpforms/) for more information about the `BpForms` grammar and more instructions for using the `BpForms` website, JSON REST API, and command line interface.

# Import library

In [1]:
import bpforms

# Create polymers from their string representations

## Form of a DNA

In [2]:
dna_1 = bpforms.DnaForm().from_str('ACGT | circular')

## Form an RNA

In [3]:
rna_1 = bpforms.RnaForm().from_str('C{01A}GU')

## Form of a protein

In [4]:
prot_1 = bpforms.ProteinForm().from_str(
            'CVYT{U}C | x-link: [type: "disulfide"'
            ' | l: 1 | r: 6]')

# Create the same polymers programmatically

## Form of a DNA

In [5]:
dna_2 = bpforms.DnaForm()
for residue in ['A', 'C', 'G', 'T']:
    dna_2.seq.append(bpforms.dna_alphabet.monomers[residue])
dna_2.circular = True

## Form of an RNA

In [6]:
rna_2 = bpforms.RnaForm()
for residue in ['C', '01A', 'G', 'U']:
    rna_2.seq.append(bpforms.rna_alphabet.monomers[residue])

## Form of a protein

In [7]:
prot_2 = bpforms.ProteinForm()
for residue in ['C', 'V', 'Y', 'T', 'U', 'C']:
    prot_2.seq.append(bpforms.protein_alphabet.monomers[residue])
prot_2.crosslinks.add(bpforms.OntoBond(
    type=bpforms.xlink.crosslinks_onto['disulfide'],
    l_monomer=1, r_monomer=6))

# Get properties of polymers

## Circularity

In [8]:
dna_1.circular

True

## Residue sequence

In [9]:
rna_1.seq

[<bpforms.core.Monomer at 0x7f74e9252910>,
 <bpforms.core.Monomer at 0x7f752e3e7cd0>,
 <bpforms.core.Monomer at 0x7f74e92572d0>,
 <bpforms.core.Monomer at 0x7f74e92610d0>]

## Crosslinks

In [10]:
prot_1.crosslinks

{<bpforms.core.OntoBond at 0x7f74e6820910>}

## String representation of a polymer

In [11]:
str(dna_2)

'ACGT | circular'

# Check the equality of polymers

In [12]:
dna_2.is_equal(dna_1) 

True

# Calculate the properties of a polymer

## Atomic structure

In [13]:
dna_1.get_structure()

(<openbabel.OBMol; proxy of <Swig Object of type 'OpenBabel::OBMol *' at 0x7f752ebcb540> >,
 {1: {'monomer': {1: 1,
    2: 2,
    3: 3,
    4: 4,
    5: 5,
    6: 6,
    7: 7,
    8: 8,
    9: 9,
    10: 10,
    11: 11,
    13: 12,
    14: 13,
    15: 14,
    16: 15,
    17: 16,
    18: 17,
    19: 18,
    20: 19,
    21: 20,
    22: 21},
   'backbone': {}},
  2: {'monomer': {1: 22,
    2: 23,
    3: 24,
    4: 25,
    5: 26,
    6: 27,
    7: 28,
    8: 29,
    9: 30,
    10: 31,
    11: 32,
    13: 33,
    14: 34,
    15: 35,
    16: 36,
    17: 37,
    18: 38,
    19: 39,
    20: 40},
   'backbone': {}},
  3: {'monomer': {1: 41,
    2: 42,
    3: 43,
    4: 44,
    5: 45,
    6: 46,
    7: 47,
    8: 48,
    9: 49,
    10: 50,
    11: 51,
    13: 52,
    14: 53,
    15: 54,
    16: 55,
    17: 56,
    18: 57,
    19: 58,
    20: 59,
    21: 60,
    22: 61,
    23: 62,
    24: 63},
   'backbone': {}},
  4: {'monomer': {1: 64,
    2: 65,
    3: 66,
    4: 67,
    5: 68,
    6: 69,
   

## SMILES representation of the structure

In [14]:
dna_1.export('smiles') 

'O1C2CC(OC2COP(=O)([O-])OC2CC(OC2COP(=O)(OC2CC(OC2COP(=O)(OC2CC(OC2COP1(=O)[O-])n1ccc(nc1=O)N)[O-])n1cnc2c1nc(N)[nH]c2=O)[O-])n1cc(C)c(=O)[nH]c1=O)n1cnc2c1ncnc2N'

## Formula

In [15]:
dna_1.get_formula()

AttrDefault(<class 'float'>, False, {'C': 39.0, 'H': 45.0, 'N': 15.0, 'O': 24.0, 'P': 4.0})

## Charge

In [16]:
dna_1.get_charge()

-4

## Unmodified/canonical sequence

In [17]:
rna_1.get_canonical_seq() 

'CAGU'