# Chapter 2: BioPython

In [1]:
import Bio

#### Basic Sequence Handling:

In [2]:
from Bio.Seq import Seq
my_seq = Seq("AATCCGGAGGACCGGTGTACTCAGCTCACCGGGGGCATTGCT")

In [3]:
my_seq

Seq('AATCCGGAGGACCGGTGTACTCAGCTCACCGGGGGCATTGCT')

In [4]:
my_seq.complement()

Seq('TTAGGCCTCCTGGCCACATGAGTCGAGTGGCCCCCGTAACGA')

In [5]:
my_seq.reverse_complement()

Seq('AGCAATGCCCCCGGTGAGCTGAGTACACCGGTCCTCCGGATT')

In [6]:
my_seq[2]

'T'

In [7]:
my_seq.count("AG")

2

####  Calculating GC Content:

In [8]:
print("GC Content (%): ", 100 * round(float((my_seq.count("G") + my_seq.count("C")) / len(my_seq) ),4 ))

GC Content (%):  61.9


In [9]:
from Bio.SeqUtils import GC
print("GC Content (%): ", round(GC(my_seq), 2))

GC Content (%):  61.9


#### Slicing Sequences:

In [10]:
my_seq

Seq('AATCCGGAGGACCGGTGTACTCAGCTCACCGGGGGCATTGCT')

In [11]:
len(my_seq)

42

In [12]:
my_seq_slice = my_seq[:3]
my_seq_slice

Seq('AAT')

In [13]:
my_seq_slice1 = my_seq[:3]
my_seq_slice2 = my_seq[3:]


In [14]:
concat_seq = my_seq_slice1 + my_seq_slice2
concat_seq

Seq('AATCCGGAGGACCGGTGTACTCAGCTCACCGGGGGCATTGCT')

In [15]:
len(concat_seq)

42

#### BLAST

In [27]:
from Bio.Blast import NCBIWWW
from Bio.Blast import NCBIXML

In [28]:
result_handle = NCBIWWW.qblast("blastn", "nt", "8332116")
blast_records = NCBIXML.read(result_handle) 

In [40]:
for alignment in blast_records.alignments[:3]:
    for hsp in alignment.hsps: 
        if hsp.expect < 0.01: 
            print ("| ----------------------------------------------- Alignment ---------------------------------------------- |")
            print ("  GI:", alignment.title.split("|")[1] )
            print ("  Ref:", alignment.title.split("|")[3] )
            print ("  Predicted:", alignment.title.split("|")[4].split(": ")[1] )
            print ("  Len:", alignment.length )
            print ("  E-val:", hsp.expect )

| ----------------------------------------------- Alignment ---------------------------------------------- |
  GI: 1219041180
  Ref: XM_021875076.1
  Predicted: Chenopodium quinoa cold-regulated 413 plasma membrane protein 2-like (LOC110697660), mRNA
  Len: 1173
  E-val: 3.74813e-117
| ----------------------------------------------- Alignment ---------------------------------------------- |
  GI: 1226796956
  Ref: XM_021992092.1
  Predicted: Spinacia oleracea cold-regulated 413 plasma membrane protein 2-like (LOC110787470), mRNA
  Len: 672
  E-val: 2.36533e-113
| ----------------------------------------------- Alignment ---------------------------------------------- |
  GI: 731339628
  Ref: XM_010682658.1
  Predicted: Beta vulgaris subsp. vulgaris cold-regulated 413 plasma membrane protein 2 (LOC104895996), mRNA
  Len: 847
  E-val: 6.34706e-108


### Handling PDB Structures:

In [41]:
from Bio.PDB.PDBParser import PDBParser

In [42]:
parser = PDBParser()

In [43]:
structure_id = "4g4k"
filename = "4g4k.pdb"
structure = parser.get_structure(structure_id, filename)



In [44]:
model = structure[0]

In [45]:
model.get_list()

[<Chain id=A>, <Chain id=B>]

In [46]:
chain = model["A"]

In [47]:
chain.get_list()[0]

<Residue GLU het=  resseq=141 icode= >

In [48]:
for model in structure:
    # Do something here with models
    for chain in model:
        # Do something here with chains
        for residue in chain:
            # Do something here with residues
            for atom in residue:
                # Do something here with atoms

IndentationError: expected an indented block (<ipython-input-48-ef5dad2f7d06>, line 8)