# Chapter 2

In [1]:
from Bio.Seq import Seq

In [2]:
my_seq =  Seq('AGTACACTGGT')

In [6]:
print('my_seq : ',my_seq)
print('my_seq.complement : ', my_seq.complement())
print('my_seq.reverse_complement : ',my_seq.reverse_complement())

my_seq :  AGTACACTGGT
my_seq.complement :  TCATGTGACCA
my_seq.reverse_complement :  ACCAGTGTACT


## Reading a Fasta file

In [None]:
from Bio import SeqIO
SequenceFast = SeqIO.parse('ls_orchid.fasta','fasta')
for record in SequenceFast:
    print("Record id : ",record.id)
    print('Record seq : ',record.seq)
    print('Record repr seq : ',repr(record.seq))
    print('Record len : ',len(record),'\n\n')

In [14]:
#type(str(record.seq))

str

## Reading a GenBank (gbk) file

In [None]:
from Bio import SeqIO
SequenceGBK = SeqIO.parse('ls_orchid.gbk','genbank')
for record in SequenceGBK:
    print("Record id : ",record.id)
    print('Record seq : ',record.seq)
    print('Record repr seq : ',repr(record.seq))
    print('Record len : ',len(record),'\n\n')

# Chapter 3

In [16]:
from Bio.Seq import Seq

In [19]:
my_seq = Seq('GATCG')

for i,n in enumerate(my_seq):
    print('%i: %s'%(i,n))

0: G
1: A
2: T
3: C
4: G


In [20]:
len(my_seq)

5

In [21]:
my_seq[:3]

Seq('GAT')

In [26]:
my_seq = Seq("AAAAA")
my_seq.count('AA')

2

### GC%

In [31]:
my_seq = Seq("GATCGATGGGCCTATATAGGATCGAAAATCGC")

GC = 100*((my_seq.count('G') + my_seq.count('C'))/len(my_seq))
print("GC% : ",GC)

GC% :  46.875


In [33]:
from Bio.SeqUtils import gc_fraction

print('gc_fraction: ',(100*gc_fraction(my_seq)))

gc_fraction:  46.875


## Transcription

In [34]:
coding_dna = Seq("ATGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG")
mRNA=coding_dna.transcribe()
mRNA

Seq('AUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAG')

In [35]:
mRNA.back_transcribe()

Seq('ATGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG')

## Translation

### Translate converts the mRNA into the protein sequence

In [38]:
messenger_rna = Seq("AUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAG")
protein_seq_rna=messenger_rna.translate()
protein_seq_rna

Seq('MAIVMGR*KGAR*')

### You can also translate directly from the coding strand DNA sequence

In [39]:
coding_dna = Seq("ATGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG")
protein_seq_dna = coding_dna.translate()
protein_seq_dna

Seq('MAIVMGR*KGAR*')

### If you pass to_stop=True parameter in translate, it will stop at the first *.

In [40]:
protein_seq_dna = coding_dna.translate(to_stop=True)
protein_seq_dna

Seq('MAIVMGR')

## MutableSeq

### Normally a seq is immutable  as you can not change anything. To convert it into a mutable seq, we use MutableSeq after which we can do almost anything with the seq

In [41]:
from Bio.Seq import MutableSeq

In [43]:
my_seq = Seq('GCCATTGTAATGGGCCGCTGAAAGGGTGCCCGA')
mutable_seq=MutableSeq(my_seq)  #Converting my_seq to a mutable sequence

In [44]:
mutable_seq[5]='X'
mutable_seq

MutableSeq('GCCATXGTAATGGGCCGCTGAAAGGGTGCCCGA')

In [None]:
immutable_seq=Seq(mutable_seq)  #To change a mutable sequence back to immutable sequence

# Chapter 4 SeqRecord

In [1]:
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord

In [2]:
simple_seq = Seq('GATC')
simple_seq_record = SeqRecord(simple_seq)
simple_seq_record

SeqRecord(seq=Seq('GATC'), id='<unknown id>', name='<unknown name>', description='<unknown description>', dbxrefs=[])

In [5]:
simple_seq_record.id='A487'
simple_seq_record.name = 'Saxy'
simple_seq_record.description = 'This seq has no description!'
print(simple_seq_record)

# or

simple_seq_record = SeqRecord(simple_seq, id='A487', name = 'seq1', description = 'This seq has no description!')
print(simple_seq_record)

ID: A487
Name: Saxy
Description: This seq has no description!
Number of features: 0
Seq('GATC')
ID: A487
Name: seq1
Description: This seq has no description!
Number of features: 0
Seq('GATC')


In [17]:
from Bio import SeqIO

record1 = SeqIO.read('NC_005816.fna','fasta')

record1

SeqRecord(seq=Seq('TGTAACGAACGGTGCAATAGTGATCCACACCCAACGCCTGAAATCAGATCCAGG...CTG'), id='gi|45478711|ref|NC_005816.1|', name='gi|45478711|ref|NC_005816.1|', description='gi|45478711|ref|NC_005816.1| Yersinia pestis biovar Microtus str. 91001 plasmid pPCP1, complete sequence', dbxrefs=[])

In [23]:
def printSeqDetails(record):    
    print('record.seq : ',record.seq)
    print('record.id : ',record.id)
    print('record.name : ',record.name)
    print('record.description : ',record.description)
    print('record.annotations : ',record.annotations)
    print('record.letter_annotations : ',record.letter_annotations)
    print('record.dbxrefs : ',record.dbxrefs)
    print('record.features : ',record.features)

In [24]:
printSeqDetails(record1)

record.seq :  TGTAACGAACGGTGCAATAGTGATCCACACCCAACGCCTGAAATCAGATCCAGGGGGTAATCTGCTCTCCTGATTCAGGAGAGTTTATGGTCACTTTTGAGACAGTTATGGAAATTAAAATCCTGCACAAGCAGGGAATGAGTAGCCGGGCGATTGCCAGAGAACTGGGGATCTCCCGCAATACCGTTAAACGTTATTTGCAGGCAAAATCTGAGCCGCCAAAATATACGCCGCGACCTGCTGTTGCTTCACTCCTGGATGAATACCGGGATTATATTCGTCAACGCATCGCCGATGCTCATCCTTACAAAATCCCGGCAACGGTAATCGCTCGCGAGATCAGAGACCAGGGATATCGTGGCGGAATGACCATTCTCAGGGCATTCATTCGTTCTCTCTCGGTTCCTCAGGAGCAGGAGCCTGCCGTTCGGTTCGAAACTGAACCCGGACGACAGATGCAGGTTGACTGGGGCACTATGCGTAATGGTCGCTCACCGCTTCACGTGTTCGTTGCTGTTCTCGGATACAGCCGAATGCTGTACATCGAATTCACTGACAATATGCGTTATGACACGCTGGAGACCTGCCATCGTAATGCGTTCCGCTTCTTTGGTGGTGTGCCGCGCGAAGTGTTGTATGACAATATGAAAACTGTGGTTCTGCAACGTGACGCATATCAGACCGGTCAGCACCGGTTCCATCCTTCGCTGTGGCAGTTCGGCAAGGAGATGGGCTTCTCTCCCCGACTGTGTCGCCCCTTCAGGGCACAGACTAAAGGTAAGGTGGAACGGATGGTGCAGTACACCCGTAACAGTTTTTACATCCCACTAATGACTCGCCTGCGCCCGATGGGGATCACTGTCGATGTTGAAACAGCCAACCGCCACGGTCTGCGCTGGCTGCACGATGTCGCTAACCAACGAAAGCATGAAACAATCCAGGCCCGTCCCTGCGATCGCTGGCTCGAAGAGCAGCAGTCCATGCTG

In [20]:
record2 = SeqIO.read('NC_005816.gb','genbank')

In [25]:
printSeqDetails(record2)

record.seq :  TGTAACGAACGGTGCAATAGTGATCCACACCCAACGCCTGAAATCAGATCCAGGGGGTAATCTGCTCTCCTGATTCAGGAGAGTTTATGGTCACTTTTGAGACAGTTATGGAAATTAAAATCCTGCACAAGCAGGGAATGAGTAGCCGGGCGATTGCCAGAGAACTGGGGATCTCCCGCAATACCGTTAAACGTTATTTGCAGGCAAAATCTGAGCCGCCAAAATATACGCCGCGACCTGCTGTTGCTTCACTCCTGGATGAATACCGGGATTATATTCGTCAACGCATCGCCGATGCTCATCCTTACAAAATCCCGGCAACGGTAATCGCTCGCGAGATCAGAGACCAGGGATATCGTGGCGGAATGACCATTCTCAGGGCATTCATTCGTTCTCTCTCGGTTCCTCAGGAGCAGGAGCCTGCCGTTCGGTTCGAAACTGAACCCGGACGACAGATGCAGGTTGACTGGGGCACTATGCGTAATGGTCGCTCACCGCTTCACGTGTTCGTTGCTGTTCTCGGATACAGCCGAATGCTGTACATCGAATTCACTGACAATATGCGTTATGACACGCTGGAGACCTGCCATCGTAATGCGTTCCGCTTCTTTGGTGGTGTGCCGCGCGAAGTGTTGTATGACAATATGAAAACTGTGGTTCTGCAACGTGACGCATATCAGACCGGTCAGCACCGGTTCCATCCTTCGCTGTGGCAGTTCGGCAAGGAGATGGGCTTCTCTCCCCGACTGTGTCGCCCCTTCAGGGCACAGACTAAAGGTAAGGTGGAACGGATGGTGCAGTACACCCGTAACAGTTTTTACATCCCACTAATGACTCGCCTGCGCCCGATGGGGATCACTGTCGATGTTGAAACAGCCAACCGCCACGGTCTGCGCTGGCTGCACGATGTCGCTAACCAACGAAAGCATGAAACAATCCAGGCCCGTCCCTGCGATCGCTGGCTCGAAGAGCAGCAGTCCATGCTG

In [26]:
len(record2.features)

41

In [28]:
print(record2.features[0])

type: source
location: [0:9609](+)
qualifiers:
    Key: biovar, Value: ['Microtus']
    Key: db_xref, Value: ['taxon:229193']
    Key: mol_type, Value: ['genomic DNA']
    Key: organism, Value: ['Yersinia pestis biovar Microtus str. 91001']
    Key: plasmid, Value: ['pPCP1']
    Key: strain, Value: ['91001']

