### Sequences act like strings

In [1]:
from Bio.Seq import Seq
my_seq = Seq("GATCG")
for index, letter in enumerate(my_seq):
    print("%i %s" % (index, letter))

0 G
1 A
2 T
3 C
4 G


In [2]:
#Getting the length of our sequences
print(len(my_seq))

5


In [3]:
#Access elements of the sequence
print(my_seq[0]) #first letter

G


In [4]:
print(my_seq[2]) #third letter

T


In [5]:
print(my_seq[-1]) #last letter

G


### Slicing a sequence

In [6]:
from Bio.Seq import Seq
my_seq = Seq("GATCGATGGGCCTATATAGGATCGAAAATCGC")
my_seq[4:12]
#first item is included and last excluded

Seq('GATGGGCC')

In [7]:
#slices with a start, stop and stride
my_seq[0::3]

Seq('GCTGTAGTAAG')

In [8]:
my_seq[1::3]

Seq('AGGCATGCATC')

In [9]:
my_seq[2::3]

Seq('TAGCTAAGAC')

In [10]:
#-1 stride to reverse the string
my_seq[::-1]

Seq('CGCTAAAAGCTAGGATATATCCGGGTAGCTAG')

### Turning Seq objects into strings

In [11]:
str(my_seq)

'GATCGATGGGCCTATATAGGATCGAAAATCGC'

In [12]:
print(my_seq)

GATCGATGGGCCTATATAGGATCGAAAATCGC


In [13]:
#construct a simple FASTA format record
fasta_format_string = ">Name\n%s\n" % my_seq
print(fasta_format_string)

>Name
GATCGATGGGCCTATATAGGATCGAAAATCGC



### Concatenating or adding sequences

In [14]:
# Adding two seq objects together
from Bio.Seq import Seq
protein_seq = Seq("EVRNAK")
dna_seq = Seq("ACGT")
protein_seq + dna_seq
Seq('EVRNAKACGT')

Seq('EVRNAKACGT')

Loop incase there are a lot of sequences

In [15]:
from Bio.Seq import Seq
list_of_seqs = [Seq("ACGT"), Seq("AACC"), Seq("GGTT")]
concatenated = Seq("")
for s in list_of_seqs:
    concatenated += s

In [16]:
concatenated

Seq('ACGTAACCGGTT')

Another case using .join method

In [17]:
from Bio.Seq import Seq
contigs = [Seq("ATG"), Seq("ATCCCG"), Seq("TTGCA")]
spacer = Seq("N"*10)
spacer.join(contigs)

Seq('ATGNNNNNNNNNNATCCCGNNNNNNNNNNTTGCA')

Changing case

In [18]:
#same as python
from Bio.Seq import Seq
dna_seq = Seq("acgtACGT")
dna_seq

Seq('acgtACGT')

In [19]:
dna_seq.upper()

Seq('ACGTACGT')

In [20]:
dna_seq.lower()

Seq('acgtacgt')

In [21]:
"GTAC" in dna_seq

False

In [22]:
"GTAC" in dna_seq.upper()

True

### Nucleotide sequences and (reverse) complements

In [23]:
from Bio.Seq import Seq
my_seq = Seq("GATCGATGGGCCTATATAGGATCGAAAATCGC")
my_seq.complement()

Seq('CTAGCTACCCGGATATATCCTAGCTTTTAGCG')

In [24]:
my_seq.reverse_complement()

Seq('GCGATTTTCGATCCTATATAGGCCCATCGATC')