In [2]:
pip install biopython

Collecting biopython
  Downloading biopython-1.83-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m13.5 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: biopython
Successfully installed biopython-1.83


In [None]:
# @title 3.1 Sequences act like strings


In [5]:
from Bio.Seq import Seq

my_seq = Seq("GATCG")

In [6]:
print(len(my_seq))

5


In [7]:
for index, letter in enumerate(my_seq):
    print("%i %s" % (index, letter))

0 G
1 A
2 T
3 C
4 G


In [8]:
print(my_seq[0])  # Huruf pertama
print(my_seq[2])  # Huruf ketiga
print(my_seq[-1])  # Huruf terakhir

G
T
G


In [9]:
print(my_seq.count("G"))

2


In [10]:
from Bio.SeqUtils import gc_fraction

gc_content = gc_fraction(my_seq)
print(gc_content)

0.6


In [None]:
# @title 3.2 Slicing a sequence


In [11]:
 from Bio.Seq import Seq
my_seq = Seq("GATCGATGGGCCTATATAGGATCGAAAATCGC")
my_seq[4:12]


Seq('GATGGGCC')

In [12]:
my_seq[0::3]

Seq('GCTGTAGTAAG')

In [13]:
my_seq[1::3]


Seq('AGGCATGCATC')

In [14]:
my_seq[2::3]

Seq('TAGCTAAGAC')

In [16]:
my_seq[::-1]


Seq('CGCTAAAAGCTAGGATATATCCGGGTAGCTAG')

In [None]:
# @title 3.3 Turning Seq objects into strings


In [18]:
str(my_seq)

'GATCGATGGGCCTATATAGGATCGAAAATCGC'

In [19]:
print(my_seq)


GATCGATGGGCCTATATAGGATCGAAAATCGC


In [20]:
fasta_format_string = ">Name\n%s\n" % my_seq
print(fasta_format_string)

>Name
GATCGATGGGCCTATATAGGATCGAAAATCGC



In [None]:
# @title 3.4 Concatenating or adding sequences


In [21]:
 from Bio.Seq import Seq
>>> seq1 = Seq("ACGT")
>>> seq2 = Seq("AACCGG")
>>> seq1 + seq2

Seq('ACGTAACCGG')

In [22]:
from Bio.Seq import Seq
>>> protein_seq = Seq("EVRNAK")
>>> dna_seq = Seq("ACGT")
>>> protein_seq + dna_seq

Seq('EVRNAKACGT')

In [26]:
from Bio.Seq import Seq

list_of_seqs = [Seq("ACGT"), Seq("AACC"), Seq("GGTT")]
concatenated_seq = Seq("")

for seq in list_of_seqs:
  concatenated_seq += seq

print(concatenated_seq)

ACGTAACCGGTT


In [30]:
from Bio.Seq import Seq
contigs = [Seq("ATG"), Seq("ATCCCG"), Seq("TTGCA")]
spacer = Seq("N" * 10)
joined_sequence = spacer.join(contigs)
print(joined_sequence)

ATGNNNNNNNNNNATCCCGNNNNNNNNNNTTGCA


In [None]:
# @title 3.5 Changing case


In [31]:
from Bio.Seq import Seq
>>> dna_seq = Seq("acgtACGT")
>>> dna_seq

Seq('acgtACGT')

In [32]:
dna_seq.upper()

Seq('ACGTACGT')

In [33]:
dna_seq.lower()

Seq('acgtacgt')

In [None]:
# @title 3.6 Nucleotide sequences and (reverse) complements


In [35]:
from Bio.Seq import Seq
>>> my_seq = Seq("GATCGATGGGCCTATATAGGATCGAAAATCGC")
>>> my_seq


Seq('GATCGATGGGCCTATATAGGATCGAAAATCGC')

In [36]:
 my_seq.complement()


Seq('CTAGCTACCCGGATATATCCTAGCTTTTAGCG')

In [37]:
my_seq.reverse_complement()

Seq('GCGATTTTCGATCCTATATAGGCCCATCGATC')

In [None]:
my_seq[::-1]


In [38]:
from Bio.Seq import Seq
>>> protein_seq = Seq("EVRNAK")
>>> protein_seq.complement()

Seq('EBYNTM')

In [None]:
# @title 3.7 Transcription


In [39]:
from Bio.Seq import Seq
>>> coding_dna = Seq("ATGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG")
>>> coding_dna


Seq('ATGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG')

In [40]:
template_dna = coding_dna.reverse_complement()
>>> template_dna

Seq('CTATCGGGCACCCTTTCAGCGGCCCATTACAATGGCCAT')

In [41]:
coding_dna

Seq('ATGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG')

In [42]:
messenger_rna = coding_dna.transcribe()
>>> messenger_rna

Seq('AUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAG')

In [43]:
template_dna.reverse_complement().transcribe()

Seq('AUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAG')

In [44]:
from Bio.Seq import Seq
>>> messenger_rna = Seq("AUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAG")
>>> messenger_rna

Seq('AUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAG')

In [45]:
messenger_rna.back_transcribe()


Seq('ATGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG')

In [None]:
# @title 3.8 Translation


In [46]:
from Bio.Seq import Seq
>>> messenger_rna = Seq("AUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAG")
>>> messenger_rna

Seq('AUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAG')

In [47]:
messenger_rna.translate()

Seq('MAIVMGR*KGAR*')

In [48]:
from Bio.Seq import Seq
>>> coding_dna = Seq("ATGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG")
>>> coding_dna

Seq('ATGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG')

In [49]:
coding_dna.translate()

Seq('MAIVMGR*KGAR*')

In [50]:
coding_dna.translate(table="Vertebrate Mitochondrial")


Seq('MAIVMGRWKGAR*')

In [51]:
coding_dna.translate(table=2)

Seq('MAIVMGRWKGAR*')

In [52]:
coding_dna.translate()

Seq('MAIVMGR*KGAR*')

In [53]:
 coding_dna.translate(to_stop=True)

Seq('MAIVMGR')

In [54]:
coding_dna.translate(table=2)

Seq('MAIVMGRWKGAR*')

In [55]:
coding_dna.translate(table=2, to_stop=True)

Seq('MAIVMGRWKGAR')

In [56]:
coding_dna.translate(table=2, stop_symbol="@")

Seq('MAIVMGRWKGAR@')

In [57]:
from Bio.Seq import Seq
>>> gene = Seq(
... "GTGAAAAAGATGCAATCTATCGTACTCGCACTTTCCCTGGTTCTGGTCGCTCCCATGGCA"
... "GCACAGGCTGCGGAAATTACGTTAGTCCCGTCAGTAAAATTACAGATAGGCGATCGTGAT"
... "AATCGTGGCTATTACTGGGATGGAGGTCACTGGCGCGACCACGGCTGGTGGAAACAACAT"
... "TATGAATGGCGAGGCAATCGCTGGCACCTACACGGACCGCCGCCACCGCCGCGCCACCAT"
... "AAGAAAGCTCCTCATGATCATCACGGCGGTCATGGTCCAGGCAAACATCACCGCTAA"
... )
>>> gene.translate(table="Bacterial")


Seq('VKKMQSIVLALSLVLVAPMAAQAAEITLVPSVKLQIGDRDNRGYYWDGGHWRDH...HR*')

In [58]:
gene.translate(table="Bacterial", to_stop=True)

Seq('VKKMQSIVLALSLVLVAPMAAQAAEITLVPSVKLQIGDRDNRGYYWDGGHWRDH...HHR')

In [59]:
gene.translate(table="Bacterial", cds=True)

Seq('MKKMQSIVLALSLVLVAPMAAQAAEITLVPSVKLQIGDRDNRGYYWDGGHWRDH...HHR')

In [3]:
# @title 3.9 Translation Tables


In [4]:
from Bio.Data import CodonTable
>>> standard_table = CodonTable.unambiguous_dna_by_name["Standard"]
>>> mito_table = CodonTable.unambiguous_dna_by_name["Vertebrate Mitochondrial"]

In [5]:
from Bio.Data import CodonTable
>>> standard_table = CodonTable.unambiguous_dna_by_id[1]
>>> mito_table = CodonTable.unambiguous_dna_by_id[2]


In [10]:
 print(standard_table)

Table 1 Standard, SGC0

  |  T      |  C      |  A      |  G      |
--+---------+---------+---------+---------+--
T | TTT F   | TCT S   | TAT Y   | TGT C   | T
T | TTC F   | TCC S   | TAC Y   | TGC C   | C
T | TTA L   | TCA S   | TAA Stop| TGA Stop| A
T | TTG L(s)| TCG S   | TAG Stop| TGG W   | G
--+---------+---------+---------+---------+--
C | CTT L   | CCT P   | CAT H   | CGT R   | T
C | CTC L   | CCC P   | CAC H   | CGC R   | C
C | CTA L   | CCA P   | CAA Q   | CGA R   | A
C | CTG L(s)| CCG P   | CAG Q   | CGG R   | G
--+---------+---------+---------+---------+--
A | ATT I   | ACT T   | AAT N   | AGT S   | T
A | ATC I   | ACC T   | AAC N   | AGC S   | C
A | ATA I   | ACA T   | AAA K   | AGA R   | A
A | ATG M(s)| ACG T   | AAG K   | AGG R   | G
--+---------+---------+---------+---------+--
G | GTT V   | GCT A   | GAT D   | GGT G   | T
G | GTC V   | GCC A   | GAC D   | GGC G   | C
G | GTA V   | GCA A   | GAA E   | GGA G   | A
G | GTG V   | GCG A   | GAG E   | GGG G   | G
--+---------

In [11]:
print(mito_table)

Table 2 Vertebrate Mitochondrial, SGC1

  |  T      |  C      |  A      |  G      |
--+---------+---------+---------+---------+--
T | TTT F   | TCT S   | TAT Y   | TGT C   | T
T | TTC F   | TCC S   | TAC Y   | TGC C   | C
T | TTA L   | TCA S   | TAA Stop| TGA W   | A
T | TTG L   | TCG S   | TAG Stop| TGG W   | G
--+---------+---------+---------+---------+--
C | CTT L   | CCT P   | CAT H   | CGT R   | T
C | CTC L   | CCC P   | CAC H   | CGC R   | C
C | CTA L   | CCA P   | CAA Q   | CGA R   | A
C | CTG L   | CCG P   | CAG Q   | CGG R   | G
--+---------+---------+---------+---------+--
A | ATT I(s)| ACT T   | AAT N   | AGT S   | T
A | ATC I(s)| ACC T   | AAC N   | AGC S   | C
A | ATA M(s)| ACA T   | AAA K   | AGA Stop| A
A | ATG M(s)| ACG T   | AAG K   | AGG Stop| G
--+---------+---------+---------+---------+--
G | GTT V   | GCT A   | GAT D   | GGT G   | T
G | GTC V   | GCC A   | GAC D   | GGC G   | C
G | GTA V   | GCA A   | GAA E   | GGA G   | A
G | GTG V(s)| GCG A   | GAG E   | GGG G   

In [12]:
 mito_table.stop_codons

['TAA', 'TAG', 'AGA', 'AGG']

In [13]:
mito_table.start_codons

['ATT', 'ATC', 'ATA', 'ATG', 'GTG']

In [14]:
mito_table.forward_table["ACG"]


'T'

In [None]:
# @title 3.10 Comparing Seq objects


In [16]:
 from Bio.Seq import Seq
>>> seq1 = Seq("ACGT")
>>> "ACGT" == seq1

True

In [17]:
 seq1 == "ACGT"

True

In [None]:
# @title 3.11 Sequences with unknown sequence contents


In [22]:
from Bio.Seq import Seq
unknown_seq = Seq(None, 10)


In [23]:
unknown_seq

Seq(None, length=10)

In [24]:
len(unknown_seq)

10

In [None]:
# @title 3.12 Sequences with partially defined sequence contents


In [26]:
from Bio.Seq import Seq
>>> seq = Seq({117512683: "TTGAAAACCTGAATGTGAGAGTCAGTCAAGGATAGT"}, length=159345973)

In [27]:
seq[1000:1020]

Seq(None, length=20)

In [28]:
 seq[117512690:117512700]

Seq('CCTGAATGTG')

In [29]:
seq[117512670:117512690]

Seq({13: 'TTGAAAA'}, length=20)

In [30]:
seq[117512700:]

Seq({0: 'AGAGTCAGTCAAGGATAGT'}, length=41833273)

In [31]:
seq = Seq("ACGT")
>>> undefined_seq = Seq(None, length=10)
>>> seq + undefined_seq + seq

Seq({0: 'ACGT', 14: 'ACGT'}, length=18)

In [None]:
# @title 3.13 MutableSeq objects


In [32]:
from Bio.Seq import Seq
>>> my_seq = Seq("GCCATTGTAATGGGCCGCTGAAAGGGTGCCCGA")

In [34]:
from Bio.Seq import MutableSeq
>>> mutable_seq = MutableSeq(my_seq)
>>> mutable_seq

MutableSeq('GCCATTGTAATGGGCCGCTGAAAGGGTGCCCGA')

In [35]:
from Bio.Seq import MutableSeq
>>> mutable_seq = MutableSeq("GCCATTGTAATGGGCCGCTGAAAGGGTGCCCGA")

In [36]:
mutable_seq

MutableSeq('GCCATTGTAATGGGCCGCTGAAAGGGTGCCCGA')

In [37]:
mutable_seq[5] = "C"
>>> mutable_seq

MutableSeq('GCCATCGTAATGGGCCGCTGAAAGGGTGCCCGA')

In [38]:
mutable_seq.remove("T")
>>> mutable_seq

MutableSeq('GCCACGTAATGGGCCGCTGAAAGGGTGCCCGA')

In [39]:
mutable_seq.reverse()
>>> mutable_seq

MutableSeq('AGCCCGTGGGAAAGTCGCCGGGTAATGCACCG')

In [40]:
from Bio.Seq import Seq
>>> new_seq = Seq(mutable_seq)
>>> new_seq

Seq('AGCCCGTGGGAAAGTCGCCGGGTAATGCACCG')

In [None]:
# @title 3.14 Finding subsequences


In [41]:
from Bio.Seq import Seq, MutableSeq
>>> seq = Seq("GCCATTGTAATGGGCCGCTGAAAGGGTGCCCGA")
>>> seq.index("ATGGGCCGC")

9

In [43]:
seq.index(b"ATGGGCCGC")

9

In [44]:
seq.index(bytearray(b"ATGGGCCGC"))

9

In [45]:
seq.index(Seq("ATGGGCCGC"))

9

In [46]:
seq.index(MutableSeq("ATGGGCCGC"))

9

In [47]:
seq.find("ACTG")

-1

In [48]:
seq.find("CC")

1

In [49]:
seq.rfind("CC")

29

In [None]:
# @title 3.15 Working with strings directly


In [53]:
from Bio.Seq import reverse_complement, transcribe, back_transcribe, translate
>>> my_string = "GCTGTTATGGGTCGTTGGAAGGGTGGTCGTGCTGCTGGTTAG"
>>> reverse_complement(my_string)

'CTAACCAGCAGCACGACCACCCTTCCAACGACCCATAACAGC'

In [54]:
transcribe(my_string)

'GCUGUUAUGGGUCGUUGGAAGGGUGGUCGUGCUGCUGGUUAG'

In [55]:
back_transcribe(my_string)

'GCTGTTATGGGTCGTTGGAAGGGTGGTCGTGCTGCTGGTTAG'

In [56]:
translate(my_string)

'AVMGRWKGGRAAG*'