In [56]:
!pip install biopython



The fasta data of P.graminis (f.sp. tritici) actin DNA is taken from NCBI.
The Data is copied, pasted in notepad and saved as 'file.fasta'

Saving the data as .fasta is important for retrieveing and parsing the nucleotide sequence

In [57]:
from Bio import SeqIO


for seq_record in SeqIO.parse("file.fasta", "fasta"):
    print(seq_record.id)
    print(repr(seq_record.seq))
    print(len(seq_record))



X77857.1
Seq('TCAGATCAACCTTGATTTATGATGATCAAAAGTCTTTAAATAGATATTGTTGAT...TCT', SingleLetterAlphabet())
2627


In [58]:
from Bio import SeqIO
count = 0
sequences = [] # Here we are setting up an array to save our sequences for the next step

for seq_record in SeqIO.parse("file.fasta", "fasta"):
    if (count < 6):
        sequences.append(seq_record)
        print("Id: " + seq_record.id + " \t " + "Length: " + str("{:,d}".format(len(seq_record))) )
        print(repr(seq_record.seq) + "\n")
        count = count + 1

Id: X77857.1 	 Length: 2,627
Seq('TCAGATCAACCTTGATTTATGATGATCAAAAGTCTTTAAATAGATATTGTTGAT...TCT', SingleLetterAlphabet())



In [59]:
print(sequences)

[SeqRecord(seq=Seq('TCAGATCAACCTTGATTTATGATGATCAAAAGTCTTTAAATAGATATTGTTGAT...TCT', SingleLetterAlphabet()), id='X77857.1', name='X77857.1', description='X77857.1 P.graminis (f.sp. tritici) actin DNA', dbxrefs=[])]


In [60]:
print(seq_record)

ID: X77857.1
Name: X77857.1
Description: X77857.1 P.graminis (f.sp. tritici) actin DNA
Number of features: 0
Seq('TCAGATCAACCTTGATTTATGATGATCAAAAGTCTTTAAATAGATATTGTTGAT...TCT', SingleLetterAlphabet())


##Reverse Complement

In [62]:
from Bio.Seq import Seq
from Bio.Alphabet import IUPAC
my_seq = Seq("GATCGATGGGCCTATATAGGATCGAAAATCGC", IUPAC.unambiguous_dna)
my_seq
Seq('GATCGATGGGCCTATATAGGATCGAAAATCGC', IUPAC.unambiguous_dna)
my_seq.complement()
Seq('CTAGCTACCCGGATATATCCTAGCTTTTAGCG', IUPAC.unambiguous_dna)
my_seq.reverse_complement()
Seq('GCGATTTTCGATCCTATATAGGCCCATCGATC', IUPAC.unambiguous_dna)

Seq('GCGATTTTCGATCCTATATAGGCCCATCGATC', IUPACUnambiguousDNA())

In [63]:
my_seq[::-1]

Seq('CGCTAAAAGCTAGGATATATCCGGGTAGCTAG', IUPACUnambiguousDNA())

##**Transcription**

In [64]:
from Bio.Seq import Seq
from Bio.Alphabet import IUPAC
coding_dna = Seq("ATGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG", IUPAC.unambiguous_dna)
coding_dna

Seq('ATGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG', IUPACUnambiguousDNA())

In [65]:
template_dna = coding_dna.reverse_complement()

In [66]:
template_dna

Seq('CTATCGGGCACCCTTTCAGCGGCCCATTACAATGGCCAT', IUPACUnambiguousDNA())

In [67]:
messenger_rna = coding_dna.transcribe()

In [68]:
messenger_rna

Seq('AUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAG', IUPACUnambiguousRNA())

##**Translation**

In [69]:
messenger_rna

Seq('AUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAG', IUPACUnambiguousRNA())

In [70]:
messenger_rna.translate()

Seq('MAIVMGR*KGAR*', HasStopCodon(IUPACProtein(), '*'))

In [71]:
coding_dna.translate()

Seq('MAIVMGR*KGAR*', HasStopCodon(IUPACProtein(), '*'))

##Single Alignment

In [72]:
from Bio import SeqIO

records = SeqIO.parse("file.fasta", "fasta")
count = SeqIO.write(records, "file.sth", "stockholm")
print("Converted %i records" % count)

Converted 1 records


In [73]:
from Bio import AlignIO
alignment = AlignIO.read("file.sth", "stockholm")

In [74]:
print(alignment)

SingleLetterAlphabet() alignment with 1 rows and 2627 columns
TCAGATCAACCTTGATTTATGATGATCAAAAGTCTTTAAATAGA...TCT X77857.1


##EMBOSS Needle and Water

In [75]:
from Bio.Emboss.Applications import NeedleCommandline
needle_cline = NeedleCommandline(asequence="alpha.faa", bsequence="beta.faa",gapopen=10, gapextend=0.5, outfile="needle.txt")
print(needle_cline)

needle -outfile=needle.txt -asequence=alpha.faa -bsequence=beta.faa -gapopen=10 -gapextend=0.5


In [76]:
from Bio.Emboss.Applications import NeedleCommandline

In [77]:
needle_cline = NeedleCommandline(r"C:\EMBOSS\needle.exe",
...                                  asequence="alpha.faa", bsequence="beta.faa",
...                                  gapopen=10, gapextend=0.5, outfile="needle.txt")