In [1]:
from Bio.Seq import Seq
from Bio.Alphabet import IUPAC

In [2]:
my_seq = Seq("AGTACACTGGT", IUPAC.unambiguous_dna)
my_seq

Seq('AGTACACTGGT', IUPACUnambiguousDNA())

In [4]:
my_prot = Seq("AGTACACTGGT", IUPAC.protein)
my_prot

Seq('AGTACACTGGT', IUPACProtein())

In [6]:
from Bio.SeqRecord import SeqRecord
simple_seq = Seq("GATC")
simple_seq_r = SeqRecord(simple_seq)

In [9]:
print(simple_seq_r)

ID: <unknown id>
Name: <unknown name>
Description: <unknown description>
Number of features: 0
Seq('GATC')


In [10]:
simple_seq_r.id = "AC12345"
simple_seq_r.description = "This sequence is pretend."
print(simple_seq_r)

ID: AC12345
Name: <unknown name>
Description: This sequence is pretend.
Number of features: 0
Seq('GATC')


In [1]:
from Bio import SeqIO
record = SeqIO.read("NC_005816.fna", "fasta")
record

SeqRecord(seq=Seq('TGTAACGAACGGTGCAATAGTGATCCACACCCAACGCCTGAAATCAGATCCAGG...CTG', SingleLetterAlphabet()), id='gi|45478711|ref|NC_005816.1|', name='gi|45478711|ref|NC_005816.1|', description='gi|45478711|ref|NC_005816.1| Yersinia pestis biovar Microtus str. 91001 plasmid pPCP1, complete sequence', dbxrefs=[])

In [2]:
len(record.seq)

9609

In [6]:
handle = open("example.fasta", "r") 
seq_list = list(SeqIO.parse(handle, "fasta"))
handle.close()
print(seq_list[0].seq)

CCCTTCTTGTCTTCAGCGTTTCTCC


In [9]:
print(seq_list[1].seq)

TTGGCAGGCCAAGGCCGATGGATCA


In [10]:
from Bio import Entrez
Entrez.email = "rfortune@iastate.edu"

In [19]:
handle = Entrez.efetch(db="nucleotide", id="DQ137224", rettype="gb", retmode="text")
record = SeqIO.read(handle, "genbank")
print(record)

ID: DQ137224.1
Name: DQ137224
Description: Megadyptes antipodes voucher JD64A cytochrome b (cytb) gene, partial cds; mitochondrial
Number of features: 3
/molecule_type=DNA
/topology=linear
/data_file_division=VRT
/date=26-JUL-2016
/accessions=['DQ137224']
/sequence_version=1
/keywords=['']
/source=mitochondrion Megadyptes antipodes (Yellow-eyed penguin)
/organism=Megadyptes antipodes
/taxonomy=['Eukaryota', 'Metazoa', 'Chordata', 'Craniata', 'Vertebrata', 'Euteleostomi', 'Archelosauria', 'Archosauria', 'Dinosauria', 'Saurischia', 'Theropoda', 'Coelurosauria', 'Aves', 'Neognathae', 'Sphenisciformes', 'Spheniscidae', 'Megadyptes']
/references=[Reference(title='Multiple gene evidence for expansion of extant penguins out of Antarctica due to global cooling', ...), Reference(title='Direct Submission', ...)]
Seq('ACACAAATTCTAACTGGCCTCCTACTGGCCGCCCACTACACTGCAGACACAACC...AGC', IUPACAmbiguousDNA())


In [20]:
print(record.seq)

ACACAAATTCTAACTGGCCTCCTACTGGCCGCCCACTACACTGCAGACACAACCCTAGCCTTCTCCTCCGTTGCCCACACATGCCGAAACGTACAGTACGGCTGACTAATCCGCAACCTACATGCAAACGGGGCCTCATTCTTCTTCATCTGCATCTACCTCCACATTGGCCGCGGACTCTACTACGGCTCCTACCTGTACAAAGAAACCTGAAACACAGGCATCATCCTCCTGCTTACCCTCATAGCAACCGCCTTCGTAGGCTATGTCCTGCCATGGGGCCAAATATCCTTCTGAGGGGCTACAGTCATCACCAACCTATTCTCAGCCATCCCCTATATCGGCCAAACTCTCGTAGAATGAGCCTGAGGCGGCTTCTCAGTAGACAACCCCACACTAACCCGATTCTTCACACTACACTTCCTTCTCCCCTTCATAATTGCAGGCCTCACCCTAATCCACCTCACCTTCCTTCACGAATCAGGCTCAAACAACCCACTAGGCATCGTAGCCAACTCTGACAAAATCCCATTCCACCCCTACTACTCCACAAAAGACATCCTAGGATTTATTCTCCTACTCCTTCCACTGGCAGCCCTCGCCCTATCCTCACCCAACCTGCTAGGGGACCCAGAAAATTTCACCCCAGCAAACCCCCTAGTCACACCCCCACATATCAAACCAGAATGATACTTCCTATTTGCATACGCCATCCTACGCTCAATCCCCAACAAGCTGGGGGGAGTCCTAGCCCTAGCAGCATCCGTACTAGTCCTATTCCTAATCCCCCTCCTCCACAAATCCAAGCAACGTACAATAACCTTCCGCCCCCTCTCCCAACTTCTATTCTGAACCCTAGTGGCCAACCTCACCATCCTAACATGAATCGGCAGCCAACCAGTAGAGCACCCCTTCATCATCATCGGCCAGCTAGCCTCCCTCACCTACTTCACCATCCTCCTAATCCTCTTCCCCCTTATCGGGGCCCTAGAAAACAAAA

In [24]:
from Bio.Blast import NCBIWWW

In [25]:
help(NCBIWWW.qblast)

Help on function qblast in module Bio.Blast.NCBIWWW:

qblast(program, database, sequence, url_base='https://blast.ncbi.nlm.nih.gov/Blast.cgi', auto_format=None, composition_based_statistics=None, db_genetic_code=None, endpoints=None, entrez_query='(none)', expect=10.0, filter=None, gapcosts=None, genetic_code=None, hitlist_size=50, i_thresh=None, layout=None, lcase_mask=None, matrix_name=None, nucl_penalty=None, nucl_reward=None, other_advanced=None, perc_ident=None, phi_pattern=None, query_file=None, query_believe_defline=None, query_from=None, query_to=None, searchsp_eff=None, service=None, threshold=None, ungapped_alignment=None, word_size=None, short_query=None, alignments=500, alignment_view=None, descriptions=500, entrez_links_new_window=None, expect_low=None, expect_high=None, format_entrez_query=None, format_object=None, format_type='XML', ncbi_gi=None, results_file=None, show_overview=None, megablast=None, template_type=None, template_length=None)
    BLAST search using NCBI's

In [26]:
query = SeqIO.read("test.fasta", format="fasta")

In [27]:
result_handle = NCBIWWW.qblast("blastn", "nt", query.seq)

In [28]:
blast_file = open("my_blast.xml", "w")
blast_file.write(result_handle.read())

143548

In [29]:
blast_file.close()
result_handle.close()

In [30]:
from Bio.Blast import NCBIXML
handle = open("my_blast.xml")
blast_record = NCBIXML.read(handle)

In [31]:
for hit in blast_record.descriptions: 
    print(hit.title)
    print(hit.e)

gi|1105484513|ref|XM_002284686.3| PREDICTED: Vitis vinifera cold-regulated 413 plasma membrane protein 2 (LOC100248690), mRNA
0.0
gi|1420088022|gb|MG722853.1| Vitis vinifera cold-regulated 413 inner membrane protein 2 mRNA, complete cds
0.0
gi|123704572|emb|AM483681.1| Vitis vinifera, whole genome shotgun sequence, contig VV78X045699.9, clone ENTAV 115
0.0
gi|1217007653|ref|XM_021787586.1| PREDICTED: Hevea brasiliensis cold-regulated 413 plasma membrane protein 2-like (LOC110637472), transcript variant X3, mRNA
9.95421e-151
gi|1217007651|ref|XM_021787585.1| PREDICTED: Hevea brasiliensis cold-regulated 413 plasma membrane protein 2-like (LOC110637472), transcript variant X2, mRNA
9.95421e-151
gi|1217007649|ref|XM_021787584.1| PREDICTED: Hevea brasiliensis cold-regulated 413 plasma membrane protein 2-like (LOC110637472), transcript variant X1, mRNA
3.47436e-150
gi|1204934028|ref|XM_021444654.1| PREDICTED: Herrania umbratica cold-regulated 413 plasma membrane protein 1 (LOC110428735), mRN

In [32]:
for hit in blast_record.alignments:
    for hsp in hit.hsps:
      print(hit.title) 
      print(hsp.expect)
      print(hsp.query[0:75] + '...')
      print(hsp.match[0:75] + '...') 
      print(hsp.sbjct[0:75] + '...')

gi|1105484513|ref|XM_002284686.3| PREDICTED: Vitis vinifera cold-regulated 413 plasma membrane protein 2 (LOC100248690), mRNA
0.0
TACTCTACAGTCTCTGACTTTGTAAGCTTCGCGCTTCTTCTCCTTTTTCTCTCTGGGGAAAGATTTTCCCTTTCT...
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||...
TACTCTACAGTCTCTGACTTTGTAAGCTTCGCGCTTCTTCTCCTTTTTCTCTCTGGGGAAAGATTTTCCCTTTCT...
gi|1420088022|gb|MG722853.1| Vitis vinifera cold-regulated 413 inner membrane protein 2 mRNA, complete cds
0.0
ATGGGGAAAAAGGGTTACTTGGCGATGAGGACTGACACTGATACTACTGATTTGATCAGTTCTGATCTCAGAGAC...
||||||||||||||||||||||||||||||||||||||| |||||||||||||||||||||||||||||||||||...
ATGGGGAAAAAGGGTTACTTGGCGATGAGGACTGACACTTATACTACTGATTTGATCAGTTCTGATCTCAGAGAC...
gi|123704572|emb|AM483681.1| Vitis vinifera, whole genome shotgun sequence, contig VV78X045699.9, clone ENTAV 115
0.0
CAGATTGGCTGGAAATGCCTGCAGCTTTGATTCTTGTTATAGTGGTGGCTCCCAGCTTGTTTTCGAGCACTATAA...
||||||||||||||||||| ||||||||||||||| |||||||||||||||||||||||||||||||||||||||...
CAGATTGGC

In [33]:
E_VALUE_THRESH = 1e-150
for record in NCBIXML.parse(open("my_blast.xml")):
    for align in record.alignments: 
        for hsp in align.hsps:
            if hsp.expect < E_VALUE_THRESH: 
                print("MATCH: %s " % align.title[:60]) 
                print(hsp.expect)

MATCH: gi|1105484513|ref|XM_002284686.3| PREDICTED: Vitis vinifera  
0.0
MATCH: gi|1420088022|gb|MG722853.1| Vitis vinifera cold-regulated 4 
0.0
MATCH: gi|123704572|emb|AM483681.1| Vitis vinifera, whole genome sh 
0.0
MATCH: gi|1217007653|ref|XM_021787586.1| PREDICTED: Hevea brasilien 
9.95421e-151
MATCH: gi|1217007651|ref|XM_021787585.1| PREDICTED: Hevea brasilien 
9.95421e-151


In [34]:
query.description

'XM_002284686.3 PREDICTED: Vitis vinifera cold-regulated 413 plasma membrane protein 2 (LOC100248690), mRNA'