#### Get HBeAG sequence from online database

In [1]:
from Bio import Entrez
from Bio import SeqIO

In [2]:
Entrez.email = "fake_mail@gmail.com"

In [3]:
entrez_handle = Entrez.efetch(
    db = "protein", id = "O92920", rettype = "fasta")

In [4]:
print(entrez_handle.read())

>sp|O92920.1|HBEAG_HBVD7 RecName: Full=External core antigen; AltName: Full=HBeAg; AltName: Full=Precore protein; AltName: Full=p25; Flags: Precursor
MQLFHLCLIISCSCPTVQASKLCLGWLWDMDIDPYKEFGATVQLLSFLPHDFFPSVRDLLDTASALFRDA
LESPEHCSPHHTALRQAILCWGELMTLATWVGANLQDPASRELVVTYVNINMGLKFRQLLWFHISCLTFG
RETVIEYLVSFGVWIRTPQAYRPPNAPILSTLPETTVVRRRGRSPRRRTPSPRRRRSQSPRRRRSQSRES
QC




In [5]:
entrez_handle = Entrez.efetch(
    db = "protein", id = "O92920", rettype = "fasta")

In [6]:
record = SeqIO.read(entrez_handle, "fasta")

In [7]:
print(record)

ID: sp|O92920.1|HBEAG_HBVD7
Name: sp|O92920.1|HBEAG_HBVD7
Description: sp|O92920.1|HBEAG_HBVD7 RecName: Full=External core antigen; AltName: Full=HBeAg; AltName: Full=Precore protein; AltName: Full=p25; Flags: Precursor
Number of features: 0
Seq('MQLFHLCLIISCSCPTVQASKLCLGWLWDMDIDPYKEFGATVQLLSFLPHDFFP...SQC', SingleLetterAlphabet())


In [8]:
# save sequence to file

SeqIO.write(record, "O92920.fasta", "fasta")

1

#### Using local BLAST databases

In [29]:
from Bio.Blast.Applications import NcbiblastpCommandline as BlastCL
from Bio.Blast import NCBIXML

In [24]:
db = "HBV"
blast_file = "O92920.fasta"
blast_out = "O92920_res.xml"
path = r"C:\Program Files\NCBI\blast-2.10.0+\bin\blastp.exe"

In [25]:
cline = BlastCL(cmd = path, query = blast_file, db = db, outfmt = 5, out = blast_out)

In [26]:
print(cline)

"C:\Program Files\NCBI\blast-2.10.0+\bin\blastp.exe" -out O92920_res.xml -outfmt 5 -query O92920.fasta -db HBV


In [51]:
stdout, stderr = cline()

In [52]:
# xml parsing

result = open(blast_out, "r")
records = NCBIXML.parse(result) # generator
item = next(records)

In [53]:
# first 3 search results

for alignment in item.alignments[:3]:
    for hsp in alignment.hsps:
        print("**** ALIGNMENT ****")
        print("Sequence: " + alignment.title[:30] + "...")
        print("  Length: " + str(alignment.length))
        print(" E-value: " + str(hsp.expect))
        print("   Score: " + str(hsp.score))
        print("    Gaps: " + str(hsp.gaps))
        print(hsp.query[0:50] + "...")
        print(hsp.match[0:50] + "...")
        print(hsp.sbjct[0:50] + "...")
        print("\n")

**** ALIGNMENT ****
Sequence: sp|O92920|HBEAG_HBVD7 Unknown...
  Length: 0
 E-value: 5.12828e-158
   Score: 1119.0
    Gaps: (None, None)
...
...
...


**** ALIGNMENT ****
Sequence: tr|O92918|O92918_HBV Unknown...
  Length: 0
 E-value: 7.76187e-153
   Score: 1085.0
    Gaps: (None, None)
...
...
...


**** ALIGNMENT ****
Sequence: tr|I2DB80|I2DB80_HBV Unknown...
  Length: 0
 E-value: 1.79507e-139
   Score: 997.0
    Gaps: (None, None)
...
...
...




#### Using BLAST online

In [55]:
from Bio.Blast import NCBIWWW

In [56]:
result_handle = NCBIWWW.qblast("blastp", "nr", "O92920")

In [57]:
blast_records = NCBIXML.parse(result_handle)
blast_record = next(blast_records)

In [58]:
# first 3 search results

for alignment in blast_record.alignments[:3]:
    for hsp in alignment.hsps:
        print("**** ALIGNMENT ****")
        print("Sequence: " + alignment.title[:30] + "...")
        print("  Length: " + str(alignment.length))
        print(" E-value: " + str(hsp.expect))
        print("   Score: " + str(hsp.score))
        print("    Gaps: " + str(hsp.gaps))
        print(hsp.query[0:50] + "...")
        print(hsp.match[0:50] + "...")
        print(hsp.sbjct[0:50] + "...")
        print("\n")

**** ALIGNMENT ****
Sequence: sp|O92920.1| RecName: Full=Ext...
  Length: 212
 E-value: 3.10459e-154
   Score: 1119.0
    Gaps: 0
MQLFHLCLIISCSCPTVQASKLCLGWLWDMDIDPYKEFGATVQLLSFLPH...
MQLFHLCLIISCSCPTVQASKLCLGWLWDMDIDPYKEFGATVQLLSFLPH...
MQLFHLCLIISCSCPTVQASKLCLGWLWDMDIDPYKEFGATVQLLSFLPH...


**** ALIGNMENT ****
Sequence: gb|QDO72733.1| precore/core pr...
  Length: 212
 E-value: 1.01553e-153
   Score: 1115.0
    Gaps: 0
MQLFHLCLIISCSCPTVQASKLCLGWLWDMDIDPYKEFGATVQLLSFLPH...
MQLFHLCLIISCSCPTVQASKLCLGWLWDMDIDPYKEFGATVQLLSFLPH...
MQLFHLCLIISCSCPTVQASKLCLGWLWDMDIDPYKEFGATVQLLSFLPH...


**** ALIGNMENT ****
Sequence: gb|QDO72728.1| precore/core pr...
  Length: 212
 E-value: 1.17331e-152
   Score: 1108.0
    Gaps: 0
MQLFHLCLIISCSCPTVQASKLCLGWLWDMDIDPYKEFGATVQLLSFLPH...
MQLFHLCLIISCSCPTVQASKLCLGWLWDMDIDPYKEFGATV+LLSFLPH...
MQLFHLCLIISCSCPTVQASKLCLGWLWDMDIDPYKEFGATVELLSFLPH...




In [71]:
f = open("output.txt", "w+")

for alignment in blast_record.alignments[:5]:
    for hsp in alignment.hsps:
        f.write("**** ALIGNMENT **** \n")
        f.write("Sequence: " + alignment.title[:50] + "..." + " \n")
        f.write("  Length: " + str(alignment.length) + " \n")
        f.write(" E-value: " + str(hsp.expect) + " \n")
        f.write("   Score: " + str(hsp.score) + " \n")
        f.write("    Gaps: " + str(hsp.gaps) + " \n")
        f.write(hsp.query[0:75] + "... \n")
        f.write(hsp.match[0:75] + "... \n")
        f.write(hsp.sbjct[0:75] + "... \n")
        f.write(" \n")