In [None]:
from Bio.Blast import NCBIWWW
from Bio.Blast import NCBIXML
from Bio import SeqIO 
import pandas as pd
from Bio import Entrez
import matplotlib.pyplot as plt

# Import the sequence(s)

In [32]:
seq_record = SeqIO.read('sequences/test/sequence_test.fa','fasta')
#seq_record.id 
seq_record.seq 

Seq('CCTCACATACCAGATGCGAATTCCCGAG', SingleLetterAlphabet())

## Multiple sequences

In [None]:
seq = list(SeqIO.parse('sequences/test/sequences.fa','fasta'))
seq[0].seq

# Call blast from the NCBI website

In [33]:
result_handle = []

Entrez.email = 'ness.louafi@gmail.com'

for i in range(len(seq)):
    print('Trying sequence number ' + str(i)+'...')
    result_handle.append(NCBIWWW.qblast("blastn", "nt", seq[i].seq, hitlist_size = 10))
    print('Done with sequence ' + str(i) + ' !')
    

Trying sequence number 0
Done with sequence 0 !


# Display the results

In [36]:
'''
for i in range(len(result_handle)):
    with open('results/results_test_'+str(i)+'.xml', 'w') as save_file: 
        blast_results = result_handle[i].read() 
        save_file.write(blast_results)
'''        
for i in range(len(result_handle)):
    for record in NCBIXML.parse(open("results/results_test_"+str(i)+".xml")): 
         if record.alignments: 
            print("\n") 
            print("results"+str(i))
            for align in record.alignments: 
                for hsp in align.hsps: 
                    print("match: %s " % align.title[:100])
                    #print('e value:', hsp.expect)
                    
    for alignment in record.alignments:
        for hsp in alignment.hsps:
            print('****Alignment****')
            print('length:', alignment.length)
            print('e value:', hsp.expect)
            print(hsp.query[0:75] + '   ')
            print(hsp.match[0:75] + '   ')
            print(hsp.sbjct[0:75] + '   ')




results0
match: gi|1825937619|ref|XM_033197112.1| PREDICTED: Trachypithecus francoisi ceramide synthase 2 (CERS2), m 
****Alignment****
length: 2479
e value: 3.81343
TATTCTGTGAGTTTGGCCCTAGCT   
|||||||||| |||||||||||||   
TATTCTGTGAATTTGGCCCTAGCT   


In [None]:
help(NCBIWWW.qblast)

In [29]:
from Bio import Entrez
from urllib.error import URLError
import time

Entrez.email = 'ness.louafi@gmail.com'
counter = 0 
list_record_host = []
for record in SeqIO.parse("sequences/test/sequences_10.fa", format="fasta"):
    print(record.id)
    print(counter)
    counter+=1
#         print(record.seq)

    # online request
    try:
        result_handle = NCBIWWW.qblast("blastn","nt",record.seq, hitlist_size = 10)
        print(result_handle)
    except HTTPError:
        time.sleep(5)
        result_handle = NCBIWWW.qblast("blastn","nt",record.seq, hitlist_size = 10)

    # result handle stored in a list
    list_record_host.append(result_handle)

0
0
<_io.StringIO object at 0x000002A56D3F01F0>
1
1
<_io.StringIO object at 0x000002A56D4020D0>
2
2
<_io.StringIO object at 0x000002A56D33F940>
3
3
<_io.StringIO object at 0x000002A56A1C4D30>
4
4
<_io.StringIO object at 0x000002A56A1F8670>
5
5
<_io.StringIO object at 0x000002A56D4021F0>
6
6
<_io.StringIO object at 0x000002A56D3BC3A0>
7
7
<_io.StringIO object at 0x000002A56D3BC1F0>
8
8
<_io.StringIO object at 0x000002A56D3BCCA0>
9
9
<_io.StringIO object at 0x000002A56D65D0D0>


In [30]:
for i in range(len(list_record_host)):
    with open('results/results'+str(i)+'.xml', 'w') as save_file: 
        blast_results = result_handle.read() 
        save_file.write(blast_results)

# Save and open the results

In [None]:
for i in range(50):
    with open('results'+str(i)+'.xml', 'w') as save_file: 
        blast_results = result_handle.read() 
        save_file.write(blast_results)

E_VALUE_THRESH = 1e-20 
for record in NCBIXML.parse(open("results.xml")): 
     if record.alignments: 
        print("\n") 
        print("query: %s" % record.query[:100]) 
        for align in record.alignments: 
            for hsp in align.hsps: 
                if hsp.expect < E_VALUE_THRESH: 
                    print("match: %s " % align.title[:100])

In [None]:
blast_params = {'program': 'blastp', 'database': 'nr', 'sequence': seq_record.seq, 'expect': 10.0}
blast_params['database'] = ['nr']
print_data = pd.DataFrame()
for database in blast_params['database']:
    db_values = {}
    result = NCBIWWW.qblast(blast_params['program'], database, blast_params['sequence'], expect=blast_params['expect'])
    file_name = "blast_output_" + database + ".xml"
    with open(file_name, "w") as output_xml:
        output_xml.write(result.read())
    result.close()
    result_input = open(file_name)
    blast_records = NCBIXML.read(result_input)
    for description in blast_records.descriptions:
        if 'score' in db_values:
            db_values['score'].append(description.score)
        else:
            db_values['score'] = [description.score]
        if 'e-value' in db_values:
            db_values['e-value'].append(description.e)
        else:
            db_values['e-value'] = [description.e]
    df = pd.DataFrame.from_dict(db_values)
    df['database'] = database[0:6] # we simply limit the name to the first 6 characters for easier viewing
    frames = [print_data, df]
    print_data = pd.concat(frames, ignore_index=True)
    

In [None]:
E_VALUE_THRESH = 1e-20 
for record in NCBIXML.parse(open("blast_output_nr.xml")): 
     if record.alignments: 
        print("\n") 
        print("query: %s" % record.query[:100]) 
        for align in record.alignments: 
            for hsp in align.hsps: 
                if hsp.expect < E_VALUE_THRESH:
                    if "Arabidopsis thaliana" in align.title:
                        print("match: %s " % align.title[:100])

# Open when finished

In [None]:
import webbrowser
webbrowser.open_new('http://localhost:8888/notebooks/Documents/iGEM/iGEM/Run%20BLAST.ipynb')