In [None]:
import pandas as pd
import numpy as np
from Bio import SeqIO, SeqRecord, Seq, SearchIO
from Bio.Blast import NCBIWWW, NCBIXML

In [None]:
#Create sequence record
seq_record_dictionary = SeqIO.to_dict(SeqIO.parse("human_mx1.fas", 'fasta')) #parse could be substituted by read
sequence_record = seq_record_dictionary[list(seq_record_dictionary)[0]]
sequence_record

In [None]:
#Create protein from sequence
rna = Seq.transcribe(sequence_record.seq)
protein = rna.translate(to_stop=True)
protein

In [None]:
#Run BLASTn and store results in .xml file
result_handler = NCBIWWW.qblast('blastn', 'nt', sequence_record.seq) #can add entrez_query='txid000000'
result_storer = result_handler.read()
with open('search_from_parser2.xml', 'w') as savefile:
        savefile.write(result_storer)

In [None]:
#Import saved .xml file with SearchIO 
results_searchio = SearchIO.read('search_from_parser2.xml', 'blast-xml')

#Unpacking of upper object (QueryResult)
print(results_searchio.seq_len) #in the upper object it is stored the length of the DNA molecule submitted for analysis
print(results_searchio.program)
print(results_searchio.version)
print(results_searchio.target) #database
print(results_searchio.id) #query id No is default
#with results_searchio.param.... can get the parameters used for the blast alingments

#for result in results_searchio:
#    print(result.id)

In [None]:
#Unpacking of first result (Hit object) (done for single object so not to have excessive output#)
#ignore query ID, description and length as already unpacked from upper object
result1 = results_searchio[0]
print(result1.id)
print(result1.description)
print(result1.seq_len) #sequence length
print(result1.accession)


In [None]:
for result in results_searchio:
    print(result.id)

In [None]:
#Unpacking the HSPs from the same Hit object (high scoring segment pair) (remember that blast uses local alignment and could be biased)
#Given that the .hsps parameter returns a list of length 1
result1_hsp = result1.hsps[0]
print(result1_hsp.bitscore)
print(result1_hsp.bitscore_raw)
print(result1_hsp.evalue)
print(result1_hsp.query_start)
print(result1_hsp.query_end)
print(result1_hsp.hit_start)
print(result1_hsp.hit_end)
print(result1_hsp.query_frame)
print(result1_hsp.hit_frame)
print(result1_hsp.ident_num)
print(result1_hsp.pos_num)
print(result1_hsp.gap_num)
print(result1_hsp.aln_span) #!!! Length of alignment
print(result1_hsp.query) #Query Seq object! important to run local alignments

print('\nseparate\n')
print(result1_hsp.query.seq) # this object only stores the alingment sequence (1989 characters) instead of the hit one of 2776

In [None]:
#make a dictionary

blast_dictionary = {'id' : [], 'description' : [], 'seq_length' : [], 'accession' : [], 'bitscore' : [], 'bitscore_raw' : [], 
'evalue' : [], 'hit_start' : [], 'hit_end' : [], 'query_frame' : [], 'gap_num' : [], 'aln_span' : []}

for result in results_searchio:
    blast_dictionary['id'].append(result.id)
    blast_dictionary['description'].append(result.description)
    blast_dictionary['seq_length'].append(result.seq_len)
    blast_dictionary['accession'].append(result.accession)
    result_hsp = result.hsps[0] #need to doublecheck because there could be more than 1 HSP (likely the first is most significant)
    blast_dictionary['bitscore'].append(result_hsp.bitscore)
    blast_dictionary['bitscore_raw'].append(result_hsp.bitscore_raw)
    blast_dictionary['evalue'].append(result_hsp.evalue)
    blast_dictionary['hit_start'].append(result_hsp.hit_start)
    blast_dictionary['hit_end'].append(result_hsp.hit_end)
    blast_dictionary['query_frame'].append(result_hsp.query_frame)
    blast_dictionary['gap_num'].append(result_hsp.gap_num)
    blast_dictionary['aln_span'].append(result_hsp.aln_span)


In [None]:
#No hit object has 2 HSPS
for result in results_searchio:
    i = 0
    for hsp in result.hsps:
        i += 1
    print(i)

In [None]:
#Create dataframe
blast_dataframe = pd.DataFrame.from_dict(blast_dictionary)
print(blast_dataframe)

In [None]:
#Save it as .csv
with open('csv_blast_results.csv', 'w') as savefile:
        savefile.write(str(blast_dataframe.to_csv()))


In [None]:
#Import saved .xml with NCBIXML
xml_parser = open('blastp_results.xml')
results_xml = NCBIXML.read(xml_parser)

In [None]:
#results_xml is a record.blast object
#It returns all the results as lists!! 
for result in results_xml.descriptions:
    result_1 = result.score
    print(result_1)