# Run multiple CloudBLAST jobs

This notebook shows how to start and wait for multiple BLAST jobs using the NCBI CloudBLAST API library.

## Requirements

We will need to install the CloudBLAST API library and a couple of Python packages to display the results.

In [None]:
# Install the NCBI CloudBLAST API library
%pip install --quiet ncbi-cloudblast-api

In [None]:
# We will use biopython to extract sequences from a file in FASTA format.
%pip install --quiet biopython

In [None]:
# Install the ipywidgets visualization library
%pip install --quiet ipywidgets widgetsnbextension pandas==0.24.2

## Before you start

To use this libray, you must provide the address of a CloudBlast API service endpoint:

In [None]:
API_ADDRESS = ""  # set the API service address, e.g. "35.245.159.177:5000"

In [None]:
from ncbi_cloudblast_api.api_client import APIClient

if not API_ADDRESS:
    raise ValueError("Please set API_ADDRESS in the previous step.")

# Create an instance of the API client
api_client = APIClient(API_ADDRESS)

## Submit BLAST jobs

In [None]:
# Our input sequences come from a FASTA file
FASTA = """
> NR_045216.1 Homo sapiens DSG2 antisense RNA 1 (DSG2-AS1), long non-coding RNA
GTTCCTGCGGGATCGGTTCCCATCATCCAATACCTCCCACCAGACTCCACTTCCATCATTGGAGGTCACATTTCAACTAA
GATTTGGAGGGGACCGTATCCTAACTATATTAACTTCCAATTTTCATCGAAAGCCCACCTGTCTAAGAAGAGCTCCTAGA
CATTCCCTTTCCCCTTCAACTTGGAGGAATTGGGAATGACAAAGAGAAATTAATCAAAAGAAGAATTAATCTCTCCAATC
AGCCTTGGCATTTTCTTTCTATCTTTGATATGTTGTTTTTTACAGTGTCTTGGTTTTCAGAATAAGCTGTTTGTAACTAC
TTCCTCCACTATATTGTGAGCCTCTGGAGGGCAAGGTCTGGGCCTTACTCATCTTTGAGTAATTTTAAAGCCACTAAATA
GGGCTCTGCACATAGTGAGAATTCAACATATGCTTAGTTGAATCTAATATTTCAGAAAAAGTCAACATCTATTCTTTAAG
ATAGCCCACAGCAGGCTGAGTGTGTTATTCGTAAATCCATTGGCCAGCGTCATTTCCTCCAAAGTGCAGCATTTTTGATA
GAACCAAAGGCTCATGACTGACCTTCAGAAAGACATTTTGTATTTCACCATCAAAGAGTGGTATGGGTTTTATTAGACTT
CCGTAATCCAGGTCTCGTTTCTCAGGCAACTCTGAAGCGTATCACACTTTATCTGAGAGGGTCACAATTAGTGAAGGAAG
GAATTGGCCTTTGCAAGTTTGTGTTGCTCACGAACTTATTAAATTGCTTGATTTTTTTTTTTCTTCCTAGCTTTGATAGA
TTGACTTTGCCTCTTTTACTGCGTGCATAGTGCCATCTTGTGGCCATAAAGCATATAACACTCATAGTTATGCCACAGTG
CTTCTGTTCCATCTTATTTCTGTAGTCCAGAGGATCTCTTAAGTGTAGAAACACATGGATTATTACTAGCCAAGGCCATC
ACTGGCCAAGCCCATCTAAAAGCCAGAGGCAGCAAGCAGTGCCCGTTGAAACATCCATAAAAGGTGATATTTGGTTAGCA
ACGTCCTACTGTAGACATTTTGATGTGGTGGTGATTGGAATATGGTCTGCAGAGATCAACCAGAATTAGCTCCATGTGCC
ACTGGGAGTGGGAACTGAAGTCACGCAGGAATAAACCAGTCTTCATGCTTTAGGATGTTCTGATGTGCCATTGTCGTGAA
ATCTTCATTCATGAGACATTCCAGTTTACCGGCAGTGTCGACTAGAATATGCTACAGCCCCCAAAATACGTTTTTAGTAT
AAGGATTATTTTGAGCTGATTATTTTGAGAAATAGCAGACGCAGGAGAAACTCTGAAGACAGAACGCAAACTTCAGCTAG
TGTCTTGAATTTAAGTCCCAAATCATCTAAGAAGCGGTCATCTAGCTCTCCTTCAATAAAACTGCAACAACCAATAGAAG
CATTCAGCGATTCAGTTTCTTCCTGAGAATAAACCAGAAGGCAATCTTTGGCTGTGTGATTTTCATCTTCCTCAGTGTTA
TATATCCTCTGCCTCTCCTACGGAGCTCATCCCTACCAAGCCTGCTCCTGCAGAGGGATGCAACTTTCCGACCCACTGAC
ATCTGTCTTGGTCATATGACTTGCTTTGGTCAATGAAATGTGGTGTAAAAAGGTGGAGGCTTTGAGAGCCATTGCCTGGT
TCTGCCTTGTTCTTTTCTTTGGGTCACAAGGACGGCAGCTTCCCAGGTACAATCTGCTCTCTCAGTGTGCATCTTGATAT
GAAGACATCATGAAGGAAAGCAGAGCTGACTGACTGGCAAAAACATAACTGTGGATAGAACCCAAAGAGAAAACAAAGCC
CAAACAGTCTCAGCTCCTTTACTCTATGCGACCAATGGCATTTCAACCTCCCTCTACAGTTTGATACCTTCTTCTATAGT
TACAGAATTCTTAACTTTGTTTTGACCTGGATCCATGGCCACCCAAAATAAAGACTATATTTCTAAAAAAAAAAAAAAAA

> JN544571.1 Homo sapiens MACC1 gene, promoter region
AGAAGGGATCCATAAATGCTGGAGCTGATAATCAAAAATCATAGATAGAGGTAGGGCCAGGTTTCCAGGACCATGGGACT
GAGGACTCTAGGGAGCCCAGTCCTTGGGGAGCTGTTAGAAAGGCAAGTGCACAGCAGGGATTGGAGCTGGTAAACAGTGA
GTGCTGACCCACTAGGGCTTTCATTTCGGGTACTGTCAAGGGTTCAATTGCTAGATTTAATGCTTTGCCTTTCTGCAAAT
AAAGGCTGTGGGAGCCACACCAGACCAGACTGCAGGGGGATTCTGATAACAGTACTCATGCATGGCATTTAGGTAGCTTT
TTAAATGTAAATGTTTTCCGGGAGACATAAAGATGGCATTAGGCATTAACACACACACACCTTAGAGTTAACATCTCAGT
TATTTTATGTGTTGATTGTTTATCACAAACATCTACTTGTACATTAATGAGTTAATGAAGTAGGGATACAATGAGGCCTT
GCCTTCAAATAGGAATGTAAGACTGCATAGGTACACACACACCCATGTAAACCAATGAAATGAGATATACAATGTGTTAT
GAGAGGAGCAGATGCAATATTATGAAATTGATTAGGAGGAAGAGAATAACTCAGATTAAGACAAACTGAGAAGGACATCT
TGAAGAGTGTTTTTCATTGAGCTTTGGAAAAAATGAAATCTCTGAACATTTCAAGATGAGCAAAAATGGTTTCAGGGAGG
AGAAATGCCATGCAGAATGCCCTCTGTAGGGTTTGGGGAAATTAAGAACCATTTGTTTTGGAAATAATATAATGACATAA
AGGAAAGCTAGGTTCTAGTAGAGAGTTGAGGAACCTGTGACAAAAAGAAACAGAACAGCTCAGTTTGGATTGTGGGATGA
GTCAAGGGGAAACTTCAAAGGGAATCTGGGTGGATTAGGTGATCTGGCAGCATGAAGCCAGGGATGCATCAGGGAGGAAG
GGGTGGTTAGAACAGTAGGAGAGAAAGTGCAGAAAAGGGAAAGGAAACTTTTTCTTTGTCACCATTCTGCCATCTACTTA
CATTAGATGAATCCTTCTATTATGTTTCTGAACCCAGACCCAGCCAGGACTTGTCTCTATTCATTTTCTGGGCTGTGTCT
AACAGGAGATAATAGGCTAGAGAGAGATGCTGTATGAACAAATAGAGAAACACATTTGTTTTAAACATTCTCTGTTGCTG
ATGTTGGAAAAAAATGTGAAACAATTATTGCACATTTCATTTCACTAAGTTTTACCTTTTTTCCCCTTTCCCCTAATTTT
CTCTTTCTTGAATTTTGAACAAAATACACAGAAGGAAAACACAAAACACAGAAATGGAAAGTAAAATGGAAGAAAATATC
AAGAAAACTTTATTCTTGCTTATATTTTAAAAGGCACATTTTAAAGTGTTATCTTAAAAATCCAGAGCATTTTAGAAGAT
GAAATGCCAAAAGGTCTCCATTATGTCTATATGTCTATGTCTTTGAGTGACAATCACAGTGCTGATGTAGAGGGAAAGGG
GGAACTAGTTAGACACTGTCACTCACCTGGGAAGGCTTTATTCACCTGTTCCACAGGGCAGTGAGGCACCTTCAGCTCTG
AATCACCGAAAGAGAATCTGGTGGGGCAAGTTCCAGCTGCATGAGGATTTGCTTGCATAAATATTTTTTACTTATTGCTA
ACACTGAGGGTGCCTTCTTACTCCCTGGCAAACATTAAACCACTTTTATTTCCTTTCATGGAAATAAGATTATATTTACA
GATGGTTCTTAGATATACTCACCTGATTTTTTTTTAATTGCTTTTCCACCTGCTTCCCCTTTCTTCTTAGGGTGAAACTC
TAGCCATACGCCCTCTTCTGGTTTCGGGTGAGGAGCCTGAATTGTGGGTATCGTAGTTCTCTTGCCTTTTGGGGTTTCTA
GTTGGGCAGCTTTGGAGCCACAGTGGTAGAACTTCAAGTCCCAGGTGGCTCAGGAAGCAGGGGTGCAGTTGCCTGCCTGC

> NR_109984.1 Homo sapiens IQCF5 antisense RNA 1 (IQCF5-AS1), long non-coding RNA
GAAACCAAGACAACCCCTGCTGATTCCTCACAGCTGACTTGTGACATGCCTAGTATTTCCCATCATTATCTCAAACAATG
ACCTCCTGAAAATACATCTGATGAAGAGAGCTGGGGACACAGATATAGCAGACCTGGTCATTCTTTTAATGGAAGGGTGG
CCCTCAATAAAGACACGGGAATGGCAGCTGTGCCAGCGCCAATAGACCTGGATGATGCGGACAGCGTTGAGCAAACGACA
GTAACGCTGGCGGACACACCACATGCGGACCCAGGACTGCAGCCTGACTGCTGCCCATTCCTGCTGCACATAGAACTCCA
ACACCATCCTCCGCCTCTTTGCCAGCAGCTTCTCCAGCACCTGCCTCCACCAGCACTGAATGATCCAAGCCCTGAGGGCT
GCATGCAGCAGTGTGCGTCGCACCAGCATGCCCCGCCACCAGGCCTGGATGAAAACAGCTGCAGACCTTTCTGTCATGAT
GGTCTTCTCTTCTGGGCCTTACAAGAGAAAACAGACACACTCAGGGTATGCTAGGAAGGGGCCCTGATCCTCTATCAATG
ATGGCTCCTTCCTCTAAAGTTCAGCACTGGGCAGGGCAACAGCTGAACCCAGGAGACCAGGTGTACATCTGCCCTAGCTA
CCTCATTCCATTTTCCATGGACCTTGCTGGCCAGTCTAGAGCAAGAACACTGTGACTTAGAGGTAAAAGAGCTGCCATCT
ACCTGGGATCCATGACTCTTCCCTTCAGAGCCCTCCCATCCGCTGAAGAAGTGGTGTCCTTTGCTCCTCCTGCCCTCTTT
ACTCTGACTTCAAGTCAAACCCACAAGCTGCTGTGTTCCTTCCTCCTCACCAAAGTGGTAACAGATTCCTCCACCTCACA
CACATTGTGAACAGCTTGAACTTCAGCTATGCATGCATCTGTACAAACATGCACACTCCACACACTAACACACATTTTGC
CCCTTGGTCCTGCATTCCCCACAGTCCCCACTTGCAGCCCCCAGCCCTATATGGTCCGGGATCACACTCCTCTGCCCCAC
TCCTGGCCACAGTCTCCACTCCGTATCTCAGCCAGACTCACATCTCTCCGCCTGACAAGAAAATGGGGGCAAGAGACAGC
CAGTCCCCATTCATTCCAGCTCTGCATACCCTGACGTTTCACCCCAGTTTTTCTGGAACACCCTGATTGATGTCAGGTAA
TGAGGACAGGTGCCTGTTTCCCTCACTGAAGCCACTCTTTGTACAATAAATTACAGGGTCATAGATCTTTGAGTCACTCT
GCTGGACCACAGGGGTCAGAGATGTCACTAGTTAAGGCTCTCTGGACCCAGCCACTCACGGAGTTGTGGGTAAGGGTGTG
GCCTGGTAGGGGTGAGGGTGTTGGGGGTGGAGTCGCTTCCCTCTTCTCCCCTTTGTTCCTTCCAGCTCTGTTCTATCGAT
GTGATTTCATCATTATGTTTAATGATCATTTAAAACAGCTTGCCATCCTTCTGGAATCAGAGTGTATGGGGGGAAATAGG
GAAGAAAGTGAAAGGTGCTCTCCCACTGCTAGGTGCCAAGTCAAACACCATTCTCACTGTTTCTAGACAGCTTCCTGGCT
TTCTCTAGGACTTCAAACATGGAGGGGGGTCCTGTCATGAGTAACTGTGGGGAACTTACTCTCCCCTCAGAAAGCTAGTG
TGGTTCTGACTGCCTCTTACAAAACATCTGACTGGGGACAGGTCGTAAAATTCGCACAGGTCTGGACTTTACTAAATTAC
CCATGGTTAGGGGCTAGATGGTCCCATCACCCTCCGGCCCGCACTCCTCCGATCAGGACTCCAACAGGAAGAGTGGAGGA
AGGGCGGGACCTGTGATGTCATAAGGGCACCTGTGATGCAGGCACCAGGACAGCTTCCAGCATATGAGCCTCTTCTAAGG
AAAACCTAGAAGACCCCAAGTACTGACCCATTGACACAAAGGTATATAATGCTGCTCATTAAAATCTGGCTGGTATTCAT

> NR_131180.1 Homo sapiens shugoshin 1 (SGO1), transcript variant 15, non-coding RNA
CAATGGAGGAGCGAGGCGGAAAATTTCGAATGTGGCGGCGGTAGTTCCAGGCGACGGCGGACGGTGGTACGGTCCTGGAG
GGCCCAGTGCGCGGGGCTAGCCGTGGCTGGAGAGCTTCGAAGAGCCTTGAAATGTGAGGAGGAGGAAGATAGCTGTTGCA
GAAGTAGTGGCCAAGGCAAAAGATGGCCAAGGAAAGATGCCTGAAAAAGTCCTTTCAAGATAGTCTTGAAGACATAAAGA
AGCGAATGAAAGAGAAAAGGAATAAAAACTTGGCAGAGATTGGCAAACGCAGGTCTTTTATAGCTGCACCATGCCAAATA
ATCAAACCAGGAAATATGTTCCTCTGGAATGGACCCCAATAGTGATGACAGCTCCAGAAATTTATTTGTGAAGGATTTAC
CGCAAATTCCTCTTGAAGAAACTGAACTTCCAGGACAAGGAGAATCATTTCAAATAGAAGCTACACCACCTGAAACTCAG
CAGTCACCTCATCTTAGCCTGAAGGATATCACCAATGTCTCCTTGTATCCTGTTGTGAAAATCAGAAGACTTTCTCTTTC
TCCAAAAAAGAATAAAGCAAGCCCAGCAGTGGCTCTGCCTAAACGTAGGTGCACAGCCAGCGTGAACTATAAGGAGCCCA
CCCTCGCTTCGAAACTGAGAAGAGGGGACCCTTTTACAGATTTGTGTTTTTTGAATTCTCCTATTTTCAAGCAGAAAAAG
GATTTGAGACGTTCTAAAAAAAGTATGAAACAAATACAATGAAGGTTTTGTTGGATATTGTTTAAATTCAATCTACACCT
CTTTCTGTTGCTTCAGGAGAGGATCTGTAAGAGTACACAAACAGAGTGAGCCATTGCCATAGAATATTCTCTTGACAGGA
CCCTAGCTCATAAACATTTCTTCAGAACTATACTTCAAATGGGATGCTTTGTATTAAAACTTTAATAAATTTAATTTATT
TTTTCTTTTGAATATAAATAACTGAGCTTAAGCATTATCATCATATCGATTTCTTATGCTGCCTAAACCTCTTAATTTTA
GTCGAAATATATCTTTTTTTTTTTTTGGAGGCGGAATTTCATTCTTGTTGCCCAGGCTGGAGTGCAATGGCACGATCTCA
GCTTACCGCAACCTCCGCCTCCCAGGTTCAAGCGATTCTCCTGCCTCAGACTCCCGAGTAGCTGGGACTACAGGCATGTG
CCACTACGCCCGGCTAATTTTGTATTTTTAGTAGAGACAGGGTTTCTCCATGTTGGTCAGGTTGCTCTCAAACTCCCAAC
CTCAGGTGATCTGCCCACCTCGGCCTCCCAAAGTGCTGGGATTACTGAGCCACTGTGCCCAGCCCAAAATGTATCTTATA
CAAACATTGTAGAAATGAATAATGATTACTCAAATAAGATCTTTTAATTATAAGCTTCTGGCATCTCTATTTTTCCCTTA
AGTAGGGGATACTAAAGTGAATGATTTTCTAAGAGGATCTTTTGAAACTCTTTAGTACAATATTTGAGTAAAATAAATTA
TTGTGGATCTTTTGAAACTCTGTGTAGTATTCGAGTAAAATAAATATTTGACATTTACAGTAGGGTACACCCTAAGTTGT
TTATGAATTCCTTATACCACTGTTATATTTAATTTTTCAGATAAAATATGTATGGGAAAATGACTAGAGAAATTGATTCT
TTTTTAGGCTCAGGGACAAGATAAGTGACTAATTGTGAACCCCAAGCTCTAATTGTATGTTCAGAGTGATGTCTCCATAA
AATTACAAATGTCATGTACTTTCAGGGTAGCTCCTTATTTAGTCAGAAATCTCAGAGATTAAATGTGTGAATATTTAGGG
TTTTTGCATATTTTTGACCAACCAATGAAAGCGAATTAGTTCAGTTTAGAAAGTTTGATGTCTATACTTGTATATACCTC
TTTAAATTTTAGAACTTTGTAGGTTATTCATTTTTAAAGAGCAATAAAGTTTGTATAAGATAGTGAAAAAAAAAAAAAAA
"""

In [None]:
from Bio import SeqIO
from io import StringIO

# For every sequence in the FASTA file, submit a BLAST job and
# save its job ID along with the corresponding FASTA description line.
descriptions_and_job_ids = [(s.description,
                             api_client.submit(verbatim_seq=str(s.seq)))
                            for s in SeqIO.parse(StringIO(FASTA), "fasta")]

## Wait for the jobs to finish and retrieve the results

This will take (more than) a few seconds.

In [None]:
descriptions_and_results = [(description,
                             api_client.wait(job_id))
                            for description, job_id in descriptions_and_job_ids]

## Display the results

In [None]:
# A list of the search result fields to display
fields = ["saccver", "pident", "length", "mismatch",
          "gapopen", "qstart", "qend", "sstart", "send",
          "evalue", "bitscore"]


### Nice display (does not work in Google Colab)

In [None]:
import ipywidgets as widgets
from IPython.display import HTML

html_tables = []
for _, result in descriptions_and_results:
    # Represent the result as a Pandas dataframe
    df = result.as_dataframe()[fields].head(20)

    html_table = widgets.Output()
    html_table.append_display_data(HTML(df.to_html(index=False, notebook=True)))
    html_tables.append(html_table)

accordion = widgets.Accordion(children=html_tables)
for i, (description, _) in enumerate(descriptions_and_results):
    accordion.set_title(i, description)

accordion

### Simple Display (for Google Colab)

In [None]:
html_list=[]
for descr, result in descriptions_and_results:
  df = result.as_dataframe()[fields].head(20)
  html_list.append('<div><h1>' + descr + '</h1>' + df.to_html(index=False, notebook=True) + '</div>')

HTML("".join(html_list))