In [None]:
from ncbi.datasets.openapi import ApiClient as DatasetsApiClient
from ncbi.datasets.openapi import ApiException as DatasetsApiException
from ncbi.datasets import VirusApi as DatasetsVirusApi

from ncbi.datasets.package import dataset

zipfile_name = "sars_cov2_dataset.zip"

with DatasetsApiClient() as api_client:
    virus_api = DatasetsVirusApi(api_client)
    try:
        print("Begin download of virus data package ...")
        virus_ds_download = virus_api.virus_genome_download(
            "2697049",#"SARS2",
            complete_only=True,
            host="human",
            include_annotation_type=["PROT_FASTA", "CDS_FASTA"],
            _preload_content=False,
        )

        with open(zipfile_name, "wb") as f:
            f.write(virus_ds_download.data)
        print(f"Download completed -- see {zipfile_name}")
    except DatasetsApiException as e:
        print(f"Exception when calling virus_genome_download: {e}\n")

# open the package zip archive so we can retrieve files from it
package = dataset.VirusDataset(zipfile_name)
# print the names and types of all files in the downloaded zip file
print(package.get_catalog())

Begin download of virus data package ...


In [3]:
import time
import ncbi.datasets.openapi
from ncbi.datasets.openapi.api import virus_api
from ncbi.datasets.openapi import ApiClient as DatasetsApiClient
from ncbi.datasets.openapi.model.v1_annotation_for_virus_type import V1AnnotationForVirusType
from ncbi.datasets.openapi.model.v1_download_summary import V1DownloadSummary
from ncbi.datasets.openapi.model.rpc_status import RpcStatus
from pprint import pprint
# Defining the host is optional and defaults to https://api.ncbi.nlm.nih.gov/datasets/v1
# See configuration.py for a list of all supported configuration parameters.
configuration = ncbi.datasets.openapi.Configuration(
    host = "https://api.ncbi.nlm.nih.gov/datasets/v1"
)

# The client must configure the authentication and authorization parameters
# in accordance with the API server security policy.
# Examples for each auth method are provided below, use the example that
# satisfies your auth use case.

# Configure API key authorization: ApiKeyAuthHeader
configuration.api_key['ApiKeyAuthHeader'] = 'YOUR_API_KEY'

# Uncomment below to setup prefix (e.g. Bearer) for API key, if needed
# configuration.api_key_prefix['ApiKeyAuthHeader'] = 'Bearer'

# Enter a context with an instance of the API client
with DatasetsApiClient() as api_client:
    # Create an instance of the API class
    api_instance = virus_api.VirusApi(api_client)
    taxon = "2697049" # str | NCBI Taxonomy ID or name (common or scientific) at any taxonomic rank
    #refseq_only = True # bool | If true, limit results to RefSeq genomes. (optional) if omitted the server will use the default value of False
    #annotated_only = True # bool | If true, limit results to annotated genomes. (optional) if omitted the server will use the default value of False
    #released_since = dateutil_parser('2020-08-01') # datetime | If set, limit results to viral genomes that have been released after a specified date (and optionally, time). April 1, 2020 midnight UTC should be formatted as '2020-04-01T00:00:00.000Z' (optional)
    host = "human" # str | If set, limit results to genomes extracted from this host (Taxonomy ID or name) All hosts by default (optional)
    #pangolin_classification = "pangolin_classification_example" # str | If set, limit results to genomes classified to this lineage by the PangoLearn tool. (optional)
    #geo_location = "USA" # str | Assemblies from this location (country and state, or continent) (optional)
    complete_only = True # bool | only include complete genomes. (optional) if omitted the server will use the default value of False
    #exclude_sequence = True # bool | Set to true to omit the genomic sequence. (optional) if omitted the server will use the default value of False
    include_annotation_type = ["CDS_FASTA","PROT_FASTA"] # | Select additional types of annotation to include in the data package.  If unset, no annotation is provided. (optional)

    # example passing only required values which don't have defaults set
    try:
        # Get summary data for Coronaviridae genomes by taxon
        api_response = api_instance.virus_genome_summary(taxon)
        pprint(api_response)
    except ncbi.datasets.openapi.ApiException as e:
        print("Exception when calling VirusApi->virus_genome_summary: %s\n" % e)

    # example passing only required values which don't have defaults set
    # and optional values
    try:
        # Get summary data for Coronaviridae genomes by taxon
        #api_response = api_instance.virus_genome_summary(taxon, refseq_only=refseq_only, annotated_only=annotated_only, released_since=released_since, host=host, pangolin_classification=pangolin_classification, geo_location=geo_location, complete_only=complete_only, exclude_sequence=exclude_sequence, include_annotation_type=include_annotation_type)
        api_response = api_instance.virus_genome_summary(taxon, host=host, complete_only=complete_only, include_annotation_type=include_annotation_type)
        pprint(api_response)
    except ncbi.datasets.openapi.ApiException as e:
        print("Exception when calling VirusApi->virus_genome_summary: %s\n" % e)

{'assembly_count': 3365053,
 'hydrated': {'cli_download_command_line': 'datasets download virus genome '
                                           'taxon 2697049',
              'estimated_file_size_mb': 370155,
              'url': 'https://api.ncbi.nlm.nih.gov/datasets/v1alpha/virus/taxon/2697049/genome/download'},
 'record_count': 3365053,
 'resource_updated_on': datetime.datetime(2022, 1, 14, 15, 19, 53, tzinfo=tzutc())}
{'assembly_count': 740729,
 'hydrated': {'cli_download_command_line': 'datasets download virus genome '
                                           'taxon 2697049 --host human '
                                           '--complete-only',
              'estimated_file_size_mb': 81480,
              'url': 'https://api.ncbi.nlm.nih.gov/datasets/v1alpha/virus/taxon/2697049/genome/download?complete_only=True&host=human&include_annotation_type=CDS_FASTA,PROT_FASTA'},
 'record_count': 740729,
 'resource_updated_on': datetime.datetime(2022, 1, 14, 15, 19, 53, tzinfo=tzu