In [None]:
# https://www.ncbi.nlm.nih.gov/biosample/SAMN36419051/
# https://www.ncbi.nlm.nih.gov/datasets/docs/v2/api/rest-api/
# https://www.ncbi.nlm.nih.gov/datasets/docs/v2/api/api-keys/#get-your-api-key

import os
import requests
from rich import inspect
from dotenv import load_dotenv

dotenv_path = "/home/chase/asp_workshop_keys"
load_dotenv(dotenv_path=dotenv_path)

NCBI_KEY = os.getenv("NCBI_KEY", "your_key_here")  

In [43]:
class Organism:
    def __init__(self, genome_id):
        self.id = genome_id
        self.data = self._get_data()
    def _get_data(self):
        url = f"https://api.ncbi.nlm.nih.gov/datasets/v2/genome/accession/{self.id}/dataset_report"
        try:
            response = requests.get(url, headers={'accept': 'application/json', "api-key": NCBI_KEY})
            response.raise_for_status()
            return response.json()
        except requests.RequestException as e:
            print(f"Error fetching data from NCBI: {e}")
    def what_am_i(self):
        print(f'I am a: "{self.data['reports'][0]['organism']['common_name']}"')


In [44]:
a=Organism(genome_id = "GCF_000001635")
a.what_am_i()

I am a: "house mouse"


In [45]:
a.data

{'reports': [{'accession': 'GCF_000001635.27',
   'current_accession': 'GCF_000001635.27',
   'paired_accession': 'GCA_000001635.9',
   'source_database': 'SOURCE_DATABASE_REFSEQ',
   'organism': {'tax_id': 10090,
    'organism_name': 'Mus musculus',
    'common_name': 'house mouse',
    'infraspecific_names': {'strain': 'C57BL/6J'}},
   'assembly_info': {'assembly_level': 'Chromosome',
    'assembly_status': 'current',
    'paired_assembly': {'accession': 'GCA_000001635.9', 'status': 'current'},
    'assembly_name': 'GRCm39',
    'assembly_type': 'haploid',
    'bioproject_lineage': [{'bioprojects': [{'accession': 'PRJNA20689',
        'title': 'Genome sequence finishing for Mus musculus, currently maintained by the Genome Reference Consortium (GRC)'}]}],
    'bioproject_accession': 'PRJNA20689',
    'release_date': '2020-06-24',
    'description': 'Genome Reference Consortium Mouse Build 39',
    'submitter': 'Genome Reference Consortium',
    'refseq_category': 'reference genome',
 

In [37]:
a=Organism("GCF_038242915")
a.what_am_i()

KeyError: 'common_name'

######## 2 ########


In [38]:
class Organism:
    def __init__(self, genome_id):
        self.id = genome_id
    def what_am_i(self):
        url = f"https://api.ncbi.nlm.nih.gov/datasets/v2/genome/accession/{self.id}/dataset_report"
        try:
            response = requests.get(url, headers={'accept': 'application/json', "api-key": NCBI_KEY})
            response.raise_for_status()
            data = response.json()
            if 'common_name' in data['reports'][0]['organism']:
                print(f"I am a: {data['reports'][0]['organism']['common_name']}")
            elif 'organism_name' in data['reports'][0]['organism']:
                print(f"🤓 I am a: {data['reports'][0]['organism']['organism_name']}")
            else:
                print("I don't know")
        except requests.RequestException as e:
            print(f"Error fetching data from NCBI: {e}")

In [39]:
a=Organism("GCF_000001635")
a.what_am_i()

I am a: house mouse


In [42]:
a=Organism("GCF_038242915")
a.what_am_i()

KeyError: 'reports'

######## 3 ########

In [None]:
class Organism:
    def __init__(self, genome_id):
        self.id = genome_id
    @staticmethod
    def _extract_taxid(data):
        try:
            return data['reports'][0]['organism']['tax_id']
        except (KeyError, IndexError):
            return None
    @staticmethod
    def _extract_assembly_name(data):
        try:
            return data['reports'][0]['assembly_info']['assembly_name']
        except (KeyError, IndexError):
            return None
    def set_info(self):
        url = f"https://api.ncbi.nlm.nih.gov/datasets/v2/genome/accession/{self.id}/dataset_report"
        try:
            response = requests.get(url, headers={'accept': 'application/json', "api-key": NCBI_KEY})
            response.raise_for_status()
        except requests.RequestException as e:
            print(f"Error fetching data from NCBI: {e}")
        # assign taxid
        self.taxid = self._extract_taxid(response.json())
        # assign assembly_name
        self.assembly_name = self._extract_assembly_name(response.json())

In [None]:
a = Organism("GCF_000001635.27")

In [None]:
a.taxid


AttributeError: 'Organism' object has no attribute 'taxid'

In [None]:
a.assembly_name

AttributeError: 'Organism' object has no attribute 'assembly_name'

In [None]:
a.set_info()
a.taxid
a.assembly_name

'GRCm39'

######## 4 ########

In [None]:
class Organism:
    __slots__ = ['id', 'BLAST_LINK', 'GDV_LINK', 'FTP_LINK', 'ASSEMBLY_PUBMED', 'ASSEMBLY_NUCCORE', 'ASSEMBLY_NUCCORE_REFSEQ', 'ASSEMBLY_NUCCORE_GENBANK', 'CGV_LINK']
    def __init__(self, genome_id):
        self.id = genome_id
        self.get_outlinks()
    def get_outlinks(self):
        """Fetches assembly links from NCBI and assigns them to the instance attributes."""
        url = f"https://api.ncbi.nlm.nih.gov/datasets/v2/genome/accession/{self.id}/links"
        try:
            response = requests.get(url, headers={'accept': 'application/json', "api-key": NCBI_KEY})
            response.raise_for_status()
            data = response.json()
            for i in data['assembly_links']:
                try:
                    link_type = i['assembly_link_type']
                    if link_type in self.__slots__:
                        setattr(self, link_type, i['resource_link'])
                except KeyError as e:
                    print(f"Missing key in assembly link data: {e}")
        except requests.RequestException as e:
            print(f"Error fetching data from NCBI: {e}")

In [None]:
a = Organism("GCF_000001635.27")

In [None]:
inspect(Organism, methods=True)

In [None]:
inspect(a, methods=True)

In [None]:
a.BLAST_LINK

'https://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE_TYPE=BlastSearch&PROG_DEF=blastn&BLAST_SPEC=GDH_GCF_000001635.27'