In [1]:
# gnomad_client.py

import nest_asyncio
import asyncio
from gql import gql, Client
from gql.transport.aiohttp import AIOHTTPTransport
import traceback
import pprint

# Allow nested asyncio loops
nest_asyncio.apply()

class GnomadClient:
    def __init__(self, url="https://gnomad.broadinstitute.org/api"):
        # Initialize transport and client
        self.transport = AIOHTTPTransport(url=url)
        self.client = Client(transport=self.transport, fetch_schema_from_transport=True)

  

    async def fetch_variants_in_variantId(self, variantId, dataset="gnomad_r4"):
      try:
        # Define the query
        query = gql(
            """
            query VariantsInGene($variantId: String!, $dataset: DatasetId!) {
              variant(variantId: $variantId, dataset: $dataset) {
                variant_id
                pos
                exome {
                  ac
                  homozygote_count
                  hemizygote_count
                  an
                  af
                  populations { 
                    id
                    ac
                    an
                    homozygote_count
                    hemizygote_count
                  }
                },
                genome {
                  ac
                  homozygote_count
                  hemizygote_count
                  an
                  af
                  populations { 
                    id
                    ac
                    an
                    homozygote_count
                    hemizygote_count
                  }
                }
              }
            }
            """
        )

        # Execute the query and return results
        params = {"variantId": variantId, "dataset": dataset}
        result = await self.client.execute_async(query, variable_values=params)
        return result
      except Exception as e:
        print(traceback.format_exc())
        return {"msg": e.args[0]}

    async def fetch_variants_in_genesymbol(self, gene_symbol, reference_genome="GRCh38", dataset="gnomad_r4"):
      try:
        # Define the query
        query = gql(
            """
            query VariantsInGene($gene_symbol: String!, $reference_genome: ReferenceGenomeId!, $dataset: DatasetId!) {
              gene(gene_symbol: $gene_symbol, reference_genome: $reference_genome) {
                variants(dataset: $dataset) {
                  variant_id
                  pos
                  exome {
                    ac
                    ac_hemi
                    ac_hom
                    an
                    af
                  }
                }
              }
            }
            """
        )

        # Execute the query and return results
        params = {"gene_symbol": gene_symbol, "reference_genome": reference_genome, "dataset": dataset}
        result = await self.client.execute_async(query, variable_values=params)
        return result
      
      except Exception as e:
        print(traceback.format_exc())
        return {"msg": e.args[0]}
      

# Wrapper function to make the async call synchronous
def get_variants_in_genesymbol(gene_symbol, reference_genome="GRCh38", dataset="gnomad_r4"):
    client = GnomadClient()
    return asyncio.run(client.fetch_variants_in_genesymbol(gene_symbol, reference_genome, dataset))
  
# Wrapper function to make the async call synchronous
def get_variants_in_variantId(variantId, dataset="gnomad_r4"):
    client = GnomadClient()
    return asyncio.run(client.fetch_variants_in_variantId(variantId, dataset))

In [2]:
# 원하는 gene_symbol에 대한 데이터를 가져옵니다.
result = get_variants_in_genesymbol(gene_symbol="BRCA1")
pprint.pprint(result)

{'gene': {'variants': [{'exome': {'ac': 1,
                                  'ac_hemi': 0,
                                  'ac_hom': 0,
                                  'af': 6.920970818418641e-07,
                                  'an': 1444884},
                        'pos': 43045606,
                        'variant_id': '17-43045606-C-G'},
                       {'exome': {'ac': 0,
                                  'ac_hemi': 0,
                                  'ac_hom': 0,
                                  'af': 0,
                                  'an': 1445274},
                        'pos': 43045607,
                        'variant_id': '17-43045607-A-T'},
                       {'exome': {'ac': 1,
                                  'ac_hemi': 0,
                                  'ac_hom': 0,
                                  'af': 6.902521214898954e-07,
                                  'an': 1448746},
                        'pos': 43045609,
                        'var

In [3]:
# 원하는 gene_symbol에 대한 데이터를 가져옵니다.
result = get_variants_in_variantId(variantId="11-108227718-A-G", dataset='gnomad_r4')
pprint.pprint(result['variant'])

{'exome': {'ac': 10,
           'af': 6.8444208387974625e-06,
           'an': 1461044,
           'hemizygote_count': 0,
           'homozygote_count': 0,
           'populations': [{'ac': 1,
                            'an': 60354,
                            'hemizygote_count': 0,
                            'homozygote_count': 0,
                            'id': 'remaining'},
                           {'ac': 1,
                            'an': 31774,
                            'hemizygote_count': 0,
                            'homozygote_count': 0,
                            'id': 'remaining_XX'},
                           {'ac': 0,
                            'an': 28580,
                            'hemizygote_count': 0,
                            'homozygote_count': 0,
                            'id': 'remaining_XY'},
                           {'ac': 0,
                            'an': 44720,
                            'hemizygote_count': 0,
                         

In [4]:
import pandas as pd
gnomad_names = {"ac": 'Allele_Count', "an": "Allele_Number", "af": "Allele_Frequency",
                "hemizygote_count" : "Number_of_hemizygotes", "homozygote_count": "Number_of_homozygotes", 'id': "Genetic_Ancestry_Group"}
gnomad_value_df = pd.DataFrame.from_dict(result['variant'])

In [5]:
population_exome = pd.DataFrame.from_dict(gnomad_value_df['exome']['populations'])
population_exome

Unnamed: 0,id,ac,an,homozygote_count,hemizygote_count
0,remaining,1,60354,0,0
1,remaining_XX,1,31774,0,0
2,remaining_XY,0,28580,0,0
3,amr,0,44720,0,0
4,amr_XX,0,25688,0,0
5,amr_XY,0,19032,0,0
6,fin,0,53408,0,0
7,fin_XX,0,27642,0,0
8,fin_XY,0,25766,0,0
9,mid,0,5762,0,0


In [6]:
population_genome = pd.DataFrame.from_dict(gnomad_value_df['genome']['populations'])
population_genome

Unnamed: 0,id,ac,an,homozygote_count,hemizygote_count
0,remaining,0,1746,0,0
1,remaining_XX,0,862,0,0
2,remaining_XY,0,884,0,0
3,amr,0,11658,0,0
4,amr_XX,0,5310,0,0
5,amr_XY,0,6348,0,0
6,fin,0,9038,0,0
7,fin_XX,0,2190,0,0
8,fin_XY,0,6848,0,0
9,ami,0,842,0,0


In [7]:
client = GnomadClient()
result = asyncio.run(client.fetch_variants_in_variantId(variantId="1-925952-G-A", dataset="gnomad_r3"))

Traceback (most recent call last):
  File "/var/folders/hs/b7y1nwdd4pdc_1pt89sgclgm0000gn/T/ipykernel_16723/2481135579.py", line 65, in fetch_variants_in_variantId
    result = await self.client.execute_async(query, variable_values=params)
  File "/Users/dowonkim/miniforge3/envs/portal/lib/python3.8/site-packages/gql/client.py", line 367, in execute_async
    return await session.execute(
  File "/Users/dowonkim/miniforge3/envs/portal/lib/python3.8/site-packages/gql/client.py", line 1639, in execute
    raise TransportQueryError(
gql.transport.exceptions.TransportQueryError: {'message': 'Variant not found'}



In [8]:
result

{'msg': "{'message': 'Variant not found'}"}

In [9]:
from variants import Variants
Variants("11-108227718-A-G").matching_variants()

'chr11:108227718-108227718 A>G'