In [None]:
# need to normalize alleles and haplotypes 
# when you use the tranlsate method for alleles that is already normalized 
# when you want to normalize a haplotype then you need to use the normalization method


# normalizing haploytpes

# from ga4gh.vrs.normalize import normalize
# norm = normalize(This can be allele or haplotype, dp --- the database that you have been using)

# https://normalize.cancervariants.org/variation#/

# now for more complex hgvs expression like ones that have different ranges: you need to use VICC Variation normalizer

# Example: 
    # NC_000023.10:g.(?_31645770)_(31792329_?)del

    # the end point that i will be using is /variation/to_vrs


#     {
#   "search_term": "NC_000023.10:g.(?_31645770)_(31792329_?)del",
#   "warnings": [],
#   "variations": [
#     {
#       "_id": "ga4gh:CX.9ylsEgpF1LmAti0XbNLeTtovkfzZUP-n",
#       "type": "CopyNumberChange",
#       "subject": {
#         "_id": "ga4gh:VSL.njsf2Too-6iNCyDdDzPJHNSirD41Zbbs",
#         "type": "SequenceLocation",
#         "sequence_id": "ga4gh:SQ.v7noePfnNpK8ghYXEqZ9NukMXW7YeNsm",
#         "interval": {
#           "type": "SequenceInterval",
#           "start": {
#             "type": "IndefiniteRange",
#             "value": 31645769,
#             "comparator": "<="
#           },
#           "end": {
#             "type": "IndefiniteRange",
#             "value": 31792329,
#             "comparator": ">="
#           }
#         }
#       },
#       "copy_change": "efo:0030067"
#     }
#   ],
#   "service_meta_": {
#     "version": "0.5.5",
#     "response_datetime": "2023-05-19T17:48:52.435887",
#     "name": "variation-normalizer",
#     "url": "https://github.com/cancervariants/variation-normalization"
#   }
# }



In [1]:
import requests
import json

class translate_api_fxn:

    def __init__(self):
        """
        Initialize class with the API URL
        """
        
        self.base_ncbi_url_api = 'https://api.ncbi.nlm.nih.gov/variation/v0/'
        self.base_varnorm_url_api = 'https://normalize.cancervariants.org'

        self.headers = {
            'Content-Type': 'application/json; charset=utf-8'
        }

    def variation_to_vrs(self,q, untranslatable_returns_text='true'):
        
        endpoint = '/variation/to_vrs'

        url = f'{self.base_varnorm_url_api}{endpoint}'
        
        params = {
            'q': q,
            'untranslatable_returns_text': untranslatable_returns_text
        }
        
        response = requests.get(url, params=params, headers=self.headers)
        
        if response.status_code == 200:
            return json.loads(response.text)['variations'][0]
        else:
            raise requests.HTTPError(f'Request failed with status code: {response.status_code}')

    def spdi_attribute_concat(self,r):
        """ 
        Extract spdi attributes,and concatenating the attributes to create a spdi syntax allele. 
        """

        reqjson = json.loads(r.text)
        spdiobjs = reqjson['data']['spdis'] #[0] Index at first position for the first spdi object. 
        expr_list = []
        for spdiobj in spdiobjs:
            spdi = ':'.join([
                spdiobj['seq_id'],
                str(spdiobj['position']),
                spdiobj['deleted_sequence'],
                spdiobj['inserted_sequence']])
            expr_list.append(spdi)
        return expr_list

    def spdi_to_hgvs(self,spdi_id):

        endpoint = '/spdi/{}/hgvs'.format(spdi_id)
        
        url = f'{self.base_ncbi_url_api}{endpoint}'

        
        response = requests.get(url,headers=self.headers)

        if response.status_code == 200:
            return json.loads(response.text)['data']['hgvs']
        else:
            raise requests.HTTPError(f'Request failed with status code: {response.status_code}')
        
    def hgvs_to_spdi(self,hgvs_id, assembly ='GCF_000001405.38'):
        """_summary_

        Args:
            hgvs_id (_type_): _description_
            assembly (str, optional): _description_. Defaults to 'GCF_000001405.38'.

        Raises:
            requests.HTTPError: _description_

        Returns:
            _type_: _description_
        """
        endpoint = '/hgvs/{}/contextuals{}'.format(hgvs_id,assembly)
        
        url = f'{self.base_ncbi_url_api}{endpoint}' 

        response = requests.get(url, headers=self.headers)

        if response.status_code == 200:
            return self.spdi_attribute_concat(response)[0] # if I only want back one spdi expression [0]
        else:
            raise requests.HTTPError(f'Request failed with status code: {response.status_code}')

In [None]:
# Testing functions above and class

api = translate_api_fxn()
result = api.variation_to_vrs('NC_000023.10:g.(?_31645770)_(31792329_?)del') #'NM_002111.8:c.60_110dup'
print(result)
result2 = api.spdi_to_hgvs('NC_000001.10:12345:1:A')
print(result2)
result3 = api.hgvs_to_spdi('NC_000001.10:g.12346C>A')
print(result3)


In [2]:
from ga4gh.vrs.extras.variation_normalizer_rest_dp import VariationNormalizerRESTDataProxy
from ga4gh.vrs.dataproxy import SeqRepoRESTDataProxy
from ga4gh.vrs.extras.translator import Translator
seqrepo_rest_service_url = "https://services.genomicmedlab.org/seqrepo"
dp = SeqRepoRESTDataProxy(base_url=seqrepo_rest_service_url)
tlr = Translator(data_proxy=dp)
vnorm = VariationNormalizerRESTDataProxy()


def from_spid_to_rightshift_hgvs(expression):    
    api = translate_api_fxn()
    try: 
        # Converting a allele in SPDI syntax to the right-shifted HGVS notation
        return api.spdi_to_hgvs(expression)
    except Exception as e: 
        # returns error produce by NCBI API 
        return '{}. Expression Error: {}'.format(e,expression)
    

from_spid_to_rightshift_hgvs('NC_000001.10:12345:1:A')
    
def from_hgvs_to_spdi(expression):
    api = translate_api_fxn()
    try: 
        # Converting a allele in SPDI syntax to the right-shifted HGVS notation
        return api.hgvs_to_spdi(expression)
    except Exception as e: 
        # returns error produce by NCBI API 
        return '{}. Expression Error: {}'.format(e,expression)  
    
from_hgvs_to_spdi('NC_000001.10:g.12346C>A')

def to_vrs_object(expression):

    api = translate_api_fxn()

    try:
        return api.variation_to_vrs(expression)
    except Exception as e:
        return '{}. Expression Error: {}'.format(e,expression)  

to_vrs_object('NC_000006.12:g.[18130687T>C;18138997C>T]')

def from_vrs_to_normalize_hgvs(vrs_object):
    
    pjo = tlr.translate_from(vrs_object,"vrs")

    try:
        return vnorm.to_hgvs(pjo)[0]
    except Exception as e:
        return '{} Expression Error: {}'.format(e,pjo)  



In [None]:
#TODO: Future implementation

import re

#'NC_000023.10:g.(?_31645770)_(31792329_?)del','NM_002111.8:c.60_110dup',
test = ['NC_000023.10:g.(?_31645770)_(31792329_?)del','NM_002111.8:c.60_110dup','NC_000001.10:12345:1:A','NC_000001.10:g.12346C>A','jo:12345:1:A']
api = translate_api_fxn()

def check_variation(input_string):
    # Define regular hgvs and spdi expressions
    hgvs_re = r'[^:]+:[cgnpr]\.'  
    spdi_re = r'(?P<ac>[^:]+):(?P<pos>\d+):(?P<del_len_or_seq>\w*):(?P<ins_seq>\w*)' 

    # Check hgvs regular expression
    if re.match(hgvs_re, input_string):
        print('this is a correct hgvs expression {}'.format(input_string))

    # Check spdi regular expression
    elif re.match(spdi_re, input_string):
        print('this is a correct spdi expression {}'.format(input_string))
        
    else:
        print("String does not match any expected pattern.")
        
# Test the function with different input strings
for i in test: 
    check_variation(i)

In [None]:
# NOTE: old way of translating functions

class extra:

    def __init__(self):
        """Initialize class with the seqrepo rest api 
        """
        self.seqrepo_rest_service_url = "https://services.genomicmedlab.org/seqrepo"
        self.dp = SeqRepoRESTDataProxy(base_url=self.seqrepo_rest_service_url)
        self.tlr = Translator(self.dp)


    def to_rightshift_hgvs(self,expression):
        """Converting SPDI allele expression into right shift normalized HGVS expressions.

        Args:
            expression (string): SPDI allele expression 

        Returns:
            string: Right shift normalized HGVS expressions
        """
        
        try: 
            # Converting a allele in SPDI syntax to the right-shifted HGVS notation
            return vs.spdi_to_hgvs(expression)
        except Exception as e: 
            # returns error produce by NCBI API 
            return 'Error in expression {}'.format(e)

    def to_fullynorm_hgvs(self,expression):
        """Converting SPDI allele expression into fully normalized HGVS expressions.

        Args:
            expression (string): SPDI allele expressions

        Returns:
            list: Fully normalized HGVS expressions
        """

        vrs_alleles = []

        try:
            trans = self.tlr.translate_from(expression, 'spdi')
            vrs_alleles.append(trans)
        except Exception as e:
            # returns error produce by translate_from method
            vrs_alleles.append('Error in expression {}'.format(e))

        for allele in vrs_alleles:
            if isinstance(allele, str):
                return allele
            else:
                hgvs_expression = vnorm.to_hgvs(allele, 'refseq')
                return  hgvs_expression

    def to_vrs_allele(self,expression):
        """Convert SPDI allele expression into VRS Allele Object


        Args:
            expression (string): SPDI allele expressions

        Returns:
            dictionary: (Key = ga4gh identifier, Value = VRS allele Object)
        """

        vrs_alleles = {}

        try:
            trans = self.tlr.translate_from(expression)
            vrs_alleles[ga4gh_identify(trans)] = trans.as_dict() #json.dumps(trans.as_dict())
        except Exception as e:
            # returns error produce by translate_from method
            vrs_alleles["Error in expression"] = '{}'.format(e)
            
        return vrs_alleles
    
    # Need to put this function in a different class. Also, don't think functions like this are nessary
    # they constantly need to be reconfigured based off of the dictionary inputed. 
    def create_spdi_expression(self,expression):
        """Takes a spdi dictionary and creates a SPDI allele expression with the follow formate: RefSeq:Posotion:Deletion:Insertion

        Args:
            expression (dictionary): (Key = SPDI four attributes, Values=  Value of each attribute) 

        Returns:
            string: SPDI allele expressions
        """
        # This would change based off of the structure of the dictionary
        spdiobjs = expression['data']['spdis'] 
        for spdiobj in spdiobjs:
            spdi = ':'.join([
                spdiobj['seq_id'],
                str(spdiobj['position']),
                spdiobj['deleted_sequence'],
                spdiobj['inserted_sequence']])
            return spdi

In [None]:
# NOTE: Understanding how to get normalized hgvs expression from vrs-python notebook

import tabulate
from ga4gh.core import ga4gh_identify
from ga4gh.vrs.normalize import normalize
from ga4gh.vrs.extras.variation_normalizer_rest_dp import VariationNormalizerRESTDataProxy
from IPython.display import HTML, display
vnorm = VariationNormalizerRESTDataProxy()


#The postgres default port of 5432 is blocked outbound by binder and potentially other institutions. 
#To circumvent users having to install UTA themsleves we created a rest data proxy for variation normalizer for the to_hgvs endpoint.

from ga4gh.vrs.dataproxy import SeqRepoRESTDataProxy
from ga4gh.vrs.extras.translator import Translator
seqrepo_rest_service_url = "https://services.genomicmedlab.org/seqrepo"
dp = SeqRepoRESTDataProxy(base_url=seqrepo_rest_service_url)
tlr = Translator(data_proxy=dp)

# todo: this example should get changed to use normalized hgvs_g as input.
tlr.normalize = False

# Round-trip test: HGVS → VR Allele → HGVS[]
header = "check hgvs_orig sequence_id sequence_id_normalized hgvs_normalized".split()
table = [header]
for hgvs_expr in (
    "NC_000013.11:g.32936732_32936733del",
    "NC_000013.11:g.32936732_32936737del",
    "NC_000013.11:g.32936732_32936733insC",
    "NC_000013.11:g.32936732_32936733delinsC",
    "NC_000013.11:g.32936732_32936735delinsC",
    "NC_000013.11:g.32936732C>G",
    "NM_015102.3:n.2802C>T",
    "NC_000013.10:g.32331094_32331095dup",
    "NC_000013.10:g.32331092_32331093insTA"
):
    a = tlr.translate_from(hgvs_expr, "hgvs")
    he = vnorm.to_hgvs(a)
    chk = "✔" if hgvs_expr in he else "✘"
    #print(f"{chk} {hgvs_expr}\n  → {ga4gh_identify(a)}\n  → {he}")
    a_norm = normalize(a, dp)
    row = [chk, hgvs_expr, ga4gh_identify(a), ga4gh_identify(a_norm), he[0] ]
    table += [row]
display(HTML(tabulate.tabulate(table, tablefmt='html')))

In [None]:
# NOTE: brain storming
result = api.variation_to_vrs('NC_000023.10:g.(?_31645770)_(31792329_?)del') #'NM_002111.8:c.60_110dup'
print(result)
pjo = tlr.translate_from(result,"vrs")
print(pjo)
# print(vnorm.to_hgvs(pjo)[0])

# # test1 = tlr.translate_from('NC_000023.10:g.(?_31645770)_(31792329_?)del', "hgvs")
# # test1
# anorm = []
# for x in 'NC_000023.10:g.(?_31645770)_(31792329_?)del':
#     result = api.variation_to_vrs(x) #'NM_002111.8:c.60_110dup'
#     pjo = tlr.translate_from(result)
#     a_norm.append(vnorm.to_hgvs(pjo))


# mylist = []
# for x in result:
#     mylist.append(tlr.translate_from(x))

# mylist



In [8]:
api = translate_api_fxn()

vrs_list = []
var_list =['NC_000023.10:g.(?_31645770)_(31792329_?)del',"NC_000013.11:g.32936732_32936733del","NC_000013.11:g.32936732_32936737del","NC_000013.11:g.32936732_32936733insC","NC_000013.11:g.32936732_32936733delinsC"]

for var in var_list:
   vrs_list.append(api.variation_to_vrs(var))

norm_hgvs = []
for vrs_obj in vrs_list:
   norm_hgvs.append(from_vrs_to_normalize_hgvs(vrs_obj))

print(norm_hgvs)

['Variation normalizer returned the status code: 422. Expression Error: <CopyNumberChange attributes: _id, copy_change, subject, type>', 'NC_000013.11:g.32936732_32936733del', 'NC_000013.11:g.32936732_32936737del', 'NC_000013.11:g.32936733dup', 'NC_000013.11:g.32936733del']


In [16]:
test = tlr.translate_from(vrs_list[1],"vrs")
type(test)

abc.Allele