# This notebook will break down how to convert HGVS nomenclature to VRS-Allele Objects

In [1]:
from ga4gh.core import sha512t24u
from ga4gh.core import ga4gh_digest, ga4gh_identify, ga4gh_serialize
from ga4gh.vrs import __version__, models
from ga4gh.vrs.dataproxy import SeqRepoRESTDataProxy
import json
from ga4gh.vrs.extras.translator import Translator



In [2]:
def ppo(o, indent=2):
    """pretty print object as json"""
    print(json.dumps(o.as_dict(), sort_keys=True, indent=indent))
def pjson(o, indent=2):
    """pretty print object as json"""
    return json.dumps(o.as_dict(), sort_keys=True, indent=indent)

In [3]:
# Requires seqrepo REST interface is running on this URL (e.g., using docker image)
seqrepo_rest_service_url = "https://services.genomicmedlab.org/seqrepo"
dp = SeqRepoRESTDataProxy(base_url=seqrepo_rest_service_url)

In [4]:
tlr = Translator(data_proxy=dp,
                 translate_sequence_identifiers=True,  # default
                 normalize=True,                       # default
                 identify=False)                        # default



In [9]:
# list of hgvs expressions
hgvs_exprs = [
    "NC_000013.11:g.32936732C=",
    "NC_000007.14:g.55181320A>T",
    "NC_000007.14:g.55181220del",
    "NC_000007.14:g.55181230_55181231insGGCT"]

In [10]:
for expr in hgvs_exprs:
    trans = tlr.translate_from(expr)
    vrs_id = ga4gh_identify(trans)
    print(vrs_id, pjson(trans), sep = '\n')


ga4gh:VA.DkZLLMnwoH6zIncSRh2c05nzCNLdTqHl
{
  "location": {
    "interval": {
      "end": {
        "type": "Number",
        "value": 32936732
      },
      "start": {
        "type": "Number",
        "value": 32936731
      },
      "type": "SequenceInterval"
    },
    "sequence_id": "ga4gh:SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT",
    "type": "SequenceLocation"
  },
  "state": {
    "sequence": "C",
    "type": "LiteralSequenceExpression"
  },
  "type": "Allele"
}
ga4gh:VA.5Z7gWQGUuGAPe4Pw2_kJvnkhS2Q5jRhY
{
  "location": {
    "interval": {
      "end": {
        "type": "Number",
        "value": 55181320
      },
      "start": {
        "type": "Number",
        "value": 55181319
      },
      "type": "SequenceInterval"
    },
    "sequence_id": "ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul",
    "type": "SequenceLocation"
  },
  "state": {
    "sequence": "T",
    "type": "LiteralSequenceExpression"
  },
  "type": "Allele"
}
ga4gh:VA.h6WuolTwZJYZh86qP2a8YVA1WXpHuY_X
{
  "location"