## This notebook will break down how to convert mixed data to VRS-Allele Objects

In [5]:
from ga4gh.core import sha512t24u
from ga4gh.core import ga4gh_digest, ga4gh_identify, ga4gh_serialize
from ga4gh.vrs import __version__, models
from ga4gh.vrs.dataproxy import SeqRepoRESTDataProxy
import json
from ga4gh.vrs.extras.translator import Translator

In [6]:
def ppo(o, indent=2):
    """pretty print object as json"""
    print(json.dumps(o.as_dict(), sort_keys=True, indent=indent))
def pjson(o, indent=2):
    """pretty print object as json"""
    return json.dumps(o.as_dict(), sort_keys=True, indent=indent)

In [7]:
# Requires seqrepo REST interface is running on this URL (e.g., using docker image)
seqrepo_rest_service_url = "https://services.genomicmedlab.org/seqrepo"
dp = SeqRepoRESTDataProxy(base_url=seqrepo_rest_service_url)

In [8]:
tlr = Translator(data_proxy=dp,
                 translate_sequence_identifiers=True,  # default
                 normalize=True,                       # default
                 identify=False)                        # default



In [10]:
# list of hgvs expressions
mixed_exprs =[
    "NC_000007.14:g.55181320A>T",
    "NC_000007.14:g.55181220del", 
    "NC_000019.10:44908821:C:T", 
    "NC_000019.10:41354681:C:T",
    "7-140453136-A-A", 
    "16-2138199-GTGAG-G" 
] 

In [11]:
for expr in mixed_exprs:
    trans = tlr.translate_from(expr)
    vrs_id = ga4gh_identify(trans)
    print(vrs_id, pjson(trans), sep = '\n')


ga4gh:VA.FIddvX-n3hQL-Sk5IfnzelQFU-eEr046
{
  "location": {
    "interval": {
      "end": {
        "type": "Number",
        "value": 7674797
      },
      "start": {
        "type": "Number",
        "value": 7674796
      },
      "type": "SequenceInterval"
    },
    "sequence_id": "ga4gh:SQ.dLZ15tNO1Ur0IcGjwc3Sdi_0A6Yf4zm7",
    "type": "SequenceLocation"
  },
  "state": {
    "sequence": "A",
    "type": "LiteralSequenceExpression"
  },
  "type": "Allele"
}
ga4gh:VA.zzIpoHntS5efItp-Nse1SPqzmHlM4QcI
{
  "location": {
    "interval": {
      "end": {
        "type": "Number",
        "value": 31574705
      },
      "start": {
        "type": "Number",
        "value": 31574704
      },
      "type": "SequenceInterval"
    },
    "sequence_id": "ga4gh:SQ.0iKlIQk2oZLoeOG9P1riRU6hvL5Ux8TV",
    "type": "SequenceLocation"
  },
  "state": {
    "sequence": "T",
    "type": "LiteralSequenceExpression"
  },
  "type": "Allele"
}
ga4gh:VA.CxiA_hvYbkD8Vqwjhx5AYuyul4mtlkpD
{
  "location": 