# Example for Team meeting 

In [1]:
from ga4gh.core import sha512t24u
from ga4gh.core import ga4gh_digest, ga4gh_identify, ga4gh_serialize
from ga4gh.vrs import __version__, models
from ga4gh.vrs.dataproxy import SeqRepoRESTDataProxy
import json

Removing allOf attribute from CopyNumber to avoid python-jsonschema-objects error.
Removing allOf attribute from SequenceInterval to avoid python-jsonschema-objects error.
Removing allOf attribute from RepeatedSequenceExpression to avoid python-jsonschema-objects error.


In [2]:
# Requires seqrepo REST interface is running on this URL (e.g., using docker image)
seqrepo_rest_service_url = "https://services.genomicmedlab.org/seqrepo"
dp = SeqRepoRESTDataProxy(base_url=seqrepo_rest_service_url)

In [3]:
def ppo(o, indent=2):
    """pretty print object as json"""
    print(json.dumps(o.as_dict(), sort_keys=True, indent=indent))

#### Example variant is NC_000019.10:g.44908822C>T.
#### This will be represented as a molecular varation
* Allele class of molecular varation
    * includes a Location and State
#### Information that we have:
* substitution C --> T
* position @ 44908822. 

In [4]:
variant = 'NC_000019.10:g.44908822C>T'

ref_id, info = variant.split(":")
print('First the {comp_id} into a VRS computed identifier, Then use {body} to fill out the rest of the class object allele!'.format(comp_id = ref_id,body = info))

First the NC_000019.10 into a VRS computed identifier, Then use g.44908822C>T to fill out the rest of the class object allele!


#### Step 1: Translate NC_000019.10 into a VRS computed Identifier which will represent out CURIE.
* This CURIE will be used in SequenceLocation, in the sequence_id attribute.

In [5]:
comp_id = dp.translate_sequence_identifier(ref_id, "ga4gh")
print("CURIE id: {value}".format(value= comp_id))

CURIE id: ['ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl']


#### Step 2: We are representing this variant as an Allele object. 
* Allele Objects have 2 attributes: Location and Sequence
* SequenceLocation has 2 attributes: SequenceInterval and sequence_id
    * SequenceInterval will represent the location of the substitution that is occuring using inter-base coordinates
        * The start and end can be represented using 3 various classes: (Number, DefiniteRange, and IndefiniteRange)
    * sequence_id is the CURIE that was created above.

In [6]:
# To create our Sequence locaiton object we need to construct our SequenceInterval object.

myinterval =  models.SequenceInterval(start=models.Number(value=44908821, type="Number"), 
                                   end=models.Number(value=44908822, type="Number"), 
                                   type="SequenceInterval")
print(ppo(myinterval))

{
  "end": {
    "type": "Number",
    "value": 44908822
  },
  "start": {
    "type": "Number",
    "value": 44908821
  },
  "type": "SequenceInterval"
}
None


#### Step 3: Combine our SequenceInterval and sequence_id to create the SequenceLocation Object.

In [7]:
mylocation =  models.SequenceLocation( 
        sequence_id = 'ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl',
        interval = myinterval)
print(ppo(mylocation))

{
  "interval": {
    "end": {
      "type": "Number",
      "value": 44908822
    },
    "start": {
      "type": "Number",
      "value": 44908821
    },
    "type": "SequenceInterval"
  },
  "sequence_id": "ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl",
  "type": "SequenceLocation"
}
None


#### Step 4: Represent the State by using literalSequenceExpression

In [8]:
mystate = models.LiteralSequenceExpression(sequence="T", type="LiteralSequenceExpression")

#### Step 5: Combine the location and state objects to create the allele object. 

In [9]:
myallele = models.Allele(location = mylocation, state = mystate, type = "Allele")
print(ppo(myallele))

{
  "location": {
    "interval": {
      "end": {
        "type": "Number",
        "value": 44908822
      },
      "start": {
        "type": "Number",
        "value": 44908821
      },
      "type": "SequenceInterval"
    },
    "sequence_id": "ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl",
    "type": "SequenceLocation"
  },
  "state": {
    "sequence": "T",
    "type": "LiteralSequenceExpression"
  },
  "type": "Allele"
}
None


In [10]:
a1 = models.Allele(
    location = models.SequenceLocation( 
        sequence_id = 'ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl',
        interval = models.SequenceInterval(start=models.Number(value=44908821, type="Number"), 
                                   end=models.Number(value=44908822, type="Number"), 
                                   type="SequenceInterval")
    ),
    state=models.LiteralSequenceExpression(sequence="T", type="LiteralSequenceExpression"),
    type="Allele"
)
ppo(a1)

{
  "location": {
    "interval": {
      "end": {
        "type": "Number",
        "value": 44908822
      },
      "start": {
        "type": "Number",
        "value": 44908821
      },
      "type": "SequenceInterval"
    },
    "sequence_id": "ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl",
    "type": "SequenceLocation"
  },
  "state": {
    "sequence": "T",
    "type": "LiteralSequenceExpression"
  },
  "type": "Allele"
}


In [11]:
#Testing if they are both the same objects
print(myallele == a1)

True
