#### Notebook Goal: Translate AlleleProfile into VRS object. 

- This notebook demonstrates how to unpack an example data point into the AlleleProfile class.
- Once the AlleleProfile object is created, the example is converted into a VRS Allele object (version 1.3).  
- The notebook includes two modified FHIR AlleleProfile example datasets derived from the HGVS expression: "NC_000001.11:g.1014265del". The key difference between these two examples, as illustrated below, is that one example does not include the Identifier, while the other does. This demonstrates that the translation of a FHIR AlleleProfile can be performed even without the GA4GH VRS Allele Identifier. Additionally, the notebook showcases the ability to handle VRS Alleles in either a normalized or unnormalized form.

In [15]:
from profiles.alleleprofile import AlleleProfile

In [16]:
# Example 1 - Substitition origin: "NC_000002.12:g.27453449C>T"
# example_deletion is a modified example point that comes from the Intro_vrs_to_moldef_translation.ipynb notebook.
# This example will exclude the GA4GH Identifier.value and Identifier.assigner
from decimal import Decimal

example_substitition ={
    "resourceType" : "MolecularDefinition",
    "id" : "demo-example-hgvs-substitition",
    "meta" : {
      "profile" : ["http://hl7.org/fhir/StructureDefinition/allelesliced"]
    },
    "moleculeType" : {
      "coding" : [{
        "system" : "http://hl7.org/fhir/sequence-type",
        "code" : "dna",
        "display" : "DNA Sequence"
      }]
    },
    "location" : [
        {
      "sequenceLocation" : {
        "sequenceContext" : {
          "reference" : "MolecularDefinition/example-sequence-nc000002-url",
          "type" : "MolecularDefinition",
          # Example needs to contain the reference sequence
          "display" : "NC_000002.12"
        },
        "coordinateInterval" : {
          "coordinateSystem" : {
            "system" : {
              "coding" : [{
                "system" : "http://loinc.org",
                "code" : "LA30100-4",
                # Example needs to contain the systems coordinate
                "display" : "0-based interbase" 
              }],
              "text" : "0-based interbase"
            }
          },
          # Example needs to contain the startQuantity
          "startQuantity" : {
            "value" : Decimal(27453448)
          },
          # Example needs to contain the endQuantity
          "endQuantity" : {
            "value" : Decimal(27453449)
          }
        }
      }
    }
    ],
    "representation" : [{
      "focus" : {
        "coding" : [{
          "system" : "http://hl7.org/fhir/moleculardefinition-focus",
          "code" : "allele-state",
          "display" : "Allele State"
        }]
      },
      "literal" : {
        "value" : "T"
      }
    }]
  }

In [17]:
example_allele_substitition = AlleleProfile(**example_substitition)

In [18]:
example_allele_substitition.model_dump()

{'resourceType': 'MolecularDefinition',
 'id': 'demo-example-hgvs-substitition',
 'meta': {'profile': ['http://hl7.org/fhir/StructureDefinition/allelesliced']},
 'moleculeType': {'coding': [{'system': 'http://hl7.org/fhir/sequence-type',
    'code': 'dna',
    'display': 'DNA Sequence'}]},
 'location': [{'sequenceLocation': {'sequenceContext': {'reference': 'MolecularDefinition/example-sequence-nc000002-url',
     'type': 'MolecularDefinition',
     'display': 'NC_000002.12'},
    'coordinateInterval': {'coordinateSystem': {'system': {'coding': [{'system': 'http://loinc.org',
         'code': 'LA30100-4',
         'display': '0-based interbase'}],
       'text': '0-based interbase'}},
     'startQuantity': {'value': Decimal('27453448')},
     'endQuantity': {'value': Decimal('27453449')}}}}],
 'representation': [{'focus': {'coding': [{'system': 'http://hl7.org/fhir/moleculardefinition-focus',
      'code': 'allele-state',
      'display': 'Allele State'}]},
   'literal': {'value': 'T'}

In [19]:
from moldeftranslator.allele_translator import VrsFhirAlleleTranslation

alleleTrans = VrsFhirAlleleTranslation()

In [20]:
vrs_example_allele_substitition_unnorm = alleleTrans.translate_allele_profile_to_vrs_allele(example_allele_substitition,normalize=False)
vrs_example_allele_substitition_unnorm.as_dict()

{'type': 'Allele',
 'location': {'type': 'SequenceLocation',
  'sequence_id': 'refseq:NC_000002.12',
  'interval': {'type': 'SequenceInterval',
   'start': {'type': 'Number', 'value': 27453448},
   'end': {'type': 'Number', 'value': 27453449}}},
 'state': {'type': 'LiteralSequenceExpression', 'sequence': 'T'}}

In [21]:
vrs_example_allele_substitition_norm = alleleTrans.translate_allele_profile_to_vrs_allele(example_allele_substitition)
vrs_example_allele_substitition_norm.as_dict()

{'_id': 'ga4gh:VA.fXvhngewkkyVwzEeSJRr5tro8Jcol6Q-',
 'type': 'Allele',
 'location': {'_id': 'ga4gh:VSL.nLMbYalHO4OEI2axqkyTMCQxrH98UpDN',
  'type': 'SequenceLocation',
  'sequence_id': 'ga4gh:SQ.pnAqCRBrTsUoBghSD1yp_jXWSmlbdh4g',
  'interval': {'type': 'SequenceInterval',
   'start': {'type': 'Number', 'value': 27453448},
   'end': {'type': 'Number', 'value': 27453449}}},
 'state': {'type': 'LiteralSequenceExpression', 'sequence': 'T'}}

In [22]:
# Example 2 - insertion origin: "NC_000001.11:g.113901365_113901366insATA"
# example_insertion_2 is a not modified that comes from the Intro_vrs_to_moldef_translation.ipynb notebook.


from decimal import Decimal

example_insertion_2 = {
    "resourceType" : "MolecularDefinition",
    "id" : "demo-example-hgvs-insertion",
    "meta" : {
      "profile" : ["http://hl7.org/fhir/StructureDefinition/allelesliced"]
    },
    "moleculeType" : {
      "coding" : [{
        "system" : "http://hl7.org/fhir/sequence-type",
        "code" : "dna",
        "display" : "DNA Sequence"
      }]
    },
    "location" : [
        {
      "sequenceLocation" : {
        "sequenceContext" : {
          "reference" : "MolecularDefinition/example-sequence-nc000001-url",
          "type" : "MolecularDefinition",
          "display" : "NC_000001.11" # Must only contain the reference sequence 
        },
        "coordinateInterval" : {
          "coordinateSystem" : {
            "system" : {
              "coding" : [{
                "system" : "http://loinc.org",
                "code" : "LA30100-4",
                "display" : "0-based interbase" # Must only contain the systems coordinate
              }],
              "text" : "0-based interbase"
            }
          },
          # Example needs to contain the startQuantity
          "startQuantity" : {
            "value" : Decimal(113901365)
          },
          # Example needs to contain the endQuantity
          "endQuantity" : {
            "value" : Decimal(113901365)
          }
        },
      }
    }
    ],
    "representation" : [{
      "focus" : {
        "coding" : [{
          "system" : "http://hl7.org/fhir/moleculardefinition-focus",
          "code" : "allele-state",
          "display" : "Allele State"
        }]
      },
      "literal" : {
        "value" : "ATA"
      }
    }]
  }


In [23]:
example_allele_insertion_2 = AlleleProfile(**example_insertion_2)
example_allele_insertion_2.model_dump()

{'resourceType': 'MolecularDefinition',
 'id': 'demo-example-hgvs-insertion',
 'meta': {'profile': ['http://hl7.org/fhir/StructureDefinition/allelesliced']},
 'moleculeType': {'coding': [{'system': 'http://hl7.org/fhir/sequence-type',
    'code': 'dna',
    'display': 'DNA Sequence'}]},
 'location': [{'sequenceLocation': {'sequenceContext': {'reference': 'MolecularDefinition/example-sequence-nc000001-url',
     'type': 'MolecularDefinition',
     'display': 'NC_000001.11'},
    'coordinateInterval': {'coordinateSystem': {'system': {'coding': [{'system': 'http://loinc.org',
         'code': 'LA30100-4',
         'display': '0-based interbase'}],
       'text': '0-based interbase'}},
     'startQuantity': {'value': Decimal('113901365')},
     'endQuantity': {'value': Decimal('113901365')}}}}],
 'representation': [{'focus': {'coding': [{'system': 'http://hl7.org/fhir/moleculardefinition-focus',
      'code': 'allele-state',
      'display': 'Allele State'}]},
   'literal': {'value': 'ATA'

In [24]:
vrs_example_allele_insertion_2 = alleleTrans.translate_allele_profile_to_vrs_allele(example_allele_insertion_2)
vrs_example_allele_insertion_2.as_dict()

{'_id': 'ga4gh:VA.J9BMdktHGGjE843oD0T_bwUV6WxojkCW',
 'type': 'Allele',
 'location': {'_id': 'ga4gh:VSL.TMxdXtmi4ctcTRipHMD6py1Nv1kLMyJd',
  'type': 'SequenceLocation',
  'sequence_id': 'ga4gh:SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO',
  'interval': {'type': 'SequenceInterval',
   'start': {'type': 'Number', 'value': 113901365},
   'end': {'type': 'Number', 'value': 113901365}}},
 'state': {'type': 'LiteralSequenceExpression', 'sequence': 'ATA'}}