#### Notebook Goal: Translate VRS (version 1.3) Objects into AlleleProfiles

- This notebook demonstrates the use of the vrs-python package to create VRS Allele objects.
    - For more information about VRS, refer to the official documentation: [VRS Documentation (version 1.3)](https://vrs.ga4gh.org/en/1.3/).  
- Once the VRS objects are created, they can be translated into FHIR AlleleProfiles using the `vrs_allele_to_allele_profile` method from the `VrsFhirAlleleTranslation` class. This method performs a one-directional conversion from VRS to FHIR AlleleProfiles.
- The notebook includes examples of various types of genetic variations, such as deletions, insertions, substitutions, identities, and duplications.


In [20]:
# Import from vrs-python in order to create a vrs object
from ga4gh.vrs import models
from normalize.allele_normalizer import AlleleNormalizer
norm = AlleleNormalizer()

In [21]:
#transalte normalized vrs objects into Allele Profiles
from moldeftranslator.allele_translator import VrsFhirAlleleTranslation
alleleTrans = VrsFhirAlleleTranslation()

In [22]:
# Creating the VRS object and then normalizing it.
# Example 1 - Deletion origin: "NC_000001.11:g.1014265del"
start = 1014263
end = 1014265
refseq = "NC_000001.11"
alt_seq = "C"

interval = models.SequenceInterval(
    start=models.Number(value=start),
    end=models.Number(value=end)
)
location = models.SequenceLocation(
    sequence_id=f"refseq:{refseq}",
    interval=interval
)

state = models.LiteralSequenceExpression(sequence=alt_seq)
del_example_1 = models.Allele(location=location, state=state)
norm_del_example_1 = norm.post_normalize_allele(del_example_1)
norm_del_example_1.as_dict()


{'_id': 'ga4gh:VA.BmF3zr2l6XLpLaK8GInM6Q3Emc3JyPD3',
 'type': 'Allele',
 'location': {'_id': 'ga4gh:VSL.i6Of9s2jVDuJ4vwU6sCeG-jT7ygmlfx6',
  'type': 'SequenceLocation',
  'sequence_id': 'ga4gh:SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO',
  'interval': {'type': 'SequenceInterval',
   'start': {'type': 'Number', 'value': 1014263},
   'end': {'type': 'Number', 'value': 1014265}}},
 'state': {'type': 'LiteralSequenceExpression', 'sequence': 'C'}}

In [23]:
# Translating the normalized allele into an allele profile
allele_profile_del_example = alleleTrans.vrs_allele_to_allele_profile(norm_del_example_1)

print(type(allele_profile_del_example))
allele_profile_del_example.model_dump()

<class 'profiles.alleleprofile.AlleleProfile'>


{'resourceType': 'MolecularDefinition',
 'identifier': [{'value': 'ga4gh:VA.BmF3zr2l6XLpLaK8GInM6Q3Emc3JyPD3',
   'assigner': {'display': 'Global Alliance for Genomics and Health'}}],
 'moleculeType': {'coding': [{'system': 'http://hl7.org/fhir/sequence-type',
    'code': 'dna',
    'display': 'DNA Sequence'}]},
 'location': [{'sequenceLocation': {'sequenceContext': {'display': 'NC_000001.11'},
    'coordinateInterval': {'coordinateSystem': {'system': {'coding': [{'system': 'http://loinc.org',
         'code': 'LA30100-4',
         'display': '0-based interbase'}]}},
     'startQuantity': {'value': Decimal('1014263')},
     'endQuantity': {'value': Decimal('1014265')}}}}],
 'representation': [{'literal': {'value': 'C'}}]}

In [24]:
# Creating the VRS object and then normalizing it.
# Example 2 - Deletion origin: "NC_000016.10:g.1510946_1510947del"
start = 1510945
end = 1510947
refseq = "NC_000001.11"
alt_seq = ""

interval = models.SequenceInterval(
    start=models.Number(value=start),
    end=models.Number(value=end)
)
location = models.SequenceLocation(
    sequence_id=f"refseq:{refseq}",
    interval=interval
)

state = models.LiteralSequenceExpression(sequence=alt_seq)
del_example_2 = models.Allele(location=location, state=state)
norm_del_example_2 = norm.post_normalize_allele(del_example_2)


In [25]:
# Translating the normalized allele into an allele profile
allele_profile_del_example_2 = alleleTrans.vrs_allele_to_allele_profile(norm_del_example_2)

print(type(allele_profile_del_example_2))
allele_profile_del_example_2.model_dump()

<class 'profiles.alleleprofile.AlleleProfile'>


{'resourceType': 'MolecularDefinition',
 'identifier': [{'value': 'ga4gh:VA.qwFzYgeGLgdWCqP37Or25T8CLCsy-Z1e',
   'assigner': {'display': 'Global Alliance for Genomics and Health'}}],
 'moleculeType': {'coding': [{'system': 'http://hl7.org/fhir/sequence-type',
    'code': 'dna',
    'display': 'DNA Sequence'}]},
 'location': [{'sequenceLocation': {'sequenceContext': {'display': 'NC_000001.11'},
    'coordinateInterval': {'coordinateSystem': {'system': {'coding': [{'system': 'http://loinc.org',
         'code': 'LA30100-4',
         'display': '0-based interbase'}]}},
     'startQuantity': {'value': Decimal('1510945')},
     'endQuantity': {'value': Decimal('1510947')}}}}],
 'representation': [{'literal': {'value': ' '}}]}

In [26]:
# Creating the VRS object and then normalizing it.
# Example 3 - Insertion origin: "NC_000001.11:g.113901365_113901366insATA"
start = 113901365
end = 113901365
refseq = "NC_000001.11"
alt_seq = "ATA"

interval = models.SequenceInterval(
    start=models.Number(value=start),
    end=models.Number(value=end)
)
location = models.SequenceLocation(
    sequence_id=f"refseq:{refseq}",
    interval=interval
)

state = models.LiteralSequenceExpression(sequence=alt_seq)
insertion_example= models.Allele(location=location, state=state)
norm_insertion_example= norm.post_normalize_allele(insertion_example)


In [27]:
# Translating the normalized allele into an allele profile
allele_profile_insertion = alleleTrans.vrs_allele_to_allele_profile(norm_insertion_example)

print(type(allele_profile_insertion))
allele_profile_insertion.model_dump()

<class 'profiles.alleleprofile.AlleleProfile'>


{'resourceType': 'MolecularDefinition',
 'identifier': [{'value': 'ga4gh:VA.J9BMdktHGGjE843oD0T_bwUV6WxojkCW',
   'assigner': {'display': 'Global Alliance for Genomics and Health'}}],
 'moleculeType': {'coding': [{'system': 'http://hl7.org/fhir/sequence-type',
    'code': 'dna',
    'display': 'DNA Sequence'}]},
 'location': [{'sequenceLocation': {'sequenceContext': {'display': 'NC_000001.11'},
    'coordinateInterval': {'coordinateSystem': {'system': {'coding': [{'system': 'http://loinc.org',
         'code': 'LA30100-4',
         'display': '0-based interbase'}]}},
     'startQuantity': {'value': Decimal('113901365')},
     'endQuantity': {'value': Decimal('113901365')}}}}],
 'representation': [{'literal': {'value': 'ATA'}}]}

In [28]:
# Creating the VRS object and then normalizing it.
# Example 4 - Substituion origin: "NC_000002.12:g.27453449C>T"
start = 27453448
end = 27453449
refseq = "NC_000002.12"
alt_seq = "T"

interval = models.SequenceInterval(
    start=models.Number(value=start),
    end=models.Number(value=end)
)
location = models.SequenceLocation(
    sequence_id=f"refseq:{refseq}",
    interval=interval
)

state = models.LiteralSequenceExpression(sequence=alt_seq)
sub_example= models.Allele(location=location, state=state)
norm_sub_example= norm.post_normalize_allele(sub_example)


In [29]:
# Translating the normalized allele into an allele profile
allele_profile_sub_example = alleleTrans.vrs_allele_to_allele_profile(norm_sub_example)

print(type(allele_profile_sub_example))
allele_profile_sub_example.model_dump()

<class 'profiles.alleleprofile.AlleleProfile'>


{'resourceType': 'MolecularDefinition',
 'identifier': [{'value': 'ga4gh:VA.fXvhngewkkyVwzEeSJRr5tro8Jcol6Q-',
   'assigner': {'display': 'Global Alliance for Genomics and Health'}}],
 'moleculeType': {'coding': [{'system': 'http://hl7.org/fhir/sequence-type',
    'code': 'dna',
    'display': 'DNA Sequence'}]},
 'location': [{'sequenceLocation': {'sequenceContext': {'display': 'NC_000002.12'},
    'coordinateInterval': {'coordinateSystem': {'system': {'coding': [{'system': 'http://loinc.org',
         'code': 'LA30100-4',
         'display': '0-based interbase'}]}},
     'startQuantity': {'value': Decimal('27453448')},
     'endQuantity': {'value': Decimal('27453449')}}}}],
 'representation': [{'literal': {'value': 'T'}}]}

In [30]:
# Creating the VRS object and then normalizing it.
# Example 5 - Identity origin: "NC_000004.12:g.88007816="
start = 88007815
end = 88007816
refseq = "NC_000004.12"
alt_seq = "G"

interval = models.SequenceInterval(
    start=models.Number(value=start),
    end=models.Number(value=end)
)
location = models.SequenceLocation(
    sequence_id=f"refseq:{refseq}",
    interval=interval
)

state = models.LiteralSequenceExpression(sequence=alt_seq)
identity_example= models.Allele(location=location, state=state)
norm_identity_example= norm.post_normalize_allele(identity_example)


In [31]:
# Translating the normalized allele into an allele profile
allele_profile_identity_example = alleleTrans.vrs_allele_to_allele_profile(norm_identity_example)

print(type(allele_profile_identity_example))
allele_profile_identity_example.model_dump()

<class 'profiles.alleleprofile.AlleleProfile'>


{'resourceType': 'MolecularDefinition',
 'identifier': [{'value': 'ga4gh:VA.eT7IpRxd5CiyJEq8sE6AgobdwgY62NxG',
   'assigner': {'display': 'Global Alliance for Genomics and Health'}}],
 'moleculeType': {'coding': [{'system': 'http://hl7.org/fhir/sequence-type',
    'code': 'dna',
    'display': 'DNA Sequence'}]},
 'location': [{'sequenceLocation': {'sequenceContext': {'display': 'NC_000004.12'},
    'coordinateInterval': {'coordinateSystem': {'system': {'coding': [{'system': 'http://loinc.org',
         'code': 'LA30100-4',
         'display': '0-based interbase'}]}},
     'startQuantity': {'value': Decimal('88007815')},
     'endQuantity': {'value': Decimal('88007816')}}}}],
 'representation': [{'literal': {'value': 'G'}}]}

In [32]:
# Creating the VRS object and then normalizing it.
# Example 6 - Duplication origin: "NC_000001.11:g.5880118_5880127dup"
start = 5880117
end = 5880127
refseq = "NC_000001.11"
alt_seq = "TGAGCTTCCATGAGCTTCCA"

interval = models.SequenceInterval(
    start=models.Number(value=start),
    end=models.Number(value=end)
)
location = models.SequenceLocation(
    sequence_id=f"refseq:{refseq}",
    interval=interval
)

state = models.LiteralSequenceExpression(sequence=alt_seq)
dup_example= models.Allele(location=location, state=state)
norm_dup_example= norm.post_normalize_allele(dup_example)


In [33]:
# Translating the normalized allele into an allele profile
allele_profile_dup_example = alleleTrans.vrs_allele_to_allele_profile(norm_dup_example)

print(type(allele_profile_dup_example))
allele_profile_dup_example.model_dump()

<class 'profiles.alleleprofile.AlleleProfile'>


{'resourceType': 'MolecularDefinition',
 'identifier': [{'value': 'ga4gh:VA.OpO3jwlmnhvpmEs2v9orWvMIa7UPb1To',
   'assigner': {'display': 'Global Alliance for Genomics and Health'}}],
 'moleculeType': {'coding': [{'system': 'http://hl7.org/fhir/sequence-type',
    'code': 'dna',
    'display': 'DNA Sequence'}]},
 'location': [{'sequenceLocation': {'sequenceContext': {'display': 'NC_000001.11'},
    'coordinateInterval': {'coordinateSystem': {'system': {'coding': [{'system': 'http://loinc.org',
         'code': 'LA30100-4',
         'display': '0-based interbase'}]}},
     'startQuantity': {'value': Decimal('5880117')},
     'endQuantity': {'value': Decimal('5880127')}}}}],
 'representation': [{'literal': {'value': 'TGAGCTTCCATGAGCTTCCA'}}]}