In [1]:
#RUN FOR GITHUB CODESPACE
import sys
import warnings

sys.path.append('..')
warnings.filterwarnings('ignore')

In [2]:
import json
from src.spdi.spdi_class import SPDI
from src.spdi.spdi_utils import SPDITranslate
from src.variant_to_cvc_translate import CVCTranslatorTo

In [3]:
spdi_translator = SPDITranslate()
cvc_translator_to = CVCTranslatorTo()

In [4]:
spdi_example_data = [
    # Example of Deletion
    {'sequence': 'NC_000001.11', 'position': '1014263', 'deletion': 'CC', 'insertion': 'C'},
    # Example of Insertion
    {'sequence': 'NC_000001.11', 'position': '113901365', 'deletion': '', 'insertion': 'ATA'},
    # Example of Duplication
    {'sequence': 'NC_000001.11', 'position': '5880117', 'deletion': 'TGAGCTTCCA', 'insertion': 'TGAGCTTCCATGAGCTTCCA'}
    ]

### SPDI Module Overview

The `SPDI` module provides functionality for creating, validating, and representing SPDI expressions.


#### Features
- **Validation of SPDI Expressions**: SPDI expressions undergo validation to ensure adherence to the SPDI format rules.
- **Representation Conversion**: SPDI objects can be converted to strings or dictionaries, enhancing their usability and interoperability.

#### Dependencies
The SPDI module does not have external dependencies and operates solely on Python's built-in functionality.


In [5]:
spdi_objects= []
print("SPDI objects created:")
for spdi in spdi_example_data: 
    spdi_objects.append(SPDI(**spdi))
spdi_objects

SPDI objects created:


[<src.spdi.spdi_class.SPDI at 0x10c9e3fd0>,
 <src.spdi.spdi_class.SPDI at 0x112c6b9d0>,
 <src.spdi.spdi_class.SPDI at 0x1109ea150>]

### spdi_utils Module Overview

The SPDITranslate module facilitates the translation of SPDI expressions to HGVS and VRS formats. It utilizes external APIs for translation. 

#### Features

- **Translation to Right-Shift HGVS**: Converts SPDI expressions to right shift HGVS using the NCBI Variation Services API.
- **Translation to VRS**: Translates SPDI expressions to VRS using the VRS python translator module.

#### Dependencies
- **External APIs**:
  - Biocmmons SeqRepo API
  - NCBI Variation Services API


In [6]:
# Translating SPDI expression to a rightshift HGVS expression
for spdi in spdi_objects:
    print(f'SPDI Expression: {spdi.to_string()}') 
    print(f'Translated to HGVS: {spdi_translator.from_spdi_to_rightshift_hgvs(spdi)}\n')


SPDI Expression: NC_000001.11:1014263:CC:C
Translated to HGVS: NC_000001.11:g.1014265del

SPDI Expression: NC_000001.11:113901365::ATA
Translated to HGVS: NC_000001.11:g.113901365_113901366insATA

SPDI Expression: NC_000001.11:5880117:TGAGCTTCCA:TGAGCTTCCATGAGCTTCCA
Translated to HGVS: NC_000001.11:g.5880118_5880127dup



In [7]:
# Translating SPDI expression to a VRS expression
for spdi in spdi_objects:
    print(f'SPDI Expression: {spdi.to_string()}') 
    print(f'Translated to VRS:\n{json.dumps(spdi_translator.from_spdi_to_vrs(spdi).as_dict(),indent = 2)}\n')

SPDI Expression: NC_000001.11:1014263:CC:C
Translated to VRS:
{
  "_id": "ga4gh:VA.BmF3zr2l6XLpLaK8GInM6Q3Emc3JyPD3",
  "type": "Allele",
  "location": {
    "_id": "ga4gh:VSL.i6Of9s2jVDuJ4vwU6sCeG-jT7ygmlfx6",
    "type": "SequenceLocation",
    "sequence_id": "ga4gh:SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
    "interval": {
      "type": "SequenceInterval",
      "start": {
        "type": "Number",
        "value": 1014263
      },
      "end": {
        "type": "Number",
        "value": 1014265
      }
    }
  },
  "state": {
    "type": "LiteralSequenceExpression",
    "sequence": "C"
  }
}

SPDI Expression: NC_000001.11:113901365::ATA
Translated to VRS:
{
  "_id": "ga4gh:VA.J9BMdktHGGjE843oD0T_bwUV6WxojkCW",
  "type": "Allele",
  "location": {
    "_id": "ga4gh:VSL.TMxdXtmi4ctcTRipHMD6py1Nv1kLMyJd",
    "type": "SequenceLocation",
    "sequence_id": "ga4gh:SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
    "interval": {
      "type": "SequenceInterval",
      "start": {
        "type": "Num

In [8]:
#Translations form SPDI to CVC to done using the variant_to_cvc_translate.py module.
for spdi in spdi_objects: 
    print(f'SPDI Expression: {spdi.to_string()}') 
    print(f'Translated to CVC:\n{cvc_translator_to.spdi_to_cvc(spdi.to_string())}\n')

SPDI Expression: NC_000001.11:1014263:CC:C
Translated to CVC:
CoreVariantClass(0-based interbase,DNA,CC,C,1014263,1014265,None,None,None,None,None,NC_000001.11,{})

SPDI Expression: NC_000001.11:113901365::ATA
Translated to CVC:
CoreVariantClass(0-based interbase,DNA,,ATA,113901365,113901365,None,None,None,None,None,NC_000001.11,{})

SPDI Expression: NC_000001.11:5880117:TGAGCTTCCA:TGAGCTTCCATGAGCTTCCA
Translated to CVC:
CoreVariantClass(0-based interbase,DNA,TGAGCTTCCA,TGAGCTTCCATGAGCTTCCA,5880117,5880127,None,None,None,None,None,NC_000001.11,{})

