In [1]:
#RUN FOR GITHUB CODESPACE
import sys
import warnings

sys.path.append('..')
warnings.filterwarnings('ignore')

In [2]:
import json
# Importing the necessary module for HGVSTranslate
from src.hgvsExtra.hgvs_utils import HGVSTranslate

# Creating an instance of the HGVSTranslate class
hgvs_translator = HGVSTranslate()


### hgvs_utils Module Overview

The `hgvs_utils` module facilitates the translation of HGVS expressions to SPDI and VRS formats. It utilizes external APIs for translation and validation.

#### Features
- **Translation to VRS**: Translates HGVS expressions to VRS using the VRS python translator module.
- **Translation to SPDI**: Converts HGVS expressions to SPDI using the NCBI Variation Services API.
- **HGVS Expression Validation**: HGVS expressions are validated using the biocommons hgvs package.

#### Dependencies
- **External APIs**:
  - Biocmmons SeqRepo API
  - NCBI Variation Services API
  <!-- VICC Variation Normalizer API -->
- **Python Packages**:
  - biocommons-hgvs


#### Example Data from DataBase

In [3]:
from database.db_operation import DbOperation

#Connect to gold standard database
db = DbOperation('../database/gsdb_v2.db')

#Convert database to pandas dataframe
df = db.get_combined_in_df()

#Capturing the cvc data in the dataframe
hgvs_data = db.extract_values(df,'hgvs')[:3]

#### HGVS Methods

In [4]:
# Translate HGVS expression to SPDI
for hgvs in hgvs_data:
    print(f'HGVS Expression: {hgvs}')
    print(f'Translated to SPDI: {hgvs_translator.from_hgvs_to_spdi(hgvs)}\n')

HGVS Expression: NC_000001.11:g.1014265del
Translated to SPDI: NC_000001.11:1014263:CC:C

HGVS Expression: NC_000001.11:g.15445656_15445660del
Translated to SPDI: NC_000001.11:15445654:GCATCG:G

HGVS Expression: NC_000016.10:g.1510946_1510947del
Translated to SPDI: NC_000016.10:1510945:AG:



In [5]:
for hgvs in hgvs_data:
    print(f'HGVS Expression: {hgvs}')
    print(f'Translated to VRS:\n{hgvs_translator.hgvs_to_vrs_trans(hgvs).as_dict()}\n')

HGVS Expression: NC_000001.11:g.1014265del
Translated to VRS:
{'_id': 'ga4gh:VA.BmF3zr2l6XLpLaK8GInM6Q3Emc3JyPD3', 'type': 'Allele', 'location': {'_id': 'ga4gh:VSL.i6Of9s2jVDuJ4vwU6sCeG-jT7ygmlfx6', 'type': 'SequenceLocation', 'sequence_id': 'ga4gh:SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO', 'interval': {'type': 'SequenceInterval', 'start': {'type': 'Number', 'value': 1014263}, 'end': {'type': 'Number', 'value': 1014265}}}, 'state': {'type': 'LiteralSequenceExpression', 'sequence': 'C'}}

HGVS Expression: NC_000001.11:g.15445656_15445660del
Translated to VRS:
{'_id': 'ga4gh:VA.5CfKpT5tErBj7PNtqdub7VOliwHEQLvs', 'type': 'Allele', 'location': {'_id': 'ga4gh:VSL.z-j8mH9v1lJf-MsQosxg_8gtRA1zKhuE', 'type': 'SequenceLocation', 'sequence_id': 'ga4gh:SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO', 'interval': {'type': 'SequenceInterval', 'start': {'type': 'Number', 'value': 15445654}, 'end': {'type': 'Number', 'value': 15445660}}}, 'state': {'type': 'LiteralSequenceExpression', 'sequence': 'G'}}

HGVS Express

### variant_to_cvc_translate Module Overview

The `variant_to_cvc_translate` module offers functionality for translating variations from HGVS, SPDI, or VRS formats into the `CoreVariantClass`.

#### Features
- **SPDI to CoreVariantClass Translation**: Translates SPDI expressions into CoreVariantClass objects.

- **HGVS to CoreVariantClass Translation**: Translates HGVS expressions into CoreVariantClass objects.

- **VRS to CoreVariantClass Translation**: Translates VRS expressions into CoreVariantClass objects.

#### Dependencies
- **External APIs**:
  - Biocmmons SeqRepo API
  - NCBI Variation Services API

- **Python Packages**:
  - bioutils.normalize
  - hgvs

In [6]:
from src.variant_to_cvc_translate import CVCTranslator
cvc_translator = CVCTranslator()

In [7]:
for hgvs in hgvs_data:
    print(f'HGVS Expression: {hgvs}') 
    print(f'Translated to CVC:\n{cvc_translator.hgvs_to_cvc(hgvs)}\n')

HGVS Expression: NC_000001.11:g.1014265del
Translated to CVC:
CoreVariantClass(0-based interbase,DNA,CC,C,1014263,1014265,None,None,None,None,None,NC_000001.11,{})

HGVS Expression: NC_000001.11:g.15445656_15445660del
Translated to CVC:
CoreVariantClass(0-based interbase,DNA,GCATCG,G,15445654,15445660,None,None,None,None,None,NC_000001.11,{})

HGVS Expression: NC_000016.10:g.1510946_1510947del
Translated to CVC:
CoreVariantClass(0-based interbase,DNA,AG,,1510945,1510947,None,None,None,None,None,NC_000016.10,{})



In [8]:

example = 'NM_181798.1:n.1263G>T' #NM_001331029.1:n.872A>G'
print(hgvs_translator.from_hgvs_to_spdi(example,validate=False))


NM_181798.1:1262:G:T


In [9]:
print(hgvs_translator.hgvs_to_vrs_trans(example,validate=False).as_dict())

{'_id': 'ga4gh:VA.2U3VttSZjpAOkf5BDKuSvaSbjnY9wUqg', 'type': 'Allele', 'location': {'_id': 'ga4gh:VSL.1nuRIkFWraP9ob21j9n-IC4RRlnrbQzk', 'type': 'SequenceLocation', 'sequence_id': 'ga4gh:SQ.KN07u-RFqd1dTyOWOG98HnOq87Nq-ZIg', 'interval': {'type': 'SequenceInterval', 'start': {'type': 'Number', 'value': 1262}, 'end': {'type': 'Number', 'value': 1263}}}, 'state': {'type': 'LiteralSequenceExpression', 'sequence': 'T'}}
