This package, hgvs, is an easy-to-use Python library for parsing, representing, formatting, and mapping variants between genome, transcript, and protein sequences.

In [None]:
import pandas as pd

import hgvs.parser
import hgvs.dataproviders.uta
# Validating variants: composed of two classes: hgvs.validator.IntrinsicValidator and hgvs.validator.ExtrinsicValidator
# Intrinsic validation evaluates a given variant for internal consistency, such as requiring that insertions specify adjacent positions. 
# Extrinsic validation evaluates a variant using external data, such as ensuring that the reference nucleotide in the variant matches that implied by the reference sequence and position.
import hgvs.validator
# Normalization is always 3’ with respect to the reference sequence. 
import hgvs.normalizer
from hgvs.exceptions import HGVSError


hp = hgvs.parser.Parser()
hdp = hgvs.dataproviders.uta.connect()
hn = hgvs.normalizer.Normalizer(hdp)
vr = hgvs.validator.Validator(hdp=hdp)

In [None]:
def validate_hgvs_variants(expression: str) -> str:
        """Validates the given HGVS expression.

        Args:
            expression (str): The HGVS expression to validate.

        Raises:
            HGVSError: If the validation fails.

        Returns:
            str: The validated HGVS expression.
        """
        try:
            parsed_variant = hp.parse_hgvs_variant(expression)
            vr.validate(parsed_variant)
            return expression
        except HGVSError as e:
            raise HGVSError(
                f"Validation failed for HGVS expression '{expression}': {e}"
            )
        
def to_vrs_tranmod(expression):
        """Convert HGVS, SPDI, gnomad (vcf), beacon to VRS variation. (Using the vrs translate module)

        Args:
            expression (str): hgvs, spdi, gnomad (vcf) or beacon expression

        Raises:
            ValueError: If the provided input is not a string.

        Returns:
            dict: VRS object
        """
        hgvs_expression = validate_hgvs_variants(expression)
        try: 
            return tlr.translate_from(str(hgvs_expression),'hgvs')
        except Exception as e:
            return '{}. Expression Error: {}'.format(e, expression)
        

for x in [
        # Insertion
        "NC_000007.14:g.55181230_55181231insGGCT", 
        # Substitution
        "NC_000019.10:g.44908822C>T",
        # Deletion
        "NC_000007.14:g.55181220del",
        # Deletion Insertion
        "NC_000023.11:g.32386323delinsGA",
        # Identity
        "NC_000013.11:g.32936732=",
        # Duplication
        "NC_000013.11:g.19993838_19993839dup"
        ]:
     print(to_vrs_tranmod(x)


In [None]:
from ga4gh.vrs.extras.variation_normalizer_rest_dp import VariationNormalizerRESTDataProxy
from ga4gh.vrs.dataproxy import SeqRepoRESTDataProxy
from ga4gh.vrs.extras.translator import Translator

In [None]:
seqrepo_rest_service_url = "https://services.genomicmedlab.org/seqrepo"
dp = SeqRepoRESTDataProxy(base_url=seqrepo_rest_service_url)


In [None]:
tlr = Translator(data_proxy=dp)
hgvs_expr1 = "NC_000001.11:g.943043C>T"#"NM_000097.7:c.814A>C"
allele1 = tlr.translate_from(hgvs_expr1,'hgvs')
allele1.as_dict()

In [None]:
def validate_hgvs_variants(expression):
    try:
        parsed_variant = hp.parse_hgvs_variant(expression)
        vr.validate(parsed_variant)
        return expression
    except HGVSError as e:
        return e
validate_hgvs_variants('NM_004343.4:c.1154_1155ins5')

In [None]:
hgvs_expr1 = "NC_000013.11:g.32936732="#"NM_000097.7:c.814A>C"
parsed_variant = hp.parse_hgvs_variant(hgvs_expr1)
if not vr.validate(parsed_variant):
    raise(ValueError("Invalid HGVS expression: %s" % hgvs_expr1)) 
# parsed_variant.posedit.edit.ref
parsed_variant.posedit.edit.alt

In [None]:
excel_file = '../data/LabHgvsExpression.xlsx'

with pd.ExcelFile(excel_file) as hgvs_path:
    input_data = pd.read_excel(hgvs_path)

input_data['oringal_hgvs_expression_example'] = input_data['oringal_hgvs_expression_example'].str.strip()
input_data['edited_hgvs_expression_expample'] = input_data['edited_hgvs_expression_expample'].str.strip()

data = pd.DataFrame(input_data)



#NOTE: Boolean Values where obtained from: https://mutalyzer.nl/
# Description: The Normalizer takes a variant description as input and checks whether it is correct.


In [None]:
data

In [None]:
original_expression = data['oringal_hgvs_expression_example']
edited_expression = data['edited_hgvs_expression_expample']

In [None]:
def validate_hgvs_variants(hgvs_list):
    # boolean_checker = []
    error_messages = []

    for hgvs in hgvs_list:
        try:
            parsed_variant = hp.parse_hgvs_variant(hgvs)
            vr.validate(parsed_variant)
            # boolean_checker.append()
            error_messages.append('Passed')
        except HGVSError as e:
            # boolean_checker.append(False)
            error_messages.append(e) 
    
    return boolean_checker, error_messages

In [None]:
original_boolean_checker, original_error_messages = validate_hgvs_variants(original_expression)
edited_boolean_checker, edited_error_messages = validate_hgvs_variants(edited_expression)

hgvs_results = {
    # 'biocommons_hgvs_original_hgvs_checker':original_boolean_checker,
    'biocommons_hgvs_original_hgvs_error_messages':original_error_messages,
    # 'biocommons_hgvs_edited_hgvs_checker':edited_boolean_checker,
    'biocommons_hgvs_edited_hgvs_error_messages':edited_error_messages
}

biocommons_hgvs_test = pd.DataFrame(hgvs_results)

In [None]:
biocommons_hgvs_test

In [None]:
result = pd.concat([data, biocommons_hgvs_test], axis=1, ignore_index=False)
result

In [None]:
result.to_csv('../data/results.csv', index=False)

In [None]:
import pandas as pd

import hgvs.parser
import hgvs.dataproviders.uta
import hgvs.validator
from hgvs.exceptions import HGVSError

hp = hgvs.parser.Parser()
hdp = hgvs.dataproviders.uta.connect()
vr = hgvs.validator.Validator(hdp=hdp)

# TODO:open different type of files. If the file format doesn't work then throw an error.
excel_file = '../data/LabHgvsExpression.xlsx'
with pd.ExcelFile(excel_file) as hgvs_path:
    input_data = pd.read_excel(hgvs_path)


def validate_hgvs_variants(hgvs_list):
    error_messages = []

    for hgvs in hgvs_list:
        try:
            parsed_variant = hp.parse_hgvs_variant(hgvs)
            vr.validate(parsed_variant)
            error_messages.append(True)
        except HGVSError as e:
            error_messages.append(e) 
    
    return error_messages

#TODO:user inputes the column name of there hgvs expression
input_data['oringal_hgvs_expression_example'] = input_data['oringal_hgvs_expression_example'].str.strip()

data = pd.DataFrame(input_data)
hgvs_expression = data['edited_hgvs_expression_expample']

original_error_messages = validate_hgvs_variants(hgvs_expression)

#TODO: create output file.
hgvs_results = {'HGVS':data['oringal_hgvs_expression_example'],
                'biocommons_validator':original_error_messages}
biocommons_hgvs_test = pd.DataFrame(hgvs_results)
biocommons_hgvs_test

In [None]:
hp.parse_hgvs_variant('NM_001256850.1:c.1141G>A').posedit.pos.end.base

In [None]:
import hgvs.validator
import hgvs.exceptions
vr = hgvs.validator.Validator(hdp=hdp)
try:
    vr.validate( hp.parse_hgvs_variant('NM_000097.7:c.814A>C') ) 
    print("it worked")
except hgvs.exceptions.HGVSError as e:
    print(e)


#NM_000371.4:c.220G>C



In [None]:
hgvs_list = ['NC_000017.11:g.43091687delC', 'NC_000007.13.g.21726874G>A']

try:
    parser = hgvs.parser.Parser()
    validator = hgvs.validator.IntrinsicValidator()
    
    for hgvs in hgvs_list:
        variant = parser.parse_hgvs_variant(hgvs)
        validator.validate(variant)
except Exception as e:
    print(e)

import hgvs.validator
import hgvs.exceptions
vr = hgvs.validator.Validator(hdp=hdp)
try:
    vr.validate( hp.parse_hgvs_variant('NM_001267550.2(TTN):c.80006G>A') )
except hgvs.exceptions.HGVSError as e:
    print(e)


from hgvs.exceptions import HGVSError
import hgvs.parser
import hgvs.validator

hgvs_list = ('NC_000017.11:g.43091687delC', 'NC_000007.13.g.21726874G>A')

# Create a validator instance
vr = hgvs.validator.Validator(hdp=hdp)

# Create a parser instance
hp = hgvs.parser.Parser()

try:
    for hgvs in hgvs_list:
        vr.validate(hp.parse_hgvs_variant(hgvs))
        print('expression passed: {}'.format(hgvs))
except HGVSError as e:
    print(e)


In [None]:
import pandas as pd
import hgvs.parser
import hgvs.dataproviders.uta
import hgvs.validator
from hgvs.exceptions import HGVSError

# import sys
# sys.path.append('..')

hp = hgvs.parser.Parser()
hdp = hgvs.dataproviders.uta.connect()
vr = hgvs.validator.Validator(hdp=hdp)

excel_file = '/Users/M278428/Documents/rf_lab_projects/DraftCoreDataModel/data/finaltestdata.xlsx'

In [None]:
input_data = pd.read_excel(excel_file)

In [None]:
input_data

In [None]:
input_data['hgvs_expression'] = input_data['hgvs_expression'].str.strip()

In [None]:
hgvsExamples = input_data['hgvs_expression']
hgvsExamples

In [None]:
def validate_hgvs_variants(hgvs_list):
    error_messages = []

    for hgvs in hgvs_list:
        try:
            parsed_variant = hp.parse_hgvs_variant(hgvs)
            vr.validate(parsed_variant)
            error_messages.append(True)
        except HGVSError as e:
            error_messages.append(e) 
    
    return error_messages

In [None]:
results = validate_hgvs_variants(hgvsExamples)
results

In [None]:
hgvs_results = {
    'HGVS': hgvsExamples,
    'Validator': results
}
result = pd.DataFrame(hgvs_results) 
result