In [1]:
from src.core_variant import CoreVariantClass



#### Creating a CoreVariantClass Object
* The goal of the CoreVariantClass object is to be able to contain metadata that can be translated into various expressions such as VRS, HGVS, SPDI, and FHIR.
* Required attributes: origCoordSystem, seqType,refAllele, altAllele, start, end, and (chrom and genomeBuild or sequenceId).
* Not Required attributes: allelicState,geneSymbol, hgncID
* For more information regarding the validation steps that are implemented in the CoreVariantClass, refer to the core_variant.py module.

In [2]:
# Creating a CoreVariantClass object 
# Example Variant: 'NC_000007.14:55181230::GGCT'
cvc = CoreVariantClass(
    origCoordSystem='0-based interbase',
    seqType='DNA',
    refAllele='',
    altAllele='GGCT',
    start=55181230,
    end=55181230,
    allelicState=None,
    geneSymbol=None,
    hgncId=None,
    chrom=None,
    genomeBuild=None,
    sequenceId='NC_000007.14')
cvc

#Other Example 
# Creating a CoreVariantClass object 
# Example Variant Representation in 0-based interbase instead of hgvs 1-based counting: NC_000019.10:g.44908822C>T

cvc2 = CoreVariantClass(
    origCoordSystem='0-based interbase',
    seqType='DNA',
    refAllele='C',
    altAllele='T',
    start=44908821,
    end=44908822,
    allelicState=None,
    geneSymbol=None,
    hgncId=None,
    chrom=None,
    genomeBuild=None,
    sequenceId='NC_000019.10')
cvc2

CoreVariantClass(0-based interbase,DNA,C,T,44908821,44908822,None,None,None,None,None,NC_000019.10,{})

#### CoreVariantClass Methods
* Methods include 
    * Return initial parameters that were inputted
    * Return object as string, dictionary, or json
    * Normalize CoreVariantClass

In [3]:
# Return initial parameters
cvc.init_params()

{'origCoordSystem': '0-based interbase',
 'seqType': 'DNA',
 'refAllele': '',
 'altAllele': 'GGCT',
 'start': 55181230,
 'end': 55181230,
 'allelicState': None,
 'geneSymbol': None,
 'hgncId': None,
 'chrom': None,
 'genomeBuild': None,
 'sequenceId': 'NC_000007.14',
 'kwargs': {}}

In [4]:
# Return as string
print(f'String format: {repr(cvc)}')

# Return as dictionary
print(f'Dictionary format: {cvc.as_dict()}')

# return as json
print(f'JSON format: {cvc.as_json()}')

String format: CoreVariantClass(0-based interbase,DNA,,GGCT,55181230,55181230,None,None,None,None,None,NC_000007.14,{})
Dictionary format: {'origCoordSystem': '0-based interbase', 'seqType': 'DNA', 'refAllele': '', 'altAllele': 'GGCT', 'start': 55181230, 'end': 55181230, 'allelicState': None, 'geneSymbol': None, 'hgncId': None, 'chrom': None, 'genomeBuild': None, 'sequenceId': 'NC_000007.14', 'kwargs': {}}
JSON format: {
  "origCoordSystem": "0-based interbase",
  "seqType": "DNA",
  "refAllele": "",
  "altAllele": "GGCT",
  "start": 55181230,
  "end": 55181230,
  "allelicState": null,
  "geneSymbol": null,
  "hgncId": null,
  "chrom": null,
  "genomeBuild": null,
  "sequenceId": "NC_000007.14",
  "kwargs": {}
}


In [5]:
#TODO: this idea of normalizing needs to be revised 

# Creating a CoreVariantClass object 
#'NM_012345.6:c.22A>T'
cvc3 = CoreVariantClass(
    origCoordSystem='1-based counting',
    seqType='DNA',
    refAllele='A',
    altAllele='T',
    start=22, #TODO: refer to core_variant.py because i change this to allow start to = end. 
    end=22,
    allelicState=None,
    geneSymbol=None,
    hgncId=None,
    chrom=None,
    genomeBuild=None,
    sequenceId='NM_012345.6')
cvc3

CoreVariantClass(1-based counting,DNA,A,T,22,22,None,None,None,None,None,NM_012345.6,{})

In [6]:
# Normalizing from 1-based counting to 0-based interbase
print('Normalized Object:')
cvc3.normalized_data()

Normalized Object:


{'origCoordSystem': '0-based interbase',
 'seqType': 'DNA',
 'allelicState': None,
 'associatedGene': {'geneSymbol': None, 'hgncId': None},
 'refAllele': 'A',
 'altAllele': 'T',
 'position': {'chrom': None,
  'genomeBuild': None,
  'start': 21,
  'end': 22,
  'sequenceId': 'NM_012345.6'}}