In [1]:
import os, sys
sys.path.append(os.path.join("../../../lib/python")) 
from navs import Variation

In [33]:
# Convert between different notations (dbSNP rs, SPDI, HGVS, anb VCF) and retrieve dbSNP JSON object
test_cases = [
    'rs328',   #dbSNP RefSNP(rs) as string 'rs328' or integer (328)
    "NC_000007.14\t8644051\t.\tC\tG,T\t.\t.\tINFO", #VCF 
    'NC_000007.14:g.8644051C>G', #HGVS
    'NC_000008.10:19813528:1:G' #SPDI
]

for tc in test_cases:
    print()
    print('Input: ' + str(tc))
    print('-------------------------------------------')

    v = Variation(tc)
    print("RSID:\n" + "\n".join([str(rsid) for rsid in v.asRsidList()]))
    print()
    #print(v)
    print("SPDI:\n" + "\n".join(v.asSpdiList()))
    print()
    print("HGVS:\n" + "\n".join(v.asHgvsList()))
    print()
    print("VCF:\n" + "\n".join(v.asVcfList()))
    print()
    rsAsJson = v.asJson()
    if rsAsJson:
        print("JSON:\n",rsAsJson[0:400] + '...')
    else:
        print('<No JSON>')
    print()


Input: rs328
-------------------------------------------
RSID:
328

SPDI:
NC_000008.11:19962212:C:G

HGVS:
NC_000008.11:g.19962213C>G

VCF:
NC_000008.11	19962213	rs328	C	G	.	.	.

JSON:
 {"refsnp_id":"328","create_date":"2000-09-19T17:02Z","last_update_date":"2018-10-12T12:02Z","last_update_build_id":"152","dbsnp1_merges":[{"merged_rsid":"3735962","revision":"108","merge_date":"2002-10-9T00:18Z"},{"merged_rsid":"17482566","revision":"123","merge_date":"2004-10-8T05:17Z"},{"merged_rsid":"52834251","revision":"128","merge_date":"2007-09-21T16:13Z"}],"citations":[1731801,1907278,2216...


Input: NC_000007.14	8644051	.	C	G,T	.	.	INFO
-------------------------------------------
RSID:
338

SPDI:
NC_000007.14:8644050:C:G
NC_000007.14:8644050:C:T

HGVS:
NC_000007.14:g.8644051C>G
NC_000007.14:g.8644051C>T

VCF:
NC_000007.14	8644051	rs338	C	G,T	.	.	INFO

JSON:
 {"refsnp_id":"338","create_date":"2000-09-19T17:02Z","last_update_date":"2018-10-12T10:27Z","last_update_build_id":"152","dbsnp1_merges"

In [22]:
#find/convert to RS using HGVS reported on different sequence types (NC, NG, and NM)
test_cases = [
    'NC_000008.10:g.19813529A>G', #genomic GRCh37
    'NC_000008.11:g.19956018A>G', #genomic GRCh38 
    'NG_008855.1:g.21948A>G', #RefSeqGene NG
    'NM_000237.2:c.953A>G', #NM transcript
    'NC_000008.11:g.19956024T>G', #genomic GRCh38; novel variant not in dbSNP example
]

for hgvs in test_cases:
    v = Variation(hgvs)
    rslist = ",".join([str(rsid) for rsid in v.asRsidList()])
    if rslist:
        print("\t".join([hgvs, rslist]))
    else:
        print("\t".join([hgvs, "Not in dbSNP"]))

NC_000008.10:g.19813529A>G	268
NC_000008.11:g.19956018A>G	268
NG_008855.1:g.21948A>G	268
NM_000237.2:c.953A>G	268
NC_000008.11:g.19956024T>G	Not in dbSNP


In [28]:
#Normalize novel variants using SPDI notation

test_cases = [
    'NM_000237.2:c.959T>G', #NM transcript
    'NC_000008.11:g.19956024T>G', #genomic GRCh38; novel variant not in dbSNP example
    'NC_000017.11:g.43054117delA',
    'NC_000017.11:g.43054118delA',  
]

#print table header
colwidth = 35
print("\t".join(["HGVS".ljust(colwidth), "SPDI".ljust(colwidth), "RS".ljust(colwidth)]))
print("\t".join(["=========================".ljust(colwidth), "=========================".ljust(colwidth), "===========".ljust(colwidth)]))

for hgvs in test_cases:
    v = Variation(hgvs)
    rslist = ",".join([str(rsid) for rsid in v.asRsidList()])
    for spdi in v.asSpdiList():
        print("\t".join([hgvs.ljust(colwidth), spdi.ljust(colwidth), rslist.ljust(colwidth)]))


HGVS                               	SPDI                               	RS                                 
NM_000237.2:c.959T>G               	NM_000237.2:1328:T:G               	                                   
NM_000237.2:c.959T>G               	NC_000008.11:19956023:T:G          	                                   
NC_000008.11:g.19956024T>G         	NC_000008.11:19956023:T:G          	                                   
NC_000017.11:g.43054117delA        	NC_000017.11:43054112:AAAAAA:AAAAA 	1200087052                         
NC_000017.11:g.43054118delA        	NC_000017.11:43054112:AAAAAA:AAAAA 	1200087052                         


In [2]:
#Normalize novel variants using SPDI notation

test_cases = [
    'NC_000007.14:g.117548629T[9]',              #Microsatellite T[9] allele
    'NC_000007.14:g.117548629T[5]',              #Microsatellite T[5] allele
    'NC_000007.14:g.117548635delT',              #Deletion
    #'NC_000007.14:g.117548625_117548628dup',     #Dup
    #'NM_000492.3:c.350G>A',                      #SNV
]

#print table header
col1 = 35
col2 = 50
col3 = 20
print("\t".join(["HGVS Input".ljust(col1), "SPDI (1st row) and equivalents".ljust(col2), "RS".ljust(col3)]))
print("\t".join(["=========================".ljust(col1), "==============================".ljust(col2), "===========".ljust(col3)]))

for hgvs in test_cases:
    v = Variation(hgvs)
    rslist = ",".join([str(rsid) for rsid in v.asRsidList()])
    for spdi in v.asSpdiList():
        print("\t".join([hgvs.ljust(col1), spdi.ljust(col2), rslist.ljust(col3)]))
        
    print("\t".join(["=========================".ljust(col1), "==============================".ljust(col2), "===========".ljust(col3)]))
    
#OUTPUT: First SPDI row is the notation for the input follow by subsequent rows of SPDI equivalents from dbSNP other variants.    

HGVS Input                         	SPDI (1st row) and equivalents                    	RS                  
NC_000007.14:g.117548629T[9]       	NC_000007.14:117548628:TTTTTTT:TTTTTTTTT          	1805177             
NC_000007.14:g.117548629T[9]       	NC_000007.14:117548628:TTTTTTT:T                  	1805177             
NC_000007.14:g.117548629T[9]       	NC_000007.14:117548628:TTTTTTT:TTTTT              	1805177             
NC_000007.14:g.117548629T[9]       	NC_000007.14:117548628:TTTTTTT:TTTTTT             	1805177             
NC_000007.14:g.117548629T[9]       	NC_000007.14:117548628:TTTTTTT:TTTTTTTT           	1805177             
NC_000007.14:g.117548629T[9]       	NC_000007.14:117548628:TTTTTTT:TTTTTTTTTT         	1805177             
NC_000007.14:g.117548629T[9]       	NC_000007.14:117548628:TTTTTTT:TTTTTTTTTTTT       	1805177             
NC_000007.14:g.117548629T[9]       	NC_000007.14:117548628:TTTTTTT:TTTTTTTTTTTTTT     	1805177             
NC_000007.14:g.117548629T[5]