diff --git a/mutacc_auto/commands/scout_command.py b/mutacc_auto/commands/scout_command.py index 2626933..2498698 100644 --- a/mutacc_auto/commands/scout_command.py +++ b/mutacc_auto/commands/scout_command.py @@ -29,10 +29,13 @@ def __init__(self, case_id=None): class ScoutExportCausativeVariants(ScoutCommand): - def __init__(self, case_id): + def __init__(self, case_id, json_output = True): super(ScoutExportCausativeVariants, self).__init__() self.add_subcommand('export') self.add_subcommand('variants') + + if json_output: self.add_option('json') + self.add_option('case-id', value=case_id) diff --git a/mutacc_auto/parse/parse_scout.py b/mutacc_auto/parse/parse_scout.py index 6300212..b42c3a6 100644 --- a/mutacc_auto/parse/parse_scout.py +++ b/mutacc_auto/parse/parse_scout.py @@ -2,6 +2,13 @@ from datetime import datetime, timedelta from mutacc_auto.commands.scout_command import ScoutExportCases +from mutacc_auto.parse.vcf_constants import (SCOUT_TO_FORMAT, + SCOUT_TO_INFO, + HEADER, + NEWLINE, + TAB, + COLUMN_NAMES, + SCOUT_TO_COLUMNS) #The timestamp in the scout database seems to be given with #millisecond precision, it is therefor necessary to divide the @@ -19,7 +26,7 @@ def get_cases_from_scout(scout_output, days_ago=None): scout_output (str): output from scout command days_ago (int): number of days since case updated - Returns (list(dict)): list of dictionaries representing the cases + Returns (list(dict)): list of dictionaries representing the cases """ cases = json.loads(scout_output) @@ -39,3 +46,145 @@ def get_cases_from_scout(scout_output, days_ago=None): recent_cases.append(case) return recent_cases + + + + + +def get_vcf_from_json(scout_vcf_output): + + """ + Reconstructs vcf from scout variant object + + Args: + scout_vcf_output (str): string returned by command 'scout export variants --json' + + Returns: + vcf_string (str): string with vcf content + """ + + scout_vcf_output = json.loads(scout_vcf_output) + + vcf_string = "" + + #Write header of vcf + for header_line in HEADER: + vcf_string += header_line + NEWLINE + + #Get samples + samples = [sample['sample_id'] for sample in scout_vcf_output[0]['samples']] + + #Append sample names to the COLUMN_NAMES list + column_names = COLUMN_NAMES + samples + column_names = TAB.join(column_names) + + vcf_string += column_names + NEWLINE + + #Write variants + for variant in scout_vcf_output: + + #Write column values + record = get_columns(variant) + + #write INFO + info = get_info(variant) + record.append(info) + + #Write the format a + format = ':'.join([SCOUT_TO_FORMAT[ID] for ID in SCOUT_TO_FORMAT.keys()]) + record.append(format) + + #write genotypes for each sample + samples = get_genotypes(variant) + record.append(samples) + + record = TAB.join(record) + NEWLINE + + #Add variant record to vcf_string + vcf_string += record + + return vcf_string + +def get_columns(variant): + """ + Given a variant object from scout, write the columns CHR - FILTER + as a string with values separated by tab + + Args: + variant (dict): dictionary of scout variant object + Returns: + record (str): values CHR-FILTER as a string + """ + record = [] + + for column in SCOUT_TO_COLUMNS: + + if type(variant[column]) == list: + column_value = ','.join([str(element) for element in variant[column]]) + + else: + column_value = str(variant[column]) + + record.append(column_value) + + return record + +def get_info(variant): + """ + Given a variant object from scout, write the INFO column + for a variant. + + Args: + variant (dict): dictionary of scout variant object + Returns: + info (str): INFO string + """ + info = [] + for ID in SCOUT_TO_INFO.keys(): + + info_string = f"{SCOUT_TO_INFO[ID]}={int(variant[ID])}" + info.append(info_string) + + if variant['category'].lower() == 'snv': + info_string = f"TYPE={variant['sub_category']}" + + else: + info_string = f"SVTYPE={variant['sub_category']}" + + info.append(info_string) + + info = ';'.join(info) + + return info + +def get_genotypes(variant): + """ + Given a variant object from scout, write the genotypes column for each + sample. + + Args: + variant (dict): dictionary of scout variant object + Returns: + samples (str): genotypes for each sample + """ + samples = [] + for sample in variant['samples']: + + gt_calls = [] + for ID in SCOUT_TO_FORMAT.keys(): + + if type(sample[ID]) == list: + + ID_value = ','.join([str(element) for element in sample[ID]]) + + else: + ID_value = str(sample[ID]) + + gt_calls.append(ID_value) + + gt_calls = ':'.join(gt_calls) + samples.append(gt_calls) + + samples = TAB.join(samples) + + return samples diff --git a/mutacc_auto/parse/vcf_constants.py b/mutacc_auto/parse/vcf_constants.py new file mode 100644 index 0000000..ef5ba92 --- /dev/null +++ b/mutacc_auto/parse/vcf_constants.py @@ -0,0 +1,60 @@ +#Scout fields name: vcf ID +SCOUT_TO_FORMAT = { + + 'genotype_call': 'GT', + 'allele_depths': 'AD', + 'read_depth': 'DP', + 'genotype_quality': 'GQ' + +} + +SCOUT_TO_INFO = { + + 'end': 'END', + 'rank_score': 'RankScore' +} + +HEADER = ( + + '##fileformat=VCFv4.2', + + '##INFO=', + '##INFO=', + '##INFO=', + '##INFO=', + + + '##FORMAT=', + '##FORMAT=', + '##FORMAT=', + '##FORMAT=' + +) + +NEWLINE = '\n' + +TAB = '\t' + +COLUMN_NAMES = [ + '#CHROM', + 'POS', + 'ID', + 'REF', + 'ALT', + 'QUAL', + 'FILTER', + 'INFO', + 'FORMAT' +] + +SCOUT_TO_COLUMNS = ( + + 'chromosome', #CHROM + 'position', #POS + 'dbsnp_id', #ID + 'reference', #REF + 'alternative',#ALT + 'quality', #QUAL + 'filters' #FILTER + +) diff --git a/mutacc_auto/recipes/input_recipe.py b/mutacc_auto/recipes/input_recipe.py index 27afb20..fea77ab 100644 --- a/mutacc_auto/recipes/input_recipe.py +++ b/mutacc_auto/recipes/input_recipe.py @@ -5,7 +5,7 @@ from mutacc_auto.utils.tmp_dir import TemporaryDirectory from mutacc_auto.commands.scout_command import ScoutExportCases, ScoutExportCausativeVariants from mutacc_auto.commands.housekeeper_command import HousekeeperCommand -from mutacc_auto.parse.parse_scout import get_cases_from_scout +from mutacc_auto.parse.parse_scout import get_cases_from_scout, get_vcf_from_json from mutacc_auto.parse.parse_housekeeper import get_bams_from_housekeeper from mutacc_auto.build_input.input_assemble import get_case @@ -66,7 +66,8 @@ def write_vcf(case_id, directory): ) as vcf_handle: vcf_command = ScoutExportCausativeVariants(case_id) - vcf_content = vcf_command.check_output() + vcf_scout_output = vcf_command.check_output() + vcf_content = get_vcf_from_json(vcf_scout_output) vcf_handle.write(vcf_content) vcf_path = vcf_handle.name diff --git a/tests/fixtures/scout_variant_output.json b/tests/fixtures/scout_variant_output.json new file mode 100644 index 0000000..1432d3d --- /dev/null +++ b/tests/fixtures/scout_variant_output.json @@ -0,0 +1 @@ +[{"_id": "a3ec99657a128d14419563d77e1381bd", "document_id": "a3ec99657a128d14419563d77e1381bd", "variant_id": "cd7f04771a30e2807ca92cc531bd9227", "display_name": "4_861261_CGGCCAGCACCAGGGTCCCCACGGCGCGTCCCTTCAGGGCCTCCTCGGCCCAGGGCCTTGGTGAACACACGT_C_clinical", "variant_type": "clinical", "case_id": "643594", "chromosome": "4", "reference": "CGGCCAGCACCAGGGTCCCCACGGCGCGTCCCTTCAGGGCCTCCTCGGCCCAGGGCCTTGGTGAACACACGT", "alternative": "C", "institute": "cust000", "missing_data": false, "position": 861261, "rank_score": 12.0, "end": 861332, "length": 71, "simple_id": "4_861261_CGGCCAGCACCAGGGTCCCCACGGCGCGTCCCTTCAGGGCCTCCTCGGCCCAGGGCCTTGGTGAACACACGT_C", "quality": 46.0, "filters": ["PASS"], "dbsnp_id": "MantaDEL:10:0:0:0:0:0", "cosmic_ids": null, "category": "sv", "sub_category": "del", "mate_id": null, "cytoband_start": "p16.3", "cytoband_end": "p16.3", "end_chrom": "4", "samples": [{"sample_id": "ADM1059A2", "display_name": "NA12882", "genotype_call": "0/1", "allele_depths": [2, 6], "read_depth": 12, "genotype_quality": 46}, {"sample_id": "ADM1059A1", "display_name": "NA12877", "genotype_call": "0/0", "allele_depths": [4, 1], "read_depth": 2, "genotype_quality": 61}, {"sample_id": "ADM1059A3", "display_name": "NA12878", "genotype_call": "0/0", "allele_depths": [3, 2], "read_depth": 4, "genotype_quality": 15}], "genetic_models": ["AD_dn"], "genes": [{"hgnc_id": 4113, "hgnc_symbol": "GAK", "ensembl_id": "ENSG00000178950", "description": "cyclin G associated kinase", "inheritance": [], "transcripts": [{"transcript_id": "ENST00000314167", "hgnc_id": 4113, "protein_id": "ENSP00000314499", "sift_prediction": "unknown", "polyphen_prediction": "unknown", "swiss_prot": "O14976", "biotype": "protein_coding", "functional_annotations": ["intron_variant", "feature_truncation"], "region_annotations": ["intronic", "genomic_feature"], "intron": "20/27", "strand": "-", "is_canonical": true, "refseq_id": "NM_005255", "refseq_identifiers": [], "is_primary": true}, {"transcript_id": "ENST00000509566", "hgnc_id": 4113, "sift_prediction": "unknown", "polyphen_prediction": "unknown", "swiss_prot": "unknown", "biotype": "processed_transcript", "functional_annotations": ["intron_variant", "non_coding_transcript_variant", "feature_truncation"], "region_annotations": ["intronic", "ncRNA_exonic", "genomic_feature"], "intron": "3/10", "strand": "-", "is_canonical": false}, {"transcript_id": "ENST00000510799", "hgnc_id": 4113, "protein_id": "ENSP00000426062", "sift_prediction": "unknown", "polyphen_prediction": "unknown", "swiss_prot": "unknown", "biotype": "protein_coding", "functional_annotations": ["upstream_gene_variant"], "region_annotations": ["upstream"], "strand": "-", "is_canonical": false}, {"transcript_id": "ENST00000511163", "hgnc_id": 4113, "protein_id": "ENSP00000421361", "sift_prediction": "unknown", "polyphen_prediction": "unknown", "swiss_prot": "unknown", "biotype": "protein_coding", "functional_annotations": ["intron_variant", "feature_truncation"], "region_annotations": ["intronic", "genomic_feature"], "intron": "17/24", "strand": "-", "is_canonical": false, "refseq_id": "XM_005272271", "refseq_identifiers": []}, {"transcript_id": "ENST00000511980", "hgnc_id": 4113, "protein_id": "ENSP00000421049", "sift_prediction": "unknown", "polyphen_prediction": "unknown", "swiss_prot": "unknown", "biotype": "protein_coding", "functional_annotations": ["upstream_gene_variant"], "region_annotations": ["upstream"], "strand": "-", "is_canonical": false}, {"transcript_id": "ENST00000515868", "hgnc_id": 4113, "sift_prediction": "unknown", "polyphen_prediction": "unknown", "swiss_prot": "unknown", "biotype": "retained_intron", "functional_annotations": ["intron_variant", "non_coding_transcript_variant", "feature_truncation"], "region_annotations": ["intronic", "ncRNA_exonic", "genomic_feature"], "intron": "1/6", "strand": "-", "is_canonical": false}], "functional_annotation": "non_coding_transcript_variant", "region_annotation": "ncRNA_exonic", "sift_prediction": "unknown", "polyphen_prediction": "unknown", "disease_associated_transcripts": [], "manual_penetrance": false, "mosaicism": false, "manual_inheritance": [], "common": {"_id": {"$oid": "5b927db8290c540e950f1930"}, "hgnc_id": 4113, "hgnc_symbol": "GAK", "ensembl_id": "ENSG00000178950", "chromosome": "4", "start": 843064, "end": 926161, "length": 83097, "description": "cyclin G associated kinase", "aliases": ["DNAJC26", "GAK"], "primary_transcripts": ["NM_005255"], "entrez_id": 2580, "omim_id": 602052, "ucsc_id": "uc003gbm.6", "uniprot_ids": ["O14976"], "vega_id": "OTTHUMG00000088301", "pli_score": 0.973134951387495, "incomplete_penetrance": false, "build": "37", "transcripts": [{"_id": {"$oid": "5b927e29290c540e9511561c"}, "ensembl_transcript_id": "ENST00000511163", "hgnc_id": 4113, "chrom": "4", "start": 843066, "end": 926161, "is_primary": false, "refseq_id": "XM_005272271", "build": "37", "length": 83095}, {"_id": {"$oid": "5b927e29290c540e9511561b"}, "ensembl_transcript_id": "ENST00000314167", "hgnc_id": 4113, "chrom": "4", "start": 843066, "end": 926086, "is_primary": true, "refseq_id": "NM_005255", "build": "37", "length": 83020}, {"_id": {"$oid": "5b927e29290c540e95115621"}, "ensembl_transcript_id": "ENST00000505819", "hgnc_id": 4113, "chrom": "4", "start": 887207, "end": 926153, "is_primary": false, "build": "37", "length": 38946}, {"_id": {"$oid": "5b927e29290c540e95115622"}, "ensembl_transcript_id": "ENST00000507580", "hgnc_id": 4113, "chrom": "4", "start": 887731, "end": 926105, "is_primary": false, "build": "37", "length": 38374}, {"_id": {"$oid": "5b927e29290c540e95115623"}, "ensembl_transcript_id": "ENST00000502656", "hgnc_id": 4113, "chrom": "4", "start": 890257, "end": 926115, "is_primary": false, "build": "37", "length": 35858}, {"_id": {"$oid": "5b927e29290c540e95115625"}, "ensembl_transcript_id": "ENST00000512325", "hgnc_id": 4113, "chrom": "4", "start": 891850, "end": 926105, "is_primary": false, "build": "37", "length": 34255}, {"_id": {"$oid": "5b927e29290c540e9511561a"}, "ensembl_transcript_id": "ENST00000509566", "hgnc_id": 4113, "chrom": "4", "start": 843066, "end": 866561, "is_primary": false, "build": "37", "length": 23495}, {"_id": {"$oid": "5b927e29290c540e95115619"}, "ensembl_transcript_id": "ENST00000515868", "hgnc_id": 4113, "chrom": "4", "start": 843066, "end": 863745, "is_primary": false, "build": "37", "length": 20679}, {"_id": {"$oid": "5b927e29290c540e9511562a"}, "ensembl_transcript_id": "ENST00000513935", "hgnc_id": 4113, "chrom": "4", "start": 906250, "end": 926008, "is_primary": false, "build": "37", "length": 19758}, {"_id": {"$oid": "5b927e29290c540e9511561d"}, "ensembl_transcript_id": "ENST00000511980", "hgnc_id": 4113, "chrom": "4", "start": 843096, "end": 860950, "is_primary": false, "build": "37", "length": 17854}, {"_id": {"$oid": "5b927e29290c540e9511561e"}, "ensembl_transcript_id": "ENST00000510799", "hgnc_id": 4113, "chrom": "4", "start": 845641, "end": 861006, "is_primary": false, "build": "37", "length": 15365}, {"_id": {"$oid": "5b927e29290c540e95115626"}, "ensembl_transcript_id": "ENST00000510022", "hgnc_id": 4113, "chrom": "4", "start": 898441, "end": 909544, "is_primary": false, "build": "37", "length": 11103}, {"_id": {"$oid": "5b927e29290c540e95115627"}, "ensembl_transcript_id": "ENST00000511229", "hgnc_id": 4113, "chrom": "4", "start": 898475, "end": 909523, "is_primary": false, "build": "37", "length": 11048}, {"_id": {"$oid": "5b927e29290c540e95115620"}, "ensembl_transcript_id": "ENST00000504435", "hgnc_id": 4113, "chrom": "4", "start": 879571, "end": 888615, "is_primary": false, "build": "37", "length": 9044}, {"_id": {"$oid": "5b927e29290c540e95115624"}, "ensembl_transcript_id": "ENST00000507991", "hgnc_id": 4113, "chrom": "4", "start": 891764, "end": 898569, "is_primary": false, "build": "37", "length": 6805}, {"_id": {"$oid": "5b927e29290c540e95115618"}, "ensembl_transcript_id": "ENST00000511345", "hgnc_id": 4113, "chrom": "4", "start": 843066, "end": 847616, "is_primary": false, "build": "37", "length": 4550}, {"_id": {"$oid": "5b927e29290c540e95115628"}, "ensembl_transcript_id": "ENST00000511983", "hgnc_id": 4113, "chrom": "4", "start": 905461, "end": 909466, "is_primary": false, "build": "37", "length": 4005}, {"_id": {"$oid": "5b927e29290c540e95115617"}, "ensembl_transcript_id": "ENST00000504668", "hgnc_id": 4113, "chrom": "4", "start": 843065, "end": 845709, "is_primary": false, "build": "37", "length": 2644}, {"_id": {"$oid": "5b927e29290c540e95115616"}, "ensembl_transcript_id": "ENST00000502799", "hgnc_id": 4113, "chrom": "4", "start": 843064, "end": 845461, "is_primary": false, "build": "37", "length": 2397}, {"_id": {"$oid": "5b927e29290c540e95115629"}, "ensembl_transcript_id": "ENST00000507124", "hgnc_id": 4113, "chrom": "4", "start": 905507, "end": 907817, "is_primary": false, "build": "37", "length": 2310}, {"_id": {"$oid": "5b927e29290c540e9511561f"}, "ensembl_transcript_id": "ENST00000504947", "hgnc_id": 4113, "chrom": "4", "start": 876556, "end": 878178, "is_primary": false, "build": "37", "length": 1622}], "transcripts_dict": {"ENST00000511163": {"_id": {"$oid": "5b927e29290c540e9511561c"}, "ensembl_transcript_id": "ENST00000511163", "hgnc_id": 4113, "chrom": "4", "start": 843066, "end": 926161, "is_primary": false, "refseq_id": "XM_005272271", "build": "37", "length": 83095}, "ENST00000314167": {"_id": {"$oid": "5b927e29290c540e9511561b"}, "ensembl_transcript_id": "ENST00000314167", "hgnc_id": 4113, "chrom": "4", "start": 843066, "end": 926086, "is_primary": true, "refseq_id": "NM_005255", "build": "37", "length": 83020}, "ENST00000505819": {"_id": {"$oid": "5b927e29290c540e95115621"}, "ensembl_transcript_id": "ENST00000505819", "hgnc_id": 4113, "chrom": "4", "start": 887207, "end": 926153, "is_primary": false, "build": "37", "length": 38946}, "ENST00000507580": {"_id": {"$oid": "5b927e29290c540e95115622"}, "ensembl_transcript_id": "ENST00000507580", "hgnc_id": 4113, "chrom": "4", "start": 887731, "end": 926105, "is_primary": false, "build": "37", "length": 38374}, "ENST00000502656": {"_id": {"$oid": "5b927e29290c540e95115623"}, "ensembl_transcript_id": "ENST00000502656", "hgnc_id": 4113, "chrom": "4", "start": 890257, "end": 926115, "is_primary": false, "build": "37", "length": 35858}, "ENST00000512325": {"_id": {"$oid": "5b927e29290c540e95115625"}, "ensembl_transcript_id": "ENST00000512325", "hgnc_id": 4113, "chrom": "4", "start": 891850, "end": 926105, "is_primary": false, "build": "37", "length": 34255}, "ENST00000509566": {"_id": {"$oid": "5b927e29290c540e9511561a"}, "ensembl_transcript_id": "ENST00000509566", "hgnc_id": 4113, "chrom": "4", "start": 843066, "end": 866561, "is_primary": false, "build": "37", "length": 23495}, "ENST00000515868": {"_id": {"$oid": "5b927e29290c540e95115619"}, "ensembl_transcript_id": "ENST00000515868", "hgnc_id": 4113, "chrom": "4", "start": 843066, "end": 863745, "is_primary": false, "build": "37", "length": 20679}, "ENST00000513935": {"_id": {"$oid": "5b927e29290c540e9511562a"}, "ensembl_transcript_id": "ENST00000513935", "hgnc_id": 4113, "chrom": "4", "start": 906250, "end": 926008, "is_primary": false, "build": "37", "length": 19758}, "ENST00000511980": {"_id": {"$oid": "5b927e29290c540e9511561d"}, "ensembl_transcript_id": "ENST00000511980", "hgnc_id": 4113, "chrom": "4", "start": 843096, "end": 860950, "is_primary": false, "build": "37", "length": 17854}, "ENST00000510799": {"_id": {"$oid": "5b927e29290c540e9511561e"}, "ensembl_transcript_id": "ENST00000510799", "hgnc_id": 4113, "chrom": "4", "start": 845641, "end": 861006, "is_primary": false, "build": "37", "length": 15365}, "ENST00000510022": {"_id": {"$oid": "5b927e29290c540e95115626"}, "ensembl_transcript_id": "ENST00000510022", "hgnc_id": 4113, "chrom": "4", "start": 898441, "end": 909544, "is_primary": false, "build": "37", "length": 11103}, "ENST00000511229": {"_id": {"$oid": "5b927e29290c540e95115627"}, "ensembl_transcript_id": "ENST00000511229", "hgnc_id": 4113, "chrom": "4", "start": 898475, "end": 909523, "is_primary": false, "build": "37", "length": 11048}, "ENST00000504435": {"_id": {"$oid": "5b927e29290c540e95115620"}, "ensembl_transcript_id": "ENST00000504435", "hgnc_id": 4113, "chrom": "4", "start": 879571, "end": 888615, "is_primary": false, "build": "37", "length": 9044}, "ENST00000507991": {"_id": {"$oid": "5b927e29290c540e95115624"}, "ensembl_transcript_id": "ENST00000507991", "hgnc_id": 4113, "chrom": "4", "start": 891764, "end": 898569, "is_primary": false, "build": "37", "length": 6805}, "ENST00000511345": {"_id": {"$oid": "5b927e29290c540e95115618"}, "ensembl_transcript_id": "ENST00000511345", "hgnc_id": 4113, "chrom": "4", "start": 843066, "end": 847616, "is_primary": false, "build": "37", "length": 4550}, "ENST00000511983": {"_id": {"$oid": "5b927e29290c540e95115628"}, "ensembl_transcript_id": "ENST00000511983", "hgnc_id": 4113, "chrom": "4", "start": 905461, "end": 909466, "is_primary": false, "build": "37", "length": 4005}, "ENST00000504668": {"_id": {"$oid": "5b927e29290c540e95115617"}, "ensembl_transcript_id": "ENST00000504668", "hgnc_id": 4113, "chrom": "4", "start": 843065, "end": 845709, "is_primary": false, "build": "37", "length": 2644}, "ENST00000502799": {"_id": {"$oid": "5b927e29290c540e95115616"}, "ensembl_transcript_id": "ENST00000502799", "hgnc_id": 4113, "chrom": "4", "start": 843064, "end": 845461, "is_primary": false, "build": "37", "length": 2397}, "ENST00000507124": {"_id": {"$oid": "5b927e29290c540e95115629"}, "ensembl_transcript_id": "ENST00000507124", "hgnc_id": 4113, "chrom": "4", "start": 905507, "end": 907817, "is_primary": false, "build": "37", "length": 2310}, "ENST00000504947": {"_id": {"$oid": "5b927e29290c540e9511561f"}, "ensembl_transcript_id": "ENST00000504947", "hgnc_id": 4113, "chrom": "4", "start": 876556, "end": 878178, "is_primary": false, "build": "37", "length": 1622}}}, "disease_terms": []}], "hgnc_ids": [4113], "hgnc_symbols": ["GAK"], "rank_score_results": [{"category": "variant_length", "score": 8}, {"category": "Variant_call_quality_filter", "score": -5}, {"category": "Inheritance_Models", "score": 1}, {"category": "Consequence", "score": 1}, {"category": "variant_type", "score": 3}, {"category": "Gene_intolerance_prediction", "score": 0}, {"category": "allele_frequency", "score": 4}], "variant_rank": 1}, {"_id": "3eecfca5efea445eec6c19a53299043b", "document_id": "3eecfca5efea445eec6c19a53299043b", "variant_id": "5e015a22c50c8555f04898cf774d5a74", "display_name": "7_124491972_C_A_clinical", "variant_type": "clinical", "case_id": "643594", "chromosome": "7", "reference": "C", "alternative": "A", "institute": "cust000", "missing_data": false, "position": 124491972, "rank_score": 11.0, "end": 124491972, "length": 1, "simple_id": "7_124491972_C_A", "quality": 1040.6700439453125, "filters": ["PASS"], "dbsnp_id": "rs116916706", "cosmic_ids": null, "category": "snv", "sub_category": "snv", "mate_id": null, "cytoband_start": "q31.33", "cytoband_end": "q31.33", "end_chrom": "7", "samples": [{"sample_id": "ADM1059A2", "display_name": "NA12882", "genotype_call": "0/1", "allele_depths": [24, 15], "read_depth": 39, "genotype_quality": 99}, {"sample_id": "ADM1059A1", "display_name": "NA12877", "genotype_call": "0/1", "allele_depths": [31, 18], "read_depth": 49, "genotype_quality": 99}, {"sample_id": "ADM1059A3", "display_name": "NA12878", "genotype_call": "0/0", "allele_depths": [37, 0], "read_depth": 37, "genotype_quality": 99}], "genetic_models": ["AR_comp_dn"], "compounds": [{"variant": "e711cff07907dde1a0b2893eae7d006e", "display_name": "7_124537031_G_A", "combined_score": 17.0, "rank_score": 6.0, "genes": [{"hgnc_id": 17284, "hgnc_symbol": "POT1", "region_annotation": "ncRNA_exonic", "functional_annotation": "non_coding_transcript_variant"}], "not_loaded": false}], "genes": [{"hgnc_id": 17284, "hgnc_symbol": "POT1", "ensembl_id": "ENSG00000128513", "description": "protection of telomeres 1", "inheritance": [], "transcripts": [{"transcript_id": "ENST00000357628", "hgnc_id": 17284, "protein_id": "ENSP00000350249", "sift_prediction": "tolerated", "polyphen_prediction": "possibly_damaging", "swiss_prot": "Q9NUX5", "biotype": "protein_coding", "functional_annotations": ["missense_variant"], "region_annotations": ["exonic"], "exon": "11/19", "strand": "-", "coding_sequence_name": "c.903G>T", "protein_sequence_name": "p.Gln301His", "is_canonical": true, "refseq_id": "NM_015450", "refseq_identifiers": []}, {"transcript_id": "ENST00000393329", "hgnc_id": 17284, "protein_id": "ENSP00000377002", "sift_prediction": "tolerated", "polyphen_prediction": "possibly_damaging", "swiss_prot": "unknown", "biotype": "protein_coding", "functional_annotations": ["missense_variant"], "region_annotations": ["exonic"], "exon": "10/18", "strand": "-", "coding_sequence_name": "c.510G>T", "protein_sequence_name": "p.Gln170His", "is_canonical": false, "refseq_id": "NM_001042594", "refseq_identifiers": [], "is_primary": true}, {"transcript_id": "ENST00000607932", "hgnc_id": 17284, "protein_id": "ENSP00000476506", "sift_prediction": "tolerated", "polyphen_prediction": "benign", "swiss_prot": "unknown", "biotype": "nonsense_mediated_decay", "functional_annotations": ["missense_variant", "NMD_transcript_variant"], "region_annotations": ["exonic", "ncRNA"], "exon": "7/14", "strand": "-", "coding_sequence_name": "c.903G>T", "protein_sequence_name": "p.Gln301His", "is_canonical": false}, {"transcript_id": "ENST00000608057", "hgnc_id": 17284, "protein_id": "ENSP00000476371", "sift_prediction": "deleterious", "polyphen_prediction": "benign", "swiss_prot": "unknown", "biotype": "nonsense_mediated_decay", "functional_annotations": ["missense_variant", "NMD_transcript_variant"], "region_annotations": ["exonic", "ncRNA"], "exon": "7/16", "strand": "-", "coding_sequence_name": "c.903G>T", "protein_sequence_name": "p.Gln301His", "is_canonical": false}, {"transcript_id": "ENST00000608126", "hgnc_id": 17284, "sift_prediction": "unknown", "polyphen_prediction": "unknown", "swiss_prot": "unknown", "biotype": "retained_intron", "functional_annotations": ["downstream_gene_variant"], "region_annotations": ["downstream"], "strand": "-", "is_canonical": false}, {"transcript_id": "ENST00000609106", "hgnc_id": 17284, "protein_id": "ENSP00000476981", "sift_prediction": "tolerated", "polyphen_prediction": "possibly_damaging", "swiss_prot": "unknown", "biotype": "nonsense_mediated_decay", "functional_annotations": ["missense_variant", "NMD_transcript_variant"], "region_annotations": ["exonic", "ncRNA"], "exon": "11/19", "strand": "-", "coding_sequence_name": "c.903G>T", "protein_sequence_name": "p.Gln301His", "is_canonical": false}, {"transcript_id": "ENST00000610141", "hgnc_id": 17284, "protein_id": "ENSP00000476673", "sift_prediction": "deleterious", "polyphen_prediction": "probably_damaging", "swiss_prot": "unknown", "biotype": "protein_coding", "functional_annotations": ["missense_variant"], "region_annotations": ["exonic"], "exon": "3/3", "strand": "-", "coding_sequence_name": "c.295G>T", "protein_sequence_name": "p.Gln99His", "is_canonical": false}], "functional_annotation": "missense_variant", "region_annotation": "exonic", "sift_prediction": "tolerated", "polyphen_prediction": "possibly_damaging", "disease_associated_transcripts": [], "manual_penetrance": false, "mosaicism": false, "manual_inheritance": [], "common": {"_id": {"$oid": "5b927db8290c540e950f46ce"}, "hgnc_id": 17284, "hgnc_symbol": "POT1", "ensembl_id": "ENSG00000128513", "chromosome": "7", "start": 124462440, "end": 124570037, "length": 107597, "description": "protection of telomeres 1", "aliases": ["POT1", "hPot1", "DKFZp586D211"], "primary_transcripts": ["NM_001042594"], "inheritance_models": ["AD"], "phenotypes": [{"mim_number": 616568, "description": "Glioma susceptibility 9", "inheritance_models": ["AD"], "status": "susceptibility"}, {"mim_number": 615848, "description": "Melanoma cutaneous malignant susceptibility to 10", "inheritance_models": ["AD"], "status": "susceptibility"}], "entrez_id": 25913, "omim_id": 606478, "ucsc_id": "uc003vlm.4", "uniprot_ids": ["Q9NUX5"], "vega_id": "OTTHUMG00000157194", "pli_score": 0.522727216703268, "incomplete_penetrance": false, "build": "37", "transcripts": [{"_id": {"$oid": "5b927e29290c540e9511ad5f"}, "ensembl_transcript_id": "ENST00000393329", "hgnc_id": 17284, "chrom": "7", "start": 124462441, "end": 124570037, "is_primary": true, "refseq_id": "NM_001042594", "build": "37", "length": 107596}, {"_id": {"$oid": "5b927e29290c540e9511ad4e"}, "ensembl_transcript_id": "ENST00000357628", "hgnc_id": 17284, "chrom": "7", "start": 124462440, "end": 124570035, "is_primary": false, "refseq_id": "NM_015450", "build": "37", "length": 107595}, {"_id": {"$oid": "5b927e29290c540e9511ad50"}, "ensembl_transcript_id": "ENST00000609106", "hgnc_id": 17284, "chrom": "7", "start": 124463910, "end": 124569856, "is_primary": false, "build": "37", "length": 105946}, {"_id": {"$oid": "5b927e29290c540e9511ad51"}, "ensembl_transcript_id": "ENST00000607932", "hgnc_id": 17284, "chrom": "7", "start": 124464016, "end": 124537238, "is_primary": false, "build": "37", "length": 73222}, {"_id": {"$oid": "5b927e29290c540e9511ad52"}, "ensembl_transcript_id": "ENST00000608057", "hgnc_id": 17284, "chrom": "7", "start": 124464016, "end": 124537238, "is_primary": false, "build": "37", "length": 73222}, {"_id": {"$oid": "5b927e29290c540e9511ad59"}, "ensembl_transcript_id": "ENST00000446993", "hgnc_id": 17284, "chrom": "7", "start": 124510973, "end": 124569998, "is_primary": false, "build": "37", "length": 59025}, {"_id": {"$oid": "5b927e29290c540e9511ad5a"}, "ensembl_transcript_id": "ENST00000609702", "hgnc_id": 17284, "chrom": "7", "start": 124510999, "end": 124569881, "is_primary": false, "build": "37", "length": 58882}, {"_id": {"$oid": "5b927e29290c540e9511ad58"}, "ensembl_transcript_id": "ENST00000429326", "hgnc_id": 17284, "chrom": "7", "start": 124499032, "end": 124537256, "is_primary": false, "build": "37", "length": 38224}, {"_id": {"$oid": "5b927e29290c540e9511ad5b"}, "ensembl_transcript_id": "ENST00000608261", "hgnc_id": 17284, "chrom": "7", "start": 124532320, "end": 124569879, "is_primary": false, "build": "37", "length": 37559}, {"_id": {"$oid": "5b927e29290c540e9511ad5c"}, "ensembl_transcript_id": "ENST00000608437", "hgnc_id": 17284, "chrom": "7", "start": 124532756, "end": 124569879, "is_primary": false, "build": "37", "length": 37123}, {"_id": {"$oid": "5b927e29290c540e9511ad5d"}, "ensembl_transcript_id": "ENST00000461288", "hgnc_id": 17284, "chrom": "7", "start": 124538315, "end": 124569856, "is_primary": false, "build": "37", "length": 31541}, {"_id": {"$oid": "5b927e29290c540e9511ad55"}, "ensembl_transcript_id": "ENST00000610141", "hgnc_id": 17284, "chrom": "7", "start": 124491862, "end": 124499104, "is_primary": false, "build": "37", "length": 7242}, {"_id": {"$oid": "5b927e29290c540e9511ad4f"}, "ensembl_transcript_id": "ENST00000436534", "hgnc_id": 17284, "chrom": "7", "start": 124462455, "end": 124469396, "is_primary": false, "build": "37", "length": 6941}, {"_id": {"$oid": "5b927e29290c540e9511ad4d"}, "ensembl_transcript_id": "ENST00000430927", "hgnc_id": 17284, "chrom": "7", "start": 124462440, "end": 124467304, "is_primary": false, "build": "37", "length": 4864}, {"_id": {"$oid": "5b927e29290c540e9511ad57"}, "ensembl_transcript_id": "ENST00000487564", "hgnc_id": 17284, "chrom": "7", "start": 124498835, "end": 124503439, "is_primary": false, "build": "37", "length": 4604}, {"_id": {"$oid": "5b927e29290c540e9511ad54"}, "ensembl_transcript_id": "ENST00000466483", "hgnc_id": 17284, "chrom": "7", "start": 124481035, "end": 124483303, "is_primary": false, "build": "37", "length": 2268}, {"_id": {"$oid": "5b927e29290c540e9511ad53"}, "ensembl_transcript_id": "ENST00000608200", "hgnc_id": 17284, "chrom": "7", "start": 124480710, "end": 124482886, "is_primary": false, "build": "37", "length": 2176}, {"_id": {"$oid": "5b927e29290c540e9511ad56"}, "ensembl_transcript_id": "ENST00000608126", "hgnc_id": 17284, "chrom": "7", "start": 124491980, "end": 124493581, "is_primary": false, "build": "37", "length": 1601}, {"_id": {"$oid": "5b927e29290c540e9511ad5e"}, "ensembl_transcript_id": "ENST00000464453", "hgnc_id": 17284, "chrom": "7", "start": 124568975, "end": 124569840, "is_primary": false, "build": "37", "length": 865}], "transcripts_dict": {"ENST00000393329": {"_id": {"$oid": "5b927e29290c540e9511ad5f"}, "ensembl_transcript_id": "ENST00000393329", "hgnc_id": 17284, "chrom": "7", "start": 124462441, "end": 124570037, "is_primary": true, "refseq_id": "NM_001042594", "build": "37", "length": 107596}, "ENST00000357628": {"_id": {"$oid": "5b927e29290c540e9511ad4e"}, "ensembl_transcript_id": "ENST00000357628", "hgnc_id": 17284, "chrom": "7", "start": 124462440, "end": 124570035, "is_primary": false, "refseq_id": "NM_015450", "build": "37", "length": 107595}, "ENST00000609106": {"_id": {"$oid": "5b927e29290c540e9511ad50"}, "ensembl_transcript_id": "ENST00000609106", "hgnc_id": 17284, "chrom": "7", "start": 124463910, "end": 124569856, "is_primary": false, "build": "37", "length": 105946}, "ENST00000607932": {"_id": {"$oid": "5b927e29290c540e9511ad51"}, "ensembl_transcript_id": "ENST00000607932", "hgnc_id": 17284, "chrom": "7", "start": 124464016, "end": 124537238, "is_primary": false, "build": "37", "length": 73222}, "ENST00000608057": {"_id": {"$oid": "5b927e29290c540e9511ad52"}, "ensembl_transcript_id": "ENST00000608057", "hgnc_id": 17284, "chrom": "7", "start": 124464016, "end": 124537238, "is_primary": false, "build": "37", "length": 73222}, "ENST00000446993": {"_id": {"$oid": "5b927e29290c540e9511ad59"}, "ensembl_transcript_id": "ENST00000446993", "hgnc_id": 17284, "chrom": "7", "start": 124510973, "end": 124569998, "is_primary": false, "build": "37", "length": 59025}, "ENST00000609702": {"_id": {"$oid": "5b927e29290c540e9511ad5a"}, "ensembl_transcript_id": "ENST00000609702", "hgnc_id": 17284, "chrom": "7", "start": 124510999, "end": 124569881, "is_primary": false, "build": "37", "length": 58882}, "ENST00000429326": {"_id": {"$oid": "5b927e29290c540e9511ad58"}, "ensembl_transcript_id": "ENST00000429326", "hgnc_id": 17284, "chrom": "7", "start": 124499032, "end": 124537256, "is_primary": false, "build": "37", "length": 38224}, "ENST00000608261": {"_id": {"$oid": "5b927e29290c540e9511ad5b"}, "ensembl_transcript_id": "ENST00000608261", "hgnc_id": 17284, "chrom": "7", "start": 124532320, "end": 124569879, "is_primary": false, "build": "37", "length": 37559}, "ENST00000608437": {"_id": {"$oid": "5b927e29290c540e9511ad5c"}, "ensembl_transcript_id": "ENST00000608437", "hgnc_id": 17284, "chrom": "7", "start": 124532756, "end": 124569879, "is_primary": false, "build": "37", "length": 37123}, "ENST00000461288": {"_id": {"$oid": "5b927e29290c540e9511ad5d"}, "ensembl_transcript_id": "ENST00000461288", "hgnc_id": 17284, "chrom": "7", "start": 124538315, "end": 124569856, "is_primary": false, "build": "37", "length": 31541}, "ENST00000610141": {"_id": {"$oid": "5b927e29290c540e9511ad55"}, "ensembl_transcript_id": "ENST00000610141", "hgnc_id": 17284, "chrom": "7", "start": 124491862, "end": 124499104, "is_primary": false, "build": "37", "length": 7242}, "ENST00000436534": {"_id": {"$oid": "5b927e29290c540e9511ad4f"}, "ensembl_transcript_id": "ENST00000436534", "hgnc_id": 17284, "chrom": "7", "start": 124462455, "end": 124469396, "is_primary": false, "build": "37", "length": 6941}, "ENST00000430927": {"_id": {"$oid": "5b927e29290c540e9511ad4d"}, "ensembl_transcript_id": "ENST00000430927", "hgnc_id": 17284, "chrom": "7", "start": 124462440, "end": 124467304, "is_primary": false, "build": "37", "length": 4864}, "ENST00000487564": {"_id": {"$oid": "5b927e29290c540e9511ad57"}, "ensembl_transcript_id": "ENST00000487564", "hgnc_id": 17284, "chrom": "7", "start": 124498835, "end": 124503439, "is_primary": false, "build": "37", "length": 4604}, "ENST00000466483": {"_id": {"$oid": "5b927e29290c540e9511ad54"}, "ensembl_transcript_id": "ENST00000466483", "hgnc_id": 17284, "chrom": "7", "start": 124481035, "end": 124483303, "is_primary": false, "build": "37", "length": 2268}, "ENST00000608200": {"_id": {"$oid": "5b927e29290c540e9511ad53"}, "ensembl_transcript_id": "ENST00000608200", "hgnc_id": 17284, "chrom": "7", "start": 124480710, "end": 124482886, "is_primary": false, "build": "37", "length": 2176}, "ENST00000608126": {"_id": {"$oid": "5b927e29290c540e9511ad56"}, "ensembl_transcript_id": "ENST00000608126", "hgnc_id": 17284, "chrom": "7", "start": 124491980, "end": 124493581, "is_primary": false, "build": "37", "length": 1601}, "ENST00000464453": {"_id": {"$oid": "5b927e29290c540e9511ad5e"}, "ensembl_transcript_id": "ENST00000464453", "hgnc_id": 17284, "chrom": "7", "start": 124568975, "end": 124569840, "is_primary": false, "build": "37", "length": 865}}}, "disease_terms": [{"_id": "OMIM:616568", "disease_id": "OMIM:616568", "disease_nr": 616568, "description": "Glioma susceptibility 9", "source": "OMIM", "genes": [17284], "inheritance": ["AD"], "hpo_terms": []}, {"_id": "OMIM:615848", "disease_id": "OMIM:615848", "disease_nr": 615848, "description": "Melanoma cutaneous malignant susceptibility to 10", "source": "OMIM", "genes": [17284], "inheritance": ["AD"], "hpo_terms": []}]}], "hgnc_ids": [17284], "hgnc_symbols": ["POT1"], "panels": ["panel1"], "gatk": "Filtered", "freebayes": "Pass", "samtools": "Filtered", "phast_conservation": ["NotConserved"], "phylop_conservation": ["NotConserved"], "thousand_genomes_frequency": 0.001597439986653626, "max_thousand_genomes_frequency": 0.00800000037997961, "exac_frequency": 0.0025949999690055847, "local_obs_old": 1, "cadd_score": 15.0, "spidex": -1.0010000467300415, "rank_score_results": [{"category": "Splicing", "score": 3}, {"category": "Variant_call_quality_filter", "score": 3}, {"category": "Inheritance_Models", "score": 1}, {"category": "Clinical_significance", "score": 0}, {"category": "Consequence", "score": 5}, {"category": "Gene_intolerance_prediction", "score": 0}, {"category": "Conservation", "score": 0}, {"category": "allele_frequency", "score": 1}, {"category": "Deleteriousness", "score": 2}, {"category": "Protein_prediction", "score": 2}], "variant_rank": 2}, {"_id": "595c7f513f185d2dbcd4803888b8bc73", "document_id": "595c7f513f185d2dbcd4803888b8bc73", "variant_id": "75533206f9fa3c3ac9e108326da82deb", "display_name": "13_21732265_C__clinical", "variant_type": "clinical", "case_id": "643594", "chromosome": "13", "reference": "C", "alternative": "", "institute": "cust000", "missing_data": false, "position": 21732265, "rank_score": 4.0, "end": 21735928, "length": 3663, "simple_id": "13_21732265_C_", "quality": 272.0, "filters": ["PASS"], "dbsnp_id": "MantaDEL:39:3:6:0:0:0", "cosmic_ids": null, "category": "sv", "sub_category": "del", "mate_id": null, "cytoband_start": "q12.11", "cytoband_end": "q12.11", "end_chrom": "13", "samples": [{"sample_id": "ADM1059A2", "display_name": "NA12882", "genotype_call": "0/0", "allele_depths": [75, 0], "read_depth": 0, "genotype_quality": 235}, {"sample_id": "ADM1059A1", "display_name": "NA12877", "genotype_call": "0/0", "allele_depths": [69, 0], "read_depth": 0, "genotype_quality": 211}, {"sample_id": "ADM1059A3", "display_name": "NA12878", "genotype_call": "0/1", "allele_depths": [74, 19], "read_depth": 38, "genotype_quality": 272}], "genes": [{"hgnc_id": 20262, "hgnc_symbol": "SKA3", "ensembl_id": "ENSG00000165480", "description": "spindle and kinetochore associated complex subunit 3", "inheritance": [], "transcripts": [{"transcript_id": "ENST00000298260", "hgnc_id": 20262, "protein_id": "ENSP00000298260", "sift_prediction": "unknown", "polyphen_prediction": "unknown", "swiss_prot": "Q8IX90", "biotype": "nonsense_mediated_decay", "functional_annotations": ["3_prime_UTR_variant", "intron_variant", "NMD_transcript_variant", "feature_truncation"], "region_annotations": ["3UTR", "intronic", "ncRNA", "genomic_feature"], "exon": "5/8", "intron": "4-5/7", "strand": "-", "is_canonical": false, "refseq_id": "XM_005266288", "refseq_identifiers": []}, {"transcript_id": "ENST00000314759", "hgnc_id": 20262, "protein_id": "ENSP00000319417", "sift_prediction": "unknown", "polyphen_prediction": "unknown", "swiss_prot": "Q8IX90", "biotype": "protein_coding", "functional_annotations": ["coding_sequence_variant", "intron_variant", "feature_truncation"], "region_annotations": ["exonic", "intronic", "genomic_feature"], "exon": "6/9", "intron": "5-6/8", "strand": "-", "is_canonical": true, "refseq_id": "NM_145061", "refseq_identifiers": [], "is_primary": true}, {"transcript_id": "ENST00000400018", "hgnc_id": 20262, "protein_id": "ENSP00000382896", "sift_prediction": "unknown", "polyphen_prediction": "unknown", "swiss_prot": "Q8IX90", "biotype": "protein_coding", "functional_annotations": ["coding_sequence_variant", "intron_variant", "feature_truncation"], "region_annotations": ["exonic", "intronic", "genomic_feature"], "exon": "6/8", "intron": "5-6/7", "strand": "-", "is_canonical": false, "refseq_id": "NM_001166017", "refseq_identifiers": []}, {"transcript_id": "ENST00000462482", "hgnc_id": 20262, "protein_id": "ENSP00000443543", "sift_prediction": "unknown", "polyphen_prediction": "unknown", "swiss_prot": "unknown", "biotype": "nonsense_mediated_decay", "functional_annotations": ["intron_variant", "NMD_transcript_variant", "feature_truncation"], "region_annotations": ["intronic", "ncRNA", "genomic_feature"], "intron": "1/2", "strand": "-", "is_canonical": false}, {"transcript_id": "ENST00000465471", "hgnc_id": 20262, "protein_id": "ENSP00000441632", "sift_prediction": "unknown", "polyphen_prediction": "unknown", "swiss_prot": "unknown", "biotype": "nonsense_mediated_decay", "functional_annotations": ["downstream_gene_variant"], "region_annotations": ["downstream"], "strand": "-", "is_canonical": false}], "functional_annotation": "coding_sequence_variant", "region_annotation": "exonic", "sift_prediction": "unknown", "polyphen_prediction": "unknown", "disease_associated_transcripts": [], "manual_penetrance": false, "mosaicism": false, "manual_inheritance": [], "common": {"_id": {"$oid": "5b927db8290c540e950f62b8"}, "hgnc_id": 20262, "hgnc_symbol": "SKA3", "ensembl_id": "ENSG00000165480", "chromosome": "13", "start": 21727734, "end": 21750741, "length": 23007, "description": "spindle and kinetochore associated complex subunit 3", "aliases": ["C13orf3", "RAMA1", "MGC4832", "SKA3"], "primary_transcripts": ["NM_145061"], "entrez_id": 221150, "ucsc_id": "uc001unt.3", "uniprot_ids": ["Q8IX90"], "vega_id": "OTTHUMG00000016539", "pli_score": 2.99578704451869e-07, "incomplete_penetrance": false, "build": "37", "transcripts": [{"_id": {"$oid": "5b927e28290c540e95101c3c"}, "ensembl_transcript_id": "ENST00000314759", "hgnc_id": 20262, "chrom": "13", "start": 21727734, "end": 21750741, "is_primary": true, "refseq_id": "NM_145061", "build": "37", "length": 23007}, {"_id": {"$oid": "5b927e28290c540e95101c3b"}, "ensembl_transcript_id": "ENST00000298260", "hgnc_id": 20262, "chrom": "13", "start": 21727734, "end": 21750687, "is_primary": false, "refseq_id": "XM_005266288", "build": "37", "length": 22953}, {"_id": {"$oid": "5b927e28290c540e95101c3a"}, "ensembl_transcript_id": "ENST00000400018", "hgnc_id": 20262, "chrom": "13", "start": 21727734, "end": 21750686, "is_primary": false, "refseq_id": "NM_001166017", "build": "37", "length": 22952}, {"_id": {"$oid": "5b927e28290c540e95101c3d"}, "ensembl_transcript_id": "ENST00000462482", "hgnc_id": 20262, "chrom": "13", "start": 21728836, "end": 21750690, "is_primary": false, "build": "37", "length": 21854}, {"_id": {"$oid": "5b927e28290c540e95101c3e"}, "ensembl_transcript_id": "ENST00000465471", "hgnc_id": 20262, "chrom": "13", "start": 21735929, "end": 21750690, "is_primary": false, "build": "37", "length": 14761}, {"_id": {"$oid": "5b927e28290c540e95101c3f"}, "ensembl_transcript_id": "ENST00000536239", "hgnc_id": 20262, "chrom": "13", "start": 21742361, "end": 21746852, "is_primary": false, "build": "37", "length": 4491}, {"_id": {"$oid": "5b927e28290c540e95101c40"}, "ensembl_transcript_id": "ENST00000475251", "hgnc_id": 20262, "chrom": "13", "start": 21746408, "end": 21750612, "is_primary": false, "build": "37", "length": 4204}], "transcripts_dict": {"ENST00000314759": {"_id": {"$oid": "5b927e28290c540e95101c3c"}, "ensembl_transcript_id": "ENST00000314759", "hgnc_id": 20262, "chrom": "13", "start": 21727734, "end": 21750741, "is_primary": true, "refseq_id": "NM_145061", "build": "37", "length": 23007}, "ENST00000298260": {"_id": {"$oid": "5b927e28290c540e95101c3b"}, "ensembl_transcript_id": "ENST00000298260", "hgnc_id": 20262, "chrom": "13", "start": 21727734, "end": 21750687, "is_primary": false, "refseq_id": "XM_005266288", "build": "37", "length": 22953}, "ENST00000400018": {"_id": {"$oid": "5b927e28290c540e95101c3a"}, "ensembl_transcript_id": "ENST00000400018", "hgnc_id": 20262, "chrom": "13", "start": 21727734, "end": 21750686, "is_primary": false, "refseq_id": "NM_001166017", "build": "37", "length": 22952}, "ENST00000462482": {"_id": {"$oid": "5b927e28290c540e95101c3d"}, "ensembl_transcript_id": "ENST00000462482", "hgnc_id": 20262, "chrom": "13", "start": 21728836, "end": 21750690, "is_primary": false, "build": "37", "length": 21854}, "ENST00000465471": {"_id": {"$oid": "5b927e28290c540e95101c3e"}, "ensembl_transcript_id": "ENST00000465471", "hgnc_id": 20262, "chrom": "13", "start": 21735929, "end": 21750690, "is_primary": false, "build": "37", "length": 14761}, "ENST00000536239": {"_id": {"$oid": "5b927e28290c540e95101c3f"}, "ensembl_transcript_id": "ENST00000536239", "hgnc_id": 20262, "chrom": "13", "start": 21742361, "end": 21746852, "is_primary": false, "build": "37", "length": 4491}, "ENST00000475251": {"_id": {"$oid": "5b927e28290c540e95101c40"}, "ensembl_transcript_id": "ENST00000475251", "hgnc_id": 20262, "chrom": "13", "start": 21746408, "end": 21750612, "is_primary": false, "build": "37", "length": 4204}}}, "disease_terms": []}], "hgnc_ids": [20262], "hgnc_symbols": ["SKA3"], "rank_score_results": [{"category": "variant_length", "score": 8}, {"category": "Variant_call_quality_filter", "score": 0}, {"category": "Inheritance_Models", "score": -12}, {"category": "Consequence", "score": 1}, {"category": "variant_type", "score": 3}, {"category": "Gene_intolerance_prediction", "score": 0}, {"category": "allele_frequency", "score": 4}], "variant_rank": 10}, {"_id": "7f2248f287fcfb078f0fb8dc4c1e6757", "document_id": "7f2248f287fcfb078f0fb8dc4c1e6757", "variant_id": "167ac42d297e31de6784613b86a304e8", "display_name": "13_21746642_T__clinical", "variant_type": "clinical", "case_id": "643594", "chromosome": "13", "reference": "T", "alternative": "", "institute": "cust000", "missing_data": false, "position": 21746642, "rank_score": 11.0, "end": 21750512, "length": 3870, "simple_id": "13_21746642_T_", "quality": 185.0, "filters": ["PASS"], "dbsnp_id": "MantaDEL:39:10:11:0:0:0", "cosmic_ids": null, "category": "sv", "sub_category": "del", "mate_id": null, "cytoband_start": "q12.11", "cytoband_end": "q12.11", "end_chrom": "13", "samples": [{"sample_id": "ADM1059A2", "display_name": "NA12882", "genotype_call": "0/0", "allele_depths": [69, 0], "read_depth": 0, "genotype_quality": 209}, {"sample_id": "ADM1059A1", "display_name": "NA12877", "genotype_call": "0/0", "allele_depths": [89, 0], "read_depth": 0, "genotype_quality": 268}, {"sample_id": "ADM1059A3", "display_name": "NA12878", "genotype_call": "0/1", "allele_depths": [88, 12], "read_depth": 24, "genotype_quality": 185}], "genes": [{"hgnc_id": 20262, "hgnc_symbol": "SKA3", "ensembl_id": "ENSG00000165480", "description": "spindle and kinetochore associated complex subunit 3", "inheritance": [], "transcripts": [{"transcript_id": "ENST00000298260", "hgnc_id": 20262, "protein_id": "ENSP00000298260", "sift_prediction": "unknown", "polyphen_prediction": "unknown", "swiss_prot": "Q8IX90", "biotype": "nonsense_mediated_decay", "functional_annotations": ["coding_sequence_variant", "intron_variant", "NMD_transcript_variant", "feature_truncation"], "region_annotations": ["exonic", "intronic", "ncRNA", "genomic_feature"], "exon": "2/8", "intron": "1/7", "strand": "-", "is_canonical": false, "refseq_id": "XM_005266288", "refseq_identifiers": []}, {"transcript_id": "ENST00000314759", "hgnc_id": 20262, "protein_id": "ENSP00000319417", "sift_prediction": "unknown", "polyphen_prediction": "unknown", "swiss_prot": "Q8IX90", "biotype": "protein_coding", "functional_annotations": ["coding_sequence_variant", "intron_variant", "feature_truncation"], "region_annotations": ["exonic", "intronic", "genomic_feature"], "exon": "2-3/9", "intron": "1-2/8", "strand": "-", "is_canonical": true, "refseq_id": "NM_145061", "refseq_identifiers": [], "is_primary": true}, {"transcript_id": "ENST00000400018", "hgnc_id": 20262, "protein_id": "ENSP00000382896", "sift_prediction": "unknown", "polyphen_prediction": "unknown", "swiss_prot": "Q8IX90", "biotype": "protein_coding", "functional_annotations": ["coding_sequence_variant", "intron_variant", "feature_truncation"], "region_annotations": ["exonic", "intronic", "genomic_feature"], "exon": "2-3/8", "intron": "1-2/7", "strand": "-", "is_canonical": false, "refseq_id": "NM_001166017", "refseq_identifiers": []}, {"transcript_id": "ENST00000462482", "hgnc_id": 20262, "protein_id": "ENSP00000443543", "sift_prediction": "unknown", "polyphen_prediction": "unknown", "swiss_prot": "unknown", "biotype": "nonsense_mediated_decay", "functional_annotations": ["intron_variant", "NMD_transcript_variant", "feature_truncation"], "region_annotations": ["intronic", "ncRNA", "genomic_feature"], "intron": "1/2", "strand": "-", "is_canonical": false}, {"transcript_id": "ENST00000465471", "hgnc_id": 20262, "protein_id": "ENSP00000441632", "sift_prediction": "unknown", "polyphen_prediction": "unknown", "swiss_prot": "unknown", "biotype": "nonsense_mediated_decay", "functional_annotations": ["stop_lost", "coding_sequence_variant", "3_prime_UTR_variant", "intron_variant", "NMD_transcript_variant", "feature_truncation"], "region_annotations": ["exonic", "exonic", "3UTR", "intronic", "ncRNA", "genomic_feature"], "exon": "1-3/5", "intron": "1-2/4", "strand": "-", "is_canonical": false}, {"transcript_id": "ENST00000475251", "hgnc_id": 20262, "sift_prediction": "unknown", "polyphen_prediction": "unknown", "swiss_prot": "unknown", "biotype": "retained_intron", "functional_annotations": ["non_coding_transcript_exon_variant", "intron_variant", "non_coding_transcript_variant", "feature_truncation"], "region_annotations": ["ncRNA_exonic", "intronic", "ncRNA_exonic", "genomic_feature"], "exon": "2-3/3", "intron": "1-2/2", "strand": "-", "is_canonical": false}, {"transcript_id": "ENST00000536239", "hgnc_id": 20262, "sift_prediction": "unknown", "polyphen_prediction": "unknown", "swiss_prot": "unknown", "biotype": "retained_intron", "functional_annotations": ["non_coding_transcript_exon_variant", "intron_variant", "non_coding_transcript_variant", "feature_truncation"], "region_annotations": ["ncRNA_exonic", "intronic", "ncRNA_exonic", "genomic_feature"], "exon": "1-2/3", "intron": "1/2", "strand": "-", "is_canonical": false}], "functional_annotation": "stop_lost", "region_annotation": "exonic", "sift_prediction": "unknown", "polyphen_prediction": "unknown", "disease_associated_transcripts": [], "manual_penetrance": false, "mosaicism": false, "manual_inheritance": [], "common": {"_id": {"$oid": "5b927db8290c540e950f62b8"}, "hgnc_id": 20262, "hgnc_symbol": "SKA3", "ensembl_id": "ENSG00000165480", "chromosome": "13", "start": 21727734, "end": 21750741, "length": 23007, "description": "spindle and kinetochore associated complex subunit 3", "aliases": ["C13orf3", "RAMA1", "MGC4832", "SKA3"], "primary_transcripts": ["NM_145061"], "entrez_id": 221150, "ucsc_id": "uc001unt.3", "uniprot_ids": ["Q8IX90"], "vega_id": "OTTHUMG00000016539", "pli_score": 2.99578704451869e-07, "incomplete_penetrance": false, "build": "37", "transcripts": [{"_id": {"$oid": "5b927e28290c540e95101c3c"}, "ensembl_transcript_id": "ENST00000314759", "hgnc_id": 20262, "chrom": "13", "start": 21727734, "end": 21750741, "is_primary": true, "refseq_id": "NM_145061", "build": "37", "length": 23007}, {"_id": {"$oid": "5b927e28290c540e95101c3b"}, "ensembl_transcript_id": "ENST00000298260", "hgnc_id": 20262, "chrom": "13", "start": 21727734, "end": 21750687, "is_primary": false, "refseq_id": "XM_005266288", "build": "37", "length": 22953}, {"_id": {"$oid": "5b927e28290c540e95101c3a"}, "ensembl_transcript_id": "ENST00000400018", "hgnc_id": 20262, "chrom": "13", "start": 21727734, "end": 21750686, "is_primary": false, "refseq_id": "NM_001166017", "build": "37", "length": 22952}, {"_id": {"$oid": "5b927e28290c540e95101c3d"}, "ensembl_transcript_id": "ENST00000462482", "hgnc_id": 20262, "chrom": "13", "start": 21728836, "end": 21750690, "is_primary": false, "build": "37", "length": 21854}, {"_id": {"$oid": "5b927e28290c540e95101c3e"}, "ensembl_transcript_id": "ENST00000465471", "hgnc_id": 20262, "chrom": "13", "start": 21735929, "end": 21750690, "is_primary": false, "build": "37", "length": 14761}, {"_id": {"$oid": "5b927e28290c540e95101c3f"}, "ensembl_transcript_id": "ENST00000536239", "hgnc_id": 20262, "chrom": "13", "start": 21742361, "end": 21746852, "is_primary": false, "build": "37", "length": 4491}, {"_id": {"$oid": "5b927e28290c540e95101c40"}, "ensembl_transcript_id": "ENST00000475251", "hgnc_id": 20262, "chrom": "13", "start": 21746408, "end": 21750612, "is_primary": false, "build": "37", "length": 4204}], "transcripts_dict": {"ENST00000314759": {"_id": {"$oid": "5b927e28290c540e95101c3c"}, "ensembl_transcript_id": "ENST00000314759", "hgnc_id": 20262, "chrom": "13", "start": 21727734, "end": 21750741, "is_primary": true, "refseq_id": "NM_145061", "build": "37", "length": 23007}, "ENST00000298260": {"_id": {"$oid": "5b927e28290c540e95101c3b"}, "ensembl_transcript_id": "ENST00000298260", "hgnc_id": 20262, "chrom": "13", "start": 21727734, "end": 21750687, "is_primary": false, "refseq_id": "XM_005266288", "build": "37", "length": 22953}, "ENST00000400018": {"_id": {"$oid": "5b927e28290c540e95101c3a"}, "ensembl_transcript_id": "ENST00000400018", "hgnc_id": 20262, "chrom": "13", "start": 21727734, "end": 21750686, "is_primary": false, "refseq_id": "NM_001166017", "build": "37", "length": 22952}, "ENST00000462482": {"_id": {"$oid": "5b927e28290c540e95101c3d"}, "ensembl_transcript_id": "ENST00000462482", "hgnc_id": 20262, "chrom": "13", "start": 21728836, "end": 21750690, "is_primary": false, "build": "37", "length": 21854}, "ENST00000465471": {"_id": {"$oid": "5b927e28290c540e95101c3e"}, "ensembl_transcript_id": "ENST00000465471", "hgnc_id": 20262, "chrom": "13", "start": 21735929, "end": 21750690, "is_primary": false, "build": "37", "length": 14761}, "ENST00000536239": {"_id": {"$oid": "5b927e28290c540e95101c3f"}, "ensembl_transcript_id": "ENST00000536239", "hgnc_id": 20262, "chrom": "13", "start": 21742361, "end": 21746852, "is_primary": false, "build": "37", "length": 4491}, "ENST00000475251": {"_id": {"$oid": "5b927e28290c540e95101c40"}, "ensembl_transcript_id": "ENST00000475251", "hgnc_id": 20262, "chrom": "13", "start": 21746408, "end": 21750612, "is_primary": false, "build": "37", "length": 4204}}}, "disease_terms": []}], "hgnc_ids": [20262], "hgnc_symbols": ["SKA3"], "thousand_genomes_frequency_left": 0.00019967999833170325, "thousand_genomes_frequency_right": 0.00019967999833170325, "rank_score_results": [{"category": "variant_length", "score": 8}, {"category": "Variant_call_quality_filter", "score": 0}, {"category": "Inheritance_Models", "score": -12}, {"category": "Consequence", "score": 8}, {"category": "variant_type", "score": 3}, {"category": "Gene_intolerance_prediction", "score": 0}, {"category": "allele_frequency", "score": 4}], "variant_rank": 3}, {"_id": "19141dfc59beacd89aab2deaf8293f8c", "document_id": "19141dfc59beacd89aab2deaf8293f8c", "variant_id": "ccd977a55f53f4a12066f6f0d9ef5260", "display_name": "20_54963149_C__clinical", "variant_type": "clinical", "case_id": "643594", "chromosome": "20", "reference": "C", "alternative": "", "institute": "cust000", "missing_data": false, "position": 54963149, "rank_score": 4.0, "end": 54963210, "length": 61, "simple_id": "20_54963149_C_", "quality": 340.0, "filters": ["PASS"], "dbsnp_id": "MantaDUP:TANDEM:59:0:1:0:0:0", "cosmic_ids": null, "category": "sv", "sub_category": "dup", "mate_id": null, "cytoband_start": "q13.2", "cytoband_end": "q13.2", "end_chrom": "20", "samples": [{"sample_id": "ADM1059A2", "display_name": "NA12882", "genotype_call": "0/1", "allele_depths": [37, 3], "read_depth": 6, "genotype_quality": 15}, {"sample_id": "ADM1059A1", "display_name": "NA12877", "genotype_call": "0/1", "allele_depths": [31, 7], "read_depth": 14, "genotype_quality": 104}, {"sample_id": "ADM1059A3", "display_name": "NA12878", "genotype_call": "0/1", "allele_depths": [34, 11], "read_depth": 22, "genotype_quality": 221}], "genes": [{"hgnc_id": 11393, "hgnc_symbol": "AURKA", "ensembl_id": "ENSG00000087586", "description": "aurora kinase A", "inheritance": [], "transcripts": [{"transcript_id": "ENST00000312783", "hgnc_id": 11393, "protein_id": "ENSP00000321591", "sift_prediction": "unknown", "polyphen_prediction": "unknown", "swiss_prot": "O14965", "biotype": "protein_coding", "functional_annotations": ["intron_variant", "feature_elongation"], "region_annotations": ["intronic", "genomic_feature"], "intron": "3/9", "strand": "-", "is_canonical": false, "refseq_id": "NM_198436", "refseq_identifiers": []}, {"transcript_id": "ENST00000347343", "hgnc_id": 11393, "protein_id": "ENSP00000216911", "sift_prediction": "unknown", "polyphen_prediction": "unknown", "swiss_prot": "O14965", "biotype": "protein_coding", "functional_annotations": ["intron_variant", "feature_elongation"], "region_annotations": ["intronic", "genomic_feature"], "intron": "2/8", "strand": "-", "is_canonical": false, "refseq_id": "NM_003600", "refseq_identifiers": [], "is_primary": true}, {"transcript_id": "ENST00000371356", "hgnc_id": 11393, "protein_id": "ENSP00000360407", "sift_prediction": "unknown", "polyphen_prediction": "unknown", "swiss_prot": "O14965", "biotype": "protein_coding", "functional_annotations": ["intron_variant", "feature_elongation"], "region_annotations": ["intronic", "genomic_feature"], "intron": "3/9", "strand": "-", "is_canonical": false}, {"transcript_id": "ENST00000395907", "hgnc_id": 11393, "protein_id": "ENSP00000379243", "sift_prediction": "unknown", "polyphen_prediction": "unknown", "swiss_prot": "unknown", "biotype": "protein_coding", "functional_annotations": ["intron_variant", "feature_elongation"], "region_annotations": ["intronic", "genomic_feature"], "intron": "2/7", "strand": "-", "is_canonical": false}, {"transcript_id": "ENST00000395909", "hgnc_id": 11393, "protein_id": "ENSP00000379245", "sift_prediction": "unknown", "polyphen_prediction": "unknown", "swiss_prot": "O14965", "biotype": "protein_coding", "functional_annotations": ["intron_variant", "feature_elongation"], "region_annotations": ["intronic", "genomic_feature"], "intron": "4/10", "strand": "-", "is_canonical": true, "refseq_id": "NM_198433", "refseq_identifiers": []}, {"transcript_id": "ENST00000395911", "hgnc_id": 11393, "protein_id": "ENSP00000379247", "sift_prediction": "unknown", "polyphen_prediction": "unknown", "swiss_prot": "O14965", "biotype": "protein_coding", "functional_annotations": ["intron_variant", "feature_elongation"], "region_annotations": ["intronic", "genomic_feature"], "intron": "3/9", "strand": "-", "is_canonical": false, "refseq_id": "XM_005260535", "refseq_identifiers": []}, {"transcript_id": "ENST00000395913", "hgnc_id": 11393, "protein_id": "ENSP00000379249", "sift_prediction": "unknown", "polyphen_prediction": "unknown", "swiss_prot": "O14965", "biotype": "protein_coding", "functional_annotations": ["intron_variant", "feature_elongation"], "region_annotations": ["intronic", "genomic_feature"], "intron": "2/8", "strand": "-", "is_canonical": false}, {"transcript_id": "ENST00000395914", "hgnc_id": 11393, "protein_id": "ENSP00000379250", "sift_prediction": "unknown", "polyphen_prediction": "unknown", "swiss_prot": "O14965", "biotype": "protein_coding", "functional_annotations": ["intron_variant", "feature_elongation"], "region_annotations": ["intronic", "genomic_feature"], "intron": "3/9", "strand": "-", "is_canonical": false, "refseq_id": "NM_198434", "refseq_identifiers": []}, {"transcript_id": "ENST00000395915", "hgnc_id": 11393, "protein_id": "ENSP00000379251", "sift_prediction": "unknown", "polyphen_prediction": "unknown", "swiss_prot": "O14965", "biotype": "protein_coding", "functional_annotations": ["intron_variant", "feature_elongation"], "region_annotations": ["intronic", "genomic_feature"], "intron": "2/8", "strand": "-", "is_canonical": false, "refseq_id": "NM_198437", "refseq_identifiers": []}, {"transcript_id": "ENST00000420474", "hgnc_id": 11393, "protein_id": "ENSP00000388073", "sift_prediction": "unknown", "polyphen_prediction": "unknown", "swiss_prot": "unknown", "biotype": "protein_coding", "functional_annotations": ["intron_variant", "feature_elongation"], "region_annotations": ["intronic", "genomic_feature"], "intron": "4/6", "strand": "-", "is_canonical": false}, {"transcript_id": "ENST00000422322", "hgnc_id": 11393, "protein_id": "ENSP00000405042", "sift_prediction": "unknown", "polyphen_prediction": "unknown", "swiss_prot": "unknown", "biotype": "protein_coding", "functional_annotations": ["intron_variant", "feature_elongation"], "region_annotations": ["intronic", "genomic_feature"], "intron": "3/5", "strand": "-", "is_canonical": false}, {"transcript_id": "ENST00000441357", "hgnc_id": 11393, "protein_id": "ENSP00000393452", "sift_prediction": "unknown", "polyphen_prediction": "unknown", "swiss_prot": "unknown", "biotype": "protein_coding", "functional_annotations": ["intron_variant", "feature_elongation"], "region_annotations": ["intronic", "genomic_feature"], "intron": "3/7", "strand": "-", "is_canonical": false}, {"transcript_id": "ENST00000451915", "hgnc_id": 11393, "protein_id": "ENSP00000401358", "sift_prediction": "unknown", "polyphen_prediction": "unknown", "swiss_prot": "unknown", "biotype": "protein_coding", "functional_annotations": ["intron_variant", "feature_elongation"], "region_annotations": ["intronic", "genomic_feature"], "intron": "4/4", "strand": "-", "is_canonical": false}, {"transcript_id": "ENST00000456249", "hgnc_id": 11393, "protein_id": "ENSP00000405170", "sift_prediction": "unknown", "polyphen_prediction": "unknown", "swiss_prot": "unknown", "biotype": "protein_coding", "functional_annotations": ["intron_variant", "feature_elongation"], "region_annotations": ["intronic", "genomic_feature"], "intron": "4/4", "strand": "-", "is_canonical": false}], "functional_annotation": "intron_variant", "region_annotation": "intronic", "sift_prediction": "unknown", "polyphen_prediction": "unknown", "disease_associated_transcripts": [], "manual_penetrance": false, "mosaicism": false, "manual_inheritance": [], "common": {"_id": {"$oid": "5b927db8290c540e950f008f"}, "hgnc_id": 11393, "hgnc_symbol": "AURKA", "ensembl_id": "ENSG00000087586", "chromosome": "20", "start": 54944445, "end": 54967393, "length": 22948, "description": "aurora kinase A", "aliases": ["AURKA", "AurA", "BTAK", "STK15", "AIK", "STK6", "STK7", "PPP1R47", "ARK1"], "primary_transcripts": ["NM_003600"], "inheritance_models": ["AD"], "phenotypes": [{"mim_number": 114500, "description": "Colon cancer susceptibility to", "inheritance_models": ["AD"], "status": "susceptibility"}], "entrez_id": 6790, "omim_id": 603072, "ucsc_id": "uc002xxg.2", "uniprot_ids": ["O14965"], "vega_id": "OTTHUMG00000032796", "pli_score": 0.435603931095833, "incomplete_penetrance": false, "build": "37", "transcripts": [{"_id": {"$oid": "5b927e29290c540e95110893"}, "ensembl_transcript_id": "ENST00000395913", "hgnc_id": 11393, "chrom": "20", "start": 54944446, "end": 54967393, "is_primary": false, "build": "37", "length": 22947}, {"_id": {"$oid": "5b927e29290c540e95110892"}, "ensembl_transcript_id": "ENST00000395915", "hgnc_id": 11393, "chrom": "20", "start": 54944446, "end": 54967352, "is_primary": false, "refseq_id": "NM_198437", "build": "37", "length": 22906}, {"_id": {"$oid": "5b927e29290c540e9511089a"}, "ensembl_transcript_id": "ENST00000395909", "hgnc_id": 11393, "chrom": "20", "start": 54944445, "end": 54967351, "is_primary": false, "refseq_id": "NM_198433", "build": "37", "length": 22906}, {"_id": {"$oid": "5b927e29290c540e9511089b"}, "ensembl_transcript_id": "ENST00000395914", "hgnc_id": 11393, "chrom": "20", "start": 54944445, "end": 54967351, "is_primary": false, "refseq_id": "NM_198434", "build": "37", "length": 22906}, {"_id": {"$oid": "5b927e29290c540e95110891"}, "ensembl_transcript_id": "ENST00000312783", "hgnc_id": 11393, "chrom": "20", "start": 54944446, "end": 54967351, "is_primary": false, "refseq_id": "NM_198436", "build": "37", "length": 22905}, {"_id": {"$oid": "5b927e29290c540e95110890"}, "ensembl_transcript_id": "ENST00000395911", "hgnc_id": 11393, "chrom": "20", "start": 54944446, "end": 54967271, "is_primary": false, "refseq_id": "XM_005260535", "build": "37", "length": 22825}, {"_id": {"$oid": "5b927e29290c540e9511088f"}, "ensembl_transcript_id": "ENST00000347343", "hgnc_id": 11393, "chrom": "20", "start": 54944446, "end": 54967261, "is_primary": true, "refseq_id": "NM_003600", "build": "37", "length": 22815}, {"_id": {"$oid": "5b927e29290c540e9511088e"}, "ensembl_transcript_id": "ENST00000371356", "hgnc_id": 11393, "chrom": "20", "start": 54944446, "end": 54967252, "is_primary": false, "build": "37", "length": 22806}, {"_id": {"$oid": "5b927e29290c540e95110894"}, "ensembl_transcript_id": "ENST00000395907", "hgnc_id": 11393, "chrom": "20", "start": 54944721, "end": 54967271, "is_primary": false, "build": "37", "length": 22550}, {"_id": {"$oid": "5b927e29290c540e95110895"}, "ensembl_transcript_id": "ENST00000441357", "hgnc_id": 11393, "chrom": "20", "start": 54948504, "end": 54967271, "is_primary": false, "build": "37", "length": 18767}, {"_id": {"$oid": "5b927e29290c540e95110896"}, "ensembl_transcript_id": "ENST00000420474", "hgnc_id": 11393, "chrom": "20", "start": 54958041, "end": 54967323, "is_primary": false, "build": "37", "length": 9282}, {"_id": {"$oid": "5b927e29290c540e95110897"}, "ensembl_transcript_id": "ENST00000422322", "hgnc_id": 11393, "chrom": "20", "start": 54958167, "end": 54967187, "is_primary": false, "build": "37", "length": 9020}, {"_id": {"$oid": "5b927e29290c540e95110898"}, "ensembl_transcript_id": "ENST00000456249", "hgnc_id": 11393, "chrom": "20", "start": 54961313, "end": 54967276, "is_primary": false, "build": "37", "length": 5963}, {"_id": {"$oid": "5b927e29290c540e95110899"}, "ensembl_transcript_id": "ENST00000451915", "hgnc_id": 11393, "chrom": "20", "start": 54961393, "end": 54967258, "is_primary": false, "build": "37", "length": 5865}], "transcripts_dict": {"ENST00000395913": {"_id": {"$oid": "5b927e29290c540e95110893"}, "ensembl_transcript_id": "ENST00000395913", "hgnc_id": 11393, "chrom": "20", "start": 54944446, "end": 54967393, "is_primary": false, "build": "37", "length": 22947}, "ENST00000395915": {"_id": {"$oid": "5b927e29290c540e95110892"}, "ensembl_transcript_id": "ENST00000395915", "hgnc_id": 11393, "chrom": "20", "start": 54944446, "end": 54967352, "is_primary": false, "refseq_id": "NM_198437", "build": "37", "length": 22906}, "ENST00000395909": {"_id": {"$oid": "5b927e29290c540e9511089a"}, "ensembl_transcript_id": "ENST00000395909", "hgnc_id": 11393, "chrom": "20", "start": 54944445, "end": 54967351, "is_primary": false, "refseq_id": "NM_198433", "build": "37", "length": 22906}, "ENST00000395914": {"_id": {"$oid": "5b927e29290c540e9511089b"}, "ensembl_transcript_id": "ENST00000395914", "hgnc_id": 11393, "chrom": "20", "start": 54944445, "end": 54967351, "is_primary": false, "refseq_id": "NM_198434", "build": "37", "length": 22906}, "ENST00000312783": {"_id": {"$oid": "5b927e29290c540e95110891"}, "ensembl_transcript_id": "ENST00000312783", "hgnc_id": 11393, "chrom": "20", "start": 54944446, "end": 54967351, "is_primary": false, "refseq_id": "NM_198436", "build": "37", "length": 22905}, "ENST00000395911": {"_id": {"$oid": "5b927e29290c540e95110890"}, "ensembl_transcript_id": "ENST00000395911", "hgnc_id": 11393, "chrom": "20", "start": 54944446, "end": 54967271, "is_primary": false, "refseq_id": "XM_005260535", "build": "37", "length": 22825}, "ENST00000347343": {"_id": {"$oid": "5b927e29290c540e9511088f"}, "ensembl_transcript_id": "ENST00000347343", "hgnc_id": 11393, "chrom": "20", "start": 54944446, "end": 54967261, "is_primary": true, "refseq_id": "NM_003600", "build": "37", "length": 22815}, "ENST00000371356": {"_id": {"$oid": "5b927e29290c540e9511088e"}, "ensembl_transcript_id": "ENST00000371356", "hgnc_id": 11393, "chrom": "20", "start": 54944446, "end": 54967252, "is_primary": false, "build": "37", "length": 22806}, "ENST00000395907": {"_id": {"$oid": "5b927e29290c540e95110894"}, "ensembl_transcript_id": "ENST00000395907", "hgnc_id": 11393, "chrom": "20", "start": 54944721, "end": 54967271, "is_primary": false, "build": "37", "length": 22550}, "ENST00000441357": {"_id": {"$oid": "5b927e29290c540e95110895"}, "ensembl_transcript_id": "ENST00000441357", "hgnc_id": 11393, "chrom": "20", "start": 54948504, "end": 54967271, "is_primary": false, "build": "37", "length": 18767}, "ENST00000420474": {"_id": {"$oid": "5b927e29290c540e95110896"}, "ensembl_transcript_id": "ENST00000420474", "hgnc_id": 11393, "chrom": "20", "start": 54958041, "end": 54967323, "is_primary": false, "build": "37", "length": 9282}, "ENST00000422322": {"_id": {"$oid": "5b927e29290c540e95110897"}, "ensembl_transcript_id": "ENST00000422322", "hgnc_id": 11393, "chrom": "20", "start": 54958167, "end": 54967187, "is_primary": false, "build": "37", "length": 9020}, "ENST00000456249": {"_id": {"$oid": "5b927e29290c540e95110898"}, "ensembl_transcript_id": "ENST00000456249", "hgnc_id": 11393, "chrom": "20", "start": 54961313, "end": 54967276, "is_primary": false, "build": "37", "length": 5963}, "ENST00000451915": {"_id": {"$oid": "5b927e29290c540e95110899"}, "ensembl_transcript_id": "ENST00000451915", "hgnc_id": 11393, "chrom": "20", "start": 54961393, "end": 54967258, "is_primary": false, "build": "37", "length": 5865}}}, "disease_terms": [{"_id": "OMIM:114500", "disease_id": "OMIM:114500", "disease_nr": 114500, "description": "Colorectal cancer", "source": "OMIM", "genes": [11393, 391, 904, 2701, 8975, 11283, 6935, 3373, 8109, 1582, 9645, 27310, 7989, 959, 583, 11848, 9031, 9673, 17228, 2897, 2514, 7128, 11998, 8805, 3690, 1149], "inheritance": ["AD"], "hpo_terms": ["HP:0005584", "HP:0006740", "HP:0006753", "HP:0006716", "HP:0002891"]}]}], "hgnc_ids": [11393], "hgnc_symbols": ["AURKA"], "rank_score_results": [{"category": "variant_length", "score": 8}, {"category": "Variant_call_quality_filter", "score": 0}, {"category": "Inheritance_Models", "score": -12}, {"category": "Consequence", "score": 1}, {"category": "variant_type", "score": 3}, {"category": "Gene_intolerance_prediction", "score": 0}, {"category": "allele_frequency", "score": 4}], "variant_rank": 13}] diff --git a/tests/recipes/test_input_recipe.py b/tests/recipes/test_input_recipe.py index a750833..5ae1a5d 100644 --- a/tests/recipes/test_input_recipe.py +++ b/tests/recipes/test_input_recipe.py @@ -11,6 +11,7 @@ HK_OUT_FILE = "tests/fixtures/HK_output_test.txt" SCOUT_OUT_FILE = "tests/fixtures/scout_output.json" TEST_VCF = "tests/fixtures/test_vcf.vcf" +TEST_SCOUT_VARIANT = "tests/fixtures/scout_variant_output.json" def mock_hk_output(case_id): @@ -28,9 +29,9 @@ def mock_scout_output(case_id): return scout_out -def mock_vcf(case_id): +def mock_scout_variant(case_id): - with open(TEST_VCF) as vcf_handle: + with open(TEST_SCOUT_VARIANT) as vcf_handle: vcf_out = vcf_handle.read() @@ -49,7 +50,7 @@ def test_get_bams(): assert len(bams) == 3 -@patch.object(Command, 'check_output', mock_vcf) +@patch.object(Command, 'check_output', mock_scout_variant) def test_write_vcf(tmpdir): tmp_dir = Path(tmpdir.mkdir('test_write_vcf')) vcf_path = write_vcf('case_id', tmp_dir)