From fb7a50ad1accbfba9fd1d40dce526fc8d783033e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Wed, 17 Apr 2019 13:31:00 +0200 Subject: [PATCH] fix docstrings as required by pydocstyle --- backend/modules/browser/browser_handlers.py | 97 +++++++++++++-------- backend/modules/browser/lookups.py | 83 ++++++++++++------ backend/modules/browser/utils.py | 69 ++++++++++----- 3 files changed, 158 insertions(+), 91 deletions(-) diff --git a/backend/modules/browser/browser_handlers.py b/backend/modules/browser/browser_handlers.py index 7ed103532..bcb867f7f 100644 --- a/backend/modules/browser/browser_handlers.py +++ b/backend/modules/browser/browser_handlers.py @@ -1,6 +1,4 @@ -""" -Request handlers for the browser -""" +"""Request handlers for the variant browser.""" import logging @@ -12,7 +10,17 @@ class Autocomplete(handlers.UnsafeHandler): + """Provide autocompletion for protein names based on current query.""" + def get(self, dataset:str, query:str, ds_version:str=None): + """ + Provide autocompletion for protein names based on current query. + + Args: + dataset (str): dataset short name + query (str): query + ds_version (str): dataset version + """ dataset, ds_version = utils.parse_dataset(dataset, ds_version) ret = {} @@ -23,9 +31,13 @@ def get(self, dataset:str, query:str, ds_version:str=None): class Download(handlers.UnsafeHandler): + """Download variants in CSV format.""" + def get(self, dataset:str, datatype:str, item:str, ds_version:str=None, filter_type:str=None): """ - Download variants as csv + Download variants in CSV format. + + Will filter the variants if filter_type is provided. Args: dataset (str): dataset short name @@ -62,10 +74,18 @@ def get(self, dataset:str, datatype:str, item:str, ds_version:str=None, filter_t class GetCoverage(handlers.UnsafeHandler): - """ - Retrieve coverage - """ + """Retrieve coverage.""" + def get(self, dataset:str, datatype:str, item:str, ds_version:str=None): + """ + Retrieve coverage. + + Args: + dataset (str): dataset short name + datatype (str): type of data + item (str): query item + ds_version (str): dataset version + """ dataset, ds_version = utils.parse_dataset(dataset, ds_version) ret = utils.get_coverage(dataset, datatype, item, ds_version) if 'region_too_large' in ret: @@ -78,26 +98,34 @@ def get(self, dataset:str, datatype:str, item:str, ds_version:str=None): class GetCoveragePos(handlers.UnsafeHandler): - """ - Retrieve coverage range - """ + """Retrieve coverage range.""" + def get(self, dataset:str, datatype:str, item:str, ds_version:str=None): + """ + Retrieve coverage range. + + Args: + dataset (str): dataset short name + datatype (str): type of data + item (str): query item + ds_version (str): dataset version + """ dataset, ds_version = utils.parse_dataset(dataset, ds_version) ret = utils.get_coverage_pos(dataset, datatype, item, ds_version) self.finish(ret) class GetGene(handlers.UnsafeHandler): - """ - Request information about a gene - """ + """Request information about a gene.""" + def get(self, dataset:str, gene:str, ds_version:str=None): """ - Request information about a gene + Request information about a gene. Args: dataset (str): short name of the dataset gene (str): the gene id + ds_version (str): dataset version """ dataset, ds_version = utils.parse_dataset(dataset, ds_version) gene_id = gene @@ -123,7 +151,6 @@ def get(self, dataset:str, gene:str, ds_version:str=None): for transcript in transcripts_in_gene: ret['transcripts'] += [{'transcript_id':transcript['transcript_id']}] - # temporary fix for names gene['gene_name'] = gene['name'] gene['full_gene_name'] = gene['full_name'] @@ -132,19 +159,16 @@ def get(self, dataset:str, gene:str, ds_version:str=None): class GetRegion(handlers.UnsafeHandler): - """ - Request information about genes in a region - """ + """Request information about genes in a region.""" + def get(self, dataset:str, region:str, ds_version:str=None): """ - Request information about genes in a region + Request information about genes in a region. Args: dataset (str): short name of the dataset region (str): the region in the format chr-startpos-endpos - - Returns: - dict: information about the region and the genes found there + ds_version (str): dataset version """ dataset, ds_version = utils.parse_dataset(dataset, ds_version) try: @@ -178,12 +202,11 @@ def get(self, dataset:str, region:str, ds_version:str=None): class GetTranscript(handlers.UnsafeHandler): - """ - Request information about a transcript - """ + """Request information about a transcript.""" + def get(self, dataset:str, transcript:str, ds_version:str=None): """ - Request information about a transcript + Request information about a transcript. Args: dataset (str): short name of the dataset @@ -191,6 +214,7 @@ def get(self, dataset:str, transcript:str, ds_version:str=None): Returns: dict: transcript (transcript and exons), gene (gene information) + """ dataset, ds_version = utils.parse_dataset(dataset, ds_version) transcript_id = transcript @@ -225,12 +249,11 @@ def get(self, dataset:str, transcript:str, ds_version:str=None): class GetVariant(handlers.UnsafeHandler): - """ - Request information about a gene - """ + """Request information about a gene.""" + def get(self, dataset:str, variant:str, ds_version:str=None): """ - Request information about a gene + Request information about a gene. Args: dataset (str): short name of the dataset @@ -338,12 +361,11 @@ def get(self, dataset:str, variant:str, ds_version:str=None): class GetVariants(handlers.UnsafeHandler): - """ - Retrieve variants - """ + """Retrieve variants.""" + def get(self, dataset:str, datatype:str, item:str, ds_version:str=None): """ - Retrieve variants + Retrieve variants. Args: dataset (str): short name of the dataset @@ -369,12 +391,11 @@ def get(self, dataset:str, datatype:str, item:str, ds_version:str=None): class Search(handlers.UnsafeHandler): - """ - Perform a search for the wanted object - """ + """Perform a search for the wanted object.""" + def get(self, dataset:str, query:str, ds_version:str=None): """ - Perform a search for the wanted object + Perform a search for the wanted object. Args: dataset (str): short name of the dataset diff --git a/backend/modules/browser/lookups.py b/backend/modules/browser/lookups.py index 2539c824f..23cbaec19 100644 --- a/backend/modules/browser/lookups.py +++ b/backend/modules/browser/lookups.py @@ -1,3 +1,6 @@ +"""Lookup functions for the variant browser.""" + + import logging import re @@ -10,7 +13,7 @@ def get_autocomplete(dataset:str, query:str, ds_version:str=None): """ - Provide autocomplete suggestions based on the query + Provide autocomplete suggestions based on the query. Args: dataset (str): short name of dataset @@ -19,6 +22,7 @@ def get_autocomplete(dataset:str, query:str, ds_version:str=None): Returns: list: A list of genes names whose beginning matches the query + """ try: ref_set = db.get_dataset_version(dataset, ds_version).reference_set @@ -33,24 +37,24 @@ def get_autocomplete(dataset:str, query:str, ds_version:str=None): def get_awesomebar_result(dataset:str, query:str, ds_version:str=None): """ - Parse the search input + Parse the search input. Datatype is one of: - - 'gene' - - 'transcript' - - 'variant' - - 'dbsnp_variant_set' - - 'region' + * `gene` + * `transcript` + * `variant` + * `dbsnp_variant_set` + * `region` Identifier is one of: - - ensembl ID for gene - - variant ID string for variant (eg. 1-1000-A-T) - - region ID string for region (eg. 1-1000-2000) + * ensembl ID for gene + * variant ID string for variant (eg. 1-1000-A-T) + * region ID string for region (eg. 1-1000-2000) Follow these steps: - - if query is an ensembl ID, return it - - if a gene symbol, return that gene's ensembl ID - - if an RSID, return that variant's string + * if query is an ensembl ID, return it + * if a gene symbol, return that gene's ensembl ID + * if an RSID, return that variant's string Args: dataset (str): short name of dataset @@ -59,6 +63,7 @@ def get_awesomebar_result(dataset:str, query:str, ds_version:str=None): Returns: tuple: (datatype, identifier) + """ query = query.strip() @@ -110,7 +115,7 @@ def get_awesomebar_result(dataset:str, query:str, ds_version:str=None): def get_coverage_for_bases(dataset:str, chrom:str, start_pos:int, end_pos:int=None, ds_version:str=None): """ - Get the coverage for the list of bases given by start_pos->end_pos, inclusive + Get the coverage for the list of bases given by start_pos->end_pos, inclusive. Args: dataset (str): short name for the dataset @@ -121,6 +126,7 @@ def get_coverage_for_bases(dataset:str, chrom:str, start_pos:int, end_pos:int=No Returns: list: coverage dicts for the region of interest. None if failed + """ dataset_version = db.get_dataset_version(dataset, ds_version) if not dataset_version: @@ -139,7 +145,7 @@ def get_coverage_for_bases(dataset:str, chrom:str, start_pos:int, end_pos:int=No def get_coverage_for_transcript(dataset:str, chrom:str, start_pos:int, end_pos:int=None, ds_version:str=None): """ - Get the coverage for the list of bases given by start_pos->end_pos, inclusive + Get the coverage for the list of bases given by start_pos->end_pos, inclusive. Args: dataset (str): short name for the dataset @@ -150,6 +156,7 @@ def get_coverage_for_transcript(dataset:str, chrom:str, start_pos:int, end_pos:i Returns: list: coverage dicts for the region of interest + """ # Is this function still relevant with postgres? # Only entries with reported cov are in database @@ -164,7 +171,7 @@ def get_coverage_for_transcript(dataset:str, chrom:str, start_pos:int, end_pos:i def get_exons_in_transcript(dataset:str, transcript_id:str, ds_version=None): """ - Retrieve exons associated with the given transcript id + Retrieve exons associated with the given transcript id. Args: dataset (str): short name of the dataset @@ -173,6 +180,7 @@ def get_exons_in_transcript(dataset:str, transcript_id:str, ds_version=None): Returns: list: dicts with values for each exon sorted by start position + """ try: ref_set = db.get_dataset_version(dataset, ds_version).reference_set @@ -197,7 +205,7 @@ def get_exons_in_transcript(dataset:str, transcript_id:str, ds_version=None): def get_gene(dataset:str, gene_id:str, ds_version:str=None): """ - Retrieve gene by gene id + Retrieve gene by gene id. Args: dataset (str): short name of the dataset @@ -206,6 +214,7 @@ def get_gene(dataset:str, gene_id:str, ds_version:str=None): Returns: dict: values for the gene; None if not found + """ try: ref_set = db.get_dataset_version(dataset, ds_version).reference_set @@ -220,13 +229,14 @@ def get_gene(dataset:str, gene_id:str, ds_version:str=None): def get_gene_by_dbid(gene_dbid:str): """ - Retrieve gene by gene database id + Retrieve gene by gene database id. Args: gene_dbid (str): the database id of the gene Returns: dict: values for the gene; empty if not found + """ try: return db.Gene.select().where(db.Gene.id == gene_dbid).dicts().get() @@ -239,7 +249,6 @@ def get_gene_by_dbid(gene_dbid:str): def get_gene_by_name(dataset:str, gene_name:str, ds_version=None): """ Retrieve gene by gene_name. - First checks gene_name, then other_names. Args: dataset (str): short name of the dataset @@ -248,6 +257,7 @@ def get_gene_by_name(dataset:str, gene_name:str, ds_version=None): Returns: dict: values for the gene; empty if not found + """ try: ref_set = db.get_dataset_version(dataset, ds_version).reference_set @@ -274,7 +284,7 @@ def get_gene_by_name(dataset:str, gene_name:str, ds_version=None): def get_genes_in_region(dataset:str, chrom:str, start_pos:int, stop_pos:int, ds_version:str=None): """ - Retrieve genes located within a region + Retrieve genes located within a region. Args: dataset (str): short name of the dataset @@ -285,6 +295,7 @@ def get_genes_in_region(dataset:str, chrom:str, start_pos:int, stop_pos:int, ds_ Returns: dict: values for the gene; empty if not found + """ try: ref_set = db.get_dataset_version(dataset, ds_version).reference_set @@ -300,7 +311,7 @@ def get_genes_in_region(dataset:str, chrom:str, start_pos:int, stop_pos:int, ds_ def get_raw_variant(dataset:str, pos:int, chrom:str, ref:str, alt:str, ds_version:str=None): """ - Retrieve variant by position and change + Retrieve variant by position and change. Args: dataset (str): short name of the reference set @@ -312,6 +323,7 @@ def get_raw_variant(dataset:str, pos:int, chrom:str, ref:str, alt:str, ds_versio Returns: dict: values for the variant; None if not found + """ dataset_version = db.get_dataset_version(dataset, ds_version) if not dataset_version: @@ -346,7 +358,8 @@ def get_raw_variant(dataset:str, pos:int, chrom:str, ref:str, alt:str, ds_versio def get_transcript(dataset:str, transcript_id:str, ds_version:str=None): """ - Retrieve transcript by transcript id + Retrieve transcript by transcript id. + Also includes exons as ['exons'] Args: @@ -356,6 +369,7 @@ def get_transcript(dataset:str, transcript_id:str, ds_version:str=None): Returns: dict: values for the transcript, including exons; None if not found + """ try: ref_set = db.get_dataset_version(dataset, ds_version).reference_set @@ -377,13 +391,16 @@ def get_transcript(dataset:str, transcript_id:str, ds_version:str=None): def get_transcripts_in_gene(dataset:str, gene_id:str, ds_version:str=None): """ - Get the transcripts associated with a gene + Get the transcripts associated with a gene. + Args: dataset (str): short name of the reference set gene_id (str): id of the gene ds_version (str): dataset version + Returns: list: transcripts (dict) associated with the gene; empty if no hits + """ try: ref_set = db.get_dataset_version(dataset, ds_version).reference_set @@ -402,18 +419,21 @@ def get_transcripts_in_gene(dataset:str, gene_id:str, ds_version:str=None): def get_transcripts_in_gene_by_dbid(gene_dbid:int): """ - Get the transcripts associated with a gene + Get the transcripts associated with a gene. + Args: gene_dbid (int): database id of the gene + Returns: list: transcripts (dict) associated with the gene; empty if no hits + """ return [transcript for transcript in db.Transcript.select().where(db.Transcript.gene == gene_dbid).dicts()] def get_variant(dataset:str, pos:int, chrom:str, ref:str, alt:str, ds_version:str=None): """ - Retrieve variant by position and change + Retrieve variant by position and change. Args: dataset (str): short name of the dataset @@ -425,6 +445,7 @@ def get_variant(dataset:str, pos:int, chrom:str, ref:str, alt:str, ds_version:st Returns: dict: values for the variant; None if not found + """ variant = get_raw_variant(dataset, pos, chrom, ref, alt, ds_version) variant = get_raw_variant(dataset, pos, chrom, ref, alt, ds_version) @@ -435,7 +456,7 @@ def get_variant(dataset:str, pos:int, chrom:str, ref:str, alt:str, ds_version:st def get_variants_by_rsid(dataset:str, rsid:str, ds_version:str=None): """ - Retrieve variants by their associated rsid + Retrieve variants by their associated rsid. Args: dataset (str): short name of dataset @@ -444,6 +465,7 @@ def get_variants_by_rsid(dataset:str, rsid:str, ds_version:str=None): Returns: list: variants as dict; no hits returns None + """ dataset_version = db.get_dataset_version(dataset, ds_version) if not dataset_version: @@ -470,7 +492,7 @@ def get_variants_by_rsid(dataset:str, rsid:str, ds_version:str=None): def get_variants_in_gene(dataset:str, gene_id:str, ds_version:str=None): """ - Retrieve variants present inside a gene + Retrieve variants present inside a gene. Args: dataset (str): short name of the dataset @@ -479,6 +501,7 @@ def get_variants_in_gene(dataset:str, gene_id:str, ds_version:str=None): Returns: list: values for the variants + """ dataset_version = db.get_dataset_version(dataset, ds_version) if not dataset_version: @@ -504,7 +527,7 @@ def get_variants_in_gene(dataset:str, gene_id:str, ds_version:str=None): def get_variants_in_region(dataset:str, chrom:str, start_pos:int, end_pos:int, ds_version:str=None): """ - Variants that overlap a region + Variants that overlap a region. Args: dataset (str): short name of the dataset @@ -515,6 +538,7 @@ def get_variants_in_region(dataset:str, chrom:str, start_pos:int, end_pos:int, d Returns: list: variant dicts, None if no hits + """ dataset_version = db.get_dataset_version(dataset, ds_version) if not dataset_version: @@ -544,7 +568,7 @@ def get_variants_in_region(dataset:str, chrom:str, start_pos:int, end_pos:int, d def get_variants_in_transcript(dataset:str, transcript_id:str, ds_version:str=None): """ - Retrieve variants inside a transcript + Retrieve variants inside a transcript. Args: dataset (str): short name of the dataset @@ -553,6 +577,7 @@ def get_variants_in_transcript(dataset:str, transcript_id:str, ds_version:str=No Returns: dict: values for the variant; None if not found + """ dataset_version = db.get_dataset_version(dataset, ds_version) diff --git a/backend/modules/browser/utils.py b/backend/modules/browser/utils.py index c0104e8c2..072637f87 100644 --- a/backend/modules/browser/utils.py +++ b/backend/modules/browser/utils.py @@ -1,3 +1,5 @@ +"""Utility functions for lookups and browser_handlers.""" + import logging from . import lookups @@ -80,7 +82,7 @@ def add_consequence_to_variants(variant_list:list): """ - Add information about variant consequence to multiple variants + Add information about variant consequence to multiple variants. Args: variant_list (list): list of variants @@ -93,7 +95,7 @@ def add_consequence_to_variants(variant_list:list): def add_consequence_to_variant(variant:dict): """ - Add information about variant consequence to a variant + Add information about variant consequence to a variant. Args: variant (dict): variant information @@ -137,6 +139,7 @@ def annotation_severity(annotation:dict): Returns: float: severity score + """ rv = -CSQ_ORDER_DICT[worst_csq_from_csq(annotation['Consequence'])] if annotation['CANONICAL'] == 'YES': @@ -146,7 +149,7 @@ def annotation_severity(annotation:dict): def get_coverage(dataset:str, datatype:str, item:str, ds_version:str=None): """ - Retrieve coverage for a gene/region/transcript + Retrieve coverage for a gene/region/transcript. Args: dataset (str): short name of the dataset @@ -156,6 +159,7 @@ def get_coverage(dataset:str, datatype:str, item:str, ds_version:str=None): Returns: dict: start, stop, coverage list + """ ret = {'coverage':[]} @@ -188,7 +192,7 @@ def get_coverage(dataset:str, datatype:str, item:str, ds_version:str=None): def get_coverage_pos(dataset:str, datatype:str, item:str, ds_version:str=None): """ - Retrieve coverage range + Retrieve coverage range. Args: dataset (str): short name of the dataset @@ -197,6 +201,7 @@ def get_coverage_pos(dataset:str, datatype:str, item:str, ds_version:str=None): Returns: dict: start, stop, chromosome + """ ret = {'start':None, 'stop':None, 'chrom':None} @@ -223,15 +228,17 @@ def get_coverage_pos(dataset:str, datatype:str, item:str, ds_version:str=None): def get_flags_from_variant(variant:dict): """ Get flags from variant. + Checks for: - - MNP (identical length of reference and variant) - - LoF (loss of function) + * MNP + * LoF (loss of function) Args: variant (dict): a variant Returns: list: flags for the variant + """ flags = [] if 'mnps' in variant: @@ -257,6 +264,7 @@ def get_proper_hgvs(annotation:dict): Returns: str: variant effect at aa level in HGVS format (p.), None if parsing fails + """ # Needs major_consequence try: @@ -278,6 +286,7 @@ def get_protein_hgvs(annotation): Returns: str: variant effect at aa level in HGVS format (p.), None if parsing fails + """ try: if '%3D' in annotation['HGVSp']: # "%3D" is "=" @@ -298,6 +307,7 @@ def get_transcript_hgvs(annotation:dict): Returns: str: variant effect at nucleotide level in HGVS format (c.), None if parsing fails + """ try: return annotation['HGVSc'].split(':')[-1] @@ -307,7 +317,7 @@ def get_transcript_hgvs(annotation:dict): def get_variant_list(dataset:str, datatype:str, item:str, ds_version:str=None): """ - Retrieve variants for a datatype + Retrieve variants for a datatype. Args: dataset (str): dataset short name @@ -317,6 +327,7 @@ def get_variant_list(dataset:str, datatype:str, item:str, ds_version:str=None): Returns: dict: {variants:list, headers:list} + """ headers = [['variant_id','Variant'], ['chrom','Chrom'], ['pos','Position'], ['HGVS','Consequence'], ['filter','Filter'], ['major_consequence','Annotation'], @@ -378,13 +389,14 @@ def format_variant(variant): def order_vep_by_csq(annotation_list:list): """ - Adds "major_consequence" to each annotation, orders by severity. + Will add "major_consequence" to each annotation and order by severity. Args: annotation_list (list): VEP annotations (as dict) Returns: list: annotations ordered by major consequence severity + """ for ann in annotation_list: try: @@ -395,23 +407,24 @@ def order_vep_by_csq(annotation_list:list): def is_region_too_large(start:int, stop:int): - ''' - Evaluates whether the size of a region is larger than maximum query + """ + Evaluate whether the size of a region is larger than maximum query. + Args: start (int): Start position of the region stop (int): End position of the region Returns: bool: True if too large - ''' + + """ region_limit = 100000 return int(stop)-int(start) > region_limit def parse_dataset(dataset, ds_version=None): """ - Check/parse if the dataset name is in the beacon form: - ``reference:dataset:version`` + Check/parse if the dataset name is in the beacon form (``reference:dataset:version``). Args: dataset (str): short name of the dataset @@ -419,21 +432,22 @@ def parse_dataset(dataset, ds_version=None): Returns: tuple: (dataset, version) + """ beacon_style = dataset.split(':') if len(beacon_style) == 3: return (beacon_style[1], beacon_style[2]) - else: - return (dataset, ds_version) + return (dataset, ds_version) def remove_extraneous_information(variant:dict): - ''' - Remove information that is not used in the frontend from a variant + """ + Remove information that is not used in the frontend from a variant. Args: variant (dict): variant data from database - ''' + + """ del variant['id'] del variant['dataset_version'] del variant['orig_alt_alleles'] @@ -443,13 +457,14 @@ def remove_extraneous_information(variant:dict): def remove_extraneous_vep_annotations(annotation_list:list): """ - Remove annotations with low-impact consequences (less than intron variant) + Remove annotations with low-impact consequences (less than intron variant). Args: annotation_list (list): VEP annotations (as dict) Returns: list: VEP annotations with higher impact + """ return [ann for ann in annotation_list if worst_csq_index(ann['Consequence'].split('&')) <= CSQ_ORDER_DICT['intron_variant']] @@ -457,26 +472,28 @@ def remove_extraneous_vep_annotations(annotation_list:list): def worst_csq_from_list(csq_list:list): """ - Choose the worst consequence + Choose the worst consequence. Args: csq_list (list): list of consequences Returns: str: the worst consequence + """ return REV_CSQ_ORDER_DICT[worst_csq_index(csq_list)] def worst_csq_from_csq(csq:str): """ - Find worst consequence in a possibly &-filled consequence string + Find worst consequence in a possibly &-filled consequence string. Args: csq (str): string of consequences, seperated with & (if multiple) Returns: str: the worst consequence + """ return REV_CSQ_ORDER_DICT[worst_csq_index(csq.split('&'))] @@ -484,27 +501,31 @@ def worst_csq_from_csq(csq:str): def worst_csq_index(csq_list:list): """ Find the index of the worst consequence. - Corresponds to the lowest value (index) from CSQ_ORDER_DICT + + Corresponds to the lowest value (index) from CSQ_ORDER_DICT. Args: csq_list (list): consequences Returns: int: index in CSQ_ODER_DICT of the worst consequence + """ return min([CSQ_ORDER_DICT[csq] for csq in csq_list]) def worst_csq_with_vep(annotation_list:list): """ - Choose the vep annotation with the most severe consequence - Adds a"major_consequence" field for that annotation + Choose the vep annotation with the most severe consequence. + + Add a"major_consequence" field for that annotation. Args: annotation_list (list): VEP annotations Returns: dict: the annotation with the most severe consequence + """ if not annotation_list: return None