From 1ff67c239a8f35568aced7cd2f986d7470dea6ff Mon Sep 17 00:00:00 2001 From: Tyler Biggs Date: Fri, 24 Apr 2020 11:01:51 -0700 Subject: [PATCH] minor bug fixes, attempted nbsite version change. --- GSForge/models/_AnnotatedGEM.py | 2 +- GSForge/models/_GeneSetCollection.py | 84 +++++++++++++++++++++++++--- GSForge/models/_Interface.py | 2 + GSForge/panels/_umap_panel.py | 12 ++-- GSForge/plots/results/_volcano.py | 8 +-- GSForge/plots/utils.py | 10 +++- setup.py | 2 +- 7 files changed, 98 insertions(+), 22 deletions(-) diff --git a/GSForge/models/_AnnotatedGEM.py b/GSForge/models/_AnnotatedGEM.py index 11d5487..b644366 100644 --- a/GSForge/models/_AnnotatedGEM.py +++ b/GSForge/models/_AnnotatedGEM.py @@ -371,5 +371,5 @@ def save(self, path: Union[str, Path, IO[AnyStr]]) -> str: if isinstance(value, str)} params_str = json.dumps(params_to_save) self.data.attrs.update({"__GSForge.AnnotatedGEM.params": params_str}) - self.data.to_netcdf(path, mode="w") + self.data.to_netcdf(path) return path diff --git a/GSForge/models/_GeneSetCollection.py b/GSForge/models/_GeneSetCollection.py index c055519..8ae3e0f 100644 --- a/GSForge/models/_GeneSetCollection.py +++ b/GSForge/models/_GeneSetCollection.py @@ -7,6 +7,7 @@ from functools import reduce from textwrap import dedent from typing import Dict, Tuple, List, Union, Callable, IO, AnyStr, FrozenSet +from collections import defaultdict # import methodtools import numpy as np @@ -205,17 +206,15 @@ def gene_sets_to_excel_sheet(self, name: str = None, keys: List[str] = None, onl def _as_dict(self, keys: Tuple[AnyStr]) -> Dict[str, np.ndarray]: return copy.deepcopy({key: self.gene_sets[key].gene_support() for key in keys}) - def _parse_keys(self, keys: List[str] = None, exclude: List[str] = None, - empty_supports: bool = False) -> Tuple[AnyStr]: + def _parse_keys(self, keys: List[str] = None, exclude: List[str] = None) -> Tuple[AnyStr]: # Use all keys in the collection if none are provided. - keys = self.gene_sets.keys() if keys is None else keys + keys = list(self.gene_sets.keys()) if keys is None else list(keys) + exclude = [] if exclude is None else exclude # Ensure all keys provided are actually within the collection. - if not all(key in self.gene_sets.keys() for key in keys): - raise ValueError(f"Not all keys given were found in the available keys: {list(self.gene_sets.keys())}") - - if empty_supports is False: - keys = [key for key in keys if self.gene_sets[key].support_exists] + for key in keys + exclude: + if key not in self.gene_sets.keys(): + raise ValueError(f"Key {key} not found in available keys:\n{list(self.gene_sets.keys())}") if exclude is not None: keys = [key for key in keys if key not in exclude] @@ -246,7 +245,7 @@ def as_dict(self, keys: List[str] = None, exclude: List[str] = None, ------- dict : Dictionary of {name: supported_genes} for each GeneSet. """ - sorted_keys = self._parse_keys(keys, exclude, empty_supports) + sorted_keys = self._parse_keys(keys, exclude) return self._as_dict(sorted_keys) # @methodtools.lru_cache() @@ -445,6 +444,73 @@ def pairwise_percent_intersection(self, keys=None, exclude=None) -> List[Tuple[s for (ak, av), (bk, bv) in itertools.permutations(zero_filtered_dict.items(), 2) if ak != bk] + def construct_standard_specification(self, include: List[str] = None, exclude=None) -> dict: + """ + Construct a standard specification that can be used to view unions, intersections and + differences (unique genes) of the sets within this collection. + + Parameters + ---------- + include : List[str] + An optional list of gene_set keys to return, by default all keys are selected. + + exclude : List[str] + An optional list of `GeneSet` keys to exclude from the returned dictionary. + + Returns + ------- + dict: A specification dictionary. + """ + + include = self._parse_keys(include, exclude) + standard_spec = defaultdict(list) + + standard_spec['union'].append({'name': f'{self.name}__standard_union', + 'keys': include}) + + standard_spec['intersection'].append({'name': f'{self.name}__standard_intersection', + 'keys': include}) + + for primary_key in include: + other_keys = [key for key in include if primary_key != key] + standard_spec['difference'].append({'name': f'{self.name}__{primary_key}__unique', + 'primary_key': primary_key, + 'other_keys': other_keys}) + + return standard_spec + + def process_set_operation_specification(self, specification: dict = None) -> dict: + """ + Calls and stores the results from a specification. The specification must declare + set operation functions and their arguments. + + Parameters + ---------- + specification : Dict + """ + # TODO: Add input validation for the specification. + + function_map = { + 'intersection': self.intersection, + 'union': self.union, + 'difference': self.difference, + 'joint_difference': self.joint_difference, + 'pairwise_unions': self.pairwise_unions, + 'pairwise_intersection': self.pairwise_intersection, + 'pairwise_percent_intersection': self.pairwise_percent_intersection, + } + + processed_spec = dict() + + for key, function in function_map.items(): + if specification.get(key): + for entry in specification.get(key): + name = entry.pop('name') + # print(entry) + processed_spec[name] = function(**entry) + + return processed_spec + ############################################################################################### # CONSTRUCTOR FUNCTIONS ############################################################################################### diff --git a/GSForge/models/_Interface.py b/GSForge/models/_Interface.py index 89e4507..00c7511 100644 --- a/GSForge/models/_Interface.py +++ b/GSForge/models/_Interface.py @@ -176,6 +176,8 @@ def __init__(self, *args, **params): if self.count_variable is None: self.set_param(**{"count_variable": self.gem.count_array_name}) + self.param["count_variable"].objects = self.gem.count_array_names + [None] + if self.gene_set_collection is not None: avail_mappings = list(self.gene_set_collection.gene_sets.keys()) self.param["selected_gene_sets"].objects = avail_mappings + [None] diff --git a/GSForge/panels/_umap_panel.py b/GSForge/panels/_umap_panel.py index 6a2c8ba..4fda387 100644 --- a/GSForge/panels/_umap_panel.py +++ b/GSForge/panels/_umap_panel.py @@ -14,9 +14,7 @@ #TODO: Allow size selection of the points drawn. class UMAP_Panel(param.Parameterized): - """A UMAP Panel Exploration Tool. - - """ + """A UMAP Panel Exploration Tool.""" interface = param.Parameter( precedence=-1.0, @@ -158,6 +156,8 @@ def view(self): df = self.interface.gem.data[self.data_var_cats["all_labels"]].to_dataframe().reset_index() # TODO: Consider how a more robust hash could be created. gene_set = frozenset(self.interface.get_gene_index()) + if len(gene_set) == 0: + return pn.pane.Markdown('No genes in selected set.') transform_state = frozenset(self.get_transform_kwargs().items()) count_array_state = frozenset(self.interface.count_variable) @@ -172,8 +172,10 @@ def view(self): points = hv.Points(df, kdims=["x", "y"]) if self.hue is not None: - points = hv.NdOverlay({key: points.select(**{self.hue: key}) - for key in points.data[self.hue].unique()}) + df[self.hue] = df[self.hue].astype(str) + points = points.opts(color=self.hue) + # points = hv.NdOverlay({key: points.select(**{self.hue: key}) + # for key in points.data[self.hue].unique()}) return points.opts(self.bokeh_opts()).opts(tools=[hover]) diff --git a/GSForge/plots/results/_volcano.py b/GSForge/plots/results/_volcano.py index 95b750a..62907c3 100644 --- a/GSForge/plots/results/_volcano.py +++ b/GSForge/plots/results/_volcano.py @@ -61,9 +61,9 @@ def bokeh_opts(): hv.opts.VLine(backend='bokeh', color="black", line_width=0.75, line_dash='dashed'), hv.opts.Labels(backend='bokeh', xoffset=0.6, yoffset=3, text_font_size='7pt'), hv.opts.Points("No Signal", color="black"), - hv.opts.Points("LFC within pval", color="red"), + hv.opts.Points("LFC within pval", color="red", tools=['hover']), hv.opts.Points("LFC outside pval", color="green"), - hv.opts.Points("No LFC within pval", color="blue"), + hv.opts.Points("No LFC within pval", color="blue", tools=['hover']), ] @staticmethod @@ -112,7 +112,7 @@ def volcano(source: xr.Dataset, for key, selection in gene_groups.items(): df.loc[selection.values, "Gene_group"] = key - kdims = [("lfc", "$log_2$ fold change"), ("p-values", "$-log_{10}$ p-values")] + kdims = [("lfc", "log2 fold change"), ("p-values", "$-log10 p-values")] vdims = ["Gene_group", "Gene"] groups = df.groupby("Gene_group").groups @@ -131,7 +131,7 @@ def volcano(source: xr.Dataset, * hv.VLine(log_fold_change_cutoff) \ * hv.VLine(-log_fold_change_cutoff) - return layout + return layout.opts(title=f'Volcano with p-value: {p_value_cutoff}, LFC: {log_fold_change_cutoff}') def process(self): kwargs = {**self.infer_kwarg_defaults_from_data(self.source, self.volcano), diff --git a/GSForge/plots/utils.py b/GSForge/plots/utils.py index 6a31e9c..2635e84 100644 --- a/GSForge/plots/utils.py +++ b/GSForge/plots/utils.py @@ -13,8 +13,14 @@ "logFC", # from EdgeR. "log2FoldChange", # from DESeq2. ], - mean_value_var=["baseMean", "logCPM"], - p_value_var=["pvalue", "PValue"] + mean_value_var=[ + "baseMean", + "logCPM", + ], + p_value_var=[ + "pvalue", + "PValue", + ] ) diff --git a/setup.py b/setup.py index 7aea9d7..4fa27ef 100644 --- a/setup.py +++ b/setup.py @@ -63,7 +63,7 @@ install_requires=requirements, extras_require={ 'docs': [ - 'nbsite', + 'nbsite==0.6.7', 'nbsphinx', 'selenium', 'sphinx_ioam_theme',