From 09b747dd559863f3d7a41c66bfe09a29ce527070 Mon Sep 17 00:00:00 2001 From: tiesmeys Date: Tue, 22 Oct 2024 17:01:35 +0200 Subject: [PATCH 1/2] Some additional refractorings, removed unused vars in ovrlpy/_utils.py, resolved double localmax search. --- ovrlpy/_ovrlp.py | 13 +++++----- ovrlpy/_utils.py | 63 +++++++++++++++--------------------------------- 2 files changed, 25 insertions(+), 51 deletions(-) diff --git a/ovrlpy/_ovrlp.py b/ovrlpy/_ovrlp.py index 9901821..fa80874 100644 --- a/ovrlpy/_ovrlp.py +++ b/ovrlpy/_ovrlp.py @@ -20,7 +20,7 @@ _compute_divergence_patched, _create_histogram, _create_knn_graph, - _determine_localmax, + _determine_localmax_and_sample, _fill_color_axes, _get_knn_expression, _get_spatial_subsample_mask, @@ -274,7 +274,7 @@ def get_pseudocell_locations( df, genes=genes, min_expression=min_expression, KDE_bandwidth=KDE_bandwidth ) - pseudocell_locations_x, pseudocells_y, _ = _determine_localmax( + pseudocell_locations_x, pseudocells_y, _ = _determine_localmax_and_sample( hist, min_distance=min_distance, min_expression=min_expression ) @@ -443,7 +443,7 @@ def detect_doublets( if integrity_sigma is not None: integrity_map = gaussian_filter(integrity_map, integrity_sigma) - dist_x, dist_y, dist_t = _determine_localmax( + dist_x, dist_y, dist_t = _determine_localmax_and_sample( (1 - integrity_map) * (signal_map > minimum_signal_strength), min_distance=min_distance, min_expression=integrity_threshold, @@ -787,7 +787,6 @@ def transform(self, coordinate_df: pd.DataFrame): self.pca_2d, embedder_2d=self.embedder_2d, embedder_3d=self.embedder_3d, - colors_min_max=self.colors_min_max, ) subsample_embedding_color, _ = _fill_color_axes( subsample_embedding_color, self.pca_3d @@ -825,8 +824,8 @@ def pseudocell_df(self) -> pd.DataFrame: def plot_region_of_interest( self, - subsample, - subsample_embedding_color, + subsample: pd.DataFrame, + subsample_embedding_color: np.ndarray, x: float = None, y: float = None, window_size: int = None, @@ -839,7 +838,7 @@ def plot_region_of_interest( ---------- subsample : pandas.DataFrame A dataframe of molecule coordinates and gene assignments. - subsample_embedding_color : Optional[pandas.DataFrame] + subsample_embedding_color : pandas.DataFrame A list of rgb values for each molecule. x : float Center x-coordinate for the region-of-interest. diff --git a/ovrlpy/_utils.py b/ovrlpy/_utils.py index cbcdb4e..db5c940 100644 --- a/ovrlpy/_utils.py +++ b/ovrlpy/_utils.py @@ -1,20 +1,22 @@ from concurrent.futures import ThreadPoolExecutor, as_completed +import matplotlib.patheffects as PathEffects + # create circular kernel: # draw outlines around artist: -import matplotlib.patheffects as PathEffects import matplotlib.pyplot as plt import numpy as np import pandas as pd import tqdm -from scipy.ndimage import gaussian_filter, maximum_filter +from scipy.ndimage import gaussian_filter from sklearn.decomposition import PCA from sklearn.neighbors import NearestNeighbors -from ._ssam2 import kde_2d +from ._ssam2 import find_local_maxima, kde_2d -def _draw_outline(ax, artist, lw=2, color="black"): +def _draw_outline(artist, lw=2, color="black"): + "Draws outlines around the (text) artists for better legibility." _ = artist.set_path_effects( [PathEffects.withStroke(linewidth=lw, foreground=color), PathEffects.Normal()] ) @@ -43,33 +45,12 @@ def _plot_scalebar( ) if edge_color is not None: - _draw_outline(ax, plot_artist[0], lw=5, color=edge_color) - _draw_outline(ax, text_artist, lw=5, color=edge_color) + _draw_outline(plot_artist[0], lw=5, color=edge_color) + _draw_outline(text_artist, lw=5, color=edge_color) return plot_artist, text_artist -def _create_circular_kernel(r): - """ - Creates a circular kernel of radius r. - - Parameters - ---------- - r : int - The radius of the kernel. - - Returns - ------- - kernel : np.array - A 2d array of the circular kernel. - - """ - - span = np.linspace(-1, 1, r * 2) - X, Y = np.meshgrid(span, span) - return (X**2 + Y**2) ** 0.5 <= 1 - - def _get_kl_divergence(p, q): # mask = (p!=0) * (q!=0) output = np.zeros(p.shape) @@ -78,7 +59,7 @@ def _get_kl_divergence(p, q): return output -def _determine_localmax(distribution, min_distance=3, min_expression=5): +def _determine_localmax_and_sample(distribution, min_distance=3, min_expression=5): """ Returns a list of local maxima in a kde of the data frame. @@ -99,12 +80,8 @@ def _determine_localmax(distribution, min_distance=3, min_expression=5): A list of y coordinates of local maxima. """ - localmax_kernel = _create_circular_kernel(min_distance) - localmax_projection = distribution == maximum_filter( - distribution, footprint=localmax_kernel - ) - rois_x, rois_y = np.where((distribution > min_expression) & localmax_projection) + rois_x, rois_y = find_local_maxima(distribution, min_distance, min_expression) return rois_x, rois_y, distribution[rois_x, rois_y] @@ -148,15 +125,15 @@ def _min_to_max(arr, arr_min=None, arr_max=None): # define a function that fits expression data to into the umap embeddings: def _transform_embeddings( - expression, pca, embedder_2d, embedder_3d, colors_min_max=[None, None] + expression, + pca, + embedder_2d, + embedder_3d, ): factors = pca.transform(expression) embedding = embedder_2d.transform(factors) embedding_color = embedder_3d.transform(factors) - # embedding_color = embedder_3d.transform(embedding) - - # embedding_color = _min_to_max(embedding_color,colors_min_max[0],colors_min_max[1]) return embedding, embedding_color @@ -192,12 +169,12 @@ def _plot_embeddings( ) text_artists = [] - for i in range(len(celltypes)): + for i, celltype in enumerate(celltypes): if not np.isnan(celltype_centers[i, 0]): t = ax.text( np.nan_to_num((celltype_centers[i, 0])), np.nan_to_num(celltype_centers[i, 1]), - celltypes[i], + celltype, color="k", fontsize=12, ) @@ -364,6 +341,9 @@ def _compute_divergence_embedded( metric="cosine_similarity", pca_divergence=0.8, ): + """This is a legacy function, replaced by _compute_divergence_patched. It contains other similarity measures than cosine similarity. + To be integrated into the patch-based divergence computation later. + """ signal = _create_histogram( df, genes, @@ -381,9 +361,6 @@ def _compute_divergence_embedded( df_top = df[df.z_delim < df.z] df_bot = df[df.z_delim > df.z] - # dr_bottom = np.zeros((df_bottom.shape[0],df_bottom.shape[1], pca.components_.shape[0])) - # dr_top = np.zeros((df_bottom.shape[0],df_bottom.shape[1], pca.components_.shape[0])) - hists_top = np.zeros((mask.sum(), pca.components_.shape[0])) hists_bot = np.zeros((mask.sum(), pca.components_.shape[0])) @@ -481,8 +458,6 @@ def pearson_cross_correlation(a, b): def _compute_embedding_vectors(subset_df, signal_mask, factor): - # for i,g in tqdm.tqdm(enumerate(genes),total=len(genes)): - if len(subset_df) < 2: return None, None From b95239b689d4641f6e2a7e10c7f8d730aa4e912d Mon Sep 17 00:00:00 2001 From: tiesmeys Date: Tue, 22 Oct 2024 17:02:28 +0200 Subject: [PATCH 2/2] Some additional refractorings, removed unused vars in ovrlpy/_utils.py, resolved double localmax search. --- ovrlpy/_utils.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/ovrlpy/_utils.py b/ovrlpy/_utils.py index db5c940..51ca6da 100644 --- a/ovrlpy/_utils.py +++ b/ovrlpy/_utils.py @@ -1,9 +1,6 @@ from concurrent.futures import ThreadPoolExecutor, as_completed import matplotlib.patheffects as PathEffects - -# create circular kernel: -# draw outlines around artist: import matplotlib.pyplot as plt import numpy as np import pandas as pd