Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 6 additions & 7 deletions ovrlpy/_ovrlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
_compute_divergence_patched,
_create_histogram,
_create_knn_graph,
_determine_localmax,
_determine_localmax_and_sample,
_fill_color_axes,
_get_knn_expression,
_get_spatial_subsample_mask,
Expand Down Expand Up @@ -274,7 +274,7 @@ def get_pseudocell_locations(
df, genes=genes, min_expression=min_expression, KDE_bandwidth=KDE_bandwidth
)

pseudocell_locations_x, pseudocells_y, _ = _determine_localmax(
pseudocell_locations_x, pseudocells_y, _ = _determine_localmax_and_sample(
hist, min_distance=min_distance, min_expression=min_expression
)

Expand Down Expand Up @@ -443,7 +443,7 @@ def detect_doublets(
if integrity_sigma is not None:
integrity_map = gaussian_filter(integrity_map, integrity_sigma)

dist_x, dist_y, dist_t = _determine_localmax(
dist_x, dist_y, dist_t = _determine_localmax_and_sample(
(1 - integrity_map) * (signal_map > minimum_signal_strength),
min_distance=min_distance,
min_expression=integrity_threshold,
Expand Down Expand Up @@ -787,7 +787,6 @@ def transform(self, coordinate_df: pd.DataFrame):
self.pca_2d,
embedder_2d=self.embedder_2d,
embedder_3d=self.embedder_3d,
colors_min_max=self.colors_min_max,
)
subsample_embedding_color, _ = _fill_color_axes(
subsample_embedding_color, self.pca_3d
Expand Down Expand Up @@ -825,8 +824,8 @@ def pseudocell_df(self) -> pd.DataFrame:

def plot_region_of_interest(
self,
subsample,
subsample_embedding_color,
subsample: pd.DataFrame,
subsample_embedding_color: np.ndarray,
x: float = None,
y: float = None,
window_size: int = None,
Expand All @@ -839,7 +838,7 @@ def plot_region_of_interest(
----------
subsample : pandas.DataFrame
A dataframe of molecule coordinates and gene assignments.
subsample_embedding_color : Optional[pandas.DataFrame]
subsample_embedding_color : pandas.DataFrame
A list of rgb values for each molecule.
x : float
Center x-coordinate for the region-of-interest.
Expand Down
62 changes: 17 additions & 45 deletions ovrlpy/_utils.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,19 @@
from concurrent.futures import ThreadPoolExecutor, as_completed

# create circular kernel:
# draw outlines around artist:
import matplotlib.patheffects as PathEffects
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tqdm
from scipy.ndimage import gaussian_filter, maximum_filter
from scipy.ndimage import gaussian_filter
from sklearn.decomposition import PCA
from sklearn.neighbors import NearestNeighbors

from ._ssam2 import kde_2d
from ._ssam2 import find_local_maxima, kde_2d


def _draw_outline(ax, artist, lw=2, color="black"):
def _draw_outline(artist, lw=2, color="black"):
"Draws outlines around the (text) artists for better legibility."
_ = artist.set_path_effects(
[PathEffects.withStroke(linewidth=lw, foreground=color), PathEffects.Normal()]
)
Expand Down Expand Up @@ -43,33 +42,12 @@ def _plot_scalebar(
)

if edge_color is not None:
_draw_outline(ax, plot_artist[0], lw=5, color=edge_color)
_draw_outline(ax, text_artist, lw=5, color=edge_color)
_draw_outline(plot_artist[0], lw=5, color=edge_color)
_draw_outline(text_artist, lw=5, color=edge_color)

return plot_artist, text_artist


def _create_circular_kernel(r):
"""
Creates a circular kernel of radius r.

Parameters
----------
r : int
The radius of the kernel.

Returns
-------
kernel : np.array
A 2d array of the circular kernel.

"""

span = np.linspace(-1, 1, r * 2)
X, Y = np.meshgrid(span, span)
return (X**2 + Y**2) ** 0.5 <= 1


def _get_kl_divergence(p, q):
# mask = (p!=0) * (q!=0)
output = np.zeros(p.shape)
Expand All @@ -78,7 +56,7 @@ def _get_kl_divergence(p, q):
return output


def _determine_localmax(distribution, min_distance=3, min_expression=5):
def _determine_localmax_and_sample(distribution, min_distance=3, min_expression=5):
"""
Returns a list of local maxima in a kde of the data frame.

Expand All @@ -99,12 +77,8 @@ def _determine_localmax(distribution, min_distance=3, min_expression=5):
A list of y coordinates of local maxima.

"""
localmax_kernel = _create_circular_kernel(min_distance)
localmax_projection = distribution == maximum_filter(
distribution, footprint=localmax_kernel
)

rois_x, rois_y = np.where((distribution > min_expression) & localmax_projection)
rois_x, rois_y = find_local_maxima(distribution, min_distance, min_expression)

return rois_x, rois_y, distribution[rois_x, rois_y]

Expand Down Expand Up @@ -148,15 +122,15 @@ def _min_to_max(arr, arr_min=None, arr_max=None):

# define a function that fits expression data to into the umap embeddings:
def _transform_embeddings(
expression, pca, embedder_2d, embedder_3d, colors_min_max=[None, None]
expression,
pca,
embedder_2d,
embedder_3d,
):
factors = pca.transform(expression)

embedding = embedder_2d.transform(factors)
embedding_color = embedder_3d.transform(factors)
# embedding_color = embedder_3d.transform(embedding)

# embedding_color = _min_to_max(embedding_color,colors_min_max[0],colors_min_max[1])

return embedding, embedding_color

Expand Down Expand Up @@ -192,12 +166,12 @@ def _plot_embeddings(
)

text_artists = []
for i in range(len(celltypes)):
for i, celltype in enumerate(celltypes):
if not np.isnan(celltype_centers[i, 0]):
t = ax.text(
np.nan_to_num((celltype_centers[i, 0])),
np.nan_to_num(celltype_centers[i, 1]),
celltypes[i],
celltype,
color="k",
fontsize=12,
)
Expand Down Expand Up @@ -364,6 +338,9 @@ def _compute_divergence_embedded(
metric="cosine_similarity",
pca_divergence=0.8,
):
"""This is a legacy function, replaced by _compute_divergence_patched. It contains other similarity measures than cosine similarity.
To be integrated into the patch-based divergence computation later.
"""
signal = _create_histogram(
df,
genes,
Expand All @@ -381,9 +358,6 @@ def _compute_divergence_embedded(
df_top = df[df.z_delim < df.z]
df_bot = df[df.z_delim > df.z]

# dr_bottom = np.zeros((df_bottom.shape[0],df_bottom.shape[1], pca.components_.shape[0]))
# dr_top = np.zeros((df_bottom.shape[0],df_bottom.shape[1], pca.components_.shape[0]))

hists_top = np.zeros((mask.sum(), pca.components_.shape[0]))
hists_bot = np.zeros((mask.sum(), pca.components_.shape[0]))

Expand Down Expand Up @@ -481,8 +455,6 @@ def pearson_cross_correlation(a, b):


def _compute_embedding_vectors(subset_df, signal_mask, factor):
# for i,g in tqdm.tqdm(enumerate(genes),total=len(genes)):

if len(subset_df) < 2:
return None, None

Expand Down