Skip to content

Commit

Permalink
Merge pull request #138 from PixelgenTechnologies/feature/exe-1716-Ad…
Browse files Browse the repository at this point in the history
…ding-rate-diff-transformation

Adding the rate_diff_transformation function.
  • Loading branch information
ptajvar committed May 15, 2024
2 parents 12c9bbc + 00be3d6 commit 04dbc28
Show file tree
Hide file tree
Showing 6 changed files with 87 additions and 2 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Added

* Add `rate_diff_transformation` function with `rate-diff` alias as an alternative option for transforming marker counts before colocalization calculation.
* Add `local_g` function to compute spatial autocorrelation of marker counts per node.
* Add `compute_transition_probabilities` function to compute transition probabilities for k-step random walks for node pairs in a graph.
* Add QC plot showing UMIs per UPIA vs Tau.
Expand Down
8 changes: 7 additions & 1 deletion src/pixelator/analysis/colocalization/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,11 @@
from pixelator.analysis.permute import permutations
from pixelator.graph.utils import Graph
from pixelator.pixeldataset import PixelDataset
from pixelator.statistics import correct_pvalues, log1p_transformation
from pixelator.statistics import (
correct_pvalues,
log1p_transformation,
rate_diff_transformation,
)

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -91,6 +95,8 @@ def _transform_data(
return data
if transform == "log1p":
return log1p_transformation(data)
if transform == "rate-diff":
return rate_diff_transformation(data)
raise ValueError(
f"`transform`must be one of: {'/'.join(get_args(TransformationTypes))}"
)
Expand Down
2 changes: 1 addition & 1 deletion src/pixelator/analysis/colocalization/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

MarkerColocalizationResults = pd.DataFrame

TransformationTypes = Literal["raw", "log1p"]
TransformationTypes = Literal["raw", "log1p", "rate-diff"]


@dataclass
Expand Down
21 changes: 21 additions & 0 deletions src/pixelator/statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,27 @@ def log1p_transformation(df: pd.DataFrame) -> pd.DataFrame:
return log1p_df


def rate_diff_transformation(df: pd.DataFrame) -> pd.DataFrame:
"""Transform antibody counts as deviation from an expected baseline distribution.
In this function we refer to baseline distribution as fixed ratio of different
antibody types in each node. For example, if in total 10% of antibodies are
HLA-ABC, in a node with 120 antibodies we expect to see 12 HLA-ABC counts.
If we actually see 8 counts in this node, the rate_diff_transformation for
HLA-ABC in this node will be -4.
:param df: the dataframe of raw antibody counts (antibodies as columns)
:returns: a dataframe with the counts difference from expected values
:rtype: pd.DataFrame
"""
antibody_counts_per_node = df.sum(axis=1)
antibody_rates = df.sum(axis=0)
antibody_rates = antibody_rates / antibody_rates.sum()

expected_counts = antibody_counts_per_node.to_frame() @ antibody_rates.to_frame().T
return df - expected_counts


def rel_normalization(df: pd.DataFrame, axis: Literal[0, 1] = 0) -> pd.DataFrame:
"""Normalize antibody counts to the relative amount per marker or component.
Expand Down
40 changes: 40 additions & 0 deletions tests/analysis/colocalization/test_colocalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,46 @@ def test_colocalization_scores_log1p(enable_backend, full_graph_edgelist: pd.Dat
assert_frame_equal(result, expected)


@pytest.mark.parametrize("enable_backend", ["networkx"], indirect=True)
def test_colocalization_scores_ratediff(
enable_backend, full_graph_edgelist: pd.DataFrame
):
result = colocalization_scores(
edgelist=full_graph_edgelist,
use_full_bipartite=True,
transformation="rate-diff",
neighbourhood_size=1,
n_permutations=50,
min_region_count=0,
random_seed=1477,
)

expected = pd.DataFrame.from_dict(
{
0: {
"marker_1": "A",
"marker_2": "B",
"pearson": -1.0,
"pearson_mean": -1.0,
"pearson_stdev": 0.0,
"pearson_z": np.nan,
"pearson_p_value": np.nan,
"pearson_p_value_adjusted": np.nan,
"jaccard": 1.0,
"jaccard_mean": 1.0,
"jaccard_stdev": 0.0,
"jaccard_z": np.nan,
"jaccard_p_value": np.nan,
"jaccard_p_value_adjusted": np.nan,
"component": "PXLCMP0000000",
}
},
orient="index",
)

assert_frame_equal(result, expected)


@pytest.mark.parametrize("enable_backend", ["networkx"], indirect=True)
def test_colocalization_scores_should_not_fail_when_one_component_has_single_node(
enable_backend,
Expand Down
17 changes: 17 additions & 0 deletions tests/test_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
clr_transformation,
correct_pvalues,
log1p_transformation,
rate_diff_transformation,
rel_normalization,
)

Expand Down Expand Up @@ -120,6 +121,22 @@ def test_clr_standard_transformation_axis_1():
assert_frame_equal(norm_counts, expected)


def test_rate_diff_transformation():
antibody_counts = pd.DataFrame(
[[7.0, 3.0, 10.0], [10.0, 2.0, 5.0]],
columns=["A", "B", "C"],
index=["0000000", "0000001"],
)

norm_counts = rate_diff_transformation(antibody_counts)
expected = pd.DataFrame(
[[-2.189189, 0.2972973, 1.89189189], [2.189189, -0.2972973, -1.89189189]],
columns=["A", "B", "C"],
index=["0000000", "0000001"],
)
assert_frame_equal(norm_counts, expected)


def test_rel_normalization():
antibody_counts = pd.DataFrame(
[[7.0, 3.0, 10.0], [10.0, 2.0, 5.0]],
Expand Down

0 comments on commit 04dbc28

Please sign in to comment.