Merge pull request #138 from PixelgenTechnologies/feature/exe-1716-Ad…

…ding-rate-diff-transformation Adding the rate_diff_transformation function.
PixelgenTechnologies · May 15, 2024 · 04dbc28 · 04dbc28
2 parents 12c9bbc + 00be3d6
commit 04dbc28
Show file tree

Hide file tree

Showing 6 changed files with 87 additions and 2 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 
+* Add `rate_diff_transformation` function with `rate-diff` alias as an alternative option for transforming marker counts before colocalization calculation.
 * Add `local_g` function to compute spatial autocorrelation of marker counts per node.
 * Add `compute_transition_probabilities` function to compute transition probabilities for k-step random walks for node pairs in a graph.
 * Add QC plot showing UMIs per UPIA vs Tau.

diff --git a/src/pixelator/analysis/colocalization/__init__.py b/src/pixelator/analysis/colocalization/__init__.py
@@ -34,7 +34,11 @@
 from pixelator.analysis.permute import permutations
 from pixelator.graph.utils import Graph
 from pixelator.pixeldataset import PixelDataset
-from pixelator.statistics import correct_pvalues, log1p_transformation
+from pixelator.statistics import (
+    correct_pvalues,
+    log1p_transformation,
+    rate_diff_transformation,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -91,6 +95,8 @@ def _transform_data(
         return data
     if transform == "log1p":
         return log1p_transformation(data)
+    if transform == "rate-diff":
+        return rate_diff_transformation(data)
     raise ValueError(
         f"`transform`must be one of: {'/'.join(get_args(TransformationTypes))}"
     )

diff --git a/src/pixelator/analysis/colocalization/types.py b/src/pixelator/analysis/colocalization/types.py
@@ -12,7 +12,7 @@
 
 MarkerColocalizationResults = pd.DataFrame
 
-TransformationTypes = Literal["raw", "log1p"]
+TransformationTypes = Literal["raw", "log1p", "rate-diff"]
 
 
 @dataclass

diff --git a/src/pixelator/statistics.py b/src/pixelator/statistics.py
@@ -131,6 +131,27 @@ def log1p_transformation(df: pd.DataFrame) -> pd.DataFrame:
     return log1p_df
 
 
+def rate_diff_transformation(df: pd.DataFrame) -> pd.DataFrame:
+    """Transform antibody counts as deviation from an expected baseline distribution.
+
+    In this function we refer to baseline distribution as fixed ratio of different
+    antibody types in each node. For example, if in total 10% of antibodies are
+    HLA-ABC, in a node with 120 antibodies we expect to see 12 HLA-ABC counts.
+    If we actually see 8 counts in this node, the rate_diff_transformation for
+    HLA-ABC in this node will be -4.
+
+    :param df: the dataframe of raw antibody counts (antibodies as columns)
+    :returns: a dataframe with the counts difference from expected values
+    :rtype: pd.DataFrame
+    """
+    antibody_counts_per_node = df.sum(axis=1)
+    antibody_rates = df.sum(axis=0)
+    antibody_rates = antibody_rates / antibody_rates.sum()
+
+    expected_counts = antibody_counts_per_node.to_frame() @ antibody_rates.to_frame().T
+    return df - expected_counts
+
+
 def rel_normalization(df: pd.DataFrame, axis: Literal[0, 1] = 0) -> pd.DataFrame:
     """Normalize antibody counts to the relative amount per marker or component.
 

diff --git a/tests/analysis/colocalization/test_colocalization.py b/tests/analysis/colocalization/test_colocalization.py
@@ -160,6 +160,46 @@ def test_colocalization_scores_log1p(enable_backend, full_graph_edgelist: pd.Dat
     assert_frame_equal(result, expected)
 
 
+@pytest.mark.parametrize("enable_backend", ["networkx"], indirect=True)
+def test_colocalization_scores_ratediff(
+    enable_backend, full_graph_edgelist: pd.DataFrame
+):
+    result = colocalization_scores(
+        edgelist=full_graph_edgelist,
+        use_full_bipartite=True,
+        transformation="rate-diff",
+        neighbourhood_size=1,
+        n_permutations=50,
+        min_region_count=0,
+        random_seed=1477,
+    )
+
+    expected = pd.DataFrame.from_dict(
+        {
+            0: {
+                "marker_1": "A",
+                "marker_2": "B",
+                "pearson": -1.0,
+                "pearson_mean": -1.0,
+                "pearson_stdev": 0.0,
+                "pearson_z": np.nan,
+                "pearson_p_value": np.nan,
+                "pearson_p_value_adjusted": np.nan,
+                "jaccard": 1.0,
+                "jaccard_mean": 1.0,
+                "jaccard_stdev": 0.0,
+                "jaccard_z": np.nan,
+                "jaccard_p_value": np.nan,
+                "jaccard_p_value_adjusted": np.nan,
+                "component": "PXLCMP0000000",
+            }
+        },
+        orient="index",
+    )
+
+    assert_frame_equal(result, expected)
+
+
 @pytest.mark.parametrize("enable_backend", ["networkx"], indirect=True)
 def test_colocalization_scores_should_not_fail_when_one_component_has_single_node(
     enable_backend,

diff --git a/tests/test_statistics.py b/tests/test_statistics.py
@@ -10,6 +10,7 @@
     clr_transformation,
     correct_pvalues,
     log1p_transformation,
+    rate_diff_transformation,
     rel_normalization,
 )
 
@@ -120,6 +121,22 @@ def test_clr_standard_transformation_axis_1():
     assert_frame_equal(norm_counts, expected)
 
 
+def test_rate_diff_transformation():
+    antibody_counts = pd.DataFrame(
+        [[7.0, 3.0, 10.0], [10.0, 2.0, 5.0]],
+        columns=["A", "B", "C"],
+        index=["0000000", "0000001"],
+    )
+
+    norm_counts = rate_diff_transformation(antibody_counts)
+    expected = pd.DataFrame(
+        [[-2.189189, 0.2972973, 1.89189189], [2.189189, -0.2972973, -1.89189189]],
+        columns=["A", "B", "C"],
+        index=["0000000", "0000001"],
+    )
+    assert_frame_equal(norm_counts, expected)
+
+
 def test_rel_normalization():
     antibody_counts = pd.DataFrame(
         [[7.0, 3.0, 10.0], [10.0, 2.0, 5.0]],