Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding the rate_diff_transformation function. #138

Merged
merged 5 commits into from
May 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Added

* Add `rate_diff_transformation` function with `rate-diff` alias as an alternative option for transforming marker counts before colocalization calculation.
* Add `local_g` function to compute spatial autocorrelation of marker counts per node.
* Add `compute_transition_probabilities` function to compute transition probabilities for k-step random walks for node pairs in a graph.
* Add QC plot showing UMIs per UPIA vs Tau.
Expand Down
8 changes: 7 additions & 1 deletion src/pixelator/analysis/colocalization/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,11 @@
from pixelator.analysis.permute import permutations
from pixelator.graph.utils import Graph
from pixelator.pixeldataset import PixelDataset
from pixelator.statistics import correct_pvalues, log1p_transformation
from pixelator.statistics import (
correct_pvalues,
log1p_transformation,
rate_diff_transformation,
)

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -91,6 +95,8 @@ def _transform_data(
return data
if transform == "log1p":
return log1p_transformation(data)
if transform == "rate-diff":
return rate_diff_transformation(data)
raise ValueError(
f"`transform`must be one of: {'/'.join(get_args(TransformationTypes))}"
)
Expand Down
2 changes: 1 addition & 1 deletion src/pixelator/analysis/colocalization/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

MarkerColocalizationResults = pd.DataFrame

TransformationTypes = Literal["raw", "log1p"]
TransformationTypes = Literal["raw", "log1p", "rate-diff"]


@dataclass
Expand Down
21 changes: 21 additions & 0 deletions src/pixelator/statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,27 @@ def log1p_transformation(df: pd.DataFrame) -> pd.DataFrame:
return log1p_df


def rate_diff_transformation(df: pd.DataFrame) -> pd.DataFrame:
"""Transform antibody counts as deviation from an expected baseline distribution.

In this function we refer to baseline distribution as fixed ratio of different
antibody types in each node. For example, if in total 10% of antibodies are
HLA-ABC, in a node with 120 antibodies we expect to see 12 HLA-ABC counts.
If we actually see 8 counts in this node, the rate_diff_transformation for
HLA-ABC in this node will be -4.

:param df: the dataframe of raw antibody counts (antibodies as columns)
:returns: a dataframe with the counts difference from expected values
:rtype: pd.DataFrame
"""
antibody_counts_per_node = df.sum(axis=1)
antibody_rates = df.sum(axis=0)
antibody_rates = antibody_rates / antibody_rates.sum()

expected_counts = antibody_counts_per_node.to_frame() @ antibody_rates.to_frame().T
return df - expected_counts


def rel_normalization(df: pd.DataFrame, axis: Literal[0, 1] = 0) -> pd.DataFrame:
"""Normalize antibody counts to the relative amount per marker or component.

Expand Down
40 changes: 40 additions & 0 deletions tests/analysis/colocalization/test_colocalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,46 @@ def test_colocalization_scores_log1p(enable_backend, full_graph_edgelist: pd.Dat
assert_frame_equal(result, expected)


@pytest.mark.parametrize("enable_backend", ["networkx"], indirect=True)
def test_colocalization_scores_ratediff(
enable_backend, full_graph_edgelist: pd.DataFrame
):
result = colocalization_scores(
edgelist=full_graph_edgelist,
use_full_bipartite=True,
transformation="rate-diff",
neighbourhood_size=1,
n_permutations=50,
min_region_count=0,
random_seed=1477,
)

expected = pd.DataFrame.from_dict(
{
0: {
"marker_1": "A",
"marker_2": "B",
"pearson": -1.0,
"pearson_mean": -1.0,
"pearson_stdev": 0.0,
"pearson_z": np.nan,
"pearson_p_value": np.nan,
"pearson_p_value_adjusted": np.nan,
"jaccard": 1.0,
"jaccard_mean": 1.0,
"jaccard_stdev": 0.0,
"jaccard_z": np.nan,
"jaccard_p_value": np.nan,
"jaccard_p_value_adjusted": np.nan,
"component": "PXLCMP0000000",
}
},
orient="index",
)

assert_frame_equal(result, expected)


@pytest.mark.parametrize("enable_backend", ["networkx"], indirect=True)
def test_colocalization_scores_should_not_fail_when_one_component_has_single_node(
enable_backend,
Expand Down
17 changes: 17 additions & 0 deletions tests/test_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
clr_transformation,
correct_pvalues,
log1p_transformation,
rate_diff_transformation,
rel_normalization,
)

Expand Down Expand Up @@ -120,6 +121,22 @@ def test_clr_standard_transformation_axis_1():
assert_frame_equal(norm_counts, expected)


def test_rate_diff_transformation():
antibody_counts = pd.DataFrame(
[[7.0, 3.0, 10.0], [10.0, 2.0, 5.0]],
columns=["A", "B", "C"],
index=["0000000", "0000001"],
)

norm_counts = rate_diff_transformation(antibody_counts)
expected = pd.DataFrame(
[[-2.189189, 0.2972973, 1.89189189], [2.189189, -0.2972973, -1.89189189]],
columns=["A", "B", "C"],
index=["0000000", "0000001"],
)
assert_frame_equal(norm_counts, expected)


def test_rel_normalization():
antibody_counts = pd.DataFrame(
[[7.0, 3.0, 10.0], [10.0, 2.0, 5.0]],
Expand Down
Loading