# Spellmatch parameter sensitivity analysis

In [1]:
from functools import partial
from pathlib import Path

import numpy as np
import pandas as pd
from sklearn.model_selection import ParameterGrid
from tqdm.auto import tqdm

from spellmatch.assignment import assign
from spellmatch.benchmark import AlgorithmConfig, Benchmark
from spellmatch.benchmark.metrics import default_metrics
from spellmatch.io import write_scores
from spellmatch.matching.algorithms.spellmatch import Spellmatch

rng = np.random.default_rng(seed=123)

## Similarity/prior weights

- Fixed simutome parameters
- Fixed adjancy radius of $15 \mu m$
- Relative to an intensity weight of $1.0$

In [2]:
source_points_dir = "source_points"
source_intensities_dir = "source_intensities"
source_clusters_dir = "source_clusters"

simutome_kwargs = {
    # assume minor mis-alignment
    "image_rotation": 2.0 * np.pi / 180,
    "image_translation": (1.0, 3.0),
    # see ../kuett_catena_2022/parameters.ipynb
    "exclude_cells": True,
    "section_thickness": 2.0,
    "cell_diameter_mean": 7.931,
    "cell_diameter_std": 1.768,
    # see ../kuett_catena_2022/parameters.ipynb
    "displace_cells": True,
    "cell_displacement_mean": 0.067,
    "cell_displacement_var": 1.010,
}
simutome_param_grid = ParameterGrid({})
num_sections = 1

algorithm_configs = {
    "spellmatch": AlgorithmConfig(
        Spellmatch,
        algorithm_kwargs={
            "intensity_transform": np.arcsinh,
            "scores_tol": 1e-6,
            "require_convergence": True,
            "adj_radius": 15,
            "intensity_weight": 1.0,
            "intensity_interp_lmd": 1.0,
            "intensity_shared_pca_n_components": 10,
            "max_spatial_cdist": 50.0,
            "require_opt_convergence": True,
        },
        algorithm_param_grid=ParameterGrid(
            {
                "prior": [
                    {
                        "alpha": 0.9,
                        "spatial_cdist_prior_thres": 25.0,
                    },
                    {
                        "alpha": 0.99,
                        "spatial_cdist_prior_thres": 25.0,
                    },
                    {
                        "alpha": 0.999,
                        "spatial_cdist_prior_thres": 25.0,
                    },
                    {
                        "alpha": 1.0,
                    },
                ],
                "degrees": [
                    {
                        "degree_weight": 0.0,
                    },
                    {
                        "degree_weight": 0.01,
                        "degree_cdiff_thres": 3,
                    },
                    {
                        "degree_weight": 0.1,
                        "degree_cdiff_thres": 3,
                    },
                    {
                        "degree_weight": 1.0,
                        "degree_cdiff_thres": 3,
                    },
                ],
                "distances": [
                    {
                        "distance_weight": 0.0,
                    },
                    {
                        "distance_weight": 0.01,
                        "distance_cdiff_thres": 5.0,
                    },
                    {
                        "distance_weight": 0.1,
                        "distance_cdiff_thres": 5.0,
                    },
                    {
                        "distance_weight": 1.0,
                        "distance_cdiff_thres": 5.0,
                    },
                ],
            }
        ),
        assignment_functions={
            "min_score_q25_union": partial(assign, min_score_quantile=0.25, direction="union", as_matrix=True),
            "max_only_intersect": partial(assign, max_only=True, direction="intersect", as_matrix=True),
            "linear_sum_forward": partial(assign, linear_sum=True, direction="forward", as_matrix=True),
        }
    )
}

metric_functions = default_metrics

In [3]:
benchmark = Benchmark(
    source_points_files=sorted(Path(source_points_dir).glob("*.csv")),
    source_intensities_files=sorted(Path(source_intensities_dir).glob("*.csv")),
    source_clusters_files=sorted(Path(source_clusters_dir).glob("*.csv")),
    simutome_kwargs=simutome_kwargs,
    simutome_param_grid=simutome_param_grid,
    num_sections=num_sections,
    algorithm_configs=algorithm_configs,
    metric_functions=metric_functions,
    seed=rng,
)

benchmark_dir = Path("spellmatch_psa")
scores_dir = benchmark_dir / "scores"
scores_dir.mkdir(exist_ok=True, parents=True)

infos = []
all_results = []
for i, (info, scores, results) in enumerate(tqdm(benchmark)):
    print(i, info)
    infos.append(info)
    if scores is not None:
        scores_file_name = f"scores{i:06d}.nc"
        write_scores(scores_dir / scores_file_name, scores)
    if results is not None:
        for result in results:
            result.update(info)
        all_results += results
infos = pd.DataFrame(data=infos)
infos.to_csv(benchmark_dir / "infos.csv", index=False)
all_results = pd.DataFrame(data=all_results)
all_results.to_csv(benchmark_dir / "results.csv", index=False)

  0%|          | 0/256 [00:00<?, ?it/s]

## Adjacency radius

- Fixed simutome parameters
- Fixed similarity/prior weights