# Assignment analysis

In [None]:
#!python -m pip install numpy pandas simutome spellmatch tqdm

In [None]:
from pathlib import Path

import numpy as np
import pandas as pd
from simutome import Simutome
from spellmatch.matching.algorithms.spellmatch import Spellmatch
from tqdm.auto import tqdm

rng = np.random.default_rng(seed=123)

In [None]:
source_points_dir = "source_points"
source_clusters_dir = "source_clusters"
source_intensities_dir = "source_intensities"

simutome_kwargs = {
    # see ../kuett_catena_2022/parameters.ipynb
    "exclude_cells": True,
    "cell_diameter_mean": 7.931,
    "cell_diameter_std": 1.768,
}

section_thicknesses = [2.0, 4.0, 6.0, 8.0, 10.0]

spellmatch_kwargs = {
    "intensity_transform": np.arcsinh,
    "scores_tol": 1e-6,
    "filter_outliers": False,
    "adj_radius": 15,
    "alpha": 0.8,
    "intensity_weight": 1.0,
    "intensity_interp_lmd": 1.0,
    "shared_intensity_pca_n_components": 10,
    "spatial_cdist_prior_thres": 25,
}

In [None]:
source_points_files = sorted(Path(source_points_dir).glob("*.csv"))
source_clusters_files = sorted(Path(source_clusters_dir).glob("*.csv"))
source_intensities_files = sorted(Path(source_intensities_dir).glob("*.csv"))
assert len(source_points_files) == len(source_clusters_files) == len(source_intensities_files)

simutome = Simutome(**simutome_kwargs, seed=rng)
spellmatch = Spellmatch(**spellmatch_kwargs)

assignment_analysis_dir = Path("assignment_analysis")
scores_dir = assignment_analysis_dir / "scores"
assignment_analysis_dir.mkdir(exist_ok=True)
scores_dir.mkdir(exist_ok=True)

results = []
pbar = tqdm(total=len(section_thicknesses) * len(source_points_files))
for section_thickness in section_thicknesses:
    for source_points_file, source_clusters_file, source_intensities_file in zip(
        source_points_files, source_clusters_files, source_intensities_files
    ):
        source_points = pd.read_csv(source_points_file, index_col="cell")
        source_clusters = pd.read_csv(source_clusters_file, index_col="cell")
        source_intensities = pd.read_csv(source_intensities_file, index_col="cell")
        cell_indices, cell_coords, cell_intensities = simutome.generate_section(
            source_points.to_numpy(),
            section_thickness,
            cell_intensities=source_intensities.loc[source_points.index, :].to_numpy(),
            cell_clusters=source_clusters.loc[source_points.index, :].iloc[:, 0].to_numpy(),
        )
        target_points = pd.DataFrame(
            cell_coords,
            index=source_points.index[cell_indices],
            columns=source_points.columns,
        )
        target_intensities = pd.DataFrame(
            cell_intensities,
            index=source_intensities.index[cell_indices],
            columns=source_intensities.columns,
        )
        scores = spellmatch.match_points(
                source_points_file.name,
                "simutome",
                source_points,
                target_points,
                source_intensities=source_intensities,
                target_intensities=target_intensities,
        )
        scores_file_name = f"scores_{section_thickness:04.1f}_{source_points_file.stem}.nc"
        scores.to_netcdf(path=scores_dir / scores_file_name)
        results.append(
            {
                "section_thickness": section_thickness,
                "source_points_file": source_points_file.name,
                "source_clusters_file": source_clusters_file.name,
                "source_intensities_file": source_intensities_file.name,
                "scores_file": scores_file_name,
            },
        )
        pbar.update()
pbar.close()
results = pd.DataFrame(data=results)
results.to_csv(assignment_analysis_dir / "results.csv", index=False)