# Pixie: pixel clustering notebook

In [1]:
# import required packages
import json
import os
from datetime import datetime as dt

import matplotlib.pyplot as plt
from alpineer import io_utils, load_utils
from matplotlib import rc_file_defaults

from ark.phenotyping import (pixel_cluster_utils, pixel_meta_clustering,
                             pixel_som_clustering, pixie_preprocessing)
from ark.utils import data_utils, example_dataset, plot_utils
from ark.utils.metacluster_remap_gui import (MetaClusterGui,
                                             colormap_helper,
                                             metaclusterdata_from_files)
import pandas as pd

This script performs pixel level clustering on the harmony corrected glycan expression images.

The pixie method follows as described: https://github.com/angelolab/pixie

In [19]:
base_dir = "../extracted/forHarm/"
#tiff_dir = os.path.join(base_dir, "2022-12-10T18-30-32-T3D5-5pt-Fine-2ms-5ul-Test")
tiff_dir = base_dir
img_sub_folder = None
segmentation_dir = None

# input all FOVs or just one
#fovs = io_utils.list_folders(tiff_dir)
fovs = ['dsst1c1-total_ion_count', 'dsst2c2-total_ion_count',
         'dsst3c3-total_ion_count', 'dsst4c4-total_ion_count']

# set to True to turn on multiprocessing
multiprocess = False
# define the number of FOVs to process in parallel, ignored if multiprocessing is set to False
batch_size = 5
# explicitly set pixel_cluster_prefix to override datetime default
pixel_cluster_prefix = "Run4_oct11_k20"

In [20]:
# define the base output pixel folder using the specified pixel cluster prefix
pixel_output_dir = os.path.join("pixie", "%s_pixel_output_dir" % pixel_cluster_prefix)
if not os.path.exists(os.path.join(base_dir, pixel_output_dir)):
    os.makedirs(os.path.join(base_dir, pixel_output_dir))

# define the preprocessed pixel data folders
pixel_data_dir = os.path.join(pixel_output_dir, 'pixel_mat_data')
pixel_subset_dir = os.path.join(pixel_output_dir, 'pixel_mat_subset')
norm_vals_name = os.path.join(pixel_output_dir, 'channel_norm_post_rowsum.feather')

In [21]:
evalu = pd.read_csv('../extracted/EvaluationV2.csv')
#evalu = evalu.dropna()

# remove glycans that are not measured in all samples
notinlist = ['Gc_28+S', 'Gc_28+S+Na*2', 'Gc_31+Fuc+Na*2', 'Gc_37+Sx2+Na', 'Gc_48+Sx2+Na',
            'Gc_49+Fucx3', 'Gc_54+Fucx4+Na', '46', '38+Fucx2', 'Gc_8+Fuc']
evalu = evalu[~evalu['composition'].isin(notinlist)]

channels = evalu['Standard_name'].to_list()
channels

['Hex3HexNAc2',
 'Hex3dHex1HexNAc2',
 'Hex4HexNAc2',
 'Hex5HexNAc2',
 'Hex3HexNAc3',
 'Hex3dHex1HexNAc3',
 'Hex4HexNAc3',
 'Hex3dHex1HexNAc4',
 'Hex5HexNAc3',
 'Hex5dHex1HexNAc3',
 'Hex4HexNAc4',
 'Hex4dHex1HexNAc4',
 'Hex5HexNAc4',
 'Hex5dHex1HexNAc4',
 'Hex4dHex1HexNAc5',
 'Hex5HexNAc5',
 'Hex5dHex1HexNAc5',
 'Hex6HexNAc5',
 'Hex6dHex1HexNAc5',
 'Hex6HexNAc6',
 'Hex6dHex1HexNAc6',
 'Hex7dHex1HexNAc6',
 'Hex4dHex1HexNAc3',
 'Hex4dHex2HexNAc4',
 'Hex5dHex3HexNAc4',
 'Hex5dHex2HexNAc5',
 'Hex5dHex3HexNAc5',
 'Hex6dHex2HexNAc6',
 'Hex6dHex3HexNAc6',
 'Hex5HexNAc4NeuAc1',
 'Hex5dHex1HexNAc4NeuAc1',
 'Hex5dHex1HexNAc6NeuAc1',
 'Hex7HexNAc6',
 'Hex5dHex2HexNAc4',
 'Hex4dHex2HexNAc5',
 'Hex6dHex2HexNAc5',
 'Hex5HexNAc6NeuAc1',
 'Hex5HexNAc5NeuAc1']

In [22]:
# channels to do pixie clustering

blur_factor = 0#1 # suggest no bluring for bacteria pixels
subset_proportion = 0.4

In [23]:
# run pixel data preprocessing
pixie_preprocessing.create_pixel_matrix(
    fovs,
    channels,
    base_dir,
    tiff_dir,
    seg_dir = None,
    img_sub_folder=img_sub_folder,
    seg_suffix= None,
    pixel_output_dir=pixel_output_dir,
    data_dir=pixel_data_dir,
    subset_dir=pixel_subset_dir,
    norm_vals_name_post_rownorm=norm_vals_name,
    blur_factor=blur_factor,
    subset_proportion=subset_proportion,
    multiprocess=multiprocess,
    batch_size=batch_size
)


Processed 4 fovs


In [24]:
pixel_som_weights_name = os.path.join(pixel_output_dir, 'pixel_som_weights.feather')
pc_chan_avg_som_cluster_name = os.path.join(pixel_output_dir, 'pixel_channel_avg_som_cluster.csv')
pc_chan_avg_meta_cluster_name = os.path.join(pixel_output_dir, 'pixel_channel_avg_meta_cluster.csv')
pixel_meta_cluster_remap_name = os.path.join(pixel_output_dir, 'pixel_meta_cluster_mapping.csv')

In [25]:
# create the pixel SOM weights
pixel_pysom = pixel_som_clustering.train_pixel_som(
    fovs,
    channels,
    base_dir,
    subset_dir=pixel_subset_dir,
    norm_vals_name=norm_vals_name,
    som_weights_name=pixel_som_weights_name,
    num_passes=1,
    seed=42
)

Training SOM


In [26]:
# use pixel SOM weights to assign pixel clusters
pixel_som_clustering.cluster_pixels(
    fovs,
    channels,
    base_dir,
    pixel_pysom,
    data_dir=pixel_data_dir,
    multiprocess=multiprocess,
    batch_size=batch_size
)

# generate the SOM cluster summary files
pixel_som_clustering.generate_som_avg_files(
    fovs,
    channels,
    base_dir,
    pixel_pysom,
    data_dir=pixel_data_dir,
    pc_chan_avg_som_cluster_name=pc_chan_avg_som_cluster_name
)

Mapping pixel data to SOM cluster labels
Processed 4 fovs
Computing average channel expression across pixel SOM clusters




In [27]:
max_k = 20
cap = 2.5

# run hierarchical clustering using average pixel SOM cluster expression
pixel_cc = pixel_meta_clustering.pixel_consensus_cluster(
    fovs,
    channels,
    base_dir,
    max_k=max_k,
    cap=cap,
    data_dir=pixel_data_dir,
    pc_chan_avg_som_cluster_name=pc_chan_avg_som_cluster_name,
    multiprocess=multiprocess,
    batch_size=batch_size
)

# generate the meta cluster summary files
pixel_meta_clustering.generate_meta_avg_files(
    fovs,
    channels,
    base_dir,
    pixel_cc,
    data_dir=pixel_data_dir,
    pc_chan_avg_som_cluster_name=pc_chan_avg_som_cluster_name,
    pc_chan_avg_meta_cluster_name=pc_chan_avg_meta_cluster_name
)

z-score scaling and capping data
Running consensus clustering
Mapping pixel data to consensus cluster labels
Processed 4 fovs
Computing average channel expression across pixel meta clusters




Mapping meta cluster values onto average channel expression across pixel SOM clusters


In [28]:
%matplotlib widget
rc_file_defaults()
plt.ion()

pixel_mcd = metaclusterdata_from_files(
    os.path.join(base_dir, pc_chan_avg_som_cluster_name),
    cluster_type='pixel'
)
pixel_mcd.output_mapping_filename = os.path.join(base_dir, pixel_meta_cluster_remap_name)
pixel_mcg = MetaClusterGui(pixel_mcd, width=10)

VBox(children=(Output(), HBox(children=(HBox(children=(FloatSlider(value=3.0, description='Max Zscore:', max=1…

In [29]:
# rename the meta cluster values in the pixel dataset
pixel_meta_clustering.apply_pixel_meta_cluster_remapping(
    fovs,
    channels,
    base_dir,
    pixel_data_dir,
    pixel_meta_cluster_remap_name,
    multiprocess=multiprocess,
    batch_size=batch_size
)

# recompute the mean channel expression per meta cluster and apply these new names to the SOM cluster average data
pixel_meta_clustering.generate_remap_avg_files(
    fovs,
    channels,
    base_dir,
    pixel_data_dir,
    pixel_meta_cluster_remap_name,
    pc_chan_avg_som_cluster_name,
    pc_chan_avg_meta_cluster_name
)

Using re-mapping scheme to re-label pixel meta clusters
Processed 4 fovs
Re-computing average channel expression across pixel meta clusters




Re-assigning meta cluster column in pixel SOM cluster average channel expression table


In [30]:
raw_cmap, _ = colormap_helper.generate_meta_cluster_colormap_dict(
    pixel_mcd.output_mapping_filename,
    pixel_mcg.im_cl.cmap
)

In [31]:
# select fovs to display
subset_pixel_fovs = ['dsst1c1-total_ion_count', 'dsst2c2-total_ion_count',
         'dsst3c3-total_ion_count', 'dsst4c4-total_ion_count']


In [32]:
# define the path to the channel file
if img_sub_folder is None:
    chan_file = os.path.join(
        io_utils.list_files(os.path.join(tiff_dir, fovs[0]), substrs=['.tiff'])[0]
    )
else:
    chan_file = os.path.join(
        img_sub_folder, io_utils.list_files(os.path.join(tiff_dir, fovs[0], img_sub_folder), substrs=['.tiff'])[0]
    )

# generate and save the pixel cluster masks for each fov in subset_pixel_fovs
data_utils.generate_and_save_pixel_cluster_masks(
    fovs=subset_pixel_fovs,
    base_dir=base_dir,
    save_dir=os.path.join(base_dir, pixel_output_dir),
    tiff_dir=tiff_dir,
    chan_file=chan_file,
    pixel_data_dir=pixel_data_dir,
    pixel_cluster_col='pixel_meta_cluster',
    sub_dir='pixel_masks',
    name_suffix='_pixel_mask',
)

Pixel Cluster Mask Generation:   0%|          | 0/4 [00:00<?, ?FOVs/s]

In [33]:
plot_utils.save_colored_masks(
    fovs=subset_pixel_fovs,
    mask_dir=os.path.join(base_dir, pixel_output_dir, "pixel_masks"),
    save_dir=os.path.join(base_dir, pixel_output_dir, "pixel_mask_colored"),
    cluster_id_to_name_path=os.path.join(base_dir, pixel_meta_cluster_remap_name),
    metacluster_colors=raw_cmap,
    cluster_type="pixel"
)

Saving colored masks: 100%|█| 4/4 [00:01<00:00,  2.53FOVs/s, FOV=dsst
