In [1]:
from pathlib import Path
import tifffile
from MibiAnalysis.mibi_analysis import MibiProcessor                                       
import pandas as pd
import numpy as np
import os
import tifffile

def get_folder_paths(base_dir):
    base_path = Path(base_dir)
    folder_dict = {folder.name: folder.resolve() for folder in base_path.iterdir() if folder.is_dir()}
    return folder_dict

# -------------------------------
# Example usage:
base_dir = "/home/simon_g/isilon_images_mnt/10_MetaSystems/MetaSystemsData/_simon/data/TNBC/TNBCShareData"
folders = get_folder_paths(base_dir)

out_dir = Path('/home/simon_g/isilon_images_mnt/10_MetaSystems/MetaSystemsData/_simon/data/TNBC/TNBC_nobackground')

In [None]:
                                                                    
for folder, path in folders.items():
    
    if (out_dir / f'{folder}_nobg.tif').exists():
        print(f'{folder} already exists!')
        continue
    
    processor = MibiProcessor()                                                   
    processor.load_tiffs(path)                              
    processor.remove_background(bg_channel='Background', t=0.1, remove_val=2, cap=10); 
    
    tifffile.imwrite(out_dir / f'{folder}_nobg.tif', processor.counts_no_bg)
    processor.panel.Label.to_csv(out_dir / 'panel.csv')

In [None]:
pd.read_csv(out_dir / 'panel.csv', index_col=0)

In [None]:
import numpy as np
import tifffile
from pathlib import Path
import matplotlib.pyplot as plt
import json
from tqdm import tqdm

in_dir  = Path('/home/simon_g/isilon_images_mnt/10_MetaSystems/MetaSystemsData/_simon/data/TNBC/TNBC_nobackground')
out_dir = Path('/home/simon_g/isilon_images_mnt/10_MetaSystems/MetaSystemsData/_simon/data/TNBC/TNBC_normed')
out_dir.mkdir(exist_ok=True)

files      = list(Path(in_dir).glob("*.tif"))
files_dict = {f.name.split("_")[0][5:]: f for f in files}

# ============================================================
# Define batches
# ============================================================
batch1_ids  = list(range(1, 30))
batch2_ids  = [int(k) for k in files_dict.keys() if int(k) not in batch1_ids]

b1 = sum(int(k) in batch1_ids for k in files_dict.keys())
b2 = len(files) - b1
print(f"Batch 1: {b1} files, Batch 2: {b2} files")

# ============================================================
# Step 1: Compute reference quantiles from batch2 (per channel)
# ============================================================
n_quantiles = 1000
quantile_levels = np.linspace(0, 1, n_quantiles)

# First pass: determine number of channels from first image
first_img = tifffile.imread(next(iter(files_dict.values())))
n_channels = first_img.shape[-1] if first_img.ndim == 3 else 1
print(f"Detected {n_channels} channels")

# Collect NON-ZERO pixel values from batch2 to compute reference distribution
print("\nComputing reference quantiles from batch2 (excluding zeros)...")
batch2_pixels = [[] for _ in range(n_channels)]

for file_key, images_path in tqdm(files_dict.items()):
    point_id = int(file_key)
    if point_id not in batch2_ids:
        continue
    
    image_stack = tifffile.imread(images_path)
    if image_stack.ndim == 2:
        image_stack = image_stack[..., np.newaxis]
    
    # Process each channel separately
    for c in range(n_channels):
        channel_data = image_stack[..., c].ravel()
        # Only keep non-zero pixels
        nonzero_pixels = channel_data[channel_data > 0]
        
        if len(nonzero_pixels) > 0:
            # Sample to avoid memory issues
            n_sample = min(len(nonzero_pixels), max(1000, len(nonzero_pixels) // 10))
            sample_idx = np.random.choice(len(nonzero_pixels), size=n_sample, replace=False)
            batch2_pixels[c].append(nonzero_pixels[sample_idx])

# Compute reference quantiles per channel
reference_quantiles = []
channels_with_no_batch2_data = []

for c in range(n_channels):
    if len(batch2_pixels[c]) == 0:
        # No non-zero pixels in this channel for batch2
        print(f"  Channel {c}: WARNING - no non-zero pixels in batch2, will skip normalization")
        reference_quantiles.append(None)
        channels_with_no_batch2_data.append(c)
    else:
        all_pixels = np.concatenate(batch2_pixels[c])
        ref_q = np.quantile(all_pixels, quantile_levels)
        reference_quantiles.append(ref_q)
        print(f"  Channel {c}: n_pixels={len(all_pixels)}, min={all_pixels.min():.2f}, max={all_pixels.max():.2f}, median={np.median(all_pixels):.2f}")

if channels_with_no_batch2_data:
    print(f"\n  >>> {len(channels_with_no_batch2_data)} channels have no data in batch2: {channels_with_no_batch2_data}")

del batch2_pixels

# ============================================================
# Step 2: Compute batch1 quantiles (for transformation) - per channel
# ============================================================
print("\nComputing batch1 quantiles (excluding zeros)...")
batch1_pixels = [[] for _ in range(n_channels)]

for file_key, images_path in tqdm(files_dict.items()):
    point_id = int(file_key)
    if point_id not in batch1_ids:
        continue
    
    image_stack = tifffile.imread(images_path)
    if image_stack.ndim == 2:
        image_stack = image_stack[..., np.newaxis]
    
    # Process each channel separately
    for c in range(n_channels):
        channel_data = image_stack[..., c].ravel()
        # Only keep non-zero pixels
        nonzero_pixels = channel_data[channel_data > 0]
        
        if len(nonzero_pixels) > 0:
            n_sample = min(len(nonzero_pixels), max(1000, len(nonzero_pixels) // 10))
            sample_idx = np.random.choice(len(nonzero_pixels), size=n_sample, replace=False)
            batch1_pixels[c].append(nonzero_pixels[sample_idx])

# Compute batch1 quantiles per channel
batch1_quantiles = []
channels_with_no_batch1_data = []

for c in range(n_channels):
    if len(batch1_pixels[c]) == 0:
        print(f"  Channel {c}: WARNING - no non-zero pixels in batch1, will skip normalization")
        batch1_quantiles.append(None)
        channels_with_no_batch1_data.append(c)
    else:
        all_pixels = np.concatenate(batch1_pixels[c])
        b1_q = np.quantile(all_pixels, quantile_levels)
        batch1_quantiles.append(b1_q)
        print(f"  Channel {c}: n_pixels={len(all_pixels)}, min={all_pixels.min():.2f}, max={all_pixels.max():.2f}, median={np.median(all_pixels):.2f}")

if channels_with_no_batch1_data:
    print(f"\n  >>> {len(channels_with_no_batch1_data)} channels have no data in batch1: {channels_with_no_batch1_data}")

del batch1_pixels

# Summary of channels that will be skipped
channels_skipped = set(channels_with_no_batch1_data) | set(channels_with_no_batch2_data)
channels_normalized = [c for c in range(n_channels) if c not in channels_skipped]
print(f"\n{'='*60}")
print(f"Summary:")
print(f"  - Channels to normalize: {len(channels_normalized)}")
print(f"  - Channels to skip (no data): {len(channels_skipped)}")
if channels_skipped:
    print(f"  - Skipped channel indices: {sorted(channels_skipped)}")
print(f"{'='*60}\n")

# ============================================================
# Step 3: Apply quantile normalization (with zero preservation)
# ============================================================
def quantile_normalize_channel(channel, source_quantiles, target_quantiles):
    """
    Transform a single channel from source distribution to target distribution.
    Preserves zero values. If quantiles are None, returns channel unchanged.
    
    Args:
        channel: 2D array (H, W) - single channel
        source_quantiles: 1D array of source distribution quantiles (or None)
        target_quantiles: 1D array of target distribution quantiles (or None)
    
    Returns:
        Normalized channel as float32
    """
    # If either quantile is None, skip normalization for this channel
    if source_quantiles is None or target_quantiles is None:
        return channel.astype(np.float32)
    
    result = np.zeros_like(channel, dtype=np.float32)
    
    # Create mask for non-zero pixels
    nonzero_mask = channel > 0
    
    if np.any(nonzero_mask):
        # Only transform non-zero pixels
        nonzero_values = channel[nonzero_mask].astype(np.float32)
        
        # Map each pixel value to its quantile in source, then to target value
        normalized_values = np.interp(
            nonzero_values,
            source_quantiles,
            target_quantiles
        )
        
        # Ensure normalized values don't become zero (minimum is smallest positive target value)
        positive_targets = target_quantiles[target_quantiles > 0]
        if len(positive_targets) > 0:
            min_positive = positive_targets.min()
        else:
            min_positive = 1.0
        normalized_values = np.maximum(normalized_values, min_positive)
        
        result[nonzero_mask] = normalized_values
    
    # Zero pixels remain zero (already initialized to zero)
    return result


def quantile_normalize_image(image, source_quantiles, target_quantiles):
    """
    Transform all channels of an image from source to target distribution.
    Each channel is normalized independently. Zeros are preserved.
    Channels with None quantiles are returned unchanged.
    
    Args:
        image: 3D array (H, W, C)
        source_quantiles: list of 1D arrays (or None) per channel
        target_quantiles: list of 1D arrays (or None) per channel
    
    Returns:
        Normalized image as float32
    """
    n_channels = image.shape[-1]
    normalized = np.zeros_like(image, dtype=np.float32)
    
    for c in range(n_channels):
        normalized[..., c] = quantile_normalize_channel(
            image[..., c],
            source_quantiles[c],
            target_quantiles[c]
        )
    
    return normalized


print("Applying quantile normalization and saving...")
print("  - Each channel normalized independently")
print("  - Zero values are preserved")
print("  - Channels with missing data are kept unchanged\n")

for file_key, images_path in tqdm(files_dict.items()):
    point_id = int(file_key)
    batch = 'batch1' if point_id in batch1_ids else 'batch2'
    
    image_stack = tifffile.imread(images_path)
    original_dtype = image_stack.dtype
    
    if image_stack.ndim == 2:
        image_stack = image_stack[..., np.newaxis]
    
    if batch == 'batch1':
        # Normalize batch1 to match batch2 distribution
        normalized = quantile_normalize_image(image_stack, batch1_quantiles, reference_quantiles)
    else:
        # Keep batch2 as-is
        normalized = image_stack.astype(np.float32)
    
    # Verify zeros are preserved - extra safety
    original_zeros = (image_stack == 0)
    normalized[original_zeros] = 0
    
    # Clip to valid range and convert back to original dtype
    if np.issubdtype(original_dtype, np.integer):
        info = np.iinfo(original_dtype)
        normalized = np.clip(normalized, info.min, info.max).astype(original_dtype)
    else:
        normalized = normalized.astype(original_dtype)
    
    # Save
    out_path = out_dir / images_path.name
    tifffile.imwrite(out_path, normalized)

print(f"\nDone! Normalized images saved to {out_dir}")

# ============================================================
# Verification: Check that zeros are preserved
# ============================================================
print("\nVerifying zero preservation on sample images...")

# Find a batch1 sample
batch1_sample = None
for key in files_dict.keys():
    if int(key) in batch1_ids:
        batch1_sample = key
        break

if batch1_sample:
    original = tifffile.imread(files_dict[batch1_sample])
    normalized = tifffile.imread(out_dir / files_dict[batch1_sample].name)
    
    orig_zeros = (original == 0).sum()
    norm_zeros = (normalized == 0).sum()
    print(f"  Batch1 sample (Point {batch1_sample}):")
    print(f"    Original zeros: {orig_zeros}")
    print(f"    Normalized zeros: {norm_zeros}")
    print(f"    Zero preservation: {'✓ PASSED' if orig_zeros == norm_zeros else '✗ FAILED'}")

# ============================================================
# Save normalization parameters for reference
# ============================================================
params = {
    'batch1_ids': batch1_ids,
    'batch2_ids': batch2_ids,
    'n_quantiles': n_quantiles,
    'n_channels': n_channels,
    'zero_preserved': True,
    'per_channel_normalization': True,
    'channels_skipped': sorted(list(channels_skipped)),
    'channels_normalized': channels_normalized,
}
with open(out_dir / 'normalization_params.json', 'w') as f:
    json.dump(params, f, indent=2)

# Save quantiles (convert None to np.nan arrays for saving)
batch1_quantiles_save = np.array([
    q if q is not None else np.full(n_quantiles, np.nan) 
    for q in batch1_quantiles
])
reference_quantiles_save = np.array([
    q if q is not None else np.full(n_quantiles, np.nan) 
    for q in reference_quantiles
])

np.savez(out_dir / 'quantiles.npz', 
         batch1_quantiles=batch1_quantiles_save,
         reference_quantiles=reference_quantiles_save,
         quantile_levels=quantile_levels,
         channels_skipped=np.array(sorted(list(channels_skipped))))

print(f"\nParameters saved to {out_dir / 'normalization_params.json'}")
print(f"Quantiles saved to {out_dir / 'quantiles.npz'}")

# ============================================================
# Visualization of the normalization effect (per channel)
# ============================================================
# Only plot channels that were actually normalized
plot_channels = [c for c in range(min(8, n_channels)) if c in channels_normalized]
n_plot_channels = len(plot_channels)

if n_plot_channels > 0:
    fig, axes = plt.subplots(2, n_plot_channels, figsize=(4*n_plot_channels, 8))
    if n_plot_channels == 1:
        axes = axes.reshape(-1, 1)
    
    for i, c in enumerate(plot_channels):
        # Plot quantile distributions
        axes[0, i].plot(quantile_levels, batch1_quantiles[c], label='Batch1 (original)', alpha=0.7)
        axes[0, i].plot(quantile_levels, reference_quantiles[c], label='Batch2 (reference)', alpha=0.7)
        axes[0, i].set_xlabel('Quantile')
        axes[0, i].set_ylabel('Pixel Value (non-zero only)')
        axes[0, i].set_title(f'Channel {c} - Quantile Distributions')
        axes[0, i].legend()
        
        # Plot transfer function
        axes[1, i].plot(batch1_quantiles[c], reference_quantiles[c])
        axes[1, i].plot([batch1_quantiles[c].min(), batch1_quantiles[c].max()],
                        [batch1_quantiles[c].min(), batch1_quantiles[c].max()], 
                        'k--', alpha=0.5, label='Identity')
        axes[1, i].set_xlabel('Batch1 Value')
        axes[1, i].set_ylabel('Normalized Value')
        axes[1, i].set_title(f'Channel {c} - Transfer Function')
        axes[1, i].legend()
    
    plt.tight_layout()
    plt.savefig(out_dir / 'normalization_visualization.png', dpi=150)
    plt.show()
    print("\nVisualization saved!")
else:
    print("\nNo channels to visualize (all were skipped).")

print("\n" + "="*60)
print("COMPLETE!")
print("="*60)

In [None]:
batch1 = tifffile.imread('/home/simon_g/isilon_images_mnt/10_MetaSystems/MetaSystemsData/_simon/data/TNBC/TNBC_normed/Point39_nobg.tif')
batch2 = tifffile.imread('/home/simon_g/isilon_images_mnt/10_MetaSystems/MetaSystemsData/_simon/data/TNBC/TNBC_normed/Point11_nobg.tif')

In [None]:
import pandas as pd
marker_names = pd.read_csv('/home/simon_g/isilon_images_mnt/10_MetaSystems/MetaSystemsData/_simon/data/TNBC/TNBC_nobackground/panel.csv', index_col=0)

In [None]:
idx = marker_names.Label.to_list().index('CD3')

image_batch1 = batch1[..., idx]
image_batch2 = batch2[..., idx]

In [None]:
fig, ax = plt.subplots(1,2)
ax[0].imshow(image_batch1, vmin=0, vmax=5)
ax[1].imshow(image_batch2, vmin=0, vmax=5)

image_batch1.min(), image_batch1.max(), image_batch2.min(), image_batch2.max(), 

In [None]:
import numpy as np
import tifffile
from pathlib import Path
import matplotlib.pyplot as plt
from tqdm import tqdm

# ============================================================
# Configuration
# ============================================================
in_dir  = Path('/home/simon_g/isilon_images_mnt/10_MetaSystems/MetaSystemsData/_simon/data/TNBC/TNBC_normed')
out_dir = Path('/home/simon_g/isilon_images_mnt/10_MetaSystems/MetaSystemsData/_simon/data/TNBC/TNBC_01scaled')
out_dir.mkdir(exist_ok=True)

PERCENTILE_HIGH = 99.9

files = list(in_dir.glob("*.tif"))
print(f"Found {len(files)} images")

# Get number of channels
first_img = tifffile.imread(files[0])
n_channels = first_img.shape[-1] if first_img.ndim == 3 else 1
print(f"Detected {n_channels} channels")

# ============================================================
# Step 1: Compute p99.9 per channel (sample from all images)
# ============================================================
print(f"\nComputing p{PERCENTILE_HIGH} per channel...")
channel_pixels = [[] for _ in range(n_channels)]

for filepath in tqdm(files):
    img = tifffile.imread(filepath)
    if img.ndim == 2:
        img = img[..., np.newaxis]
    
    for c in range(n_channels):
        nonzero = img[..., c].ravel()
        nonzero = nonzero[nonzero > 0]
        if len(nonzero) > 0:
            # Sample ~10% to save memory
            n_sample = min(len(nonzero), max(1000, len(nonzero) // 10))
            idx = np.random.choice(len(nonzero), size=n_sample, replace=False)
            channel_pixels[c].append(nonzero[idx])

# Compute percentiles
p_high_values = []
for c in range(n_channels):
    if len(channel_pixels[c]) > 0:
        all_px = np.concatenate(channel_pixels[c])
        p_high_values.append(np.percentile(all_px, PERCENTILE_HIGH))
    else:
        p_high_values.append(None)

del channel_pixels

# ============================================================
# Step 2: Apply normalization
# ============================================================
print("\nApplying 0-1 normalization...")

# Store one example for before/after plot
example_before = None
example_after = None
example_name = None

for i, filepath in enumerate(tqdm(files)):
    img = tifffile.imread(filepath)
    original_shape = img.shape
    
    if img.ndim == 2:
        img = img[..., np.newaxis]
    
    # Save example before normalization
    if i == 0:
        example_before = img.copy()
        example_name = filepath.name
    
    # Normalize each channel
    result = np.zeros_like(img, dtype=np.float32)
    
    for c in range(n_channels):
        p_high = p_high_values[c]
        if p_high is None or p_high == 0:
            continue
        
        channel = img[..., c].astype(np.float32)
        nonzero_mask = channel > 0
        
        if np.any(nonzero_mask):
            # Clip and scale
            clipped = np.clip(channel[nonzero_mask], 0, p_high)
            scaled = clipped / p_high
            # Keep small minimum so non-zero stays non-zero
            result[..., c][nonzero_mask] = np.maximum(scaled, 1e-6)
    
    # Save example after normalization
    if i == 0:
        example_after = result.copy()
    
    # Save
    tifffile.imwrite(out_dir / filepath.name, result)

print(f"\nDone! Saved to {out_dir}")

# ============================================================
# Before/After Visualization
# ============================================================
print("\nGenerating before/after comparison...")

# Select channels to show (first 8 with data)
show_channels = [c for c in range(n_channels) if p_high_values[c] is not None][:8]
n_show = len(show_channels)

fig, axes = plt.subplots(3, n_show, figsize=(2.5*n_show, 8))

for i, c in enumerate(show_channels):
    # Before
    before_ch = example_before[..., c]
    after_ch = example_after[..., c]
    
    # Show images
    vmax_before = np.percentile(before_ch[before_ch > 0], 99) if np.any(before_ch > 0) else 1
    
    axes[0, i].imshow(before_ch, cmap='viridis', vmin=0, vmax=vmax_before)
    axes[0, i].set_title(f'Ch {c} Before', fontsize=9)
    axes[0, i].axis('off')
    
    axes[1, i].imshow(after_ch, cmap='viridis', vmin=0, vmax=1)
    axes[1, i].set_title(f'Ch {c} After', fontsize=9)
    axes[1, i].axis('off')
    
    # Histogram comparison
    before_nonzero = before_ch[before_ch > 0].ravel()
    after_nonzero = after_ch[after_ch > 0].ravel()
    
    if len(before_nonzero) > 0:
        axes[2, i].hist(before_nonzero, bins=50, alpha=0.5, label='Before', density=True)
    if len(after_nonzero) > 0:
        axes[2, i].hist(after_nonzero, bins=50, alpha=0.5, label='After', density=True)
    axes[2, i].set_xlabel('Value', fontsize=8)
    axes[2, i].set_title(f'Ch {c} Histograms', fontsize=9)
    axes[2, i].legend(fontsize=7)

plt.suptitle(f'Before/After Normalization (p{PERCENTILE_HIGH})\n{example_name}', fontsize=11)
plt.tight_layout()
plt.savefig(out_dir / 'before_after_comparison.png', dpi=150)
plt.show()

print(f"Plot saved to {out_dir / 'before_after_comparison.png'}")

In [None]:
import tifffile

images = tifffile.imread('/home/simon_g/isilon_images_mnt/10_MetaSystems/MetaSystemsData/_simon/data/TNBC/TNBCcellTypes/P10_labeledImage.tiff')

In [None]:
import matplotlib.pyplot as plt

plt.imshow(images)

In [None]:
images = tifffile.imread('/home/simon_g/isilon_images_mnt/10_MetaSystems/MetaSystemsData/_simon/data/TNBC/TNBCShareCellData/p10_labeledcellData.tiff')
plt.imshow(images)

In [None]:
import pandas as pd
info = pd.read_csv('/home/simon_g/isilon_images_mnt/10_MetaSystems/MetaSystemsData/_simon/data/TNBC/TNBCShareCellData/cellData.csv')

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from pathlib import Path
import numpy as np
import tifffile
from copy import deepcopy
import matplotlib.pyplot as plt
from scipy import ndimage
from BioUtensils.extraction import get_cell_expression_fast

for patient_id in info.SampleID.unique():
    filename = Path(f'/home/simon_g/isilon_images_mnt/10_MetaSystems/MetaSystemsData/_simon/data/TNBC/TNBCShareCellData/p{patient_id}_labeledcellData.tiff')
    if not filename.exists():
        print(f'{filename.name} is missing!')
        continue
    masks = tifffile.imread(filename)
    tmp_info = info[info.SampleID == patient_id].sort_values(by='cellLabelInImage')
    print(len(np.unique(masks)), len(tmp_info))
    print(tmp_info)
    
    present_masks = set(np.unique(masks))
    annotated_masks = set(tmp_info.cellLabelInImage.unique())
    not_annotated_masks = present_masks - annotated_masks
    
    lut = np.zeros(len(present_masks))
    for v in present_masks:
        if v in annotated_masks:
            lut[v] = v
        else:
            lut[v] = 0
            
    lut[0] = 0
    lut[1] = 0
    
    plt.imshow(masks)
    plt.show()
    
    tmp_masks = deepcopy(masks)
    new = lut[masks]
    
    print(len(np.unique(new)), len(tmp_info))
    
    cell_labels = np.unique(new)
    cell_labels = cell_labels[cell_labels != 0]  # exclude background
    
    centroids = np.array(ndimage.center_of_mass(new, labels=new, index=cell_labels)).round().astype(int)
    print(centroids.shape)
    plt.imshow(new)
    plt.scatter(centroids[:, 1], centroids[:, 0], s=1, color='red')
    
    # sizes = get_cell_expression_fast(new != 0, new, get_size=True)
    
    print(tmp_info.tumorCluster.unique())
    
    # Primary classes
    group_mapping = {
        1: "Unidentified",
        2: "Immune",
        3: "Endothelial",
        4: "Mesenchymal-like",
        5: "Tumor",
        6: "Keratin-positive tumor"
    }

    # Immune subtypes
    immune_mapping = {
        1: "Tregs",
        2: "CD4 T",
        3: "CD8 T",
        4: "CD3 T",
        5: "NK",
        6: "B",
        7: "Neutrophils",
        8: "Macrophages",
        9: "DC",
        10: "DC/Mono",
        11: "Mono/Neu",
        12: "Other immune"
    }
    
    break

In [2]:
from pathlib import Path
imaging_data_folder = Path('/home/simon_g/isilon_images_mnt/10_MetaSystems/MetaSystemsData/_simon/data/MCI_data')
dataset_name = 'MIBI_TNBC'

dataset_folder = imaging_data_folder / dataset_name
dataset_folder.mkdir(exist_ok=True)

images_folder = Path('/home/simon_g/isilon_images_mnt/10_MetaSystems/MetaSystemsData/_simon/data/TNBC/TNBC_01scaled')
target_images_folder = dataset_folder / 'images'
target_images_folder.mkdir(exist_ok=True)

masks_folder = Path('/home/simon_g/isilon_images_mnt/10_MetaSystems/MetaSystemsData/_simon/data/TNBC/TNBCShareCellData')
target_masks_folder = dataset_folder / 'masks'
target_masks_folder.mkdir(exist_ok=True)

In [3]:
group_mapping = {
    1: "Unidentified",
    2: "Immune",
    3: "Endothelial",
    4: "Mesenchymal-like",
    5: "Tumor",
    6: "Keratin-positive tumor"
}

immune_mapping = {
    1: "Tregs",
    2: "CD4 T",
    3: "CD8 T",
    4: "CD3 T",
    5: "NK",
    6: "B",
    7: "Neutrophils",
    8: "Macrophages",
    9: "DC",
    10: "DC/Mono",
    11: "Mono/Neu",
    12: "Other immune"
}

In [4]:
import pandas as pd
annotations = pd.read_csv('/home/simon_g/isilon_images_mnt/10_MetaSystems/MetaSystemsData/_simon/data/TNBC/TNBCShareCellData/cellData.csv')

In [7]:
import tifffile
import re
import shutil
import numpy as np
import matplotlib.pyplot as plt
from copy import deepcopy
from scipy import ndimage
from tqdm import tqdm

all_annotations = []
for image_file in tqdm(list(images_folder.glob('*.tif'))):
    m = re.search(r'Point(\d+)_', str(image_file))
    point_id = int(m.group(1)) if m else None
    print(point_id, image_file.name)

    # if (target_masks_folder / f'{point_id}_masks.tif').exists():
    #     print(f'Data for Point{point_id} already exists!')
    #     continue
    
    masks_file = masks_folder / f'p{point_id}_labeledcellData.tiff'
    
    masks = tifffile.imread(masks_file)
    _annotations = annotations[annotations.SampleID == point_id].sort_values(by='cellLabelInImage')
    
    present_masks = set(np.unique(masks))
    annotated_masks = set(_annotations.cellLabelInImage.unique())
    not_annotated_masks = present_masks - annotated_masks
    
    #print(len(present_masks), len(annotated_masks), len(not_annotated_masks))
    
    lut = np.zeros(len(present_masks))
    for v in present_masks:
        if v in annotated_masks:
            lut[v] = v
        else:
            lut[v] = 0
            
    lut[0] = 0
    lut[1] = 0
    
    masks = lut[masks]
    assert (len(np.unique(masks)) - 1) == len(annotated_masks)
     
    unique_labels = np.unique(masks)
    unique_labels = unique_labels[unique_labels != 0]  # exclude background
    
    centroids = np.array(ndimage.center_of_mass(masks, labels=masks, index=unique_labels)).round()

    if len(centroids) == 0:
        print('NO CELLS')
        continue


    image_annotations = _annotations[['cellLabelInImage', 'cellSize', 'Group', 'immuneGroup']]
    image_annotations['DIM1'] = centroids[:, 0].astype(int)
    image_annotations['DIM2'] = centroids[:, 1].astype(int)
    image_annotations = image_annotations.reset_index(drop=True)   
     
    image_annotations['Group'] = image_annotations.Group.map(group_mapping)
    image_annotations['immuneGroup'] = image_annotations.immuneGroup.map(immune_mapping)

    image_annotations['annotation'] = (
        image_annotations['immuneGroup']
        .combine_first(image_annotations['Group'])
    )
    
    image_annotations = image_annotations.rename(columns={
        'cellLabelInImage': 'mask_value',
        'immuneGroup': 'immune_annotation',
        'Group': 'group_annotation',
    })
    
    image_annotations['SampleID'] = point_id
    all_annotations.append(image_annotations)
    print(len(all_annotations))

    #tifffile.imwrite(target_masks_folder / f'{point_id}_masks.tif', masks)
    #shutil.copy2(image_file, target_images_folder / f'{point_id}_image.tif')
    
combined_annotations = pd.concat(all_annotations, ignore_index=True)
combined_annotations.to_csv(dataset_folder / 'annotations.csv', index=False)

  0%|          | 0/41 [00:00<?, ?it/s]

12 Point12_nobg.tif


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM1'] = centroids[:, 0].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM2'] = centroids[:, 1].astype(int)
  2%|▏         | 1/41 [00:02<01:39,  2.49s/it]

1
11 Point11_nobg.tif


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM1'] = centroids[:, 0].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM2'] = centroids[:, 1].astype(int)
  5%|▍         | 2/41 [00:04<01:36,  2.46s/it]

2
5 Point5_nobg.tif


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM1'] = centroids[:, 0].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM2'] = centroids[:, 1].astype(int)
  7%|▋         | 3/41 [00:07<01:28,  2.33s/it]

3
6 Point6_nobg.tif


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM1'] = centroids[:, 0].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM2'] = centroids[:, 1].astype(int)
 10%|▉         | 4/41 [00:08<01:14,  2.01s/it]

4
41 Point41_nobg.tif


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM1'] = centroids[:, 0].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM2'] = centroids[:, 1].astype(int)
 12%|█▏        | 5/41 [00:10<01:04,  1.79s/it]

5
20 Point20_nobg.tif


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM1'] = centroids[:, 0].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM2'] = centroids[:, 1].astype(int)
 15%|█▍        | 6/41 [00:11<00:59,  1.70s/it]

6
38 Point38_nobg.tif


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM1'] = centroids[:, 0].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM2'] = centroids[:, 1].astype(int)
 17%|█▋        | 7/41 [00:12<00:54,  1.61s/it]

7
3 Point3_nobg.tif


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM1'] = centroids[:, 0].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM2'] = centroids[:, 1].astype(int)
 20%|█▉        | 8/41 [00:14<00:53,  1.62s/it]

8
22 Point22_nobg.tif


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM1'] = centroids[:, 0].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM2'] = centroids[:, 1].astype(int)
 22%|██▏       | 9/41 [00:15<00:48,  1.52s/it]

9
39 Point39_nobg.tif


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM1'] = centroids[:, 0].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM2'] = centroids[:, 1].astype(int)
 24%|██▍       | 10/41 [00:17<00:46,  1.50s/it]

10
33 Point33_nobg.tif


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM1'] = centroids[:, 0].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM2'] = centroids[:, 1].astype(int)
 27%|██▋       | 11/41 [00:18<00:42,  1.41s/it]

11
37 Point37_nobg.tif


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM1'] = centroids[:, 0].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM2'] = centroids[:, 1].astype(int)
 29%|██▉       | 12/41 [00:20<00:41,  1.45s/it]

12
18 Point18_nobg.tif


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM1'] = centroids[:, 0].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM2'] = centroids[:, 1].astype(int)
 32%|███▏      | 13/41 [00:21<00:41,  1.49s/it]

13
7 Point7_nobg.tif


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM1'] = centroids[:, 0].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM2'] = centroids[:, 1].astype(int)
 34%|███▍      | 14/41 [00:23<00:39,  1.45s/it]

14
34 Point34_nobg.tif


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM1'] = centroids[:, 0].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM2'] = centroids[:, 1].astype(int)
 37%|███▋      | 15/41 [00:24<00:36,  1.40s/it]

15
24 Point24_nobg.tif


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM1'] = centroids[:, 0].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM2'] = centroids[:, 1].astype(int)
 39%|███▉      | 16/41 [00:25<00:34,  1.40s/it]

16
16 Point16_nobg.tif


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM1'] = centroids[:, 0].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM2'] = centroids[:, 1].astype(int)
 41%|████▏     | 17/41 [00:27<00:35,  1.47s/it]

17
14 Point14_nobg.tif


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM1'] = centroids[:, 0].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM2'] = centroids[:, 1].astype(int)
 44%|████▍     | 18/41 [00:28<00:34,  1.49s/it]

18
31 Point31_nobg.tif


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM1'] = centroids[:, 0].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM2'] = centroids[:, 1].astype(int)
 46%|████▋     | 19/41 [00:30<00:31,  1.45s/it]

19
15 Point15_nobg.tif


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM1'] = centroids[:, 0].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM2'] = centroids[:, 1].astype(int)
 49%|████▉     | 20/41 [00:31<00:29,  1.41s/it]

20
35 Point35_nobg.tif


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM1'] = centroids[:, 0].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM2'] = centroids[:, 1].astype(int)
 51%|█████     | 21/41 [00:33<00:29,  1.47s/it]

21
10 Point10_nobg.tif


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM1'] = centroids[:, 0].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM2'] = centroids[:, 1].astype(int)
 54%|█████▎    | 22/41 [00:34<00:28,  1.49s/it]

22
1 Point1_nobg.tif


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM1'] = centroids[:, 0].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM2'] = centroids[:, 1].astype(int)
 56%|█████▌    | 23/41 [00:36<00:28,  1.56s/it]

23
28 Point28_nobg.tif


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM1'] = centroids[:, 0].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM2'] = centroids[:, 1].astype(int)
 59%|█████▊    | 24/41 [00:38<00:30,  1.77s/it]

24
13 Point13_nobg.tif


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM1'] = centroids[:, 0].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM2'] = centroids[:, 1].astype(int)
 61%|██████    | 25/41 [00:41<00:31,  1.99s/it]

25
4 Point4_nobg.tif


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM1'] = centroids[:, 0].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM2'] = centroids[:, 1].astype(int)
 63%|██████▎   | 26/41 [00:43<00:31,  2.11s/it]

26
27 Point27_nobg.tif


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM1'] = centroids[:, 0].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM2'] = centroids[:, 1].astype(int)
 66%|██████▌   | 27/41 [00:45<00:28,  2.07s/it]

27
29 Point29_nobg.tif


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM1'] = centroids[:, 0].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM2'] = centroids[:, 1].astype(int)
 68%|██████▊   | 28/41 [00:47<00:27,  2.15s/it]

28
40 Point40_nobg.tif


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM1'] = centroids[:, 0].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM2'] = centroids[:, 1].astype(int)
 71%|███████   | 29/41 [00:49<00:23,  1.93s/it]

29
9 Point9_nobg.tif


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM1'] = centroids[:, 0].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM2'] = centroids[:, 1].astype(int)
 73%|███████▎  | 30/41 [00:50<00:19,  1.80s/it]

30
25 Point25_nobg.tif


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM1'] = centroids[:, 0].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM2'] = centroids[:, 1].astype(int)
 76%|███████▌  | 31/41 [00:52<00:16,  1.64s/it]

31
32 Point32_nobg.tif


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM1'] = centroids[:, 0].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM2'] = centroids[:, 1].astype(int)
 78%|███████▊  | 32/41 [00:53<00:14,  1.59s/it]

32
36 Point36_nobg.tif


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM1'] = centroids[:, 0].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM2'] = centroids[:, 1].astype(int)
 80%|████████  | 33/41 [00:54<00:11,  1.49s/it]

33
19 Point19_nobg.tif


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM1'] = centroids[:, 0].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM2'] = centroids[:, 1].astype(int)
 83%|████████▎ | 34/41 [00:56<00:10,  1.47s/it]

34
26 Point26_nobg.tif


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM1'] = centroids[:, 0].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM2'] = centroids[:, 1].astype(int)
 85%|████████▌ | 35/41 [00:57<00:08,  1.46s/it]

35
23 Point23_nobg.tif


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM1'] = centroids[:, 0].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM2'] = centroids[:, 1].astype(int)
 88%|████████▊ | 36/41 [00:59<00:07,  1.45s/it]

36
2 Point2_nobg.tif


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM1'] = centroids[:, 0].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM2'] = centroids[:, 1].astype(int)
 90%|█████████ | 37/41 [01:00<00:05,  1.42s/it]

37
21 Point21_nobg.tif


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM1'] = centroids[:, 0].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM2'] = centroids[:, 1].astype(int)
 93%|█████████▎| 38/41 [01:01<00:04,  1.45s/it]

38
30 Point30_nobg.tif


 95%|█████████▌| 39/41 [01:03<00:02,  1.35s/it]

NO CELLS
17 Point17_nobg.tif


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM1'] = centroids[:, 0].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM2'] = centroids[:, 1].astype(int)
 98%|█████████▊| 40/41 [01:04<00:01,  1.42s/it]

39
8 Point8_nobg.tif


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM1'] = centroids[:, 0].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annotations['DIM2'] = centroids[:, 1].astype(int)
100%|██████████| 41/41 [01:05<00:00,  1.61s/it]


40


In [63]:
import pandas as pd
marker_names = shutil.copy2('/home/simon_g/isilon_images_mnt/10_MetaSystems/MetaSystemsData/_simon/data/TNBC/TNBC_nobackground/panel.csv', dataset_folder / 'marker_names.csv')

In [None]:
from pathlib import Path
import numpy as np
import tifffile
from copy import deepcopy
import matplotlib.pyplot as plt
from scipy import ndimage
from BioUtensils.extraction import get_cell_expression_fast

for patient_id in info.SampleID.unique():
    filename = Path(f'/home/simon_g/isilon_images_mnt/10_MetaSystems/MetaSystemsData/_simon/data/TNBC/TNBCShareCellData/p{patient_id}_labeledcellData.tiff')
    if not filename.exists():
        print(f'{filename.name} is missing!')
        continue
    masks = tifffile.imread(filename)
    tmp_info = info[info.SampleID == patient_id].sort_values(by='cellLabelInImage')
    print(len(np.unique(masks)), len(tmp_info))
    print(tmp_info)
    
    present_masks = set(np.unique(masks))
    annotated_masks = set(tmp_info.cellLabelInImage.unique())
    not_annotated_masks = present_masks - annotated_masks
    
    lut = np.zeros(len(present_masks))
    for v in present_masks:
        if v in annotated_masks:
            lut[v] = v
        else:
            lut[v] = 0
            
    lut[0] = 0
    lut[1] = 0
    
    plt.imshow(masks)
    plt.show()
    
    tmp_masks = deepcopy(masks)
    new = lut[masks]
    
    print(len(np.unique(new)), len(tmp_info))
    
    cell_labels = np.unique(new)
    cell_labels = cell_labels[cell_labels != 0]  # exclude background
    
    centroids = np.array(ndimage.center_of_mass(new, labels=new, index=cell_labels)).round().astype(int)
    print(centroids.shape)
    plt.imshow(new)
    plt.scatter(centroids[:, 1], centroids[:, 0], s=1, color='red')
    
    # sizes = get_cell_expression_fast(new != 0, new, get_size=True)
    
    print(tmp_info.tumorCluster.unique())
    
    # Primary classes

    
    break