In [None]:
# Associate manual annotation of celltypes (x,y positions)
# to single cells
# Run training using CellSighter
# authors: Pacome Prompsy
# contact: pacome.prompsy@chuv.ch
# Guenova Lab
# CHUV (Centre Hospitalier Universitaire Vaudois), Lausanne, Suisse


In [None]:
import sys
sys.path.append(".")
import os
import argparse
import numpy as np
import json
import os
import pandas as pd
import numpy as np
from tifffile import imread
import random
import tifffile
from scipy.spatial.distance import cdist


In [None]:
output_dir = "../output/CellSighter"
cell_marker_file = "../annotation/cell_markers.csv"
tiff_dir = "../output/input/"
segmentation_dir  = "../output/segmentation/"
cell_table_dir  = "../output/cell_table/"

In [None]:
sample = "ROI-15"
annotator = "Annotator1"
if len(annotator) > 0:
    name = sample + "-" + annotator
else:
    name = sample + "-" 
annotation_dir = "../output/CellSighter/manual_annotation_celltype/" + name

In [None]:
cell_type = pd.read_csv(os.path.join(cell_marker_file))

cell_types_unique = cell_type.cell_type[np.sort(np.unique(cell_type.cell_type, return_index=True)[1])]
cell_types_unique = np.flip(cell_types_unique)

marker_unique = cell_type.marker[np.sort(np.unique(cell_type.marker, return_index=True)[1])]
marker_unique = np.flip(marker_unique)

In [None]:
image = imread(os.path.join(tiff_dir, sample, "DAPI.tiff"))


In [None]:
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
base_dir = os.path.join(output_dir, "cell_classification")
if not os.path.exists(base_dir):
    os.makedirs(base_dir)
cellType_dir = os.path.join(base_dir, "CellTypes")
if not os.path.exists(cellType_dir):
    os.makedirs(cellType_dir)
cells_dir = os.path.join(cellType_dir, "cells")
if not os.path.exists(cells_dir):
    os.makedirs(cells_dir)
cells2labels_dir = os.path.join(cellType_dir, "cells2labels")
if not os.path.exists(cells2labels_dir):
    os.makedirs(cells2labels_dir)
data_dir = os.path.join(cellType_dir, "data")
if not os.path.exists(data_dir):
    os.makedirs(data_dir)
images_dir =  os.path.join(data_dir, "images")
if not os.path.exists(images_dir):
    os.makedirs(images_dir)    


In [None]:
# Load segmentation 
whole_cell = imread(os.path.join(segmentation_dir, sample + "_whole_cell.tiff"))

# Save in the "cells" folder
np.savez(os.path.join(cells_dir, name +".npz"), data = whole_cell)

In [None]:
# Load the labels
csv_files = [f for f in os.listdir(annotation_dir) if f.endswith('.csv')]
dfs = []
for csv_file in csv_files:
    # Read the CSV file into a dataframe
    df = pd.read_csv(os.path.join(annotation_dir, csv_file))
    
    # Get the cell type from the file name
    cell_type = os.path.basename(csv_file).split('-')[0]
    
    # Add a column for the cell type
    df['cell_type'] = cell_type
    
    # Append the dataframe to the list of dataframes
    dfs.append(df)
result_df = pd.concat(dfs, ignore_index=True)


In [None]:
# Reading the cell centroids
cells = pd.read_csv(os.path.join(cell_table_dir, sample + "_cell_table_size_normalized.csv.gz"))


In [None]:
# Get a list of unique cell types in the result dataframe
cell_types = result_df['cell_type'].unique()


In [None]:
cell_types

In [None]:
cells["cell_type"] = ""

In [None]:
# Loop through each cell in the "cells" dataframe
for i, row in result_df.iterrows():
    # Extract the x and y coordinates of the cell centroid
    point_x, point_y, cell_class = row["axis-0"], row["axis-1"], row["cell_type"]
    
    # Initialize a dictionary to store the distances to the closest point in each cell type
    distances = {}
    
    # Loop through each cell type in the "result_df" dataframe
    
    # Filter the "result_df" dataframe to include only the points for the current cell type
    cells_locations = cells[['centroid-0', 'centroid-1', 'label']].values
    cells_locations = cells_locations[((cells_locations[...,0] > point_x - 200) & (cells_locations[...,0] < point_x + 200)) &
                             ((cells_locations[...,1] > point_y - 200) & (cells_locations[...,1] < point_y + 200))]
    
    if cells_locations.shape[0] > 0:
        # Compute the distances from the cell centroid to each point in the filtered dataframe
        cell_distances = cdist([[point_x, point_y]], cells_locations[...,0:2]).flatten()
        label = cells_locations[np.where(cell_distances==np.min(cell_distances))[0][0],2]
        
        # Add the closest cell type
        if np.min(cell_distances) < 150: 
            cells.loc[(cells.label == label),"cell_type"] = cell_class


In [None]:
cells_df = cells[["fov","label", "cell_type"]]
cells_df.to_csv("../output/CellSighter/celltype/Predictions/" + name + "_true_labels.csv")

In [None]:
# Retrieve only cropped cells, set to -1 others.
unique_strings = set(cells["cell_type"])
string_to_int = {
 "":-1,
 "Macrophages":0,
 "APC":1,
 "B_cell":2,
 "T_regulatory":3,
 "Monocytes":4,
 "Neutrophils":5,
 "NKT":6,
 "Keratinocyte":7,
 "Leukocyte":8,
 "Endothelial":9,
 "T_cytotoxic":10,
 "pDC":11,
 "T_helper":12,
 "Fibroblast":13,
 "Lymphatic":14,
 "Basophil":15,
 "Monocytic_Lineage":16
                }
cell_type_int = [string_to_int[s] for s in cells["cell_type"]]
cells["cell_type_int"] = cell_type_int


In [None]:
labels = np.zeros(int(max(cells.label)) + 1)
for i in range(len(labels)):
    labels[i] = -1
idx = [int(item) for item in cells["label"].to_list()]
labels[idx] = cells["cell_type_int"]

# Save as npz in the "data" folder
np.savez(os.path.join(cells2labels_dir,  name + ".npz"), data = labels)

In [None]:
cells2labels_dir = '../output/CellSighter/celltype/cell_classification/CellTypes/cells2labels'

In [None]:
# Copy full markers images

all_markers = []
# Load the markers 
for marker in marker_unique:
    # Load segmentation 
    marker_image = imread(os.path.join(tiff_dir, sample, marker + ".tiff"))
    
    # Save in the "cells" folder
    all_markers.append(marker_image)
    

# Combine
all_markers = np.array(all_markers)
all_markers = np.transpose(all_markers, (1, 2, 0))

# Save as npz in the "data" folder

np.savez(os.path.join(images_dir,  name + ".npz"),  data = all_markers)