In [None]:
import imageio as io
import pandas as pd
import os
import matplotlib.pyplot as plt
import numpy as np
import tifffile
from tqdm.notebook import tqdm
import pathlib
from cellpose import models, core
import json
import glob
import PIL
import scanpy as sc

PIL.Image.MAX_IMAGE_PIXELS = 9999999999999999

image_path = 'data'
experiment_name = 'DH_r1'

# Load Dapi

In [None]:
dapi_images = glob.glob('output/region_1/images/*DAPI*')

image_array_list = []
ctr = 0
for path in dapi_images:
    img_fpath = pathlib.Path(path)
    img = io.imread(img_fpath)
    image_array_list.append(img)
    image_array_list = np.array(image_array_list)  
    print(np.shape(image_array_list))
    if ctr != 0:
        image_array_list = list([np.max(image_array_list, axis = 0)])
    else:
        image_array_list = list(image_array_list)
    print(ctr)
    ctr += 1


In [None]:
maxed_image = np.array(image_array_list[0])

In [None]:

def read_dapi_image(path: str, downscale_factor: int = 2) -> np.ndarray:
    img_fpath = pathlib.Path(path)
    img = io.imread(img_fpath)
    return downscale_image(img, downscale_factor=downscale_factor)

def downscale_image(img: np.ndarray, downscale_factor: int = 2) -> np.ndarray:
    # Calculate the amount 
    # 
    # of padding needed for each axis
    pad_height = (downscale_factor - img.shape[0] % downscale_factor) % downscale_factor
    pad_width = (downscale_factor - img.shape[1] % downscale_factor) % downscale_factor

    # Pad the array with zeros
    img = np.pad(img, ((0, pad_height), (0, pad_width)), mode='constant')
    return img


# Run Cellpose

In [None]:
def run_cellpose(img: np.ndarray, model_path: str) -> (np.ndarray, np.ndarray, np.ndarray):
    use_GPU = core.use_gpu()
    model = models.CellposeModel(gpu=use_GPU, pretrained_model= model_path  )
    channels = [0,0]
    masks, flows, styles = model.eval([img], channels=channels, diameter=model.diam_labels,flow_threshold=0, cellprob_threshold=0)
    return (masks, flows, styles)


In [None]:
masks, flows, styles = run_cellpose(
    maxed_image,
    model_path = r'models/DAPI'
)

Plot and save segmentation

In [None]:
plt.imshow(masks[0])

## Add the new segmentation to the transcripts.csv

In [None]:
detected_transcripts = pd.read_csv(os.path.join(image_path, experiment_name, f'detected_transcripts.csv'), index_col=0)
detected_transcripts

Get the pixel to um conversion

In [None]:
def get_pixel_size(path: str) -> float:
    file = pd.read_csv(os.path.join(path, experiment_name, f'micron_to_mosaic_pixel_transform.csv'), index_col=None, header=None)
    return file

pixel_size = get_pixel_size(xenium_path).values
pixel_size

In [None]:
detected_transcripts['global_x_pixels'] = (detected_transcripts.global_x.values*(float(pixel_size[0][0].split()[0]))) + float(pixel_size[0][0].split()[2])
detected_transcripts['global_y_pixels'] = (detected_transcripts.global_y.values*(float(pixel_size[1][0].split()[1]))) + float(pixel_size[1][0].split()[2])

In [None]:
detected_cells = masks[0][detected_transcripts.global_y_pixels.values.astype(int), detected_transcripts.global_x_pixels.values.astype(int)]
detected_transcripts['cell_id'] = detected_cells
detected_transcripts['overlaps_nucleus'] = (detected_cells > 0).astype(int)
detected_transcripts

In [None]:
plt.imshow(masks[0])
plt.scatter(detected_transcripts.global_x_pixels, detected_transcripts.global_y_pixels, cmap='tab20', s=1, alpha=0.02)

In [None]:
detected_cells = masks[0][detected_transcripts.global_y_pixels.values.astype(int), detected_transcripts.global_x_pixels.values.astype(int)]

In [None]:
cross_tab = pd.crosstab(index=detected_cells,
                        columns=detected_transcripts['gene'].values)

In [None]:
cross_tab = cross_tab[np.array(cross_tab.index.tolist()) % 10000 != 0]

In [None]:
cross_tab = cross_tab[np.sum(cross_tab.values, axis=1) > 1]

In [None]:

adata = sc.AnnData(X=cross_tab.values, var=pd.DataFrame(index=cross_tab.columns), obs=pd.DataFrame(index=cross_tab.index.tolist()))

In [None]:
sc.tl.pca(adata)
sc.pp.neighbors(adata)
sc.tl.umap(adata)

In [None]:
sc.pl.umap(adata, color='Tmem119', vmax=4)

In [None]:
try:
    os.mkdir(os.path.join('output', f'{experiment_name}'))
except:
    None

In [None]:
adata.write(os.path.join('output', f'{experiment_name}', f'{experiment_name}_adata.h5ad'))

In [None]:
image_compare = masks[0].astype(int)

unique_values = adata.obs.index.astype(float).astype(int)
cell_locations = np.unique(image_compare, return_index=True)

x_coord=cell_locations[1]//np.shape(image_compare)[0]
y_coord=cell_locations[1]%np.shape(image_compare)[1]

cells = cell_locations[0]
data = pd.DataFrame(zip(x_coord, y_coord), index=cells, columns=['x', 'y'])
xy_adata= data.loc[unique_values.tolist()].values

adata.obs['x'] = xy_adata[:,0]
adata.obs['y'] = xy_adata[:,1]

adata.obsm['X_spatial'] = xy_adata

sc.pp.calculate_qc_metrics(adata, inplace=True)

sc.pl.embedding(adata, basis='spatial', color='total_counts', size=20, vmax=3000)

adata.write(os.path.join('output', f'{experiment_name}', f'{experiment_name}_adata_processed.h5ad'))

In [None]:
detected_transcripts.to_csv(os.path.join('output', f'{experiment_name}', "transcripts_cellpose.csv"))