# <span style="color:darkblue">04-Spot_Assignment</span>

In this notebook we will assign the detected RNAs and Txs to each uniquely labeleld cell in our Cellpose-produced cell and nuclear masks.

## 4.0 - Load libraries

In [1]:
from skimage import io
import numpy as np
import napari
import pandas as pd
from glob import glob
from skimage.measure import regionprops_table, regionprops

import sys
sys.path.append('../')

from src.misc import group_experiments, load_data

***

## 4.1 - Load spot & cell/nuclei mask data and check image

In [2]:
#choose FISH channel
channel='CY35'
Strain= 'CET111'
mRNA='HWP1CAL610CLB2Q670'
condition='SPIDER37'
Fov='02'

# load image data
RNAs = io.imread(f'../data/restructured_data/replicate1/{Strain}_{mRNA}_{condition}_{channel}_{Fov}.tif')
DAPI = io.imread(f'../data/restructured_data/replicate1//{Strain}_{mRNA}_{condition}_DAPI_{Fov}.tif')
DIC = io.imread(f'../data/restructured_data/replicate1//{Strain}_{mRNA}_{condition}_DIC_{Fov}.tif')
mask = io.imread(f'../data/restructured_data/replicate1//Masks/{Strain}_{mRNA}_{condition}_DIC_{Fov}_seg.tif')
nuclear_mask = io.imread(f'../data/restructured_data/replicate1/Masks/{Strain}_{mRNA}_{condition}_DAPI_{Fov}_seg.tif')

# load spot data
spot_data = np.load(glob(f'../data/restructured_data/replicate1/Spots/{Strain}_{mRNA}_{condition}_{channel}_{Fov}_spots_thr*.npy')[0])
dense_data = np.load(glob(f'../data/restructured_data/replicate1/Spots decomposition/{Strain}_{mRNA}_{condition}_{channel}_{Fov}_spots_thr*_dd_regions.npy')[0])

In [3]:
def preprocess_spot_data(spot_data, dense_data):
    # spot_data has the form:
    # z, y, x

    # dense data has the form 
    # z, y, x, mRNA counts, -- other information --

    # let's introduce mRNA counts of 1 for the spots:    
    spot_data_padded = np.pad(spot_data, ((0,0),(0,1)), mode='constant', constant_values=1)
    
    # discard other information and merge
    spot_data_combined = np.concatenate([spot_data_padded, dense_data[:,:4]], axis=0)
    return spot_data_combined


def count_spots(mask, nuclear_mask, spot_data, cells):    
    for z, y, x, number in spot_data:
        cell_id = mask[y, x]
        nucleus = nuclear_mask[y, x]

        if number == 1:
            cells[cell_id]['spots_per_cell'] += number
        else:
            cells[cell_id]['dense_regions_per_cell'] += 1
            cells[cell_id]['decomposed_RNAs'] += number

            # if the spot sits in the nucleus, 
            # also increase nascent RNAs and transcription sites
            if nucleus > 0:
                cells[cell_id]['tx_per_cell'] += 1
                cells[cell_id]['nascent_RNAs'] += number
    return cells

def count_nuclei(mask, nuclear_mask, cells):
    # count nuclei per cell - hyphae may have multiple ones!
    for nucleus in regionprops(nuclear_mask):
        y, x = nucleus.centroid
        cell_id = mask[int(y), int(x)]
        cells[cell_id]['nuclei'] += 1
    return cells

def spot_assignment(mask, nuclear_mask, spot_data, dense_data):
    cells = {}
    
    for cell_id in np.unique(mask):
        cells[cell_id] = {
            'spots_per_cell': 0,
            'dense_regions_per_cell': 0,
            'decomposed_RNAs': 0,
            'tx_per_cell': 0,
            'nascent_RNAs': 0,
            'nuclei': 0
        }
        
    spot_data_combined = preprocess_spot_data(spot_data, dense_data)
    
    cells = count_spots(mask, nuclear_mask, spot_data_combined, cells)
    cells = count_nuclei(mask, nuclear_mask, cells)
    
    # remove spots on background
    del cells[0]

    # convert to dataframe, collect object information and merge
    df = pd.DataFrame(cells).T.reset_index().rename(columns={'index': 'label'})
    df['total_RNAs_per_cell'] = df['spots_per_cell'] + df['decomposed_RNAs'] - df['dense_regions_per_cell']

    props = pd.DataFrame(regionprops_table(mask, properties=['label', 'bbox', 'area', 'eccentricity']))
    df = props.merge(df, on='label')

    return df

In [4]:
df = spot_assignment(mask, nuclear_mask, spot_data, dense_data)

In [5]:
root_dir = '../data/restructured_data/replicate1'
savename = f"{root_dir}/Results/{Strain}_{mRNA}_{condition}_{channel}_{Fov}.csv"

In [6]:
df.to_csv(savename)

***

## 4.2 - Batch assignment

In [None]:
root_dir = '../data/restructured_data/replicate1'

In [None]:
experiments = group_experiments(root_dir)

print('I found the following experiments:')
print(experiments.keys())
print('select applicable experiments')

In [None]:
experiments_to_process = experiments.keys()

for identifier in experiments_to_process:
    fovs = experiments[identifier]
    
    for fov, paths, in fovs.items():
        print(f'processing {identifier=}, {fov=}')
        data = load_data(paths)
        savename = f"{root_dir}/Results/{identifier}_{channel}_{fov}.csv"
        
        process = True
        # check if all files required for this step have been loaded
        for entry in ['spots', 'dense', 'cell_mask', 'nuclear_mask']:
            if data.get(entry) is None:
                print(f'{identifier=}, {fov=}, {entry=} could not be found')
                print(f'skipping {identifier=}, {fov=}!')
                process=False
        
        if process:
            df = spot_assignment(
                data.get('cell_mask'), 
                data.get('nuclear_mask'),
                data.get('spots'),
                data.get('dense')
            )
            
            print(f'saving data to {savename}')
            df.to_csv(savename)
            print('done.')
        
        print(10*'-')