In [None]:
from collections import defaultdict
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor

import pandas as pd
import numpy as np
from skimage.util import invert
from skimage.io import imread, imsave
from scipy.ndimage import gaussian_filter
import seaborn as sns
from matplotlib import pyplot as plt

from chromemt_analysis import segment_like_paper, continuous_erosion_edt, linear_fit_to_residual_volume, parse_simulation_zip

## Perform analysis on both simulated patches and ChromEMT and compare

## 1) load data

In [None]:
## 1. load simulated data

# id, zipfile, file in zipfile

# basedir = Path('E:/chromemt_data/')
# basedir = Path('/Volumes/davidh-ssd/chromemt_data/')
basedir = Path('/Users/david/Desktop/chromemt_data/')

# old, separate files
# simulation_data = [
#     ('sim_irregular_390', basedir / 'simulations/VoxelTest.zip', None ),
#     ('sim_regular_390', basedir / 'simulations/VoxelTestReg.zip', None ),
#     ('sim_inactive_cd_415', basedir / 'simulations/voxelVersion415.zip', 'voxeltestInactiveCD.dat' ),
#     ('sim_irregular_415', basedir / 'simulations/voxelVersion415.zip', 'voxeltestIrr.dat' ),
#     ('sim_regular_415', basedir / 'simulations/voxelVersion415.zip', 'voxeltestReg.dat' )
# ]

# new all-in-one zip
simulation_data = [
    ('sim_random_390', basedir / 'simulations/AllCVC.zip', 'Irr390.txt'),
    ('sim_random_415', basedir / 'simulations/AllCVC.zip', 'Irr415.txt'),
    ('sim_random_440', basedir / 'simulations/AllCVC.zip', 'Irr440.txt'),
    ('sim_equidistant_320', basedir / 'simulations/AllCVC.zip', 'Reg320.txt'),
    ('sim_equidistant_390', basedir / 'simulations/AllCVC.zip', 'Reg390.txt'),
    ('sim_equidistant_415', basedir / 'simulations/AllCVC.zip', 'Reg415.txt'),
    ('sim_equidistant_440', basedir / 'simulations/AllCVC.zip', 'Reg440.txt'),
    ('sim_k562_region_cd_415', basedir / 'simulations/AllCVC.zip', 'InactiveCD.txt'),
]

# load all simulations, invert
simulated_images = {id_: invert(parse_simulation_zip(zip_, subfile)[1].astype(np.float32)) for id_, zip_, subfile in simulation_data} 

In [None]:
## 2. load ChromEMT data

pixel_size = 1.28
tile_size_nm = 120.0
tile_size = int( np.ceil( tile_size_nm / pixel_size ))

patches_chromemt = {}

chromemt_img = imread(basedir / '49801.tif')
n_tiles = (1, 8, 8)
tile_offsets = ((chromemt_img.shape[0] - tile_size) // 2 , 150, 50)

cut_starts = np.meshgrid(*(np.arange(tile_offsets_i, tile_offsets_i + n_tiles_i * tile_size, tile_size ) for tile_offsets_i, n_tiles_i in zip(tile_offsets, n_tiles)), indexing='ij')
cut_starts = np.stack(cut_starts, -1)
cut_starts = cut_starts.reshape((np.prod(n_tiles), -1))

patches_chromemt['real_chromemt_interphase'] = [chromemt_img[tuple(slice(c, c+tile_size) for c in cut_start)] for cut_start in cut_starts]

chromemt_img = imread(basedir / '49803.tif')
n_tiles = (1, 6, 6)
tile_offsets = ((chromemt_img.shape[0] - tile_size) // 2 , 310, 130)

cut_starts = np.meshgrid(*(np.arange(tile_offsets_i, tile_offsets_i + n_tiles_i * tile_size, tile_size ) for tile_offsets_i, n_tiles_i in zip(tile_offsets, n_tiles)), indexing='ij')
cut_starts = np.stack(cut_starts, -1)
cut_starts = cut_starts.reshape((np.prod(n_tiles), -1))

patches_chromemt['real_chromemt_mitotic'] = [chromemt_img[tuple(slice(c, c+tile_size) for c in cut_start)] for cut_start in cut_starts]

## 2) simulate experimental variation

In [None]:
# expected molecules per pixel inside structure
expected_molecules = 0.5

# blur before sampling molecule pos
sigma_diffusion = 1.0

simulated_images_exp_var = {}

for id_, img in simulated_images.items():
    img = invert(img)
    img_expected_molecules = gaussian_filter(img * expected_molecules, sigma_diffusion)
    img_sampled_molecules = np.random.poisson(img_expected_molecules).astype(np.float32)
    img_simulation_final = invert(img_sampled_molecules)
    simulated_images_exp_var[id_] = img_simulation_final

In [None]:
def cut_central_patches(img, patch_size=94):

    n_patches = [s // patch_size for s in img.shape]
    offset = [s % patch_size // 2 for s in img.shape]

    cut_starts = np.stack(np.meshgrid(*[np.arange(off, off+n*patch_size, patch_size) for off, n in zip(offset, n_patches)], indexing='ij'), -1).reshape(-1, 3)
    cuts = [img[tuple([slice(csi, csi+patch_size) for csi in cs])] for cs in cut_starts]

    return cuts

simulated_patches = {id_: cut_central_patches(img) for id_, img in simulated_images.items()}
simulated_patches_exp_var = {id_: cut_central_patches(img) for id_, img in simulated_images_exp_var.items()}

### Optional: save volumes with experimental effects applied

In [None]:
# sigma zyx roughly estimated via line profiles through isolated densities in EM data
# minimal FWHMs ~2.5px xy, ~4.5px z
sigma = (2.0, 1.0, 1.0)

# save some simulated data (with blur to mimic experimental resolution) as tiff
for id_, img in simulated_images.items():
    img_blurred = gaussian_filter(img, sigma)
    imsave(basedir / f'simulations/{id_}.tif', np.expand_dims(img_blurred,1))

for id_, img in simulated_images_exp_var.items():
    img_blurred = gaussian_filter(img, sigma)
    imsave(basedir / f'simulations/{id_}_sparse.tif', np.expand_dims(img_blurred,1))

In [None]:
# sigma zyx roughly estimated via line profiles through isolated densities in EM data
# minimal FWHMs ~2.5px xy, ~4.5px z
sigma = (2.0, 1.0, 1.0)

res = defaultdict(list)

def get_cvc_diam(mask):
    if mask.sum() == 0:
        return np.nan, np.nan
    
    cvc = mask.sum() / mask.size
        
    erosion_radii = np.arange(0, 11)
    trace = continuous_erosion_edt(mask)
    est_diam, _ = linear_fit_to_residual_volume(trace, erosion_radii)

    return cvc, est_diam

def blur_segment_get_metrics(patch, sigma=None):
    if sigma is not None:
        patch = gaussian_filter(patch, sigma)
    mask = segment_like_paper(patch)
    return get_cvc_diam(mask)


futures = []
with ThreadPoolExecutor() as tpe:

    # 1. submit tasks to thread pool
    # raw simulated masks
    for id_, patches in simulated_patches.items():
        for patch in patches:
            mask = invert(patch.astype(bool))        
            futures.append(tpe.submit(get_cvc_diam, mask))

    # data with blur to match microscope resolution
    for id_, patches in simulated_patches.items():
        for patch in patches:
            futures.append(tpe.submit(blur_segment_get_metrics, patch, sigma))

    # data with extra simulation of sparse labelling
    for id_, patches in simulated_patches_exp_var.items():
        for patch in patches:
            futures.append(tpe.submit(blur_segment_get_metrics, patch, sigma))

    # real data, NOTE: no extra blur is applied here
    for id_, patches in patches_chromemt.items():
        for patch in patches:
            futures.append(tpe.submit(blur_segment_get_metrics, patch, None))

        
    # 2. get results, put in dict
    # we do the same loops as above and keep iterator over common future list
    fiter = iter(futures)
    for id_, patches in simulated_patches.items():
        for patch in patches:
            cvc, est_diam = next(fiter).result()
            
            res['sim_type'].append('raw')
            res['id'].append(id_)
            res['cvc'].append(cvc)
            res['diam'].append(est_diam)
    print('(1/4) simulated data raw masks done.')
    
    for id_, patches in simulated_patches.items():
        for patch in patches:
            cvc, est_diam = next(fiter).result()
            
            res['sim_type'].append('with_blur')
            res['id'].append(id_)
            res['cvc'].append(cvc)
            res['diam'].append(est_diam)
    print('(2/4) simulated data with blur done.')

    for id_, patches in simulated_patches_exp_var.items():
        for patch in patches:
            cvc, est_diam = next(fiter).result()

            res['sim_type'].append('with_blur_sparse')
            res['id'].append(id_)
            res['cvc'].append(cvc)
            res['diam'].append(est_diam)
    print('(3/4) simulated data with blur + sparse labelling done.')

    for id_, patches in patches_chromemt.items():
        for patch in patches:
            cvc, est_diam = next(fiter).result()

            res['sim_type'].append('real_data')
            res['id'].append(id_)
            res['cvc'].append(cvc)
            res['diam'].append(est_diam)
    print('(4/4) real data done.')

In [None]:
df = pd.DataFrame.from_dict(res)

df['diam'] *= 1.28
df['cvc'] *= 100
df['type'] = df.id.str.split('_', n=1, expand=True)[1].str.rsplit('_', n=1, expand=True)[0]

# df = df[df.sim_type.isin(['with_blur_sparse','real_data'])]
# df = df[df['type'].isin(['irregular', 'regular', 'inactive_cd'])]


fig, ax = plt.subplots(figsize=(10,6))
sns.boxplot(ax=ax, data=df, x='id', y='cvc', hue='sim_type')
ax.set_xticklabels(ax.get_xticklabels(),rotation=90);

plt.rc('pdf', fonttype='42')
plt.savefig('/Users/david/Desktop/density-figures/comparison_plot_cvc_bysimtype.pdf')

fig, ax = plt.subplots(figsize=(10,6))
sns.boxplot(ax=ax, data=df, x='id', y='diam', hue='sim_type')
ax.set_xticklabels(ax.get_xticklabels(),rotation=90);

plt.rc('pdf', fonttype='42')
plt.savefig('/Users/david/Desktop/density-figures/comparison_plot_diam_bysimtype.pdf')

In [None]:
df = pd.DataFrame.from_dict(res)

df['diam'] *= 1.28
df['cvc'] *= 100
df['type'] = df.id.str.split('_', n=1, expand=True)[1].str.rsplit('_', n=1, expand=True)[0]

df = df[df.sim_type.isin(['with_blur_sparse','real_data'])]
# df = df[df['type'].isin(['irregular', 'regular', 'inactive_cd'])]


fig, ax = plt.subplots(figsize=(10,6))
sns.boxplot(ax=ax, data=df, x='id', y='cvc', hue='type')
ax.set_xticklabels(ax.get_xticklabels(),rotation=90);

plt.rc('pdf', fonttype='42')
plt.savefig('/Users/david/Desktop/density-figures/comparison_plot_cvc_onlychromemt+sparse.pdf')

fig, ax = plt.subplots(figsize=(10,6))
sns.boxplot(ax=ax, data=df, x='id', y='diam', hue='type')
ax.set_xticklabels(ax.get_xticklabels(),rotation=90);

plt.rc('pdf', fonttype='42')
plt.savefig('/Users/david/Desktop/density-figures/comparison_plot_diam_onlychromemt+sparse.pdf')

In [None]:
# get means per id
df.groupby('id').diam.describe()[['mean', 'std']], df.groupby('id').cvc.describe()[['mean', 'std']]