In [None]:
sm = snakemake

In [None]:
import spherpro.bro as sb
from  spherpro import datastore as sd
import spherpro.library as sl
import spherpro.db as db
import imp
import pycytools as pct
import pycytools.library
import re
import pandas as pd
import numpy as np
import plotnine as pn
import seaborn as sns
import operator
import sqlalchemy as sa
from sqlalchemy import func
%matplotlib inline
import matplotlib.pyplot as plt
import pycytools
import imp
import scipy.stats as stats

import pathlib
import importlib as ipl

# Aim: Correct the 'distance to sphere border' readout

Distance to sphere border can be used as a substitute measurement for global environment.

However the 'naive' distance to border measurement we have is the distance to the sphere mask border. This systematically overestimates distance to mask border for tangential cuts.

To counteract this, we use the sphere size from intact spheres and assume spheriodicity for the spheres, to calculate a corrected distance to border readout.

## 0) Config

In [None]:
fn_config = sm.input.fn_config
fol_plot = pathlib.Path('results/figures/')

In [None]:
bro = sb.get_bro(fn_config)

In [None]:
#given variables

COL_CONDITIONID = db.conditions.condition_id.key
COL_PLATE = db.conditions.plate_id.key
COL_WELL = db.conditions.well_name.key
COL_IMGID = db.images.image_id.key
COL_OBJID = db.objects.object_id.key
COL_VALUE = db.object_measurements.value.key
COL_MEASID = db.measurements.measurement_id.key

In [None]:
# variables bfanalysis
COL_BF_AREA = 'AreaShape_Area'
COL_BF_WELL = 'Metadata_well'
COL_BF_PLATE = 'Metadata_plate'

In [None]:
#defined here
COL_DTRIM = 'dist_to_rim'
COL_APPERENT_RADIUS = 'apparent_sphere_rad'
COL_IMG_RADIUS = 'image_radius'
COL_BF_RADIUS = 'bf_radius'
COL_BF_RADIUS_SCALED = 'scaled_radius'

In [None]:
COL_CORR_D2RIM = 'corr_dist2rim'

In [None]:
BF_RESOLTUION = 5*0.65 # 5x downscaling for segmentation, and 0.65 um/pixel microscope resolution

## 1) Load real radius data

The data was saved during the bf analysis

In [None]:
file_radius = sm.input.fn_bf_quantification
dat_bf = pd.read_csv(file_radius)

In [None]:
dat_bf[COL_BF_PLATE].unique()

In [None]:
dat_bf[COL_BF_PLATE].unique()

## 2) Query for the distances to rim

In [None]:
q_measmeta = (bro.data.get_measmeta_query()
                             .filter(
                     db.measurements.measurement_name == 'MeanIntensity',
                     db.ref_planes.channel_name == 'dist-sphere',
                     db.stacks.stack_name == 'DistStack',
             )
                          )

q_objmeta = (bro.data.get_objectmeta_query()
                          .join(db.conditions, db.conditions.condition_id==db.images.condition_id)
                     .filter(db.objects.object_type == 'cell')
                               .add_columns(db.images.condition_id))

In [None]:
%%time
adat_dist = bro.io.objmeasurements.get_measurements(q_meas=q_measmeta, q_obj=q_objmeta)

In [None]:
data_dist = bro.io.objmeasurements.convert_anndata_legacy(adat_dist)

In [None]:
data_dist = data_dist.query(f'{db.object_measurements.value.key} < 2**16-2').copy()

In [None]:
dat_conditions = bro.doquery(bro.data.main_session.query(db.conditions))

In [None]:
data_dist[COL_DTRIM] = data_dist[db.object_measurements.value.key]

## 3) Find the maximally oberved distance to rim

3.1) Set all the negative values to zero

In [None]:
data_dist.loc[data_dist[COL_DTRIM]<0,COL_DTRIM]= 0

In [None]:
def get_rads(df):
    max_radius_sphere = df.groupby([COL_CONDITIONID])[COL_DTRIM].max().reset_index()
    max_radius_sphere = max_radius_sphere.rename(columns={COL_DTRIM: COL_APPERENT_RADIUS})
    max_radius_img = df.groupby([COL_IMGID,COL_CONDITIONID])[COL_DTRIM].max().reset_index()
    max_radius_img = max_radius_img.rename(columns={COL_DTRIM: COL_IMG_RADIUS})
    
    return max_radius_img.merge(max_radius_sphere, on=COL_CONDITIONID)

3.2) Find the maximum distance to rim of each sphere (this corresponds to the apparent sphere radius) and the maximum distance to rim of each image (this corresponds to the apparent image radius)

In [None]:
dat_imc_radius = get_rads(data_dist)

In [None]:
dat_imc_radius

3.3) Calculate the distance to rim from the areas

In [None]:
def get_radius(area, resolution):
    r = np.sqrt(area/np.pi)
    return r*resolution

In [None]:
dat_bf[COL_BF_RADIUS] = dat_bf[COL_BF_AREA].apply(get_radius, resolution=BF_RESOLTUION)

In [None]:
dat_bf = dat_bf.rename(columns={COL_BF_PLATE: COL_PLATE, COL_BF_WELL: COL_WELL})

Visualize the apperent sphere radius (=maximum observed sphere radius) vs the brightfield radius


In [None]:
import seaborn as sns

In [None]:
data_scatter = (dat_bf
    .merge(dat_conditions)
    .merge(dat_imc_radius))

In [None]:
fig, ax = plt.subplots(figsize=(5,5))
sns.distplot(data_scatter[COL_APPERENT_RADIUS],label='apparent_radius',ax=ax)
sns.distplot(data_scatter[COL_BF_RADIUS],label='real_radius',ax=ax)
plt.legend()   

In [None]:
sns.lmplot(x=COL_BF_RADIUS,y=COL_APPERENT_RADIUS,data = data_scatter)

Find the correction that maps from the apparent radius to the real radius

In [None]:
from statsmodels.graphics.api import abline_plot
from statsmodels.formula.api import ols, rlm
import statsmodels as sm

In [None]:
rlm_model = (rlm(f'{COL_APPERENT_RADIUS}~{COL_BF_RADIUS}', data_scatter,
                 ).fit()
            )
print(rlm_model.summary())
try:
    b = rlm_model.params.Intercept
except:
    b=0
m = rlm_model.params[COL_BF_RADIUS]
#m=1
fig, ax = plt.subplots(figsize=(5,5))
ax.scatter(data_scatter[COL_BF_RADIUS],data_scatter[COL_APPERENT_RADIUS])
ax.plot(data_scatter[COL_BF_RADIUS],(data_scatter[COL_BF_RADIUS]*m+b),'-')
#ax.plot(data_scatter[COL_BF_RADIUS],(data_scatter[COL_BF_RADIUS]*0.8),'-')
plt.xlim(50,260)
plt.ylim(50,260)

ax.set_xlabel('real_radius')
ax.set_ylabel('apparent_sphere_radius')

Background:
Spheroids could shrink @embedding or we could have some systematic errors
- Real Radius: calculated based on the equivalent diameter of the brightfield image segmentation
- Apperent radius: This is the maximal distance to rim (outside) of any of the cells in all of the cuts of a sphere. This can be underestimated for two reasons:
    1) No center cut: a perfect center cut is needed to not underestimate the sphere radius by distance to rim
        -> Particularily relevant for small spheres
    2) Deformation of the spheres: any deformation leads to an underestimation of radius based on distance to rim
        -> Particularily relevant for large spheres
        
For segmentation this poses a problem:
There is reason to assume that the apperent sphere radius is systematically underestimated. In particular for small (1) and large spheres (2). This kind of assymetric error poses a problem for regression.

The robust regression indicates: a negligable intercept and an slope of ca 0.77, which I think is an underestimation (it should be closer to 1). 

Alternative estimation strategy: do the ratio apperent/real and look at histogram

In [None]:
%matplotlib inline

In [None]:
(pn.ggplot(data_scatter, pn.aes(x=f'{COL_APPERENT_RADIUS}/{COL_BF_RADIUS}'))+
     pn.geom_histogram())

Get the scaling factor from the top 120 observations

In [None]:
scale_bf = data_scatter.eval(f'{COL_APPERENT_RADIUS}/{COL_BF_RADIUS}').sort_values().tail(120).mean()

In [None]:
fig, ax = plt.subplots(figsize=(5,5))
ax.scatter(data_scatter[COL_BF_RADIUS],data_scatter[COL_APPERENT_RADIUS])
ax.plot(data_scatter[COL_BF_RADIUS],(data_scatter[COL_BF_RADIUS]*scale_bf),'-')
plt.xlim(50,260)
plt.ylim(50,260)
ax.set_xlabel('real_radius')
ax.set_ylabel('apparent_sphere_radius')

In [None]:
dat_bf[COL_BF_RADIUS_SCALED] = dat_bf[COL_BF_RADIUS] * scale_bf

In [None]:
print(scale_bf)

3.3) Find the dist to rim with the next formula:
<br>
$$r_{real} = R - \sqrt{R^{2}-2rx+ x^{2}}$$

where, $r_{real}$ is the real distance to rim,  $R$ is the radius of the sphere, $r$ is the radius of the cut (corr_cut_rad) and $x$ is the distance to rim in the cut (apparent distance to rim)

In [None]:
def calculate_real_dist_to_rim(sphere_radius, cut_rad, dist_to_rim):
    """
    Calculates the corrected distance to rim
    """
    if cut_rad > sphere_radius:
        return dist_to_rim
    real_dist = sphere_radius-np.sqrt(sphere_radius**2 -(2*dist_to_rim*cut_rad) + dist_to_rim**2)
    return real_dist

In [None]:
data_dist = data_dist.set_index(COL_OBJID, drop=False)

In [None]:
data_dist = data_dist.set_index(COL_OBJID, drop=False)
data_dist[COL_CORR_D2RIM] = (data_dist
    .merge(dat_imc_radius)
    .merge(dat_conditions)
    .merge(dat_bf)
    .set_index(COL_OBJID)
    .apply(lambda x: calculate_real_dist_to_rim(x[COL_BF_RADIUS_SCALED],
                                                x[COL_IMG_RADIUS],
                                                x[COL_DTRIM]), axis=1)

)

In [None]:

sns.jointplot(x=COL_DTRIM,y=COL_CORR_D2RIM,data= data_dist)

-> As expected the distances get corrected distance to rim get shorter

## Upload the corrected distance to rim to the database

In [None]:
OUT_STACK = 'ObjectStack'
OUT_CHANNEL_TYPE = 'object'
OUT_CHANNEL_NAME = 'object'
OUT_MEASUREMENT_NAME = 'dist-rim'
OUT_MEASUREMENT_TYPE = 'Location'

In [None]:
plane_id = (bro.session.query(db.planes.plane_id)
            .join(db.stacks)
            .join(db.ref_stacks)
            .join(db.ref_planes)
            .filter(db.stacks.stack_name == OUT_STACK,
                   db.ref_planes.channel_type == OUT_CHANNEL_TYPE,
                   db.ref_planes.channel_name == OUT_CHANNEL_NAME)).one()[0]

In [None]:
dat_measure_meta = pd.DataFrame({db.measurements.measurement_name.key: [OUT_MEASUREMENT_NAME],
                                db.measurements.plane_id.key: plane_id,
                                db.measurements.measurement_type.key: OUT_MEASUREMENT_TYPE})

In [None]:
dat_measure_meta

In [None]:
dat_measure_meta = bro.processing.measurement_maker.register_measurements(dat_measure_meta)

In [None]:
dat_obj_meas = (data_dist
                .loc[:, [COL_CORR_D2RIM, COL_OBJID, 'object_type', COL_IMGID]]
                .rename(columns={COL_CORR_D2RIM: COL_VALUE})
                .dropna()
               )

In [None]:
dtrim_measid = dat_measure_meta[COL_MEASID].values[0]
dat_obj_meas[COL_MEASID] = dtrim_measid

In [None]:
bro.processing.measurement_maker.add_object_measurements(dat_obj_meas, drop_all_old=True)

Remove all images that do not have a distance to rim

In [None]:
good_imgs = dat_obj_meas[COL_IMGID].unique()
stmt = bro.session.query(db.valid_images).filter(sa.not_(db.valid_images.image_id.in_([int(i) for i in good_imgs])))

In [None]:
bro.doquery(stmt)

In [None]:
stmt.delete(synchronize_session='fetch')
    #bro.session|.execute(stmt)
bro.session.commit()

Calculate the 'average' distance to rim of neighbours

In [None]:
bro.processing.nb_aggregation.add_nb_measurement('NbMean',np.mean, object_type='cell', measurement_name=OUT_MEASUREMENT_NAME,
                                                    stack_name=OUT_STACK)

## Sanity checks: Plot distance to rim vs Pt194

CisPt194 was added to all spheres after pooling.

Thus it's diffusion gradient inside the spheres could be used as a substitute readout for distance to border (e.g. like Durand, R. E. (1982). Use of Hoechst 33342 for cell selection from multicell systems. Journal of Histochemistry and Cytochemistry, 30(2), 117–122. http://doi.org/10.1177/30.2.6174559)

In [None]:
import spherpro.bromodules.helpers_vz as helpers_vz
imp.reload(helpers_vz)
hpr = helpers_vz.HelperVZ(bro)

In [None]:
from scipy import stats

In [None]:
col_int = 'Pt194'
col_raw = 'dist-sphere'
col_corr = 'object'
transf = lambda x: np.log10(x+0.1)

In [None]:
q = (bro.session.query(db.conditions.condition_id)
     #.join(db.sampleblocks)
     #.filter(db.sampleblocks.sampleblock_name == blockname)
   #  .filter(db.conditions.condition_name.like('DLD%'))
     #.filter(db.conditions.bc_x.in_([2,3,6,10,11]))
    ).all()

In [None]:
condids = [r[0] for r in q ]

In [None]:
measids = [m[0] for m in (bro.data.get_measmeta_query()
           .filter(bro.filters.measurements.get_measmeta_filter_statements(
    channel_names=[col_int,'object', 'dist-sphere'],
    stack_names=['FullStackFiltered', 'ObjectStack', 'DistStack'],
    measurement_names=['MeanIntensityComp', 'dist-rim', 'MeanIntensity'],
    measurement_types=[None, None, None]))
    .with_entities(db.measurements.measurement_id)).all()]

In [None]:
%%time
dat = hpr.get_data(cond_ids=condids, meas_ids=measids, object_type='cell' ,legacy=False )

In [None]:
dat_measmeta = hpr.get_measuremeta(bro.data.pannel,
                                   measurement_names=['MeanIntensityComp', 'NbMeanMeanIntensityComp'],
                                   additional_measfilt=db.measurements.measurement_id.in_(measids)
                                  )

In [None]:
bro.helpers.anndata.add_anndata_varmeta(dat, dat_measmeta, on='measurement_id')

In [None]:
dat.var_names = dat.var['channel_name']

In [None]:
dat.var_names

In [None]:
q = (bro.session.query(db.images.image_id, db.conditions)
     .join(db.conditions)
    )

dat_cond = bro.doquery(q)


In [None]:
bro.helpers.anndata.add_anndata_obsmeta(dat, dat_cond, on=db.images.image_id.key)

Check how distance to border changes upon correction over all:

In [None]:
fig = plt.figure(figsize=(3,3))
plt.hexbin(dat.obs_vector(col_raw), transf(dat.obs_vector(col_int)),
          gridsize=100, rasterized=True)
plt.colorbar()
plt.title(f'Distance to border raw')
plt.xlabel(r'Raw distance to border [$\mu m$]')
plt.ylabel('Cisplatin Pt194 [log10(MeanIntensity)]')
fig.savefig(fol_plot / 'd2rim_raw_hm.pdf')

In [None]:
fig = plt.figure(figsize=(3,3))
plt.hexbin(dat.obs_vector(col_corr), transf(dat.obs_vector(col_int)),
          gridsize=100,rasterized=True)
plt.colorbar()
plt.title(f'Distance to border corrected')
plt.xlabel(r'Corrected distance to border [$\mu m$]')
plt.ylabel('Cisplatin Pt194 [log10(MeanIntensity)]')
fig.savefig(fol_plot / 'd2rim_corr_hm.pdf')

In [None]:
print(f'''
Spearman overall:
raw: {stats.spearmanr(dat.obs_vector(col_raw), dat.obs_vector(col_int))}

corr: {stats.spearmanr(dat.obs_vector(col_corr), dat.obs_vector(col_int))}

-> Overall correlation seems to improve upon correction
''')

Also check the correlations for each sphere (=condition_id) - how often does it improve?

In [None]:
cordict = {}
for c in dat.obs.condition_id.unique():
    tdat = dat[dat.obs.condition_id == c]
    cordict[c] = (stats.spearmanr(tdat.obs_vector(col_raw), tdat.obs_vector(col_int)).correlation,
     stats.spearmanr(tdat.obs_vector(col_corr), tdat.obs_vector(col_int)).correlation)

dat_cor = pd.DataFrame(cordict).T
dat_cor.columns = ('raw', 'corrected')
     
dat_cor = dat_cor.assign(delta=lambda x: x['corrected']-x['raw'])

In [None]:
axs = dat_cor.hist(layout=(1,3), figsize=(5,1.5), bins=25)
axs[0,0].set_title(r'$\rho_{raw}$')
axs[0,1].set_title(r'$\rho_{corrected}$')
axs[0,2].set_title(r'$\Delta (\rho)$')
axs[0,0].set_ylabel('count')
axs[0,0].set_xlabel(r'Spearman $\rho$')
axs[0,1].set_xlabel(r'Spearman $\rho$')
axs[0,2].set_xlabel(r'$\Delta (\rho_{corrected}-\rho_{raw})$')

In [None]:
fig = axs[0,0].get_figure()
fig.savefig(fol_plot / 'd2rim_hist.pdf')

In [None]:
dat_cor.mean()

In [None]:
print(
f'''
Correlation strength increases in: {(dat_cor['delta'] < 0).mean()}
of spheres.
''')

In [None]:
stats.ttest_1samp(dat_cor['delta'],0)

In [None]:
def boot_bigger(series: pd.Series, n: int, val: float):
    is_big = 0
    for _ in range(n):
        is_big += series.sample(frac=1,replace=True).mean() > val
    return is_big/n
    

In [None]:
boot_bigger(dat_cor['delta'], 100000, 0)

Regress the two first without correction:

In [None]:
import statsmodels.api as sm
import statsmodels.formula.api as smf
import patsy
from patsy.builtins import Q
from patsy import bs

In [None]:
#d = pd.DataFrame({'x': np.log10(dat.obs_vector('Pt194')+0.01), 'y': np.log10(dat.obs_vector( 'object')+0.1)})
d = pd.DataFrame({'x': np.log10(dat.obs_vector(col_int)+0.01), 'y': np.log10(dat.obs_vector( 'dist-sphere')+0.1)})
d[d==np.inf]=np.nan
d[d==-np.inf]=np.nan
d = d.dropna()
#mod = smf.ols(f'y~bs(x, df=10)', data=d).fit()
mod_distsphere = smf.ols(f'y~x', data=d).fit()

In [None]:
mod_distsphere.summary()

In [None]:
plt.hexbin(d['x'],10**d['y'],yscale='log')
plt.colorbar()
predvals = pd.DataFrame({'x': np.arange(d['x'].min(), d['x'].max(), 0.1)})
ax = plt.gca()
ax.set_yscale('log')
plt.scatter(predvals.loc[:,'x'], 10**mod_distsphere.predict(predvals), s=1, c='white')
plt.title(f'Distance to border uncorrected\nvs\nCis{col_int}')
plt.ylabel('Distance to border [um]')
plt.xlabel('Cisplatin [log10(MeanIntensity)]')

In [None]:
plt.hexbin(d['x'],10**d['y'])
plt.colorbar()
predvals = pd.DataFrame({'x': np.arange(d['x'].min(), d['x'].max(), 0.1)})
ax = plt.gca()
plt.scatter(predvals.loc[:,'x'], 10**mod_distsphere.predict(predvals), s=1, c='white')
plt.title(f'Distance to border uncorrected \nvs\nCis{col_int}')
plt.ylabel('Distance to border [um]')
plt.xlabel('Cisplatin [log10(MeanIntensity)]')
plt.ylim((0,150))

And after correction:

In [None]:
d = pd.DataFrame({'x': np.log10(dat.obs_vector(col_int)+0.01), 'y': np.log10(dat.obs_vector( 'object')+0.1)})
#d = pd.DataFrame({'x': np.log10(dat.obs_vector('Pt194')+0.01), 'y': -dat.obs_vector( 'dist-sphere')})
d[d==np.inf]=np.nan
d[d==-np.inf]=np.nan
d = d.dropna()
#mod = smf.ols(f'y~bs(x, df=10)', data=d).fit()
mod = smf.ols(f'y~x', data=d).fit()

In [None]:
mod.summary()

In [None]:
plt.hexbin(d['x'],10**d['y'],yscale='log')
plt.colorbar()
predvals = pd.DataFrame({'x': np.arange(d['x'].min(), d['x'].max(), 0.1)})
ax = plt.gca()
ax.set_yscale('log')
plt.scatter(predvals.loc[:,'x'], 10**mod.predict(predvals), s=1, c='white')
plt.title(f'Distance to border\nvs\nCis{col_int}')
plt.ylabel('Distance to border [um]')
plt.xlabel('Cisplatin [log10(MeanIntensity)]')

In [None]:
plt.hexbin(d['x'],10**d['y'])
plt.colorbar()
predvals = pd.DataFrame({'x': np.arange(d['x'].min(), d['x'].max(), 0.1)})
ax = plt.gca()
plt.scatter(predvals.loc[:,'x'], 10**mod.predict(predvals), s=1, c='white')
plt.title(f'Distance to border\nvs\nCis{col_int}')
plt.ylabel('Distance to border [um]')
plt.xlabel('Cisplatin [log10(MeanIntensity)]')
plt.ylim((0,150))

-> On average also the goodness of fit seems to increase.