In [None]:
sm = snakemake

In [None]:
import spherpro.bro as sb
import spherpro.db as db
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

import pathlib

import spherpro.bromodules.helpers_vz as helpers_vz

# Aim: Assess distance to border readout
Plot distance to rim vs Pt194

CisPt194 was added to all spheres after pooling.

Thus it's diffusion gradient inside the spheres could be used as a substitute readout for distance to border (e.g. like Durand, R. E. (1982). Use of Hoechst 33342 for cell selection from multicell systems. Journal of Histochemistry and Cytochemistry, 30(2), 117–122. http://doi.org/10.1177/30.2.6174559)

## 0) Config

In [None]:
fn_config = sm.input.fn_config
fol_plot = pathlib.Path(sm.output.fol_out)
fol_plot.mkdir()

In [None]:
bro = sb.get_bro(fn_config)


In [None]:
hpr = helpers_vz.HelperVZ(bro)



In [None]:
col_int = 'Pt194'
col_raw = 'dist-sphere'
col_corr = 'object'
transf = lambda x: np.log10(x+0.1)

In [None]:
q = (bro.session.query(db.conditions.condition_id)
     #.join(db.sampleblocks)
     #.filter(db.sampleblocks.sampleblock_name == blockname)
   #  .filter(db.conditions.condition_name.like('DLD%'))
     #.filter(db.conditions.bc_x.in_([2,3,6,10,11]))
    ).all()

In [None]:
condids = [r[0] for r in q ]

In [None]:
measids = [m[0] for m in (bro.data.get_measmeta_query()
           .filter(bro.filters.measurements.get_measmeta_filter_statements(
    channel_names=[col_int,'object', 'dist-sphere'],
    stack_names=['FullStackFiltered', 'ObjectStack', 'DistStack'],
    measurement_names=['MeanIntensityComp', 'dist-rim', 'MeanIntensity'],
    measurement_types=[None, None, None]))
    .with_entities(db.measurements.measurement_id)).all()]

In [None]:
%%time
dat = hpr.get_data(cond_ids=condids, meas_ids=measids, object_type='cell' ,legacy=False )

In [None]:
dat_measmeta = hpr.get_measuremeta(bro.data.pannel,
                                   measurement_names=['MeanIntensityComp', 'NbMeanMeanIntensityComp'],
                                   additional_measfilt=db.measurements.measurement_id.in_(measids)
                                  )

In [None]:
bro.helpers.anndata.add_anndata_varmeta(dat, dat_measmeta, on='measurement_id')

In [None]:
dat.var_names = dat.var['channel_name']

In [None]:
dat.var_names

In [None]:
q = (bro.session.query(db.images.image_id, db.conditions)
     .join(db.conditions)
    )

dat_cond = bro.doquery(q)


In [None]:
bro.helpers.anndata.add_anndata_obsmeta(dat, dat_cond, on=db.images.image_id.key)

Check how distance to border changes upon correction over all:

In [None]:
fig = plt.figure(figsize=(3,3))
plt.hexbin(dat.obs_vector(col_raw), transf(dat.obs_vector(col_int)),
          gridsize=100, rasterized=True)
plt.colorbar()
plt.title(f'Distance to border raw')
plt.xlabel(r'Raw distance to border [$\mu m$]')
plt.ylabel('Cisplatin Pt194 [log10(MeanIntensity)]')
fig.savefig(fol_plot / 'd2rim_raw_hm.pdf')

In [None]:
fig = plt.figure(figsize=(3,3))
plt.hexbin(dat.obs_vector(col_corr), transf(dat.obs_vector(col_int)),
          gridsize=100,rasterized=True)
plt.colorbar()
plt.title(f'Distance to border corrected')
plt.xlabel(r'Corrected distance to border [$\mu m$]')
plt.ylabel('Cisplatin Pt194 [log10(MeanIntensity)]')
fig.savefig(fol_plot / 'd2rim_corr_hm.pdf')

In [None]:
print(f'''
Spearman overall:
raw: {stats.spearmanr(dat.obs_vector(col_raw), dat.obs_vector(col_int))}

corr: {stats.spearmanr(dat.obs_vector(col_corr), dat.obs_vector(col_int))}

-> Overall correlation seems to improve upon correction
''')

Also check the correlations for each sphere (=condition_id) - how often does it improve?

In [None]:
cordict = {}
for c in dat.obs.condition_id.unique():
    tdat = dat[dat.obs.condition_id == c]
    cordict[c] = (stats.spearmanr(tdat.obs_vector(col_raw), tdat.obs_vector(col_int)).correlation,
     stats.spearmanr(tdat.obs_vector(col_corr), tdat.obs_vector(col_int)).correlation)

dat_cor = pd.DataFrame(cordict).T
dat_cor.columns = ('raw', 'corrected')
     
dat_cor = dat_cor.assign(delta=lambda x: x['corrected']-x['raw'])

In [None]:
axs = dat_cor.hist(layout=(1,3), figsize=(5,1.5), bins=25)
axs[0,0].set_title(r'$\rho_{raw}$')
axs[0,1].set_title(r'$\rho_{corrected}$')
axs[0,2].set_title(r'$\Delta (\rho)$')
axs[0,0].set_ylabel('count')
axs[0,0].set_xlabel(r'Spearman $\rho$')
axs[0,1].set_xlabel(r'Spearman $\rho$')
axs[0,2].set_xlabel(r'$\Delta (\rho_{corrected}-\rho_{raw})$')

In [None]:
fig = axs[0,0].get_figure()
fig.savefig(fol_plot / 'd2rim_hist.pdf')

In [None]:
dat_cor.mean()

In [None]:
print(
f'''
Correlation strength increases in: {(dat_cor['delta'] < 0).mean()}
of spheres.
''')

In [None]:
stats.ttest_1samp(dat_cor['delta'],0)

In [None]:
def boot_bigger(series: pd.Series, n: int, val: float):
    is_big = 0
    for _ in range(n):
        is_big += series.sample(frac=1,replace=True).mean() > val
    return is_big/n
    

In [None]:
boot_bigger(dat_cor['delta'], 100000, 0)

Regress the two first without correction:

In [None]:
import statsmodels.api as sm
import statsmodels.formula.api as smf
import patsy
from patsy.builtins import Q
from patsy import bs

In [None]:
#d = pd.DataFrame({'x': np.log10(dat.obs_vector('Pt194')+0.01), 'y': np.log10(dat.obs_vector( 'object')+0.1)})
d = pd.DataFrame({'x': np.log10(dat.obs_vector(col_int)+0.01), 'y': np.log10(dat.obs_vector( 'dist-sphere')+0.1)})
d[d==np.inf]=np.nan
d[d==-np.inf]=np.nan
d = d.dropna()
#mod = smf.ols(f'y~bs(x, df=10)', data=d).fit()
mod_distsphere = smf.ols(f'y~x', data=d).fit()

In [None]:
mod_distsphere.summary()

In [None]:
plt.hexbin(d['x'],10**d['y'],yscale='log')
plt.colorbar()
predvals = pd.DataFrame({'x': np.arange(d['x'].min(), d['x'].max(), 0.1)})
ax = plt.gca()
ax.set_yscale('log')
plt.scatter(predvals.loc[:,'x'], 10**mod_distsphere.predict(predvals), s=1, c='white')
plt.title(f'Distance to border uncorrected\nvs\nCis{col_int}')
plt.ylabel('Distance to border [um]')
plt.xlabel('Cisplatin [log10(MeanIntensity)]')

In [None]:
plt.hexbin(d['x'],10**d['y'])
plt.colorbar()
predvals = pd.DataFrame({'x': np.arange(d['x'].min(), d['x'].max(), 0.1)})
ax = plt.gca()
plt.scatter(predvals.loc[:,'x'], 10**mod_distsphere.predict(predvals), s=1, c='white')
plt.title(f'Distance to border uncorrected \nvs\nCis{col_int}')
plt.ylabel('Distance to border [um]')
plt.xlabel('Cisplatin [log10(MeanIntensity)]')
plt.ylim((0,150))

And after correction:

In [None]:
d = pd.DataFrame({'x': np.log10(dat.obs_vector(col_int)+0.01), 'y': np.log10(dat.obs_vector( 'object')+0.1)})
#d = pd.DataFrame({'x': np.log10(dat.obs_vector('Pt194')+0.01), 'y': -dat.obs_vector( 'dist-sphere')})
d[d==np.inf]=np.nan
d[d==-np.inf]=np.nan
d = d.dropna()
#mod = smf.ols(f'y~bs(x, df=10)', data=d).fit()
mod = smf.ols(f'y~x', data=d).fit()

In [None]:
mod.summary()

In [None]:
plt.hexbin(d['x'],10**d['y'],yscale='log')
plt.colorbar()
predvals = pd.DataFrame({'x': np.arange(d['x'].min(), d['x'].max(), 0.1)})
ax = plt.gca()
ax.set_yscale('log')
plt.scatter(predvals.loc[:,'x'], 10**mod.predict(predvals), s=1, c='white')
plt.title(f'Distance to border\nvs\nCis{col_int}')
plt.ylabel('Distance to border [um]')
plt.xlabel('Cisplatin [log10(MeanIntensity)]')

In [None]:
plt.hexbin(d['x'],10**d['y'])
plt.colorbar()
predvals = pd.DataFrame({'x': np.arange(d['x'].min(), d['x'].max(), 0.1)})
ax = plt.gca()
plt.scatter(predvals.loc[:,'x'], 10**mod.predict(predvals), s=1, c='white')
plt.title(f'Distance to border\nvs\nCis{col_int}')
plt.ylabel('Distance to border [um]')
plt.xlabel('Cisplatin [log10(MeanIntensity)]')
plt.ylim((0,150))

-> On average also the goodness of fit seems to increase.