In [None]:
import os
import numpy as np
import pandas as pd
from scroutines.config_plots import *

In [None]:
ddir = '/u/home/f/f7xiesnm/project-zipursky/easifish/cdf03_c1-2_bino/proc/r1-v1'
!ls $ddir
# n5dir = '/data/qlyu/v1/data/easifish/lt186/r1/stitching/export.n5'
f1 = os.path.join(ddir, 'roi.csv')
f2 = os.path.join(ddir, 'spotcount.csv') #_intn.csv')

In [None]:
def rot2d(x, y, theta):
    """ rotate by theta
    """
    theta_rad = theta/180*np.pi
    R  = np.array([[np.cos(theta_rad), -np.sin(theta_rad),],
                   [np.sin(theta_rad),  np.cos(theta_rad),],
                  ])
    xy = np.vstack([x,y])
    xyr = R.dot(xy) 
    return xyr[0], xyr[1]


In [None]:
def plot_easifish_proj(df, x, y, hue='lbl_v2', na_rep='NA', invert_yaxis=False, figsize=(8,8)):
    
    fig, ax = plt.subplots(figsize=figsize)
    sns.scatterplot(data=df, 
                    x=x, y=y, color='lightgray', s=2, edgecolor='none')
    sns.scatterplot(data=df[df[hue]!=na_rep], 
                    x=x, y=y, hue=hue, palette=palette, s=5, edgecolor='none')
    if invert_yaxis:
        ax.invert_yaxis()
        
    ax.grid(False)
    ax.set_aspect('equal')
    ax.legend(bbox_to_anchor=(1,1))
    plt.show()

In [None]:
df1 = pd.read_csv(f1, index_col=0)
df2 = pd.read_csv(f2, index_col=0)
df  = df1.join(df2)

In [None]:
amin, amax = 500, 5000
cond = ((df['area'] < amax) & (df['area'] > amin))
df = df[cond]

In [None]:
# cell level stats
fig, axs = plt.subplots(1,4,figsize=(4*4,1*4))
ax = axs[0]
sns.histplot(df['x'].values, ax=ax)
ax.set_xlabel('x')
ax = axs[1]
sns.histplot(df['y'].values, ax=ax)
ax.set_xlabel('y')
ax = axs[2]
sns.histplot(df['z'].values, ax=ax)
ax.set_xlabel('z')
ax = axs[3]
sns.histplot(df['area'].values, ax=ax)
ax.set_xlabel('area')
ax.axvline(amin)
ax.axvline(amax)
fig.tight_layout()
plt.show()

In [None]:
df.describe()

In [None]:
# from sklearn.decomposition import PCA
# pca = PCA(n_components=3)
# df[['xr', 'yr', 'zr']]= pca.fit_transform(df[['x', 'y', 'z']])

In [None]:
channels = [
    'r1_c0',  'r1_c2',
]
genes = [
    # 'RL (Cre)',
    # 'LM (FlpO)',
    'LM',
    'RL',
]

def rename(code):
    rename_dict = {
        0: 'NA', 
        # 1: 'RL', 
        # 2: 'LM', 
        1: 'LM',
        2: 'RL',
        3: 'both'}
    return rename_dict[code]

df = df.copy()
th_c0 = 32 
th_c2 = 10 
df['bi_r1_c0'] = df['r1_c0'] > th_c0 
df['bi_r1_c2'] = df['r1_c2'] > th_c2 
df['lbl'] = df['bi_r1_c0'].astype(int) + 2*df['bi_r1_c2'].astype(int)
df['lbl_v2'] = df['lbl'].apply(rename)
np.unique(df['lbl'].values)
np.unique(df['lbl_v2'].values)

In [None]:
sns.color_palette('tab10', 10)

In [None]:
palette = {
    'NA': 'lightgray', 
    'LM': 'C3', 
    'RL': 'C0', 
    'both': 'C6', 
}

In [None]:
df.value_counts('lbl_v2')

In [None]:
a = df.value_counts('lbl_v2')
print(a/len(df)*100, "\n")

print(f"both in LM (%): {a.loc['both']/(a.loc['both']+a.loc['LM'])*100:.1f}")
print(f"both in RL (%): {a.loc['both']/(a.loc['both']+a.loc['RL'])*100:.1f}")
print(f"RL/LM (%): {(a.loc['both']+a.loc['RL'])/(a.loc['both']+a.loc['LM'])*100:.1f}")

In [None]:
fig, ax = plt.subplots(figsize=(3,4))
df[df['lbl_v2']!='NA'].value_counts('lbl_v2').plot.bar(width=0.7, color='black')
plt.show()

In [None]:
plot_easifish_proj(df, 'x', 'y', invert_yaxis=True)
plot_easifish_proj(df, 'x', 'z', invert_yaxis=True)
plot_easifish_proj(df, 'y', 'z', invert_yaxis=True)

In [None]:
df['yr'] = df['y']
df['xr'], df['zr'] = rot2d(df['x'], df['z'], 0)

plot_easifish_proj(df, 'xr', 'yr', invert_yaxis=True)
plot_easifish_proj(df, 'xr', 'zr', invert_yaxis=True)
plot_easifish_proj(df, 'yr', 'zr', invert_yaxis=True)

In [None]:
xmin, ymin, zmin = df[['xr', 'yr', 'zr']].min()
xmax, ymax, zmax = df[['xr', 'yr', 'zr']].max()
print(xmin, ymin, zmin)
print(xmax, ymax, zmax)

step = 25
xbins = np.arange(0, xmax+step, step).astype(int)
ybins = np.arange(0, ymax+step, step).astype(int)
zbins = np.arange(0, zmax+step, step).astype(int)
print(xbins, ybins, zbins, xbins.shape, ybins.shape, zbins.shape)

df['xrbin'] = pd.cut(df['xr'], bins=xbins, labels=False) #.astype(int)
df['yrbin'] = pd.cut(df['yr'], bins=ybins, labels=False)
df['zrbin'] = pd.cut(df['zr'], bins=zbins, labels=False)
df

In [None]:
step = 50
xbins2 = np.arange(0, xmax+step, step).astype(int)
ybins2 = np.arange(0, ymax+step, step).astype(int)
zbins2 = np.arange(0, zmax+step, step).astype(int)
print(xbins2, ybins2, zbins2, xbins2.shape, ybins2.shape, zbins2.shape)

df[f'xrbin_s{step}'] = pd.cut(df['xr'], bins=xbins2, labels=False) #.astype(int)
df[f'yrbin_s{step}'] = pd.cut(df['yr'], bins=ybins2, labels=False)
df[f'zrbin_s{step}'] = pd.cut(df['zr'], bins=zbins2, labels=False)

In [None]:
step = 100
xbins3 = np.arange(0, xmax+step, step).astype(int)
ybins3 = np.arange(0, ymax+step, step).astype(int)
zbins3 = np.arange(0, zmax+step, step).astype(int)
print(xbins3, ybins3, zbins3, xbins3.shape, ybins3.shape, zbins3.shape)

df[f'xrbin_s{step}'] = pd.cut(df['xr'], bins=xbins3, labels=False) #.astype(int)
df[f'yrbin_s{step}'] = pd.cut(df['yr'], bins=ybins3, labels=False)
df[f'zrbin_s{step}'] = pd.cut(df['zr'], bins=zbins3, labels=False)

In [None]:
dfsize_x = df.groupby(['xrbin', 'lbl_v2']).size().unstack().fillna(0) #mean().unstack()
dfsize_y = df.groupby(['yrbin', 'lbl_v2']).size().unstack().fillna(0) #mean().unstack()
dfsize_z = df.groupby(['zrbin', 'lbl_v2']).size().unstack().fillna(0) #mean().unstack()



In [None]:
dfsize_s100 = df.groupby(['xrbin_s100', 'yrbin_s100',  'zrbin_s100', 'lbl_v2']).size() # .unstack().fillna(0) #mean().unstack()
dfsize_s100

In [None]:

# xz
dfsize_xz1 = df[df['lbl_v2']=='LM'].groupby(['xrbin', 'zrbin']).size().unstack().fillna(0).astype(int) #mean().unstack()
dfsize_xz1 = dfsize_xz1.reindex(np.arange(len(xbins))).T.reindex(np.arange(len(zbins))).T
dfsize_xz1 = dfsize_xz1.fillna(0).astype(int)

dfsize_xz2 = df[df['lbl_v2']=='RL'].groupby(['xrbin', 'zrbin']).size().unstack().fillna(0).astype(int) #mean().unstack()
dfsize_xz2 = dfsize_xz2.reindex(np.arange(len(xbins))).T.reindex(np.arange(len(zbins))).T
dfsize_xz2 = dfsize_xz2.fillna(0).astype(int)

dfsize_xz3 = df[df['lbl_v2']=='both'].groupby(['xrbin', 'zrbin']).size().unstack().fillna(0).astype(int) #mean().unstack()
dfsize_xz3 = dfsize_xz3.reindex(np.arange(len(xbins))).T.reindex(np.arange(len(zbins))).T
dfsize_xz3 = dfsize_xz3.fillna(0).astype(int)

# yz
dfsize_yz1 = df[df['lbl_v2']=='LM'].groupby(['yrbin', 'zrbin']).size().unstack().fillna(0).astype(int) #mean().unstack()
dfsize_yz1 = dfsize_yz1.reindex(np.arange(len(xbins))).T.reindex(np.arange(len(zbins))).T
dfsize_yz1 = dfsize_yz1.fillna(0).astype(int)

dfsize_yz2 = df[df['lbl_v2']=='RL'].groupby(['yrbin', 'zrbin']).size().unstack().fillna(0).astype(int) #mean().unstack()
dfsize_yz2 = dfsize_yz2.reindex(np.arange(len(xbins))).T.reindex(np.arange(len(zbins))).T
dfsize_yz2 = dfsize_yz2.fillna(0).astype(int)

dfsize_yz3 = df[df['lbl_v2']=='both'].groupby(['yrbin', 'zrbin']).size().unstack().fillna(0).astype(int) #mean().unstack()
dfsize_yz3 = dfsize_yz3.reindex(np.arange(len(xbins))).T.reindex(np.arange(len(zbins))).T
dfsize_yz3 = dfsize_yz3.fillna(0).astype(int)

# xy
dfsize_xy1 = df[df['lbl_v2']=='LM'].groupby(['xrbin', 'yrbin']).size().unstack().fillna(0).astype(int) #mean().unstack()
dfsize_xy1 = dfsize_xy1.reindex(np.arange(len(xbins))).T.reindex(np.arange(len(zbins))).T
dfsize_xy1 = dfsize_xy1.fillna(0).astype(int)

dfsize_xy2 = df[df['lbl_v2']=='RL'].groupby(['xrbin', 'yrbin']).size().unstack().fillna(0).astype(int) #mean().unstack()
dfsize_xy2 = dfsize_xy2.reindex(np.arange(len(xbins))).T.reindex(np.arange(len(zbins))).T
dfsize_xy2 = dfsize_xy2.fillna(0).astype(int)

dfsize_xy3 = df[df['lbl_v2']=='both'].groupby(['xrbin', 'yrbin']).size().unstack().fillna(0).astype(int) #mean().unstack()
dfsize_xy3 = dfsize_xy3.reindex(np.arange(len(xbins))).T.reindex(np.arange(len(zbins))).T
dfsize_xy3 = dfsize_xy3.fillna(0).astype(int)

In [None]:
fig, axs = plt.subplots(1,3,figsize=(3*5,1*4))
ax = axs[0]
ax.plot(dfsize_x['LM'], label='LM')
ax.plot(dfsize_x['RL'], label='RL')
ax.plot(dfsize_x['both'], label='both')
ax.set_xlabel('x')
ax.set_ylabel('Num cells')

ax = axs[1]
ax.plot(dfsize_y['LM'], label='LM')
ax.plot(dfsize_y['RL'], label='RL')
ax.plot(dfsize_y['both'], label='both')
ax.set_xlabel('y')
ax.set_ylabel('Num cells')

ax = axs[2]
ax.plot(dfsize_z['LM'], label='LM')
ax.plot(dfsize_z['RL'], label='RL')
ax.plot(dfsize_z['both'], label='both')
ax.set_xlabel('z')
ax.set_ylabel('Num cells')
ax.legend()

fig.tight_layout()

In [None]:
total = dfsize_z.sum(axis=1)
total_lbl = dfsize_z['LM']+dfsize_z['RL']+dfsize_z['both']

fig, axs = plt.subplots(2,1,figsize=(1*6,2*3), sharex=True)
ax = axs[0]
ax.plot(zbins[dfsize_z.index], total, label='total', color='gray')
ax.plot(zbins[dfsize_z.index], total_lbl, label='total lbl', color='k')
sns.despine(ax=ax)
ax.legend(bbox_to_anchor=(1,1))
ax.set_xlabel('zr')
ax.set_ylabel('num. cells')

ax = axs[1]
cond = total > 20
ax.plot(zbins[dfsize_z.index][cond], (total_lbl/(total))[cond], color='k')
sns.despine(ax=ax)
ax.set_xlabel('zr')
ax.set_ylabel('Frac. cells')

fig, axs = plt.subplots(2,1,figsize=(1*6,2*3), sharex=True)
ax = axs[0]
ax.plot(zbins[dfsize_z.index], dfsize_z['LM'], label='LM only')
ax.plot(zbins[dfsize_z.index], dfsize_z['RL'], label='RL only')
ax.plot(zbins[dfsize_z.index], dfsize_z['both'], label='both')
sns.despine(ax=ax)
ax.set_xlabel('zr')
ax.set_ylabel('num. cells')
ax.legend(bbox_to_anchor=(1,1))

ax = axs[1]
cond = total_lbl > 20
ax.plot(zbins[dfsize_z.index][cond], ((dfsize_z['LM'])/(total_lbl))[cond])
ax.plot(zbins[dfsize_z.index][cond], ((dfsize_z['RL'])/(total_lbl))[cond])
ax.plot(zbins[dfsize_z.index][cond], ((dfsize_z['both'])/(total_lbl))[cond])
sns.despine(ax=ax)
ax.set_xlabel('zr')
ax.set_ylabel('Frac. labeled cells')

In [None]:
def plot_density_maps(dfsize_1, dfsize_2, dfsize_3):
    fig, axs = plt.subplots(1,4,figsize=(4*6,1*5), sharex=True,sharey=True)
    ax = axs[0]
    sns.heatmap(dfsize_1.T, ax=ax, cmap='rocket_r', cbar_kws=dict(shrink=0.5))
    ax.set_aspect('equal')
    ax.set_title('LM')

    ax = axs[1]
    sns.heatmap(dfsize_2.T, ax=ax, cmap='rocket_r', cbar_kws=dict(shrink=0.5))
    ax.set_aspect('equal')
    ax.set_title('RL')

    ax = axs[2]
    sns.heatmap(dfsize_3.T, ax=ax, cmap='rocket_r', cbar_kws=dict(shrink=0.5))
    ax.set_aspect('equal')
    ax.set_title('both')

    ax = axs[3]
    sns.heatmap(np.log2((dfsize_1.T+1)/(dfsize_2.T+1)), 
                cmap='coolwarm', 
                center=0,
                cbar_kws=dict(shrink=0.5)
               )
    ax.set_aspect('equal')
    ax.set_title('log2(LM/RL)')
    plt.show()

In [None]:
plot_density_maps(dfsize_xz1, dfsize_xz2, dfsize_xz3)
plot_density_maps(dfsize_yz1, dfsize_yz2, dfsize_yz3)
plot_density_maps(dfsize_xy1, dfsize_xy2, dfsize_xy3)

# at every x and y, compare z distribution

In [None]:
# dfsub = df[df['zrbin'] == 8]
# plot_easifish_proj(dfsub, 'xr', 'yr', invert_yaxis=True)
# plot_easifish_proj(dfsub, 'xr', 'zr', invert_yaxis=True)
# plot_easifish_proj(dfsub, 'yr', 'zr', invert_yaxis=True)

In [None]:
# xy
dfmean = df.groupby(['xrbin_s100', 'yrbin_s100', 'lbl_v2'])['zr'].mean().unstack().unstack() #.fillna(0).astype(int) #mean().unstack()

In [None]:
fig, axs = plt.subplots(1,5,figsize=(5*6,1*5))
for category, ax in zip(['LM', 'RL', 'both', 'NA'], axs):
    sns.heatmap(dfmean[category].T, 
                vmin=100, vmax=300, 
                cmap='rocket_r', cbar_kws=dict(shrink=0.3), 
                ax=ax)
    # ax.invert_yaxis()
    ax.set_aspect('equal')
    ax.set_title(category)
    ax.grid(False)
    # ax.set_xticks([])
    # ax.set_yticks([])
ax = axs[4]
sns.heatmap(dfmean['RL'].T-dfmean['LM'].T, 
            cmap='coolwarm', cbar_kws=dict(shrink=0.3), 
            center=0, vmax=100, vmin=-100,
            ax=ax)
# ax.invert_yaxis()
ax.set_aspect('equal')
ax.set_title('RL-LM')
ax.grid(False)

In [None]:
# xy
dfsub = df[df['lbl_v2']!='NA'].copy()
dfsubmean = dfsub.groupby(['xrbin_s100', 'yrbin_s100', 'lbl_v2'])['zr'].mean().unstack().unstack() #.fillna(0).astype(int) #mean().unstack()
dfsubmeandiff = dfsubmean['RL']-dfsubmean['LM']


dfsubmeandiff_shuffs = []
n_rep = 1000
for i in range(n_rep): 
    dfsub['lbl_v2_shuff'] = np.random.choice(dfsub['lbl_v2'].values, size=len(dfsub), replace=False)
    dfsubmean_shuff = dfsub.groupby(['xrbin_s100', 'yrbin_s100', 'lbl_v2_shuff'])['zr'].mean().unstack().unstack() #.fillna(0).astype(int) #mean().unstack()
    dfsubmeandiff_shuffs.append(dfsubmean_shuff['RL']-dfsubmean_shuff['LM'])
dfsubmeandiff_shuffs = np.array(dfsubmeandiff_shuffs)
    
dfsubmeandiff_shuffmean = np.nanmean(dfsubmeandiff_shuffs, axis=0) # .shape

# dfsubmeandiff_shuffstd = np.nanstd(dfsubmeandiff_shuffs, axis=0) # .shape
# dfsubmeandiff_sigma = (dfsubmeandiff - dfsubmeandiff_shuffmean) /(dfsubmeandiff_shuffstd/np.sqrt(n_rep))

In [None]:
dfsubmeandiff_shuffs.shape

In [None]:
pvals = (1+np.sum(np.abs(dfsubmeandiff_shuffs) > np.abs(dfsubmeandiff.values[np.newaxis,:,:]), axis=0))/n_rep
pvals

In [None]:
fig, axs = plt.subplots(1,3,figsize=(3*6,1*5))
ax = axs[0]
sns.heatmap(dfsubmeandiff.T, 
            cmap='coolwarm', cbar_kws=dict(shrink=0.3), 
            center=0, vmax=100, vmin=-100,
            ax=ax)
# ax.invert_yaxis()
ax.set_aspect('equal')
ax.set_title('RL-LM (data)')

ax = axs[1]
sns.heatmap(dfsubmeandiff_shuffmean.T, 
            cmap='coolwarm', cbar_kws=dict(shrink=0.3), 
            center=0, 
            # vmax=100, vmin=-100,
            ax=ax)
# ax.invert_yaxis()
ax.set_aspect('equal')
ax.set_title('RL-LM (shuffled \n mean over 1000 shuffles)')

ax = axs[2]
sns.heatmap(-np.log10(pvals).T, 
            cmap='rocket_r', cbar_kws=dict(shrink=0.3), 
            # center=0, 
            # vmax=100, vmin=-100,
            ax=ax)
# ax.invert_yaxis()
ax.set_aspect('equal')
ax.set_title('RL-LM (p-value)')
ax.grid(False)

In [None]:
fig, ax = plt.subplots(1,1,figsize=(1*6,1*5))
ax.scatter(dfsubmeandiff.T, -np.log10(pvals).T)
ax.set_ylabel('-log10(p-value)')
ax.set_xlabel('mean RL-LM in zr (um)')
sns.despine(ax=ax)
plt.show()

# ax.grid(False)