In [None]:
import base
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import re
import rushd as rd
import scipy as sp
import seaborn as sns

# enables concurrent editing of base.py
from importlib import reload
reload(base)

In [None]:
simulation_path = rd.datadir/'projects'/'miR-iFFL'
sim_data, sim_stats = base.load_modeling(simulation_path, 'stochastic_sims')
sim_data2, sim_stats2 = base.load_modeling(rd.rootdir/'output', 'missplicing')

In [None]:
# Create color palettes
kind_palette = {
    '1T_1': base.colors['teal'],
    '1T_2': base.colors['orange'],
    '1T_3': base.colors['red'],
    '2T_0': base.colors['green'],
    '2V_0': base.colors['purple'],
}

gene_markers = {
    '1T': 'o',
    '2T': 'D',
    '2V': 's',
}

model_palette = {
    'base': base.colors['teal'],
    'more_mRNA': base.get_light_color(base.colors['blue']),
    'more_miRNA': base.get_dark_color(base.colors['blue'])
}

model_markers = {
    'base': 'o',
    'more_mRNA': 'D',
    'more_miRNA': 's'
}

In [None]:
display(sim_data2)

In [None]:
# Plot two-gene
plot_df = sim_data[(sim_data['design']<2) & (sim_data['risc']==10000)]
g = sns.displot(data=plot_df, x='output', col='moi', hue='kind', palette=kind_palette,
                facet_kws=dict(sharex=False, sharey=False), bw_adjust=3,
                kind='kde', legend=False, common_norm=False)

In [None]:
# Plot stats
fig, axes = plt.subplots(1,4, gridspec_kw=dict(wspace=0.4), figsize=(10,2))
scatter_kwargs2 = dict(s=4, jitter=0.1, linewidth=0.5, edgecolor='white', native_scale=True)

plot_df = sim_stats[(sim_stats['design']<=1) & (sim_stats['risc']==10000)]
plot_df.sort_values(['moi','gene'], inplace=True)

# stat gmean
ax = axes[0]
for (gene, moi), group in plot_df.groupby(['gene','moi'], sort=False):
    sns.stripplot(data=group, x='condition', y='marker_gmean', color=base.colors['gray'],
                    legend=False, ax=ax, marker=gene_markers[gene], **scatter_kwargs2)
    sns.stripplot(data=group, x='condition', y='output_gmean', hue='kind', palette=kind_palette,
                    legend=False, ax=ax, marker=gene_markers[gene], **scatter_kwargs2)
ax.set(title='Mean', xlabel='', ylabel='', yscale='log',)

# stat std
ax = axes[1]
for (gene, moi), group in plot_df.groupby(['gene','moi'], sort=False):
    sns.stripplot(data=group, x='condition', y='marker_std', color=base.colors['gray'],
                    legend=False, ax=ax, marker=gene_markers[gene], **scatter_kwargs2)
    sns.stripplot(data=group, x='condition', y='output_std', hue='kind', palette=kind_palette,
                    legend=False, ax=ax, marker=gene_markers[gene], **scatter_kwargs2)
ax.set(title='Std.', xlabel='', ylabel='', yscale='log', )
ax.yaxis.set_minor_formatter(plt.NullFormatter())

# slope
ax = axes[2]
for (gene, moi), group in plot_df.groupby(['gene','moi'], sort=False):
    sns.stripplot(data=group, x='condition', y='slope', hue='kind', palette=kind_palette,
                    legend=False, ax=ax, marker=gene_markers[gene], **scatter_kwargs2)
ax.set(title='Slope', xlabel='', ylabel='', yticks=[0.6,0.7,0.8,0.9,1])

# stat CV
ax = axes[3]
for (gene, moi), group in plot_df.groupby(['gene','moi'], sort=False):
    sns.stripplot(data=group, x='condition', y='marker_variation', color=base.colors['gray'],
                    legend=False, ax=ax, marker=gene_markers[gene], **scatter_kwargs2)
    sns.stripplot(data=group, x='condition', y='output_variation', hue='kind', palette=kind_palette,
                    legend=False, ax=ax, marker=gene_markers[gene], **scatter_kwargs2)
ax.set(title='CV', xlabel='', ylabel='',)
ax.yaxis.set_minor_formatter(plt.NullFormatter())

for ax in axes:
    ax.set(xticklabels=['']*len(ax.get_xticklabels()))

In [None]:
fig, axes = plt.subplots(3,2, gridspec_kw=dict(width_ratios=(0.8,1), wspace=0.2, hspace=0.4))

plot_df = sim_data[(sim_data['risc']==10000) & (sim_data['design']<=1)]
yticks = [[0,1e3,2e3], [0,2e3,4e3,6e3], [0,2e3,4e3,6e3]]
hist_xticks = [[0,300,600,900,1200,1500], [0,1e3,2e3,3e3], [0,1e3,2e3,3e3,4e3,5e3]]

for i,moi in enumerate(plot_df['moi'].unique()):

    # line plot
    ax = axes[i,0]

    # drop data with only one point at a given copy number (for this plot)
    d = plot_df[(plot_df['moi']==moi)]
    if moi==3: d = d[(d['copy_num']<=10) & (d['gene']=='2V') | (d['gene']!='2V')]
    
    sns.lineplot(data=d[d['gene']!='1T'], x='copy_num', y='output', hue='kind', palette=kind_palette, 
                 legend=False, dashes=False, style='kind', markers=kind_markers, ax=ax, markersize=4, markeredgewidth=0.5,
                 estimator=sp.stats.gmean, errorbar=lambda x: (sp.stats.gmean(x) / sp.stats.gstd(x), sp.stats.gmean(x) * sp.stats.gstd(x)))
    sns.lineplot(data=d[d['gene']=='1T'], x='copy_num', y='output', hue='kind', palette=kind_palette, 
                 legend=False, dashes=False, style='kind', markers=kind_markers, ax=ax, markersize=4, markeredgewidth=0.5,
                 estimator=sp.stats.gmean, errorbar=lambda x: (sp.stats.gmean(x) / sp.stats.gstd(x), sp.stats.gmean(x) * sp.stats.gstd(x)), 
                 ls='--')
    sns.despine(ax=ax)
    ax.minorticks_off()
    ax.set(xlabel='', yticks=yticks[i], yticklabels=[f'{float(l)/1000:.0f}k' for l in yticks[i]], ylabel='output (#)')
    
    if i==2: ax.set(xlabel='copy number')

    # histogram
    ax = axes[i,1]
    sns.kdeplot(data=d[d['gene']!='1T'], x='output', hue='kind', palette=kind_palette, ax=ax, legend=False, common_norm=False,
                bw_adjust=3)
    sns.kdeplot(data=d[d['gene']=='1T'], x='output', hue='kind', palette=kind_palette, ax=ax, legend=False, common_norm=False,
                bw_adjust=3, ls='--')
    sns.despine(ax=ax, left=True)
    
    ax.set(xlabel='', xlim=(hist_xticks[i][0], hist_xticks[i][-1]), xticks=hist_xticks[i], ylabel='', yticklabels=[],)
    if i>0: ax.set(xticklabels=[f'{float(l.get_text())/1000:.0f}' for l in ax.get_xticklabels()])
    else: ax.set(xticklabels=[f'{float(l.get_text())/1000:.1f}' for l in ax.get_xticklabels()])
    xticklabels_k = ax.get_xticklabels()
    xticklabels_k[-1] = xticklabels_k[-1].get_text()+'k'
    ax.set(xticklabels=xticklabels_k)
    ax.get_yaxis().set_visible(False)
    if i==2: ax.set(xlabel='protein (#)')

for ax in axes.flatten(): ax.minorticks_off()

In [None]:
# Plot designs
plot_df = sim_data[(sim_data['gene']=='1T')]
g = sns.displot(data=plot_df, x='output', col='moi', hue='kind', palette=kind_palette,
                facet_kws=dict(sharex=False, sharey=False), bw_adjust=3, row='risc',
                kind='kde', legend=False, common_norm=False)

In [None]:
# Plot models
plot_df = sim_data2
g = sns.displot(data=plot_df, x='output', col='moi', hue='model', palette=model_palette,
                facet_kws=dict(sharex=False, sharey=False), bw_adjust=3,
                kind='kde', legend=True, common_norm=False)

In [None]:
# Plot stats
fig, axes = plt.subplots(1,4, gridspec_kw=dict(wspace=0.4), figsize=(10,2))
scatter_kwargs2 = dict(s=4, jitter=0.1, linewidth=0.5, edgecolor='white', native_scale=True)

plot_df = sim_stats2
plot_df.sort_values(['moi','model'], inplace=True)

# stat gmean
ax = axes[0]
for (model, moi), group in plot_df.groupby(['model','moi'], sort=False):
    sns.stripplot(data=group, x='condition', y='marker_gmean', color=base.colors['gray'],
                    legend=False, ax=ax, marker=model_markers[model], **scatter_kwargs2)
    sns.stripplot(data=group, x='condition', y='output_gmean', hue='model', palette=model_palette,
                    legend=False, ax=ax, marker=model_markers[model], **scatter_kwargs2)
ax.set(title='Mean', xlabel='', ylabel='', yscale='log',)

# stat std
ax = axes[1]
for (model, moi), group in plot_df.groupby(['model','moi'], sort=False):
    sns.stripplot(data=group, x='condition', y='marker_std', color=base.colors['gray'],
                    legend=False, ax=ax, marker=model_markers[model], **scatter_kwargs2)
    sns.stripplot(data=group, x='condition', y='output_std', hue='model', palette=model_palette,
                    legend=False, ax=ax, marker=model_markers[model], **scatter_kwargs2)
ax.set(title='Std.', xlabel='', ylabel='', yscale='log', )
ax.yaxis.set_minor_formatter(plt.NullFormatter())

# slope
ax = axes[2]
for (model, moi), group in plot_df.groupby(['model','moi'], sort=False):
    sns.stripplot(data=group, x='condition', y='slope', hue='model', palette=model_palette,
                    legend=False, ax=ax, marker=model_markers[model], **scatter_kwargs2)
ax.set(title='Slope', xlabel='', ylabel='', yticks=[0.6,0.7,0.8,0.9,1])

# stat CV
ax = axes[3]
for (model, moi), group in plot_df.groupby(['model','moi'], sort=False):
    sns.stripplot(data=group, x='condition', y='marker_variation', color=base.colors['gray'],
                    legend=False, ax=ax, marker=model_markers[model], **scatter_kwargs2)
    sns.stripplot(data=group, x='condition', y='output_variation', hue='model', palette=model_palette,
                    legend=False, ax=ax, marker=model_markers[model], **scatter_kwargs2)
ax.set(title='CV', xlabel='', ylabel='',)
ax.yaxis.set_minor_formatter(plt.NullFormatter())

for ax in axes:
    ax.set(xticklabels=['']*len(ax.get_xticklabels()))
    ax.axvspan(2.5,5.5, color=base.get_light_color(base.colors['gray']), alpha=0.2,)

In [None]:
fig, axes = plt.subplots(3,2, gridspec_kw=dict(width_ratios=(0.8,1), wspace=0.2, hspace=0.4))

plot_df = sim_data2
yticks = [[0,1e3,2e3], [0,1e3,2e3,3e3], [0,2e3,4e3,6e3]]
hist_xticks = [[0,300,600,900,1200,1500], [0,1e3,2e3,3e3], [0,1e3,2e3,3e3,4e3,5e3]]

for i,moi in enumerate(plot_df['moi'].unique()):

    # line plot
    ax = axes[i,0]

    # drop data with only one point at a given copy number (for this plot)
    d = plot_df[(plot_df['moi']==moi)]
    #if moi==3: d = d[(d['copy_num']<=10) & (d['gene']=='2V') | (d['gene']!='2V')]
    
    sns.lineplot(data=d[d['model']!='base'], x='copy_num', y='output', hue='model', palette=model_palette, 
                 legend=False, dashes=False, style='model', markers=model_markers, ax=ax, markersize=4, markeredgewidth=0.5,
                 estimator=sp.stats.gmean, errorbar=lambda x: (sp.stats.gmean(x) / sp.stats.gstd(x), sp.stats.gmean(x) * sp.stats.gstd(x))
                 )
    sns.lineplot(data=d[d['model']=='base'], x='copy_num', y='output', hue='model', palette=model_palette, 
                 legend=False, dashes=False, style='model', markers=model_markers, ax=ax, markersize=4, markeredgewidth=0.5,
                 estimator=sp.stats.gmean, errorbar=lambda x: (sp.stats.gmean(x) / sp.stats.gstd(x), sp.stats.gmean(x) * sp.stats.gstd(x)), 
                 ls='--')
    sns.despine(ax=ax)
    ax.minorticks_off()
    ax.set(xlabel='', yticks=yticks[i], yticklabels=[f'{float(l)/1000:.0f}k' for l in yticks[i]], ylabel='output (#)')
    
    if i==2: ax.set(xlabel='copy number')

    # histogram
    ax = axes[i,1]
    sns.kdeplot(data=d[d['model']!='base'], x='output', hue='model', palette=model_palette, ax=ax, legend=False, common_norm=False,
                bw_adjust=3)
    sns.kdeplot(data=d[d['model']=='base'], x='output', hue='model', palette=model_palette, ax=ax, legend=False, common_norm=False,
                bw_adjust=3, ls='--')
    sns.despine(ax=ax, left=True)
    
    ax.set(xlabel='', xlim=(hist_xticks[i][0], hist_xticks[i][-1]), xticks=hist_xticks[i], ylabel='', yticklabels=[],)
    if i>0: ax.set(xticklabels=[f'{float(l.get_text())/1000:.0f}' for l in ax.get_xticklabels()])
    else: ax.set(xticklabels=[f'{float(l.get_text())/1000:.1f}' for l in ax.get_xticklabels()])
    xticklabels_k = ax.get_xticklabels()
    xticklabels_k[-1] = xticklabels_k[-1].get_text()+'k'
    ax.set(xticklabels=xticklabels_k)
    ax.get_yaxis().set_visible(False)
    if i==2: ax.set(xlabel='protein (#)')

for ax in axes.flatten(): ax.minorticks_off()

In [None]:
subset = sim_data2[(sim_data2['moi']==10) & (sim_data2['copy_num']>13)].copy()
_, subset_stats = base.calculate_bins_stats(subset, by=['model','condition','risc','moi'], num_bins=10,)
display(subset_stats)

In [None]:
g = sns.displot(data=sim_data2[sim_data2['moi']==10], x='marker', y='output', hue='model',
                common_norm=False,)
sns.kdeplot(data=sim_data2[(sim_data2['moi']==10) & (sim_data2['copy_num']>12)], x='marker', hue='model', y='output',
            common_norm=False, color=base.colors['gray'], ax=g.ax)

In [None]:
plot_df = sim_data2
g = sns.displot(data=plot_df, x='marker', y='output', hue='model', col='moi',
                common_norm=False, kind='kde', log_scale=True)
for model in plot_df['model'].unique():
    stat = subset_stats[(subset_stats['model']==model)]
    xs = np.logspace(3.5, 4.2, 100)
    ys = stat['slope'].values[0] * np.log10(xs) + stat['intercept_log'].values[0]
    g.axes_dict[10].plot(xs, 10**ys, color='black',)

In [None]:
plot_df = sim_data2
fig, ax = plt.subplots(1,1,)
for model in plot_df['model'].unique():
    sns.kdeplot(data=plot_df[plot_df['model']==model], x='marker', y='output', hue='moi',
                common_norm=False, log_scale=True)
# for model in plot_df['model'].unique():
#     stat = subset_stats[(subset_stats['model']==model)]
#     xs = np.logspace(3.5, 4.2, 100)
#     ys = stat['slope'].values[0] * np.log10(xs) + stat['intercept_log'].values[0]
#     g.axes_dict[10].plot(xs, 10**ys, color='black',)

In [None]:
plot_df = sim_data2
g = sns.histplot(data=plot_df, x='marker', y='output', hue='model',
            common_norm=False, )#log_scale=True)
g.legend(loc='upper left')
# for model in plot_df['model'].unique():
#     stat = subset_stats[(subset_stats['model']==model)]
#     xs = np.logspace(3.5, 4.2, 100)
#     ys = stat['slope'].values[0] * np.log10(xs) + stat['intercept_log'].values[0]
#     g.axes_dict[10].plot(xs, 10**ys, color='black',)

In [None]:
plot_df = sim_data2
g = sns.scatterplot(data=plot_df, x='marker', y='output', hue='model', palette=model_palette,
                    alpha=0.2, size=2, legend=False)

In [None]:
plot_df = sim_data[(sim_data['risc']==10000) & (sim_data['design']<2)]
g = sns.scatterplot(data=plot_df, x='marker', y='output', hue='kind', palette=kind_palette,
                    alpha=0.2, size=2, legend=False)

In [None]:
copy_num_stats = sim_data[(sim_data['risc']==10000) & (sim_data['design']<2)].groupby(['kind','copy_num'])[['marker','output']].agg([np.mean, np.std]).reset_index()
copy_num_stats.columns = copy_num_stats.columns.map(lambda i: base.rename_multilevel_cols(i))

copy_num_stats2 = sim_data2.groupby(['model','copy_num'])[['marker','output']].agg([np.mean, np.std]).reset_index()
copy_num_stats2.columns = copy_num_stats2.columns.map(lambda i: base.rename_multilevel_cols(i))

In [None]:
plot_df = copy_num_stats2
g = sns.scatterplot(data=plot_df, x='marker_mean', y='output_mean', hue='model', palette=model_palette)
plt.errorbar(plot_df['marker_mean'], plot_df['output_mean'], xerr=plot_df['marker_std'], yerr=plot_df['output_std'],
             color='black', fmt='none')
g.legend(loc='upper left')

In [None]:
plot_df = copy_num_stats
g = sns.scatterplot(data=plot_df, x='marker_mean', y='output_mean', hue='kind', palette=kind_palette)
plt.errorbar(plot_df['marker_mean'], plot_df['output_mean'], xerr=plot_df['marker_std'], yerr=plot_df['output_std'],
             color='black', fmt='none')
g.legend(loc='upper left')