In [None]:
import base
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import rushd as rd
import scipy as sp
import seaborn as sns
import sklearn.metrics
import itertools

# enables concurrent editing of base.py
from importlib import reload
reload(base)

sns.set_style('ticks')
sns.set_context('talk',rc={'font.family': 'sans-serif', 'font.sans-serif':['Helvetica Neue']})

In [None]:
base_path = rd.datadir/'instruments'/'data'/'attune'/'Emma'

exp66_path = base_path/'2024.06.20_EXP66_rep_1'/'data_singlets'

plates = pd.DataFrame({
    'data_path': [exp66_path],
    'yaml_path': [exp66_path/'wells.yaml']
})

output_path = rd.rootdir/'output'/'orthogonal-circuits'
cache_path = rd.rootdir/'output'/'orthogonal-circuits'/'data.gzip'

metadata_keys = set()
for p in plates['yaml_path'].unique():
    print(p)
    rd.plot.plot_well_metadata(p)
    metadata_keys.update(rd.flow.load_well_metadata(p).keys())
display(metadata_keys)

In [None]:
# Load data
data = pd.DataFrame()
if cache_path.is_file(): data = pd.read_parquet(cache_path)
else: 
    channel_list = ['mRuby2-A','FSC-A','SSC-A','mGL-A','iRFP-A']
    data = rd.flow.load_groups_with_metadata(plates, columns=channel_list)

    # Remove negative channel values
    for c in channel_list: data = data[data[c]>0]
    
    data.dropna(inplace=True)
    data.to_parquet(rd.outfile(cache_path))
display(data)

In [None]:
iRFP_gate = data[data.control_cond == 'NT']['iRFP-A'].quantile(0.999)
display(iRFP_gate)

In [None]:
data_gated = data.loc[(data['iRFP-A']>iRFP_gate)&(data['control_cond']=='na')]
display(data_gated)

In [None]:
for promoter_cond, group in data_gated.groupby('promoter_cond'):

    group = group.sample(n=20000, random_state=1)

    # definitions for the axes
    left, width = 0.1, 0.65
    bottom, height = 0.1, 0.65
    spacing = 0.005

    rect_scatter = [left, bottom, width, height]
    rect_histx = [left, bottom + height + spacing, width, 0.2]
    rect_histy = [left + width + spacing, bottom, 0.2, height]

    # Set up figure
    fig = plt.figure(figsize=(7, 7))
    ax_scatter = plt.axes(rect_scatter)
    ax_scatter.tick_params(direction='in', top=True, right=True)
    ax_histx = plt.axes(rect_histx)
    ax_histx.set_axis_off()
    ax_histy = plt.axes(rect_histy)
    ax_histy.set_axis_off()

    # Set limits
    xlim = (1*10**1, 1*10**7)
    ylim = (1*10**1, 1*10**5)
    ax_scatter.set_xlim(xlim)
    ax_scatter.set_ylim(ylim)
    ax_histx.set_xlim(xlim)
    ax_histy.set_ylim(ylim)

    # Make density plots
    g = sns.kdeplot(ax=ax_scatter, data=group, y = 'mRuby2-A', x = 'mGL-A', hue='circuit_cond',  log_scale=True, common_norm=False, hue_order = ['base-base','T-NT','NT-T','T-T'],
                alpha=0.7, fill=False, legend=True)
    
    sns.move_legend(g,"upper left", bbox_to_anchor=(1.1, 1),frameon=False)

    # Plot histograms
    sns.kdeplot(ax=ax_histx, data=group, x = 'mGL-A', hue='circuit_cond',hue_order = ['base-base','T-NT','NT-T','T-T'],
                log_scale=True, common_norm=False,
                alpha=0.1, fill=True, legend=False)
    sns.kdeplot(ax=ax_histy, data=group,  y = 'mRuby2-A', hue='circuit_cond',hue_order = ['base-base','T-NT','NT-T','T-T'],
                log_scale=True, common_norm=False,
                alpha=0.1, fill=True, legend=False)

    # Add in gating lines
    #ax_scatter.axvline(comp_AF514_gate, 0, 1, color='black')
    #ax_scatter.axhline(mRuby_gate, 0, 1, color='black')

    # Title
    fig.suptitle(f'Promoter pair: {promoter_cond}')
    # Misc plotting stuff
    fig.tight_layout()  # Helps improve white spacing
    fig.savefig(rd.outfile(output_path/(f'{promoter_cond}.svg')), bbox_inches='tight')
    fig.savefig(rd.outfile(output_path/(f'{promoter_cond}.pdf')), bbox_inches='tight')
    plt.show()
    fig.clf()

In [None]:
for circuit_cond, group in data_gated.groupby('circuit_cond'):

    group = group.sample(n=20000, random_state=1)

    # definitions for the axes
    left, width = 0.1, 0.65
    bottom, height = 0.1, 0.65
    spacing = 0.005

    rect_scatter = [left, bottom, width, height]
    rect_histx = [left, bottom + height + spacing, width, 0.2]
    rect_histy = [left + width + spacing, bottom, 0.2, height]

    # Set up figure
    fig = plt.figure(figsize=(7, 7))
    ax_scatter = plt.axes(rect_scatter)
    ax_scatter.tick_params(direction='in', top=True, right=True)
    ax_histx = plt.axes(rect_histx)
    ax_histx.set_axis_off()
    ax_histy = plt.axes(rect_histy)
    ax_histy.set_axis_off()

    # Set limits
    xlim = (1*10**1, 1*10**7)
    ylim = (1*10**1, 1*10**5)
    ax_scatter.set_xlim(xlim)
    ax_scatter.set_ylim(ylim)
    ax_histx.set_xlim(xlim)
    ax_histy.set_ylim(ylim)

    # Make density plots
    g = sns.kdeplot(ax=ax_scatter, data=group, y = 'mRuby2-A', x = 'mGL-A', hue='promoter_cond',  log_scale=True, common_norm=False, hue_order = ['hPGK-hPGK','EF1a-hPGK','hPGK-EF1a','EF1a-EF1a'],
                alpha=0.7, fill=False, legend=True)
    
    sns.move_legend(g,"upper left", bbox_to_anchor=(1.1, 1),frameon=False)

    # Plot histograms
    sns.kdeplot(ax=ax_histx, data=group, x = 'mGL-A', hue='promoter_cond',hue_order = ['hPGK-hPGK','EF1a-hPGK','hPGK-EF1a','EF1a-EF1a'],
                log_scale=True, common_norm=False,
                alpha=0.1, fill=True, legend=False)
    sns.kdeplot(ax=ax_histy, data=group,  y = 'mRuby2-A', hue='promoter_cond',hue_order = ['hPGK-hPGK','EF1a-hPGK','hPGK-EF1a','EF1a-EF1a'],
                log_scale=True, common_norm=False,
                alpha=0.1, fill=True, legend=False)

    # Add in gating lines
    #ax_scatter.axvline(comp_AF514_gate, 0, 1, color='black')
    #ax_scatter.axhline(mRuby_gate, 0, 1, color='black')

    # Title
    fig.suptitle(f'Circuit: {circuit_cond}')
    # Misc plotting stuff
    fig.tight_layout()  # Helps improve white spacing
    fig.savefig(rd.outfile(output_path/(f'{circuit_cond}.svg')), bbox_inches='tight')
    fig.savefig(rd.outfile(output_path/(f'{circuit_cond}.pdf')), bbox_inches='tight')
    plt.show()
    fig.clf()

## Mutual information analysis

In [None]:
def mutual_information(df, n_bins_per_axis=20):
    contingency = np.histogram2d(np.log(df['mGL-A']), np.log(df['mRuby2-A']), bins=n_bins_per_axis)[0]
    return sklearn.metrics.mutual_info_score(None, None, contingency=contingency)
def spearman_r(df):
    return sp.stats.spearmanr(df['mGL-A'], df['mRuby2-A']).statistic
def summary_stats(df):
    return pd.DataFrame({
        "mutual_information": mutual_information(df),
        "spearman_r": spearman_r(df)
    }, index=[0])

In [None]:
summary_df = data_gated.groupby(['promoter_cond', 'circuit_cond']).apply(summary_stats, include_groups=False).reset_index()
summary_df['combo_cond'] = summary_df.promoter_cond + '_' + summary_df.circuit_cond

In [None]:
list(itertools.repeat(['a','b','c'], times=4))

In [None]:
condition_order

In [None]:
base.group_palette

In [None]:
condition_order = [f'{x}_{y}' for x,y in itertools.product(['hPGK-hPGK', 'hPGK-EF1a', 'EF1a-hPGK', 'EF1a-EF1a'], ['base-base', 'NT-T', 'T-NT', 'T-T'])]
ticklabel_df = pd.DataFrame({
    'combo_cond': condition_order,
    'promoter_cond': [x for cond in ['P-P', 'P-E', 'E-P', 'E-E'] for x in ([cond] * 4)],
    'circuit_cond': ['base', 'NT-T', 'T-NT', 'T-T'] * 4,
})
for param in ['mutual_information', 'spearman_r']:
    plt.figure(figsize=(15,5))
    sns.stripplot(summary_df, x='combo_cond', y=param, order=condition_order, hue='circuit_cond', palette={'base-base': 'k', 'NT-T': (base.colors['teal'], 0.5), 'T-NT': (base.colors['teal'], 0.5), 'T-T': base.colors['teal']}, legend=None)
    rd.plot.generate_xticklabels(ticklabel_df, 'combo_cond', ['promoter_cond', 'circuit_cond'])
    plt.axvspan(-0.5,3.5, color='gray', alpha=0.2, ec=None)
    plt.axvspan(7.5,11.5, color='gray', alpha=0.2, ec=None)
    plt.xlim(-0.2,15.2)
    plt.show()

In [None]:
sns.kdeplot(data_gated[(data_gated['circuit_cond'] == 'T-NT') & (data_gated['promoter_cond'] == 'EF1a-EF1a')], x='mGL-A', log_scale=True)
sns.kdeplot(data_gated[(data_gated['circuit_cond'] == 'T-T') & (data_gated['promoter_cond'] == 'EF1a-EF1a')], x='mGL-A', log_scale=True)
plt.show()
sns.kdeplot(data_gated[(data_gated['circuit_cond'] == 'T-NT') & (data_gated['promoter_cond'] == 'EF1a-EF1a')], x='mRuby2-A', log_scale=True)
sns.kdeplot(data_gated[(data_gated['circuit_cond'] == 'T-T') & (data_gated['promoter_cond'] == 'EF1a-EF1a')], x='mRuby2-A', log_scale=True)
plt.show()