This notebook implements a simple probabilistic model for estimating average complex stoichiometries from previosuly calculated fractions of correlated trajectories.

In [None]:
# Load CSV file with 1x, 2x, and 4x HaloTag standards
# (output of the stoichiometry analysis notebook)

# Location of the CSV results file
results_file = '../reports/figures/Fig2sup1_GST_dimer_rev1/summary_figures/Correlations_results.csv'
# Column in the results file that stores the fraction of correlated trajectories
metric = 'frac_corr_C1'
# Column in the results file that stores the condition name
condition = 'Condition'
# Dictionary of condition names for HaloTag standards of specific stoichiometry
standards = {1:'HaloTag-1x',
            2:'HaloTag-2x-tandem',
            4:'HaloTag_4x'}


plot_settings = '../src/plotting_settings.py' # File containing matplotlib settings
save_figs = False

In [None]:
# Import modules and set up plotting
import os
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as st
import pandas as pd
from scipy.optimize import curve_fit, brentq

if save_figs: # Save figure files
    %matplotlib
    %run $plot_settings save_large
      
else: # Plot interactively
    %matplotlib
    %run $plot_settings plot_only

In [None]:
# Load data file
data = pd.read_csv(results_file)
save_dir = os.path.dirname(results_file)

#print(data)

In [None]:
# Process and plot data

data_stats= data.groupby(condition)[metric].agg([np.mean, np.std, st.sem])

to_plot = pd.DataFrame()
for n, name in standards.items():
    row = data_stats.loc[name]
    row['n'] = n
    to_plot=to_plot.append(row)
    
fig,ax = plt.subplots() 
ax.errorbar(to_plot['n'], to_plot['mean'],yerr=to_plot['sem'], fmt='_', capsize=2)

print(data_stats)

In [None]:
# Estimate labeling efficiency
def f_obs (n, PL, y_offset):
    """Fraction of observed correlated trajectories"""
    numerator = 1-np.power((1-PL),n)-np.power((1-PL),n)+np.power((1-2*PL),n)
    denominator = 1-np.power((1-2*PL),n)
    ratio = numerator / denominator
    # Apply offset
    ratio_scaled = ratio * (1-y_offset)
    result = ratio_scaled + y_offset
    return result

# Fit PL and y_offset parameters to data
popt, _ = curve_fit(f_obs, to_plot['n'], to_plot['mean'], p0=[0.2,0.0])

label_eff = popt[0]
y_offset = popt[1]
print(label_eff, y_offset)

In [None]:
f_obs_vect = np.vectorize(f_obs)

n_values = np.linspace(1,16, 100)
y_values = f_obs_vect(n_values, label_eff, y_offset)
ax.plot(n_values,y_values)

ax.set_ylabel('Fraction of correlated trajectories')
ax.set_xlabel('Oligomer size (# of HaloTag proteins)')
ax.set_xlim([0, max(n_values)])
ax.set_ylim([0,1])

if save_figs:
    fig_name = 'Stoichiometry_plot'
    fig_path = os.path.join(save_dir, fig_name+'.pdf')
    fig.savefig(fig_path)


In [None]:
# Solve the labeling efficiency equation to estimate n from a specific f_obs

# Change the value below to estimate the corresponding oligomeric state
experimental_f_obs = 0.13

def f_obs_solve (n, PL, y_offset, y_val):
    """Fraction of observed correlated trajectories with y subtracted"""
    numerator = 1-np.power((1-PL),n)-np.power((1-PL),n)+np.power((1-2*PL),n)
    denominator = 1-np.power((1-2*PL),n)
    ratio = numerator / denominator
    # Apply offset
    ratio_scaled = ratio * (1-y_offset)
    result = ratio_scaled + y_offset - y_val
    return result


a = brentq(f_obs_solve, 0.75, 20, args=((label_eff, y_offset,experimental_f_obs)))
print(a)
