# Fig5

In [56]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rc
from zebrafish_ms2_paper.trace_analysis import extract_traces, predict_protein_for_all_nuclei, get_on_and_off_times, remove_blips, predict_protein, predict_protein_v2, compute_trace_uncertainty, enforce_1spot_per_nucleus, binarize_trace
from zebrafish_ms2_paper.ap_analysis import get_ap_for_somites, get_ap_for_spots, filter_spots_by_distance_from_somite, filter_spots_by_perp_distance_from_ap_axis
from zebrafish_ms2_paper.utils import pboc_rc, style_axes, colors, fontsize
from matplotlib import rc, rcParams
import pandas as pd
import pickle
import matplotlib as mpl
from scipy.signal import find_peaks

In [57]:
%matplotlib qt

In [58]:
fontsize = 8
linewidth = 2
markersize = 8

In [59]:
"""specify the paths to the required datasets"""
#path_to_manual_dataset1 = r'/media/brandon/Data1/Somitogenesis/Dorado/rerun_quantification_July7_2023/filtered_df.pkl'
path_to_manual_dataset1 = r'/media/brandon/Data1/Somitogenesis/Dorado/manual_curation/complete_manual_spots.pkl' #r'/media/brandon/Data1/Somitogenesis/Dorado/manual_traces/all_manual_spots_quant_std_offset.pkl'
path_to_non_blank_timepoints = r'/media/brandon/Data1/Somitogenesis/Dorado/non_blank_timepoints_full.pkl'

In [60]:
rcParams.update(pboc_rc)
rcParams['pdf.fonttype'] = 42

In [61]:
# correct time stamps
with open(path_to_non_blank_timepoints,'rb') as fp:
    non_blank_timepoints = pickle.load(fp)

In [90]:
plt.close('all')

## Fig 5A
Plot example traces. Here we plot 5 traces. We ended up only showing 4 and changing the order to make the figure smaller.

In [91]:
# load the manual dorado dataset
df = pd.read_pickle(path_to_manual_dataset1)

minimum_number_of_spots = 20
#good_ids = [2715, 5070, 2831, 7814, 8258]
good_ids = [8258, 2715, 2831, 5070 ]

df = df[[nuc in good_ids for nuc in df.nucleus_id]]
df = enforce_1spot_per_nucleus(df, method='gauss3d_dog')

# extract culled traces
traces = extract_traces(df, method='gauss3d_dog')
traces = [traces[3], traces[0], traces[2], traces[1]]
    

In [92]:
#f, axs = plt.subplots(7, 1, gridspec_kw={'height_ratios': [1, 1, 1, 1, 1.5, 1.5, 1.5]})
f, axd = plt.subplot_mosaic([['a', 'a'], ['b', 'b'], ['c', 'c'], ['d', 'd'], ['e', 'e'], ['f', 'f'], ['g', 'g'], ['h', 'i']], gridspec_kw={'height_ratios': [1, 1, 1, 1, 1.5, 1.5, 1.5, 1.5]})
keys = ['a', 'b', 'c', 'd', 'e', 'f','g', 'h', 'i']
counter = 0
max_inten = df.gauss3d_dog.max()
for i, trace in enumerate(traces):
    #ax = axs[counter]
    ax = axd[keys[counter]]
    t_arr, inten_arr, nucleus = trace
    t_arr = non_blank_timepoints[t_arr.astype('int')]
    
    mrna, protein, fp = predict_protein_v2(inten_arr, t_arr, Tmax=180, t_eval=np.arange(0, 180))

    # for trace uncertainty
    sub_df = df[df.nucleus_id == nucleus]
    t_arr_bkg = sub_df.t.values
    bkg_arr = sub_df.offset.values
    t_arr_bkg = t_arr_bkg[~np.isnan(bkg_arr)]
    bkg_arr = bkg_arr[~np.isnan(bkg_arr)]
    s = compute_trace_uncertainty(t_arr_bkg, bkg_arr, inten_arr)
    l = inten_arr - s
    u = inten_arr + s
    
    # plot protein
    t_eval = np.arange(0, len(protein))
    protein_ids = np.array([t in t_arr for t in t_eval])
    ax.plot(t_arr, protein[protein_ids] / np.max(protein[protein_ids]) * np.max(inten_arr) / max_inten, '-', linewidth=linewidth, alpha=0.5, label='predicted protein', color=colors['blue'])

    ax.plot(t_arr, inten_arr / max_inten, '-', linewidth=linewidth, label='her1-MS2', color=colors['green'])
    #ax.plot(t_arr, inten_arr / max_inten, 'o', label='_nolabel_', markersize=12, fillstyle='none', markeredgecolor='k', markeredgewidth=1)
    ax.fill_between(t_arr, l / max_inten, u / max_inten, facecolor=colors['green'], alpha=0.25)
    
    ax.set_xlim([0, 145])

    #if counter == 0:
        #ax.legend(loc=1, fontsize=fontsize, facecolor='w')

    if counter == 3:
        ax.set_xticks([0, 30, 60, 90, 120])
        ax.set_xlabel('time (min)', fontsize=fontsize)
    else:
        ax.set_xticks([0, 30, 60, 90, 120], labels=[])
    
    if counter == 2:
        ax.set_ylabel('fluorescence intensity (a.u.) \n', fontsize=fontsize)
    
    ax = style_axes(ax, fontsize=fontsize)
    counter += 1
    

## Fig5B
Plot a zoom in of a burst

In [93]:
# load the manual dorado dataset
df = pd.read_pickle(path_to_manual_dataset1)

# extract the trace just for the nucleus in question
good_ids = [2831]

df = df[[nuc in good_ids for nuc in df.nucleus_id]]
df = enforce_1spot_per_nucleus(df, method='gauss3d_dog')

# extract culled traces
traces = extract_traces(df, method='gauss3d_dog')

In [94]:
#ax = axs[counter]
ax = axd[keys[counter]]
# use same max intensity from the full set of traces
max_inten = 62139.0
for i, trace in enumerate(traces):
    t_arr, inten_arr, nucleus = trace
    t_arr = non_blank_timepoints[t_arr.astype('int')]
    
    mrna, protein, fp = predict_protein_v2(inten_arr, t_arr, Tmax=180, t_eval=np.arange(0, 180))

    # for trace uncertainty
    sub_df = df[df.nucleus_id == nucleus]
    t_arr_bkg = sub_df.t.values
    bkg_arr = sub_df.offset.values
    t_arr_bkg = t_arr_bkg[~np.isnan(bkg_arr)]
    bkg_arr = bkg_arr[~np.isnan(bkg_arr)]
    s = compute_trace_uncertainty(t_arr_bkg, bkg_arr, inten_arr)
    l = inten_arr - s
    u = inten_arr + s
    
    ax.plot(t_arr, inten_arr / max_inten, '-', linewidth=linewidth, label='her1-MS2', color=colors['green'])
    ax.plot(t_arr, inten_arr / max_inten, 'o', label='_nolabel_', markersize=markersize, fillstyle='none', markeredgecolor='k', markeredgewidth=1)
    ax.fill_between(t_arr, l / max_inten, u / max_inten, facecolor=colors['green'], alpha=0.25)
    
    ax.set_xlim([56, 64])
    ax.set_xticks([56, 57, 58, 59, 60, 61, 62, 63, 64])
    ax.set_xlabel('time (min)', fontsize=fontsize)
    ax.set_ylabel('fluorescence \nintensity (a.u.)', fontsize=fontsize)
    
    ax = style_axes(ax, fontsize=fontsize)

counter += 1

In [95]:
#ax = axs[counter]
ax = axd[keys[counter]]
ax.set_xticks([])
ax.set_yticks([])
ax = style_axes(ax)
counter += 1

## Fig5D
Plot an example of a binarized trace

In [96]:
# load the manual dorado dataset
df = pd.read_pickle(path_to_manual_dataset1)

good_ids = [2715]

df = df[[nuc in good_ids for nuc in df.nucleus_id]]
df = enforce_1spot_per_nucleus(df, method='gauss3d_dog')

# extract culled traces
traces = extract_traces(df, method='gauss3d_dog')

            

In [97]:
#ax = axs[counter]
ax = axd[keys[counter]]
max_inten = df.gauss3d_dog.max()
for i, trace in enumerate(traces):
    t_arr, inten_arr, nucleus = trace
    t_arr = non_blank_timepoints[t_arr.astype('int')]

    state = binarize_trace(inten_arr, t_arr, thresh=1.0, window_size=3)
    
    # for trace uncertainty
    sub_df = df[df.nucleus_id == nucleus]
    t_arr_bkg = sub_df.t.values
    bkg_arr = sub_df.offset.values
    t_arr_bkg = t_arr_bkg[~np.isnan(bkg_arr)]
    bkg_arr = bkg_arr[~np.isnan(bkg_arr)]
    s = compute_trace_uncertainty(t_arr_bkg, bkg_arr, inten_arr)
    l = inten_arr - s
    u = inten_arr + s
        
    ax.plot(t_arr, inten_arr / max_inten, '-', linewidth=linewidth, label='her1-MS2', color=colors['green'])
    #ax.plot(t_arr, inten_arr / max_inten, 'o', label='_nolabel_', markersize=12, fillstyle='none', markeredgecolor='k', markeredgewidth=2)
    ax.fill_between(t_arr, l / max_inten, u / max_inten, facecolor=colors['green'], alpha=0.25)
    
    ax.plot(t_arr, state, 'k-', linewidth=1, label='inferred promoter state')
    
    ax.set_xlim([0, 145])
    #ax.legend(loc=2, fontsize=fontsize, facecolor='w')

    ax.set_xticks([0, 30, 60, 90, 120])
    ax.set_xlabel('time (min)', fontsize=fontsize)
    ax.set_ylabel('fluorescence \nintensity (a.u.)', fontsize=fontsize, color=colors['green'])
    
    ax = style_axes(ax, fontsize=fontsize)
counter += 1

# Fig5E
Histogram of number of her1 transcriptional bursts per protein oscilliation

In [107]:
"""bursts per pulse"""
bins = np.array([1, 2, 3])
probs = np.zeros((2,3))
# dorado automatic
#probs[0] = np.array([0.8414, 0.1521, 0.0065])
# z1 automatic
#probs[1] = np.array([ 0.8824, 0.1176, 0.000])

# new dorado manual
probs = [np.array([0.919, 0.076, 0.005])]


In [118]:
i = 0
f, ax = plt.subplots(figsize=(1.5, 1.5))
ax.bar(bins, probs[i], facecolor=colors['green'], width=0.5)
ax.set_xlabel('bursts per oscillation', fontsize=fontsize)
ax.set_ylabel('\n \n probability', fontsize=fontsize)
ax.set_xticks([1, 2, 3])
ax.set_xlim([0.5, 3.5])
ax.set_xlim([0.5, 3.5])
ax.set_ylim([-0.05, 1])

ax = style_axes(ax, fontsize=fontsize)
f.tight_layout()

In [99]:
ax = axd[keys[counter]]
#for i in range(len(probs)):
#    ax.plot(bins, probs[i], 'ko', markersize=markersize, markerfacecolor = colors['green'], alpha=0.7)
i = 0
ax.bar(bins, probs[i], facecolor=colors['green'], width=0.5)
ax.set_xlabel('bursts per oscillation', fontsize=fontsize)
ax.set_ylabel('\n \n probability', fontsize=fontsize)
ax.set_xticks([1, 2, 3])
ax.set_xlim([0.5, 3.5])
ax.set_xlim([0.5, 3.5])
ax.set_ylim([-0.05, 1])

ax = style_axes(ax, fontsize=fontsize)
counter += 1

## Period dists

In [100]:
def extract_burst_periods_by_ap(df, method='gauss3d_dog'):
    traces = extract_traces(df, method=method)
    aps = []
    periods = []
    event_times = []
    for trace in traces:
        t_arr, inten_arr, nucleus = trace
        sub_df = df[df.nucleus_id == nucleus]
        state = binarize_trace(inten_arr, t_arr, thresh=1.0, window_size=3)
        on_times, off_times = get_on_and_off_times(state, t_arr)
        if len(on_times) < 2:
            continue
        these_periods = np.diff(on_times)

        these_reference_times = off_times[1:len(these_periods)+1]
        these_aps = np.zeros(len(these_reference_times))
        for i, t in enumerate(these_reference_times):
            these_aps[i] = sub_df[np.abs(sub_df.t - t) == np.min(np.abs(sub_df.t - t))].ap.iloc[0]

        aps.extend(these_aps)
        periods.extend(these_periods)
        event_times.extend(these_reference_times)
        
    return periods, event_times, aps


def bootstrap_period_dist(periods, bins, n_bootstraps=100):
    period_dists = np.zeros((n_bootstraps, len(bins) - 1))
    for n in range(n_bootstraps):
        ids = np.random.choice(len(periods), len(periods))
        counts, _ = np.histogram(np.array(periods)[ids], bins)
        prob_dens = counts / np.sum(counts) / np.diff(bins)
        period_dists[n] = prob_dens
    
    return np.mean(period_dists, axis=0), np.std(period_dists, axis=0)


def extract_protein_periods_by_ap(df, method='pred_protein', distance=15, prominence=1e6, burn_in_time=20):
    traces = extract_traces(df, method=method)
    aps = []
    periods = []
    event_times = []
    for trace in traces:
        t_arr, inten_arr, nucleus = trace
        if len(t_arr) <= burn_in_time:
            continue
        inten_arr = inten_arr[burn_in_time:]
        t_arr = t_arr[burn_in_time:]
        sub_df = df[df.nucleus_id == nucleus]
        peaks, props = find_peaks(inten_arr, distance=distance, prominence=prominence)
        state = np.zeros(len(inten_arr))
        for i in range(len(peaks)):
            state[int(peaks[i])] = 1
            
        on_times, off_times = get_on_and_off_times(state, t_arr)
        if len(on_times) < 2:
            continue
        these_periods = np.diff(on_times)

        these_reference_times = off_times[:len(these_periods)]
        these_aps = np.zeros(len(these_reference_times))
        for i, t in enumerate(these_reference_times):
            these_aps[i] = sub_df[np.abs(sub_df.t - t) == np.min(np.abs(sub_df.t - t))].ap.iloc[0]

        aps.extend(these_aps)
        periods.extend(these_periods)
        event_times.extend(these_reference_times)
        
    return periods, event_times, aps

In [101]:
#path_to_df = r'/media/brandon/Data1/Somitogenesis/Dorado/gauss_001_v2_sigma_filt/filtered_df.pkl'
path_to_df = r'/media/brandon/Data1/Somitogenesis/Dorado/manual_curation/complete_manual_spots.pkl'
#path_to_df = r'/media/brandon/Data1/Somitogenesis/Dorado/manual_traces/all_manual_spots_quant_std_offset.pkl'

df = pd.read_pickle(path_to_df)

with open(r'/media/brandon/Data1/Somitogenesis/Dorado/ap.pkl', 'rb') as f:
    ap = pickle.load(f)
    
if 'ap' not in df.keys():
    df = get_ap_for_spots(df, ap)
    
# filter df
minimum_number_of_spots = 10
traces = extract_traces(df, method='gauss3d_dog')
spots_per_trace = [np.sum(trace[1] > 0) for trace in traces]
good_nuc_ids = np.unique(df.nucleus_id)[np.array(spots_per_trace) > minimum_number_of_spots]
df = df[[nid in good_nuc_ids for nid in df.nucleus_id.tolist()]]
traces = extract_traces(df, method='gauss3d_dog')
df = enforce_1spot_per_nucleus(df, method='gauss3d_dog')

In [102]:
burst_periods, event_times, aps = extract_burst_periods_by_ap(df)

In [103]:
tracks = pd.read_csv(r'/media/brandon/Data1/Somitogenesis/Dorado/tracks_corrected.csv')
protein_df = predict_protein_for_all_nuclei(df, tracks)
protein_df = get_ap_for_spots(protein_df, ap)

periods, event_times, aps = extract_protein_periods_by_ap(protein_df)

  protein_df = pd.concat((protein_df, tmp_df), axis=0)


In [104]:
ax = axd[keys[counter]]
# protein
bins = np.linspace(0, 48, 10)
prob_dens, uncertainty_prob_dens = bootstrap_period_dist(periods, bins)

#plt.plot(bins[:-1], prob_dens, 'ko', markersize=18, markerfacecolor=colors['blue'], alpha=1, label='predicted Her1 protein')
#plt.plot(bins[:-1], prob_dens, '-', color=colors['blue'], linewidth=3)
ax.errorbar(bins[:-1], prob_dens, uncertainty_prob_dens, marker='o', markerfacecolor=(colors['blue'],) + (0.5,), markersize=markersize,
            markeredgecolor='k', linewidth=linewidth, elinewidth=linewidth, capsize=linewidth, capthick=linewidth, ecolor=colors['blue'], 
             color=colors['blue'], label='predicted Her1 protein oscillations', barsabove=True)

# bursts
prob_dens, uncertainty_prob_dens = bootstrap_period_dist(burst_periods, bins)
ax.errorbar(bins[:-1], prob_dens, uncertainty_prob_dens, marker='o', markerfacecolor=(colors['green'],) + (0.5,), markersize=markersize,
            markeredgecolor='k', linewidth=linewidth, elinewidth=linewidth, capsize=linewidth, capthick=linewidth, ecolor=colors['green'], 
             color=colors['green'], label='measured her1 transcriptional bursts', barsabove=True)



ax.set_xlabel('period (min)', fontsize=fontsize)
ax.set_ylabel('probability \ndensity (min)$^{-1}$', fontsize=fontsize)
#ax.legend(fontsize= fontsize, facecolor='w', loc=2)
ax.set_ylim([0,0.07])
ax.set_yticks([0, 0.06])
ax = style_axes(ax, fontsize=fontsize)

In [292]:
plt.savefig(r'/home/brandon/Documents/Code/zebrafish-ms2-paper/figures/Fig5_python.pdf')