## siRNA knockdown ##

This notebook fits analytical functions to Rafał’s data.

The analytical functions look like this:

$$f_\text{red}(t) =
m_\text{r}\,k_\text{tl} \left(
\frac{1}{\beta_\text{r}-\delta_\text{r}+k_\text{m,r}} \mathrm{e}^{-(\beta_\text{r}+k_\text{m,r})(t-t_0)}
-\frac{1}{\beta_\text{r} - \delta_\text{r}} \mathrm{e}^{-\beta_\text{r} (t-t_0)}
+\frac{k_\text{m,r}}{(\beta_\text{r}-\delta_\text{r}) (\beta_\text{r}-\delta_\text{r}+k_\text{m,r})} \mathrm{e}^{-\delta_\text{r} (t-t_0)}
\right)
$$

$$f_\text{green}(t) =
m_\text{g}\,k_\text{tl} \left(
\frac{1}{\beta_\text{g}-\delta_\text{g}+k_\text{m,g}} \mathrm{e}^{-(\beta_\text{g}+k_\text{m,g})(t-t_0)}
-\frac{1}{\beta_\text{g} - \delta_\text{g}} \mathrm{e}^{-\beta_\text{g} (t-t_0)}
+\frac{k_\text{m,g}}{(\beta_\text{g}-\delta_\text{g}) (\beta_\text{g}-\delta_\text{g}+k_\text{m,g})} \mathrm{e}^{-\delta_\text{g} (t-t_0)}
\right)
$$

In [51]:
# Import modules needed
%matplotlib inline
import numpy as np
import lmfit as lm
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import pandas as pd
from io_Daniel import *
import os
import pdb

In [2]:
def red(t, tr, ktl, mr, kmr, betr, deltr, offr):
    """Model function for red data"""

    f = np.zeros(np.shape(t))
    idx_after = (t > tr)
    dt = t[idx_after] - tr

    f1 = np.exp(- (betr + kmr) * dt) / (betr - deltr + kmr)
    f2 = - np.exp(- betr * dt) / (betr - deltr)
    f3 = kmr * np.exp(- deltr * dt) / (betr - deltr) / (betr - deltr + kmr)

    f[idx_after] = (f1 + f2 + f3) * mr * ktl

    return f + offr

In [3]:
def green(t, tg, ktl, mg, kmg, betg, deltg, offg):
    """Model function for green data"""

    f = np.zeros(np.shape(t))
    idx_after = t > tg
    dt = t[idx_after] - tg

    f1 = np.exp(- (betg + kmg) * dt) / (betg - deltg + kmg)
    f2 = - np.exp(- betg * dt) / (betg - deltg)
    f3 = kmg * np.exp(- deltg * dt) / (betg - deltg) / (betg - deltg + kmg)

    f[idx_after] = (f1 + f2 + f3) * mg * ktl

    return f + offg

In [4]:
def combined(t, tr, tg, ktl, m, kmr, kmg, betr, betg, deltr, deltg, offr, offg):
    """Model function for a combined fit of red and green data"""
    
    f = np.stack(
        (red(t=t, tr=tr, ktl=ktl, mr=m, kmr=kmr, betr=betr, deltr=deltr, offr=offr),
         green(t=t, tg=tg, ktl=ktl, mg=m, kmg=kmg, betg=betg, deltg=deltg, offg=offg)),
        axis=1)

    return f

In [5]:
# Set default parameter values
ktl_0 = 5
m_0 = 5

tr_0 = 5
mr_0 = 19
kmr_0 = 0.03
betr_0 = 0.16
deltr_0 = 0.07
offr_0 = 0

tg_0 = 4
mg_0 = 2
kmg_0 = 4
betg_0 = 0.03
deltg_0 = 0.07
offg_0 = 0

In [6]:
# Create fit models
model_red = lm.Model(red)
model_red.set_param_hint('tr', min=0, max=30, value=tr_0)
model_red.set_param_hint('ktl', min=0, value=ktl_0)
model_red.set_param_hint('mr', min=0, value=mr_0)
model_red.set_param_hint('kmr', min=0, value=kmr_0)
model_red.set_param_hint('betr', min=0, value=betr_0)
model_red.set_param_hint('deltr', min=0, value=deltr_0)
model_red.set_param_hint('offr', value=offr_0)

model_green = lm.Model(green)
model_green.set_param_hint('tg', min=0, max=30, value=tg_0)
model_green.set_param_hint('ktl', min=0, value=ktl_0)
model_green.set_param_hint('mg', min=0, value=mg_0)
model_green.set_param_hint('kmg', min=0, value=kmg_0)
model_green.set_param_hint('betg', min=0, value=betg_0)
model_green.set_param_hint('deltg', min=0, value=deltg_0)
model_green.set_param_hint('offg', value=offg_0)

model_combined = lm.Model(combined)
model_combined.set_param_hint('tr', min=0, max=30, value=tr_0)
model_combined.set_param_hint('tg', min=0, max=30, value=tg_0)
model_combined.set_param_hint('ktl', min=0, value=ktl_0)
model_combined.set_param_hint('m', min=0, value=m_0)
model_combined.set_param_hint('kmr', min=0, value=kmr_0)
model_combined.set_param_hint('kmg', min=0, value=kmg_0)
model_combined.set_param_hint('betr', min=0, value=betr_0)
model_combined.set_param_hint('betg', min=0, value=betg_0)
model_combined.set_param_hint('deltr', min=0, value=deltr_0)
model_combined.set_param_hint('deltg', min=0, value=deltg_0)
model_combined.set_param_hint('offr', value=offr_0)
model_combined.set_param_hint('offg', value=offg_0)

## Read in data and prepare result list

In [7]:
# Read in data
D = {}

# Add background levels (undo background substraction)
bg_green = 4.3
bg_red = 2.7

D['Huh7 control'] = {}
D['Huh7 control']['t'] = pd.read_excel('data/Huh7_control.xlsx', sheetname='time').values.squeeze()
D['Huh7 control']['rfp'] = pd.read_excel('data/Huh7_control.xlsx', sheetname='RFP').values + bg_red
D['Huh7 control']['gfp'] = pd.read_excel('data/Huh7_control.xlsx', sheetname='GFP').values + bg_green

D['A549 control'] = {}
D['A549 control']['t'] = pd.read_excel('data/A549_control.xlsx', sheetname='time').values.squeeze()
D['A549 control']['rfp'] = pd.read_excel('data/A549_control.xlsx', sheetname='RFP').values + bg_red
D['A549 control']['gfp'] = pd.read_excel('data/A549_control.xlsx', sheetname='GFP').values + bg_green

D['A549 siRNA'] = {}
D['A549 siRNA']['t'] = pd.read_excel('data/A549_siRNA.xlsx', sheetname='time').values.squeeze()
D['A549 siRNA']['rfp'] = pd.read_excel('data/A549_siRNA.xlsx', sheetname='RFP').values + bg_red
D['A549 siRNA']['gfp'] = pd.read_excel('data/A549_siRNA.xlsx', sheetname='GFP').values + bg_green

In [72]:
# Provide output tables

# Initialize result dictionary
R = {}

# Get a list of fit parameters
par_names = model_green.param_names.copy()
par_names.extend(p for p in model_red.param_names if p not in par_names)
par_names.sort()

# Iteratively populate the result dictionary
for k in D.keys():
    R[k] = {}
    nTraces = np.shape(D[k]['gfp'])[1]
    nTimes = np.shape(D[k]['gfp'])[0]
    tpl_traces = np.empty((nTimes, nTraces))
    tpl_traces.fill(np.NaN)

    R[k]['green'] = {}
    R[k]['green']['params'] = pd.DataFrame(index=np.arange(nTraces), columns=model_green.param_names, dtype='float64')
    R[k]['green']['fit'] = np.copy(tpl_traces)

    R[k]['red'] = {}
    R[k]['red']['params'] = pd.DataFrame(index=np.arange(nTraces), columns=model_red.param_names, dtype='float64')
    R[k]['red']['fit'] = np.copy(tpl_traces)

    R[k]['combined'] = {}
    R[k]['combined']['params'] = pd.DataFrame(index=np.arange(nTraces), columns=model_combined.param_names, dtype='float64')
    R[k]['combined']['fit'] = np.stack([tpl_traces, tpl_traces], axis=0)

## Fit and plot separate models

In [57]:
def plotSeparate(ds, tr, pdf=None):
    """Fits and plots the data, treating RFP and GFP separately.
    
    Keyword arguments:
    ds -- the dictionary key of the dataset
    tr -- the index of the trace in the dataset to be processed
    pdf -- a PdfPages object to which the figure is written if it is not None
    """

    # Plot fit results
    fig = plt.figure()
    p_tr = plt.axvline(R[ds]['red']['params']['tr'][tr], label='RFP onset',
                       color='#ff0000', linewidth=.5, linestyle='--')
    p_tg = plt.axvline(R[ds]['green']['params']['tg'][tr], label='GFP onset',
                      color='#00ff00', linewidth=.5, linestyle='--')
    p_fr, = plt.plot(D[ds]['t'], R[ds]['red']['fit'][:,tr], '-', label='RFP (fit)', color='#ff0000', linewidth=1)
    p_fg, = plt.plot(D[ds]['t'], R[ds]['green']['fit'][:,tr], '-', label='GFP (fit)', color='#00ff00', linewidth=1)
    p_dr, = plt.plot(D[ds]['t'], D[ds]['rfp'][:,tr], '-', label='RFP (measured)', color='#990000', linewidth=.5)
    p_dg, = plt.plot(D[ds]['t'], D[ds]['gfp'][:,tr], '-', label='GFP (measured)', color='#009900', linewidth=.5)

    # Format plot
    plt.xlabel('Time [h]')
    plt.ylabel('Fluorescence intensity [a.u.]')
    plt.title('{:s} (separate fit) #{:03d}'.format(ds, tr))
    plt.legend(handles=[p_dg, p_fg, p_tg, p_dr, p_fr, p_tr])

    # Write figure to pdf
    if pdf != None:
        pdf.savefig(fig)

    # Show and close figure
    plt.show()
    plt.close()

In [None]:
# Fit traces separately
for ds in D.keys():
    nTraces = np.shape(D[ds]['rfp'])[1]

    for tr in range(nTraces):
        print('Fitting „{:s}“ #{:03d}/{:03d} …'.format(ds, tr, nTraces))

        # Adjust parameter bounds for onset time for current trace
        model_red.set_param_hint('tr', max=D[ds]['t'][D[ds]['rfp'][:,tr].argmax()])
        model_green.set_param_hint('tg', max=D[ds]['t'][D[ds]['gfp'][:,tr].argmax()])

        # Fit the data
        result_red = model_red.fit(D[ds]['rfp'][:,tr], t=D[ds]['t'])
        result_green = model_green.fit(D[ds]['gfp'][:,tr], t=D[ds]['t'])

        # Save results to R
        R[ds]['red']['params'].iloc[tr] = result_red.best_values
        R[ds]['red']['fit'][:,tr] = result_red.best_fit
        R[ds]['green']['params'].iloc[tr] = result_green.best_values
        R[ds]['green']['fit'][:,tr] = result_green.best_fit

In [None]:
# Plot results of separate fit
ts = getTimeStamp()

for ds in D.keys():
    pdffile = os.path.join(getOutpath(), '{:s}_separate_{:s}.pdf'.format(ts, ds.replace(' ', '_')))
    with PdfPages(pdffile) as pdf:
        for tr in range(np.shape(D[ds]['rfp'])[1]):
            plotSeparate(ds, tr, pdf)

## Fit and plot combined model

In [None]:
def plotCombined(ds, tr, pdf=None, params=False):
    """Fits and plots the data, treating RFP and GFP together.
    
    Keyword arguments:
    ds -- the dictionary key of the dataset
    tr -- the index of the trace in the dataset to be processed
    pdf -- a PdfPages object to which the figure is written if it is not None
    params -- if set to True, the parameters will be shown
    """

    # Plot fit results
    fig = plt.figure()

    if params:
        ax = fig.gca()
    else:
        ax = fig.gca()

    p_tr = ax.axvline(R[ds]['combined']['params']['tr'][tr], label='RFP onset',
                       color='#ff0000', linewidth=.5, linestyle='--')
    p_tg = ax.axvline(R[ds]['combined']['params']['tg'][tr], label='GFP onset',
                      color='#00ff00', linewidth=.5, linestyle='--')
    p_fr, = ax.plot(D[ds]['t'], R[ds]['combined']['fit'][0,:,tr], '-', label='RFP (fit)', color='#ff0000', linewidth=1)
    p_fg, = ax.plot(D[ds]['t'], R[ds]['combined']['fit'][1,:,tr], '-', label='GFP (fit)', color='#00ff00', linewidth=1)
    p_dr, = ax.plot(D[ds]['t'], D[ds]['rfp'][:,tr], '-', label='RFP (measured)', color='#990000', linewidth=.5)
    p_dg, = ax.plot(D[ds]['t'], D[ds]['gfp'][:,tr], '-', label='GFP (measured)', color='#009900', linewidth=.5)

    # Format plot
    ax.set_xlabel('Time [h]')
    ax.set_ylabel('Fluorescence intensity [a.u.]')
    ax.set_title('{:s} (combined fit) #{:03d}'.format(ds, tr))
    ax.legend(handles=[p_dg, p_fg, p_tg, p_dr, p_fr, p_tr])

    # Write figure to pdf
    if pdf != None:
        pdf.savefig(fig)

    # Show and close figure
    plt.show()
    plt.close()

In [None]:
# Fit combined model
for ds in D.keys():
    nTraces = np.shape(D[ds]['rfp'])[1]

    for tr in range(nTraces):
        print('Fitting „{:s}“ #{:03d}/{:03d} …'.format(ds, tr, nTraces))

        # Adjust parameter bounds for onset time for current trace
        model_combined.set_param_hint('tr', max=D[ds]['t'][D[ds]['rfp'][:,tr].argmax()])
        model_combined.set_param_hint('tg', max=D[ds]['t'][D[ds]['gfp'][:,tr].argmax()])

        # Fit the data
        data = np.stack([D[ds]['rfp'][:,tr], D[ds]['gfp'][:,tr]], axis=1)
        result = model_combined.fit(data, t=D[ds]['t'])

        # Save results to R
        R[ds]['combined']['params'].iloc[tr] = result.best_values
        R[ds]['combined']['fit'][0,:,tr] = result.best_fit[:,0]
        R[ds]['combined']['fit'][1,:,tr] = result.best_fit[:,1]

In [None]:
# Plot results of combined fit
ts = getTimeStamp()
for ds in D.keys():
    pdffile = os.path.join(getOutpath(), '{:s}_combined_{:s}.pdf'.format(ts, ds.replace(' ', '_')))
    with PdfPages(pdffile) as pdf:
        for tr in range(np.shape(D[ds]['rfp'])[1]):
            plotCombined(ds, tr, pdf)

In [None]:
for k in R.keys():
    plt.figure()
    plt.plot([0, 30], [0, 30], 'k-')
    plt.plot(R[k]['red']['params']['tr'], R[k]['green']['params']['tg'], '.')
    plt.xlabel('Onset RFP [h]')
    plt.ylabel('Onset GFP [h]')
    plt.title(k)
    plt.show()
    plt.close()
    

From the [documentation](http://lmfit-py.readthedocs.io/en/latest/model.html#lmfit.model.Model.fit):

If supplied, `weights` will be used to weight the calculated residual so that the quantity minimized in the least-squares sense is `weights*(data - fit)`. `weights` must be an `ndarray`-like object of same size and shape as `data`.

In [None]:
pn_both = ['m', 'ktl']
pn_red = ['tr', 'kmr', 'betr', 'deltr', 'offr']
pn_green = ['tg', 'kmg', 'betg', 'deltg', 'offg']

plt.figure()
ax = plt.gca()

# Plot general violins
violins = ax.violinplot(R['Huh7 control']['combined'][pn_both].values, showmeans=True)
#ax.set_xticks(range(1, 1+len(pn_both))
#ax.set_xticklabels(pn_both)

for p in violins.pop('bodies'):
    p.set_facecolor('#0000ff55')
for p in violins.values():
    p.set_edgecolor('#000099ff')

# Plot green violins
violins = ax.violinplot(R['Huh7 control']['combined'][pn_green].values, showmeans=True)
#ax.set_xticks(range(1+len(pn_both), 1+len(pn_green))
#ax.set_xticklabels(pn_green)

for p in violins.pop('bodies'):
    p.set_facecolor('#00ff0055')
for p in violins.values():
    p.set_edgecolor('#009900ff')

# Plot red violins
violins = ax.violinplot(R['Huh7 control']['combined'][pn_red].values, showmeans=True)
#ax.set_xticks(range(1+len(pn_both)+len(pn_green), 1+len(pn_red))
#ax.set_xticklabels(pn_red)

for p in violins.pop('bodies'):
    p.set_facecolor('#ff000055')
for p in violins.values():
    p.set_edgecolor('#990000ff')

ax.set_yscale('log')
plt.show()
plt.close()

In [None]:
plt.figure()
ax = plt.gca()
ax.violinplot(R['Huh7 control']['combined']['params']['tr'].values, showextrema=False, positions=[0])
ax.plot(0, 6, 'rx')
ax.set_xticks([])
ax.spines['left'].set_position('zero')
for s in [ax.spines[pos] for pos in ['top', 'right', 'bottom']]:
    s.set_visible(False)
ax.set_title('tr')
plt.show()
plt.close()

In [None]:
def plotViolin(ax, data, label, clr_face, clr_edge, mark=None):
    v = ax.violinplot(data, showextrema=False, positions=[0])
    for p in v['bodies']:
        p.set_facecolor(clr_face)
    if mark != None:
        ax.plot(0, mark, 'x', color=clr_edge)
    ax.set_xticks([])
    ax.spines['left'].set_position('zero')
    for s in [ax.spines[pos] for pos in ['top', 'right', 'bottom']]:
        s.set_visible(False)
    ax.set_title(label)

In [None]:
plt.figure()
ax = plt.gca()
data = R['Huh7 control']['combined']['params']['tr'].values
label = 't_red'
clr_face = '#99000055'
clr_edge = '#ff0000ff'
mark = 7
plotViolin(ax, data, label, clr_face, clr_edge, mark)
plt.show()
plt.close()

In [None]:
# Plot violins
pn_both = ['m', 'ktl']
pn_red = ['tr', 'kmr', 'betr', 'deltr', 'offr']
pn_green = ['tg', 'kmg', 'betg', 'deltg', 'offg']
grid = (2, max(len(pn_red), len(pn_green)))

ts = getTimeStamp()

for ds in R.keys():

    pdffile = os.path.join(getOutpath(), '{:s}_violins_{:s}.pdf'.format(ts, ds.replace(' ', '_')))
    with PdfPages(pdffile) as pdf:

        for i in R[ds]['combined']['params'].index:
            fig = plt.figure()
            fig.suptitle('{:s} (combined fit) #{:03d}'.format(ds, i))

            # Plot green parameters
            for prm in range(len(pn_green)):
                ax = plt.subplot2grid(grid, (0, prm))
                label = pn_green[prm]
                data = R[ds]['combined']['params'][label].values
                clr_face = '#00ff0055'
                clr_edge = '#009900ff'
                plotViolin(ax, data, label, clr_face, clr_edge, data[i])

            # Plot red parameters
            for prm in range(len(pn_red)):
                ax = plt.subplot2grid(grid, (1, prm))
                label = pn_red[prm]
                data = R[ds]['combined']['params'][label].values
                clr_face = '#ff000055'
                clr_edge = '#990000ff'
                plotViolin(ax, data, label, clr_face, clr_edge, data[i])
        
            pdf.savefig(fig)
            plt.show(fig)
            plt.close(fig)


In [73]:
violins

{'bodies': [<matplotlib.collections.PolyCollection at 0x7fd39f37cf98>,
  <matplotlib.collections.PolyCollection at 0x7fd39c0e57b8>],
 'cbars': <matplotlib.collections.LineCollection at 0x7fd39f391cc0>,
 'cmaxes': <matplotlib.collections.LineCollection at 0x7fd39cf43b70>,
 'cmeans': <matplotlib.collections.LineCollection at 0x7fd39bb96048>,
 'cmins': <matplotlib.collections.LineCollection at 0x7fd39cf43828>}