**Note:** Each plot can be zoomed using the cursor for a better resolution.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random

import PIL.Image as Image
import matplotlib.image as img
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Weekly Exercises: week 11-12

The **two-stage model for gene expression** is a model of gene expression that explicitly includes transcription of mRNAs $\,m\,$ (rate $v_0$) and translation of proteins $\,n\,$ (rate $v_1$) as first-order processes.

In particular we have:
- $v_0$ transcription rate - probability per unit of time to synthetize mRNA ( from experimental data  $v_0 \sim 10^{-1}/10^{-2}\,s^{-1}$ )
- $v_1$ translation rate - probability per unit of time to synthetize a protein ( from experimental data $v_1 \sim 10^{-1}/10^{-2}\,s^{-1}$ )
- $d_0$ mRNA degradation rate - $\,d_0^{-1}$ average mRNA lifetime ( $d_0 \sim 10^{-2}/10^{-3}\,s^{-1}$  for escherichia coli bacterium)
- $d_1$ protein degradation rate - $\,d_1^{-1}$ average protein lifetime ( $d_1 \sim 10^{-4}\,s^{-1}$  usually protein's half lives are much larger than mRNA ones)

## Simulate the stochastic dynamics of the two stage model.

Below you can find an implementation of the **Gillespie algorithm** for the two-stage model for gene expression, a Monte Carlo strategy that is able to simulate the dynamics of a complex stochastic process.

In [None]:
m0, n0 = 100, 500
m, n = [m0], [n0]
v0, v1, d0, d1 = 10e-1, 10e-2, 10e-2, 10e-4

t = [0]
t_end = 10000

m_inf, n_inf = v0/d0, (v0*v1)/(d0*d1)
print('Steady states\n- mRNA: m =', m_inf, '\n- Protein: n =', n_inf)

Steady states
- mRNA: m = 10.0 
- Protein: n = 1000.0


In [None]:
# Gillespie Algorithm
while(t[-1] < t_end):
  current_m, current_n = m[-1], n[-1]
  rates = [v0, current_m*v1, current_m*d0, current_n*d1]

  dt = np.random.exponential(scale=1/sum(rates))
  t.append(t[-1]+dt)

  rand = sum(rates)*random.uniform(0,1)
  indx = np.argmax(np.cumsum(rates)>rand)
  # Transcription
  if indx == 0:
    m.append(m[-1]+1)
    n.append(n[-1])
  # Translation
  elif indx == 1:
    m.append(m[-1])
    n.append(n[-1]+1)
  # mRNA degradation
  elif indx == 2:
    m.append(m[-1]-1)
    n.append(n[-1])
  # Protein degradation
  elif indx == 3:
    m.append(m[-1])
    n.append(n[-1]-1)

In [None]:
fig = make_subplots(rows=2, cols=2,  specs=[[{}, {}], [{"colspan":2}, None]], subplot_titles=('mRNA Dynamics', 'Protein Dynamics', 'mRNA+Protein Dynamics'))

fig.add_trace(go.Scatter(x=t, y=m, name='mRNA', mode='lines', marker=dict(color='blue')), row=1, col=1)
fig.add_trace(go.Scatter(x=[0,t_end], y=2*[m_inf], mode='lines', line_dash='dash', name='mRNA steady state', marker=dict(color='red')), row=1, col=1)
fig.add_trace(go.Scatter(x=t, y=n, name='proteins', mode='lines', marker=dict(color='dodgerblue')), row=1, col=2)
fig.add_trace(go.Scatter(x=[0,t_end], y=2*[n_inf], mode='lines', line_dash='dash', name='Proteins steady state', marker=dict(color='orange')), row=1, col=2)

fig.add_trace(go.Scatter(x=t, y=m, name='mRNA', mode='lines', marker=dict(color='blue'), showlegend=False), row=2, col=1)
fig.add_trace(go.Scatter(x=[0,t_end], y=2*[m_inf], mode='lines', line_dash='dash', name='mRNA steady state', marker = dict(color='red'), showlegend=False), row=2, col=1)
fig.add_trace(go.Scatter(x=t, y=n, name='proteins', mode='lines', marker=dict(color='dodgerblue'), showlegend=False), row=2, col=1)
fig.add_trace(go.Scatter(x=[0,t_end], y=2*[n_inf], mode='lines', line_dash='dash', name='Proteins steady state', marker=dict(color='orange'), showlegend=False), row=2, col=1)

fig.update_layout(title_text='Two Stage Model for Gene Expression - Gillespie algorithm', hovermode='x unified', height=1000)
fig.update_xaxes(title_text='Time')
fig.update_yaxes(title_text='Abundance')
fig.show()

Output hidden; open in https://colab.research.google.com to view.

<br>

If the number of mRNA and protein molecules are very large $\left( n>>1,\;m>>1 \right)$ then we can treat them as deterministic continuous variables. So then the **deterministic equations of the two stage model** are:

$$
\begin{cases}
\dot{m}=v_0-d_0m \\
\dot{n}=v_1m-d_1n
\end{cases}
$$

<br><br>

This is a system a linear ODEs which can be solved exactly, the solutions are the following:

$$
\begin{cases}
m(t)=\frac{v_0}{d_0}+\left(m_0-\frac{v_0}{d_0}\right)e^{-d_0t} \\
n(t)=\frac{v_0v_1}{d_0d_1}+\left(n_0-\frac{v_0v_1}{d_0d_1}\right)e^{-d_1t}+v_1\left(m_0-\frac{v_0}{d_0}\right)F(t)
\end{cases}
$$

<br>

where we have $m(0)=m_0\;$,$\;n(0)=n_0\;$ and $\;F(t)=\begin{cases}
\frac{e^{-d_0t}-e^{-d_1t}}{d_1-d_0} \;\;\;d_1\neq d_0\\
te^{-d_1t} \;\;\;d_1=d_0
\end{cases}$


<br>

In particular, when $t\rightarrow\infty$ we obtain two steady solutions, $m_\infty$ and $n_\infty$, which coincide with the averaged stochastic dynamics for $t\rightarrow\infty$:

$$
\begin{cases}
m_\infty=\frac{v_0}{d_0} \\
n_\infty=\frac{v_0v_1}{d_0d_1}
\end{cases}
$$

In [None]:
def deterministic_2stage(M0, N0, V0, V1, D0, D1, T):
  if D0==D1:
    F=T*np.exp(-D1*T)
  else:
    F=(np.exp(-D0*T)-np.exp(-D1*T))/(D1-D0)
  return [V0/D0+(M0-V0/D0)*np.exp(-D0*T), (V0*V1)/(D0*D1)+(N0-(V0*V1)/(D0*D1))*np.exp(-D1*T)+V1*(M0-V0/D0)*F]

m_det, n_det = deterministic_2stage(m0, n0, v0, v1, d0, d1, np.array(t))

In [None]:
fig = make_subplots(rows=1, cols=2, subplot_titles=('mRNA Dynamics', 'Protein Dynamics'))

fig.add_trace(go.Scatter(x=t, y=m, name='mRNA', mode='lines', marker=dict(color='blue')), row=1, col=1)
fig.add_trace(go.Scatter(x=[0,t_end], y=2*[m_inf], mode='lines', line_dash='dash', name='mRNA steady state', marker=dict(color='red')), row=1, col=1)
fig.add_trace(go.Scatter(x=t, y=m_det, mode='lines', name='Deterministic mRNA solution', marker=dict(color='red')), row=1, col=1)

fig.add_trace(go.Scatter(x=t, y=n, name='proteins', mode='lines', marker=dict(color='dodgerblue')), row=1, col=2)
fig.add_trace(go.Scatter(x=[0,t_end], y=2*[n_inf], mode='lines', line_dash='dash', name='Proteins steady state', marker=dict(color='orange')), row=1, col=2)
fig.add_trace(go.Scatter(x=t, y=n_det, mode='lines', name='Deterministic protein solution', marker=dict(color='orange')), row=1, col=2)

fig.update_layout(title_text='Two Stage Model for Gene Expression', hovermode='x unified')
fig.update_xaxes(title_text='Time')
fig.update_yaxes(title_text='Abundance')
fig.show()

Output hidden; open in https://colab.research.google.com to view.

The reactions can be described by the probability that $n$ proteins and $m$ mRNAs exist at time $t$:

$$P(n,m,t)=P_{n,m}(t)=P(n\;proteins\,,\;m\;mRNAs\;at\;time\;t)$$

and how this probability evolves with time. Each reaction rate is interpreted as the probability per unit time of the appropriate reaction.

Let $\delta t$ be a time interval small enough so that at most only one reaction can occur, so if there are $m$ mRNAs and $n$ proteins at time $t+\delta t$, then:

<br>

- if a mRNA molecule was synthesized during the interval $\delta t\,$, there must have been $\;m−1\;$ mRNAs at time $\;t$. The transcription probability is $\;P(transcription) = v_0\delta t\;$ which is independent of the number of mRNAs present

<br>

- if a protein was synthesized during the interval $\;\delta t\,$, there must have been $\;n−1\;$ proteins at time $\;t$. The translation probability is $\;P(translation) = v_1m\delta t$

<br>

- if a mRNA was degraded during the interval $\;\delta t\,$, there must have been $\;m+1\;$ mRNAs at time $\;t$. The probability of degradation is $\;P(mRNA\;degradation) = d_0(m+1)\delta t$

<br>

- if a protein was degraded during the interval $\;\delta t\,$, there must have been $n+1$ proteins at time $\;t$. The probability of degradation is $\;P(protein\;degradation) = d_1(n+1)\delta t$

<br>

- if no reaction occours during $\;\delta t\,$, the number of proteins and mRNAs will be unchanged, which occurs with probability $\;P(no\;reaction) = 1 − v_0\delta t − v_1m\delta - d_0m\delta t - d_1n\delta t$.

<br><br>

Putting these probabilities together, we can write the **master equation** describing the time evolution of $P_{n,m}(t)$:

<br>

$$
P_{n,m}(t+\delta t) = v_0\delta tP_{n,m-1}(t) + d_0(m+1)\delta tP_{n,m+1}(t) + v_1m\delta t P_{n-1,m}(t) + d_1(n+1)\delta t P_{n+1,m}(t) +\left( 1 − v_0\delta t − v_1m\delta t - d_0m\delta t - d_1n\delta t \right) P_{n,m}(t)
$$

<br>

Let's divide by $\delta t$:

<br>

$$
\frac{P_{n,m}(t+\delta t)-P_{n,m}(t)}{\delta t} = v_0P_{n,m-1}(t) + d_0(m+1)P_{n,m+1}(t) + v_1mP_{n-1,m}(t) + d_1(n+1)\ P_{n+1,m}(t) - \left(v_0\ + v_1m + d_0m + d_1n \right) P_{n,m}(t)
$$

<br>

Finally taking the limit $\delta t\rightarrow0$ gives us the **master equation of the two stage model**:

<br>

$$
\frac{dP_{n,m}}{dt} = v_0\left(P_{n,m-1} - P_{n,m}\right) + d_0\{(m+1)P_{n,m+1}-mP_{n,m}\} + v_1m\{P_{n-1,m}-P_{n,m}\} + d_1\{(n+1)P_{n+1,m}-nP_{n,m}\} \\ \\
$$


<br><br>

Now we want to quantitatively obtain the distribution $P_{n,m}(t)$ at stationarity $P_{n,m}^{stat}\;$, so once are reached the steady states $n_\infty$ and $m_\infty$ and the dynamics is fluctuating around these two points. This is done through the following simulation.

In [None]:
m, n = [m0], [n0]

t = [0]
t_end = 50e4

m_inf, n_inf = v0/d0, (v0*v1)/(d0*d1)
print('Steady states\n- mRNA: m =', m_inf, '\n- Protein: n =', n_inf)

Steady states
- mRNA: m = 10.0 
- Protein: n = 1000.0


In [None]:
# Gillespie Algorithm
while(t[-1] < t_end):
  current_m, current_n = m[-1], n[-1]
  rates = [v0, current_m*v1, current_m*d0, current_n*d1]

  dt = np.random.exponential(scale=1/sum(rates))
  t.append(t[-1]+dt)

  rand = sum(rates)*random.uniform(0,1)
  indx = np.argmax(np.cumsum(rates)>rand)
  # Transcription
  if indx == 0:
    m.append(m[-1]+1)
    n.append(n[-1])
  # Translation
  elif indx == 1:
    m.append(m[-1])
    n.append(n[-1]+1)
  # mRNA degradation
  elif indx == 2:
    m.append(m[-1]-1)
    n.append(n[-1])
  # Protein degradation
  elif indx == 3:
    m.append(m[-1])
    n.append(n[-1]-1)

m_stat, n_stat = m[int(t_end/2):], n[int(t_end/2):]

In [None]:
fig = go.Figure()
fig.add_trace(go.Histogram2d(x=m_stat, y=n_stat, colorscale = 'YlOrRd', hoverinfo='skip', histnorm='probability'))  # histnorm='probability',
fig.add_trace(go.Histogram(y=n_stat, xaxis='x2', marker=dict(color='#7f7f7f'), hoverinfo='skip'))
fig.add_trace(go.Histogram(x=m_stat, yaxis='y2', marker=dict(color='#7f7f7f'), hoverinfo='skip'))

fig.add_trace(go.Scatter(x=2*[m_inf], y=[min(n_stat),max(n_stat)], line=dict(color='black', width=1, dash='dash'), hoverinfo='skip'))
fig.add_trace(go.Scatter(x=[min(m_stat),max(m_stat)], y=2*[n_inf], line=dict(color='black', width=1, dash='dash'), hoverinfo='skip'))

fig.update_layout(
    autosize = False, title_text='$P_{n\,m}^{stat}\;-\;P_{n\,m}(t)\;Stationary\;Distribution$',
    xaxis = dict(zeroline=False, domain=[0,0.85], showgrid=False, title_text='# of mRNA at stationarity'),
    yaxis = dict(zeroline=False, domain=[0,0.85], showgrid=False, title_text='# of proteins at stationarity'),
    xaxis2 = dict(zeroline=False, domain=[0.85,1], showticklabels=False, showgrid=False),
    yaxis2 = dict(zeroline=False, domain=[0.85,1], showticklabels=False, showgrid=False),
    height = 700, width = 700, bargap = 0, hovermode = 'closest', showlegend = False, paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)')
fig.show()

Output hidden; open in https://colab.research.google.com to view.