In [2]:
import numpy as np
from numpy import array
from scipy.linalg import expm
from numpy import dot
import plotly.express as px
import pandas as pd



In [3]:
#testing rate matrices and nucleotide distributions
Q1 = array([[-1.4, 0.1, 0.4, 0.9], 
           [4.0, -6.9, 0.9, 2.0], 
           [6.3, 2.0, -11.3, 3.0], 
           [0.7,0.1, 0.2, -1]])

Q2 = np.array([[-0.0935, 0.0148, 0.0558, 0.0229], 
           [0.0469, -0.0676, 0.0108, 0.0099], 
           [0.00, 0.0058, -0.0319, 0.0261], 
           [0.00,0.0132, 0.0370, -0.0501]])

Q3= np.array([
    [-4.5,  2.0,  1.0,  1.5],
    [ 2.0, -3.5,  0.5,  1.0],
    [ 1.0,  0.5, -2.5,  1.0],
    [ 0.0,  1.0,  2.0, -3.0]
])

Q4 = np.array([[-1.707,  0.537,  0.306,  0.864],
       [ 0.249, -0.889 ,  0.116,  0.525 ],
       [ 0.038,  0.182, -0.555,  0.335],
       [ 0.203,  0.580,  0.234, -1.017]])

Q5 = np.array([[-1.86,  0.59,  0.33,  0.94],
       [ 0.27, -0.97,  0.13,  0.57],
       [ 0.04,  0.20, -0.60,  0.36],
       [ 0.23,  0.62,  0.25, -1.10]])

Q6 = np.array([[-4.56,  1.59,  2.33,  0.64],
       [ 0.55, -1.98,  1.13,  0.3],
       [ 0.73,  1.43, -4.52,  2.36],
       [ 0.14,  0.37,  1.8, -2.33]])

i = np.array([0.25, 0.25, 0.25, 0.25])
test_nst_array1 = np.array([0.3, 0.4, 0.2, 0.1])
test_nst_array2 = np.array([0.9, 0.1, 0.1, 0.1])


In [4]:
#stationary process 
def calculate_stationary_distribution(Q):
    """
    Calculate the stationary distribution pi for a given substitution rate matrix Q.

    Parameters:
    Q (numpy.ndarray): The substitution rate matrix.

    Returns:
    numpy.ndarray: The stationary distribution pi.
    """
    # Add an additional equation to account for the sum of pi elements being 1
    A = np.vstack([Q.T, np.ones(Q.shape[0])])

    b = np.zeros(Q.shape[0] + 1)
    b[-1] = 1

    # Solve for pi
    pi = np.linalg.lstsq(A, b, rcond=None)[0]

    return pi


def calculate_stationary_rate(Q):
    """
    Calculate the stationary evolution rate mu_stationary for a given substitution rate matrix Q.

    Parameters:
    Q (numpy.ndarray): The substitution rate matrix.

    Returns:
    float: The stationary evolution rate mu_stationary.
    """
    # Get the stationary disitrbution of Q
    pi_stationary = calculate_stationary_distribution(Q)

    # Calculate stationary evolution rate using the formula mu = - sum_i(pi*Qii)
    mu_stationary = - np.sum(pi_stationary*np.diagonal(Q))
    return mu_stationary

def matrix_calibration(Q):
    Q2 = Q/ (- np.sum(calculate_stationary_distribution(Q)*np.diagonal(Q)))
    return Q2


In [5]:
#stationary process ENS accumulation function
def generate_ENS(pi, Q1, Q2, t_range, t1):
    """
    Generates the ENS over a range of time points using two different Q matrices before and after a specified time point t1.
    
    Parameters:
    - pi: A numpy array of shape (1, 4) representing the vector pi.
    - Q1, Q2: Two numpy arrays of shape (4, 4) representing the original and new rate matrices.
    - t_range: numpy.linspace defining the start and end of the time range.
    - t1: The time point at which to switch from using Q1 to Q2.
    
    Returns:
    - A list of ENS values for each time point in the range.
    """
    ens_values = []
    ens_accumulated = 0  # To keep track of the accumulated ENS value
    
    for t in t_range:
        if t <= t1:
            ens = -np.sum(pi * np.diag(Q1)) * t + ens_accumulated
            ens_values.append(ens)
        else:
            ens_2 = -np.sum(pi * np.diag(Q2)) * (t-t1) + ens # Update the accumulated ENS at t1 to continue from this point using Q2
            ens_values.append(ens_2)
    
    return ens_values

In [6]:
# Ploting the ENS over time with rate matrix switch in the middle
pi = calculate_stationary_distribution(Q5)
t_range =  np.linspace(0, 10, 99)
t1 = 5  #The time point that change the rate matrix

# Generate ENS values
ens_values = generate_ENS(pi, Q5, Q6, t_range, t1)

# Create the plot
fig = px.line(x=t_range, y=ens_values, labels={'x': 'Time (t)', 'y': 'ENS'}, title='ENS Over Time with Rate Matrix Switch at t=5')

# Show the plot
fig.show()

In [7]:
# Ploting the evolutionary rate mu over time with rate matrix switch in the middle

mu_value = []

# Generate mu values
for t in t_range:
    if t<= t1:
        mu = calculate_stationary_rate(Q5)
        mu_value.append(mu)
    else:
        mu = calculate_stationary_rate(Q6)
        mu_value.append(mu)
    
# Create the plot
fig = px.line(x=t_range, y=mu_value, labels={'x': 'Time (t)', 'y': 'Evolution rate'}, title='Evolution rate Over Time with Rate Matrix Switch at t=5')

# Show the plot
fig.show()

In [8]:
#non-stationary process 
#evolution rate at each time point in non-stationary process

def calculate_non_statioanry_mu(Q, pi_0, t):
    """
    Calculate the value of mu prime (μ(t)) for a given substitution rate matrix Q,
    initial nucleotide frequency pi_0, and time t.

    Parameters:
    Q (numpy.ndarray): The substitution rate matrix.
    pi_0 (numpy.ndarray): The initial nucleotide frequency distribution.
    t (float): The time at which to calculate μ(t).

    Returns:
    float: The calculated value of μ(t).
    """
    # Calculate f(t) = pi_0 * exp(Qt)
    f_t = dot(pi_0, expm(Q * t))
    
    # Calculate mu'(t) as the sum of the element-wise product of f(t) and the diagonal of Q
    mu = - dot(f_t,np.diagonal(Q))
    
    return mu

In [9]:
#derivative of evolution rate
def calculate_non_stationary_mu_prime(Q, pi_0, t):
    """
    Correctly calculate the value of mu prime (μ'(t)) based on the provided formula:
    μ'(t) = - pi_0 * Q * exp(Qt) * diag(Q)

    Parameters:
    Q (numpy.ndarray): The substitution rate matrix.
    pi_0 (numpy.ndarray): The initial nucleotide frequency distribution.
    t (float): The time at which to calculate μ'(t).

    Returns:
    numpy.ndarray: The calculated value of μ'(t) as a vector.
    """
    
    # Calculate μ'(t) using the provided formula
    mu_prime_t = -pi_0.dot(Q).dot(expm(Q * t)).dot(np.diagonal(Q))
    
    return mu_prime_t


In [10]:
#Calculate the range of evolution rate and its derivation given a time intervel and initial nucleotide frequency
def mu_mu_prime_range(Q, pi_0, t_range):
    """
    Calculate the range of evolution rate and its derivation 
    given a time intervel and initial nucleotide frequency

    Parameters:
    Q (numpy.ndarray): The substitution rate matrix.
    pi_0 (numpy.ndarray): The initial nucleotide frequency distribution.
    t_range (numpy.linspace): The time interval for calculating the evolutionary rate and its derivative

    Returns:
    lists: The calculated value of μ(t) and μ'(t) over the time inteval as lists.
    """
    mu_range = []
    mu_prime_range = []
    for t in t_range:
        mu = calculate_non_statioanry_mu(Q, pi_0, t)
        mu_prime = calculate_non_stationary_mu_prime(Q, pi_0, t)
        mu_range.append(mu)
        mu_prime_range.append(mu_prime)
    
    return mu_range, mu_prime_range

In [17]:
#test the evolution rate using the testing Q and pi
test_nst_array1 = np.array([0.05, 0.35, 0.35, 0.25])
test_nst_array2 = np.array([0.7, 0.1, 0.1, 0.1])
pi = calculate_stationary_distribution(Q6)
t_range_large =  np.linspace(0, 10, 100)
t_range_small = np.linspace(0, 0.5, 100)
mu_range1, mu_prime_range1 = mu_mu_prime_range(Q5, test_nst_array2, t_range_large)
lim_mu = min(mu_range1)

In [18]:
df1 = pd.DataFrame({'Time': t_range_large, 'Evolution rate': mu_range1})
fig1 = px.line(df1, x='Time', y='Evolution rate')
fig1.show()

In [19]:
df2 = pd.DataFrame({'Time': t_range_large, 'Mu_prime': mu_prime_range1})
fig2 = px.line(df2, x='Time', y='Mu_prime')
fig2.show()