In [125]:
from cogent3.maths.matrix_exponential_integration import expected_number_subs
from cogent3 import get_app, load_aligned_seqs
import numpy as np

from numpy import array

GN_model = get_app("model", sm = "GN", 
                unique_trees = True, time_het = "max", optimise_motif_probs = True, 
                show_progress = False, opt_args = dict(max_restarts=5))

no_degenerates = get_app(
    "omit_degenerates",
    moltype = 'dna',
    gap_is_degen = True,
    motif_length = 1,
)



In [100]:
type(GN_model)

cogent3.app.evo.model

In [44]:
import pathlib
paths = list(pathlib.Path("../data/16s-10").glob("**/*.json"))

In [116]:
path = paths[0]
aln = load_aligned_seqs(path)
aln1 = no_degenerates(aln)
result = GN_model(aln1)


In [121]:
result.lf.get_motif_probs()

T,C,A,G
0.2017,0.2469,0.2197,0.3316


In [101]:
def get_matrix_pi(path):
    aln = load_aligned_seqs(path)
    aln1 = no_degenerates(aln)
    result = GN_model(aln1)
    edge_name = result.tree.get_node_names(includeself = False)[1]
    Q = result.lf.get_rate_matrix_for_edge(edge_name, calibrated=True)
    pi = result.lf.get_motif_probs()
    return Q, pi


In [102]:
Q1, pi1 = get_matrix_pi(paths[0])
Q2, pi2 = get_matrix_pi(paths[1])

Q1 = np.array(Q1)
Q2 = np.array(Q2)

In [103]:
import math
#stationary process 
def calculate_stationary_distribution(Q):
    """
    Calculate the stationary distribution pi for a given substitution rate matrix Q.

    Parameters:
    Q (numpy.ndarray): The substitution rate matrix.

    Returns:
    numpy.ndarray: The stationary distribution pi.
    """
    # Add an additional equation to account for the sum of pi elements being 1
    A = np.vstack([Q.T, np.ones(Q.shape[0])])

    b = np.zeros(Q.shape[0] + 1)
    b[-1] = 1

    # Solve for pi
    pi = np.linalg.lstsq(A, b, rcond=None)[0]

    return pi


def calculate_stationary_rate(Q):
    """
    Calculate the stationary evolution rate mu_stationary for a given substitution rate matrix Q.

    Parameters:
    Q (numpy.ndarray): The substitution rate matrix.

    Returns:
    float: The stationary evolution rate mu_stationary.
    """
    # Get the stationary disitrbution of Q
    pi_stationary = calculate_stationary_distribution(Q)

    # Calculate stationary evolution rate using the formula mu = - sum_i(pi*Qii)
    mu_stationary = - np.sum(pi_stationary*np.diagonal(Q))
    return mu_stationary

In [122]:
calculate_stationary_rate(Q2)

0.1585570958504568

In [123]:
def matrix_calibration(Q):
    Q_c = Q/ (- math.fsum(calculate_stationary_distribution(Q)*np.diagonal(Q)))
    return Q_c

In [124]:
Q2c = matrix_calibration(Q2)
calculate_stationary_rate(Q2c)

0.9999999999999964

In [107]:
# R = np.array([
#     [-4.5,  2.0,  1.0,  1.5],
#     [ 2.0, -4.5,  1.5,  1.0],
#     [ 1.0,  1.5, -4.5,  2.0],
#     [ 1.5,  1.0,  2.0, -4.5]
# ])

R2 = np.array([
    [-4.5,  2.0,  1.0,  1.5],
    [ 2.0, -3.5,  0.5,  1.0],
    [ 1.0,  0.5, -2.5,  1.0],
    [ 0.5,  1.0,  1.0, -2.5]
])


Q = array([[-1.4, 0.1, 0.4, 0.9], 
           [4.0, -6.9, 0.9, 2.0], 
           [6.3, 2.0, -11.3, 3.0], 
           [0.7,0.1, 0.2, -1]], dtype=float)
i = np.array([0.25, 0.25, 0.25, 0.25])

test_nst_array1 = np.array([0.05, 0.35, 0.35, 0.25])


Q_2 = np.array([[-0.0935, 0.0148, 0.0558, 0.0229], 
           [0.0469, -0.0676, 0.0108, 0.0099], 
           [0.00, 0.0058, -0.0319, 0.0261], 
           [0.00,0.0132, 0.0370, -0.0501]], dtype=float)

Q3= np.array([
    [-4.5,  2.0,  1.0,  1.5],
    [ 2.0, -3.5,  0.5,  1.0],
    [ 1.0,  0.5, -2.5,  1.0],
    [ 0.0,  1.0,  2.0, -3.0]
])

In [108]:
# Add an additional equation to account for the sum of pi elements being 1
A = np.vstack([Q2.T, np.ones(Q2.shape[0])])

# Right-hand side of the equations
b = np.zeros(Q2.shape[0] + 1)
b[-1] = 1  # The sum of pi elements is 1

# Solve for pi
pi = np.linalg.lstsq(A, b, rcond=None)[0]

print("Stationary distribution pi:", pi)




Stationary distribution pi: [0.16834885 0.01212364 0.81174323 0.00778429]


In [109]:
# Function to calculate d
def calculate_d(p, R):
    return np.linalg.norm(np.dot(p, R))

# Settings for grid search
resolution = 10
min_val = 0.01  # Minimum value for each element in p to avoid extreme cases
largest_d = -np.inf
best_p = None

# Adjusted search to ensure all elements are non-zero and sum to 1
for a in np.linspace(min_val, 1 - 3*min_val, resolution):
    for b in np.linspace(min_val, 1 - 2*min_val - a, resolution):
        for c in np.linspace(min_val, 1 - min_val - a - b, resolution):
            d = 1 - (a + b + c)  # Remaining value to ensure sum(p) = 1
            if d >= min_val:  # Check if the last value is also above the minimum
                p = np.array([a, b, c, d])
                current_d = calculate_d(p, Q3)
                if current_d > largest_d:
                    largest_d = current_d
                    best_p = p

print("Best p:", best_p)
print("Largest d:", largest_d)

Best p: [0.97 0.01 0.01 0.01]
Largest d: 5.050499975249976


In [110]:
t_range =  np.linspace(0, 2, 100)
s_exp_numb_sub_value_1 = list()
n_exp_numb_sub_value_1 = list()

for t in t_range:
    s_exp_numb_sub_value_1.append(expected_number_subs(test_nst_array1, Q, t))


for t in t_range:
    n_exp_numb_sub_value_1.append(expected_number_subs(test_nst_array1, Q2, t))



In [111]:
import plotly.express as px

# Assuming t_range, s_exp_numb_sub_value_1, and n_exp_numb_sub_value_1 are defined elsewhere in your code

data1 = []
for i, t in enumerate(t_range):
    data1.append({'t': t, 'value': s_exp_numb_sub_value_1[i], 'series': 'Stationary'})
    data1.append({'t': t, 'value': n_exp_numb_sub_value_1[i], 'series': 'Non-Stationary'})

# Plotting with Plotly Express
fig = px.line(data1, x='t', y='value', color='series', labels={'value': 'ENS'})
fig.update_traces(line=dict(width=5))

# Setting plot aspect to equal (makes width and height equal)
fig.update_layout(width=600, height=600, legend_title_text='')

# Remove the legend title
fig.update_layout(legend_title_text='')

fig.update_layout(
    xaxis_title='t',
    yaxis_title='ENS',
    font=dict(
        size=25
    )
)

# Update legend title font size
fig.update_layout(legend_title_font=dict(size=18))

# Update axis label font size
fig.update_layout(
    xaxis=dict(
        title_font=dict(size=25),
        tickfont=dict(size=20),
    ),
    yaxis=dict(
        title_font=dict(size=25),
        tickfont=dict(size=20),
    )
)

# Move the legend to the bottom
fig.update_layout(legend=dict(
    orientation='h',
    yanchor='bottom',
    y=-0.25,  # Adjust this value as needed to move the legend up or down
    xanchor='center',
    x=0.5  # Adjust this value as needed to move the legend left or right
))

fig.show()


  sf: grouped.get_group(s if len(s) > 1 else s[0])


In [112]:
# from scipy.stats import linregress

# # Perform linear regression for s_exp_numb_sub_value
# slope_s, intercept_s, r_value_s, p_value_s, std_err_s = linregress(t_range, s_exp_numb_sub_value_1)
# print("Linear regression for s_exp_numb_sub_value:")
# print(f"Slope: {slope_s}, Intercept: {intercept_s}, R-squared: {r_value_s**2}, P-value: {p_value_s}")

# # Perform linear regression for n_exp_numb_sub_value_1
# slope_n1, intercept_n1, r_value_n1, p_value_n1, std_err_n1 = linregress(t_range, n_exp_numb_sub_value_1)
# print("Linear regression for n_exp_numb_sub_value_1:")
# print(f"Slope: {slope_n1}, Intercept: {intercept_n1}, R-squared: {r_value_n1**2}, P-value: {p_value_n1}")