In [1]:
from cogent3.maths.matrix_exponential_integration import expected_number_subs
from cogent3 import get_app, load_aligned_seqs
import numpy as np

from numpy import array

GTR_model = get_app("model", sm = "GTR", 
                unique_trees = True, time_het = "max", optimise_motif_probs = True, 
                show_progress = False, opt_args = dict(max_restarts=5))

no_degenerates = get_app(
    "omit_degenerates",
    moltype = None,
    gap_is_degen = True,
    motif_length = 1,
)



In [2]:
import pathlib
paths = list(pathlib.Path("../data/16s-10").glob("**/*.json"))

In [3]:
def get_matrix_pi(path):
    aln = load_aligned_seqs(path)
    aln1 = no_degenerates(aln)
    result = GTR_model(aln1)
    edge_name = result.tree.get_node_names(includeself = False)[0]
    Q = result.lf.get_rate_matrix_for_edge(edge_name, calibrated=False)
    pi = result.lf.get_motif_probs()
    return Q, pi


In [4]:
Q1, pi1 = get_matrix_pi(paths[0])
Q2, pi2 = get_matrix_pi(paths[1])

In [214]:
t_range =  np.linspace(0, 0.3, 100)

# R = np.array([
#     [-4.5,  2.0,  1.0,  1.5],
#     [ 2.0, -4.5,  1.5,  1.0],
#     [ 1.0,  1.5, -4.5,  2.0],
#     [ 1.5,  1.0,  2.0, -4.5]
# ])

Q = array([[-1.4, 0.1, 0.4, 0.9], 
           [4.0, -6.9, 0.9, 2.0], 
           [6.3, 2.0, -11.3, 3.0], 
           [0.7,0.1, 0.2, -1]], dtype=float)
i = [0.25, 0.25, 0.25, 0.25]



In [215]:
# Add an additional equation to account for the sum of pi elements being 1
A = np.vstack([Q.T, np.ones(Q.shape[0])])

# Right-hand side of the equations
b = np.zeros(Q.shape[0] + 1)
b[-1] = 1  # The sum of pi elements is 1

# Solve for pi
pi = np.linalg.lstsq(A, b, rcond=None)[0]

print("Stationary distribution pi:", pi)

Stationary distribution pi: [0.43712579 0.02142362 0.02629754 0.51515306]


In [202]:

# Function to calculate d
def calculate_d(p, R):
    return np.linalg.norm(np.dot(p, R))

# Settings for grid search
resolution = 10
min_val = 0.01  # Minimum value for each element in p to avoid extreme cases
largest_d = -np.inf
best_p = None

# Adjusted search to ensure all elements are non-zero and sum to 1
for a in np.linspace(min_val, 1 - 3*min_val, resolution):
    for b in np.linspace(min_val, 1 - 2*min_val - a, resolution):
        for c in np.linspace(min_val, 1 - min_val - a - b, resolution):
            d = 1 - (a + b + c)  # Remaining value to ensure sum(p) = 1
            if d >= min_val:  # Check if the last value is also above the minimum
                p = np.array([a, b, c, d])
                current_d = calculate_d(p, Q)
                if current_d > largest_d:
                    largest_d = current_d
                    best_p = p

print("Best p:", best_p)
print("Largest d:", largest_d)

Best p: [0.01 0.01 0.97 0.01]
Largest d: 13.025007562377843


In [203]:
s_exp_numb_sub_value_1 = list()
n_exp_numb_sub_value_1 = list()

for t in t_range:
    s_exp_numb_sub_value_1.append(expected_number_subs(pi, Q, t))


for t in t_range:
    n_exp_numb_sub_value_1.append(expected_number_subs(i, Q, t))


# s_exp_numb_sub_value_2 = list()
# n_exp_numb_sub_value_2 = list()

# for t in t_range:
#     s_exp_numb_sub_value_2.append(expected_number_subs(i, R, t))


# for t in t_range:
#     n_exp_numb_sub_value_2.append(expected_number_subs(best_p, R, t)) 



In [204]:
import plotly.express as px

data1 = []
for i, t in enumerate(t_range):
    data1.append({'t': t, 'value': s_exp_numb_sub_value_1[i], 'series': 'Stationary'})
    data1.append({'t': t, 'value': n_exp_numb_sub_value_1[i], 'series': 'Non-Stationary'})

# Plotting with Plotly Express
fig = px.line(data1, x='t', y='value', color='series', 
              labels={'value': 'ENS'}, 
              title='Expected Number of Substitutions Over Time')
fig.update_traces(line=dict(width=3))

# Setting plot aspect to equal (makes width and height equal)
fig.update_layout(width=600, height=600, legend_title_text='', )

# Remove the legend title
fig.update_layout(legend_title_text='')

fig.update_layout(
    xaxis_title='t',
    yaxis_title='ENS',
    font=dict(
        size=18
    )
)

# Update legend title font size
fig.update_layout(legend_title_font=dict(size=18))

# Update axis label font size
fig.update_layout(
    xaxis=dict(
        title_font=dict(size=20),
        tickfont=dict(size=16),
    ),
    yaxis=dict(
        title_font=dict(size=20),
        tickfont=dict(size=16),
    )
)

fig.show()





In [208]:
from scipy.stats import linregress

# Perform linear regression for s_exp_numb_sub_value
slope_s, intercept_s, r_value_s, p_value_s, std_err_s = linregress(t_range, s_exp_numb_sub_value_1)
print("Linear regression for s_exp_numb_sub_value:")
print(f"Slope: {slope_s}, Intercept: {intercept_s}, R-squared: {r_value_s**2}, P-value: {p_value_s}")

# Perform linear regression for n_exp_numb_sub_value_1
slope_n1, intercept_n1, r_value_n1, p_value_n1, std_err_n1 = linregress(t_range, n_exp_numb_sub_value_1)
print("Linear regression for n_exp_numb_sub_value_1:")
print(f"Slope: {slope_n1}, Intercept: {intercept_n1}, R-squared: {r_value_n1**2}, P-value: {p_value_n1}")

Linear regression for s_exp_numb_sub_value:
Slope: 1.5721143017122345, Intercept: 2.7755575615628914e-17, R-squared: 1.0, P-value: 0.0
Linear regression for n_exp_numb_sub_value_1:
Slope: 2.7920253113979694, Intercept: 0.08856288220741321, R-squared: 0.9830997943925857, P-value: 1.1900169403353296e-88


In [206]:
# import plotly.express as px

# data2 = []
# for i, t in enumerate(t_range):
#     data2.append({'t': t, 'value': s_exp_numb_sub_value_2[i], 'series': 'Stationary2'})
#     data2.append({'t': t, 'value': n_exp_numb_sub_value_2[i], 'series': 'Non-Stationary2'})

# # Plotting with Plotly Express
# fig = px.line(data2, x='t', y='value', color='series', 
#               labels={'value': 'the expected number of substitutions'}, 
#               title='Expected Number of Substitutions Over Time')
# fig.update_traces(line=dict(width=1))

# fig.show()

In [217]:
Q = array([[-1.6, 0.3, 0.4, 0.9], 
           [8.085, -10.985, 0.9, 2.0], 
           [6.3, 2.0, -11.3, 3.0], 
           [0.7,0.1, 0.2, -1]], dtype=float)


# Add an additional equation to account for the sum of pi elements being 1
A = np.vstack([Q.T, np.ones(Q.shape[0])])

# Right-hand side of the equations
b = np.zeros(Q.shape[0] + 1)
b[-1] = 1  # The sum of pi elements is 1

# Solve for pi
pi = np.linalg.lstsq(A, b, rcond=None)[0]

print("Stationary distribution pi:", pi)

Stationary distribution pi: [0.43713889 0.02141566 0.02629728 0.51514817]


In [213]:
from cogent3 import make_tree

def make_annotation(text, x, y, fontsize, color):
    annot = dict(text=text, x=x, y=y,
            showarrow=False, xanchor="left",
            font=dict(size=fontsize,
                        family="Inconsolata, monospace", color=color))
    return annot

def add_text_annotation(fig, name_text_colour, fontsize=24):
    """adds an annotation to the figure"""
    annotes = []
    num = 1
    for name, val in name_text_colour.items():
        node = fig.tree.get_node_matching_name(name)
        annot = make_annotation(val["text"], 1.1 * node.x,
                                node.y, fontsize, val["color"])
        annotes.append(annot)
        if name.startswith("edge"):
            continue

        annot = make_annotation(f"<b><i>P</i><sub>{num}</sub><b>",
                                node.x - 0.4, 0.2 + node.y,
                                fontsize, val["color"])
        annotes.append(annot)

        num += 1

    fig.figure.layout.annotations = tuple(annotes)
    return fig

tree = make_tree("(A,(B,C))")
fig = tree.get_figure()
fig.line_width = 4
fig.marker = 14
fig.tips_as_text = False
fig.style_edges(["A", "edge.0"], line=dict(color="darkgreen"))
fig.style_edges("B", line=dict(color="orange"))
fig.style_edges("C", line=dict(color="blue"))
name_seq_colour = {"A": dict(color="darkgreen", text="C C C G C T"),
                        "B":dict(color="orange", text="T C C G G T"),
                        "C": dict(color="blue", text="C C C G A T"),
                        "edge.0": dict(color="black", text="π"),
                        }

fig = add_text_annotation(fig, name_seq_colour, fontsize=30)
fig.show(width=450, height=350)
fig.write("phylo.pdf")
