In [2]:
from cogent3 import load_aligned_seqs, get_app
import pathlib


In [3]:
paths = list(pathlib.Path("../data/16s-10").glob("**/*.json"))

GN_model = get_app("model", sm = "GN", 
                unique_trees = True, time_het = "max", optimise_motif_probs = True, 
                show_progress = True, opt_args = dict(max_restarts=5))

no_degenerates = get_app(
    "omit_degenerates",
    moltype = None,
    gap_is_degen = True,
    motif_length = 1,
)

In [4]:
def alns_to_matx_pi(path, model):
    aln = load_aligned_seqs(path)
    aln1 = no_degenerates(aln)
    result = model(aln1)
    edge_names = result.tree.get_node_names(includeself = False)
    matrices = {n:result.lf.get_rate_matrix_for_edge(n, calibrated = False) for n in edge_names}
    motif_probs = result.lf.get_motif_probs()
    return matrices, motif_probs

matrix_dict_array = list()
motif_probs_dict_array = list()
for path in paths:
    matrix_dict_array.append(alns_to_matx_pi(path, GN_model)[0])
    motif_probs_dict_array.append(alns_to_matx_pi(path, GN_model)[1])


   0%|          |00:00<?

   0%|          |00:00<?

ModuleNotFoundError: No module named 'cogent3.tution_model'

In [10]:
matrix_dict_array[1]

            T          C          A          G
 ---------------------------------------------
 T    -1.7071     0.5367     0.3064     0.8640
 C     0.2488    -0.8894     0.1158     0.5248
 A     0.0380     0.1822    -0.5550     0.3347
 G     0.2033     0.5798     0.2337    -1.0169
 ---------------------------------------------,
            T          C          A          G
 ---------------------------------------------
 T    -0.4260     0.1099     0.2782     0.0379
 C     0.9957    -1.6865     0.3531     0.3377
 A     0.0689     0.0000    -0.0689     0.0000
 G     0.4794     0.2493     0.6167    -1.3455
 ---------------------------------------------,
            T          C          A          G
 ---------------------------------------------
 T    -1.5629     1.0337     0.2136     0.3156
 C     0.7493    -0.9929     0.0868     0.1568
 A     0.2793     0.2508    -1.1354     0.6054
 G     0.2090     0.1284     0.3103    -0.6477
 ---------------------------------------------}

In [6]:
def process_compel_data(matrix_dict_array, motif_probs_dict_array):
    motif_probs_np_array = [motif_prob.array for motif_prob in motif_probs_dict_array]
    motif_probs_list = [inner_array.tolist() for inner_array in motif_probs_np_array]

    matrix_dict_np_array = [{n: matrix[n].array for n in matrix} for matrix in matrix_dict_array]
    matrix_dict_of_lists = [{k: v.tolist() for k, v in matrix.items()} for matrix in matrix_dict_np_array]

    data = dict()
    for i in range(len(matrix_dict_array)):
        data[i] = {'motif_prob': motif_probs_list[i], 'matrix': matrix_dict_of_lists[i]}

    return data

data = process_compel_data(matrix_dict_array, motif_probs_dict_array)

In [7]:
import json

with open('../data/matrix_motif.json', 'w') as json_file:
    json.dump(data, json_file)