In [3]:
from cogent3.app.result import bootstrap_result


In [4]:
bootstrap_result.observed

<property at 0x15db99800>

In [None]:
import json
from cogent3.util.deserialise import deserialise_object

# Assuming the required classes and functions have been defined or imported as per the previous discussion
# including the bootstrap class, test_of_substitution_model, etc.

# Define a function to simulate the full process with 10 bootstrap iterations
def run_bootstrap_example(aln_serialisable, outgroup_name, tree, num_reps=10, verbose=True):
    """
    Run the bootstrap process for a test of substitution model with 10 repetitions.
    
    Parameters:
    -----------
    aln_serialisable : str
        Path to the alignment JSON file.
    outgroup_name : str
        Name of the out-group in the alignment.
    tree : PhyloNode
        The phylogenetic tree object.
    num_reps : int
        Number of bootstrap iterations (default is 10).
    verbose : bool
        Whether to show progress (default is True).
        
    Returns:
    --------
    bootstrap_result : compact_bootstrap_result
        The result of the bootstrap process.
    """
    # Load the alignment from a serialized file
    aln = deserialise_object(json.load(open(aln_serialisable, 'r')))
    
    # Run the bootstrap process
    bootstrap_result = create_bootstrap_substitution_app(
        aln=aln,
        outgroup_name=outgroup_name,
        tree=tree,
        just_continuous=False,
        num_reps=num_reps,
        opt_args=None,
        verbose=verbose
    )
    
    return bootstrap_result

# Set paths and parameters
aln_serialisable = '/Users/gulugulu/Desktop/honours/data_local/triples_aln_subset/ENSG00000007933_7.json'
result_lf_serialisable = '/Users/gulugulu/Desktop/honours/data_local/whole_genome_mammal87/triads_model_fitting_350_threshold/ENSG00000007933/model_fitting_result/7.json'
outgroup_name = "Sperm_whale"

# Load the tree
load_json_app = get_app("load_json")
result_lf = load_json_app(result_lf_serialisable)
my_tree = result_lf.get_ens_tree()

# Run the bootstrap with 10 repetitions
bootstrap_result = run_bootstrap_example(aln_serialisable, outgroup_name, my_tree, num_reps=10)

# Examine the bootstrap result
print("Observed Likelihood Ratio (LR):", bootstrap_result.observed.LR)
print("P-value from bootstrap:", bootstrap_result.pvalue)
print("Number of bootstrap replicates:", len(bootstrap_result))
print("Bootstrap replicate Likelihood Ratios (LRs):")
for i, (key, val) in enumerate(bootstrap_result.items()):
    print(f"Replicate {i+1} - LR: {val['LR']}, p-value: {val['pvalue']}")

# Optional: Inspect the entire bootstrap result object if needed
# print(bootstrap_result)


In [12]:
import glob
import os
import re
import json

def extract_info(path):
    # Define the regular expression pattern to extract the gene name and identifier
    pattern = r"/(ENSG\d+)_(\d+)\.json"
    
    # Search the pattern in the path
    match = re.search(pattern, path)
    if match:
        gene_name = match.group(1)
        identifier = match.group(2)
        return gene_name, identifier
    else:
        return None, None
    
subset_dir = '/Users/gulugulu/Desktop/honours/data_local/triples_aln_subset'
subset_triples_set_names = {}
paths = glob.glob(os.path.join(subset_dir, '*.json'))
info_dir = '/Users/gulugulu/Desktop/honours/data_local/whole_genome_mammal87/triads_350_threshold'
for path in paths:
    file_name = os.path.basename(path).rsplit('.', 1)[0]
    gene_name, identifier = extract_info(path)
    triples_name_info_dir = os.path.join(info_dir, gene_name, 'triads_species_names_dict.json')
    triples_name_info = json.load(open(triples_name_info_dir, 'r'))
    triple_names = triples_name_info[identifier]
    subset_triples_set_names[file_name] = triple_names

with open ('/Users/gulugulu/Desktop/honours/data_local/triples_aln_subset_names.json', 'w') as outfile:
    json.dump(subset_triples_set_names, outfile, indent=4)

