In [7]:
from brancharchitect.majority_consensus_extended import create_majority_consensus_tree_extended
from brancharchitect.majority_consensus import create_majority_consensus_tree
from brancharchitect.consensus_tree import get_taxa_circular_order
from brancharchitect.newick_parser import parse_newick
from scipy.stats import kendalltau, spearmanr
from typing import List

def write_order(file_path, order: List[int]):
    with open(file_path, 'w') as f:
        for taxon in order:
            f.write(taxon + '\n')

def compare_orders(order1, order2):
    tau, tau_p_value = kendalltau(order1, order2)
    spearman_corr, spearman_p_value = spearmanr(order1, order2)
    return tau, tau_p_value, spearman_corr, spearman_p_value

# Specify the path to your file
file_path = './test_data_leave_order/sarbecovvirus_600_100_GTR/sarbecov_virus_trimmed_sequence_window_600_100_GTR.newick'
# Open the file and read its content
with open(file_path, 'r') as file:
    file_content = file.read()

trees = parse_newick(file_content)
first_order = get_taxa_circular_order(trees[0])
write_order("./leaf_order_results/mcte_600_100.first_order", first_order)

majority_consensus_tree_extended = create_majority_consensus_tree_extended(file_content)
mcte_to_newick = majority_consensus_tree_extended.to_newick()
mcte_order = get_taxa_circular_order(majority_consensus_tree_extended)
write_order("./leaf_order_results/mcte_600_100.order", mcte_order)

mct = create_majority_consensus_tree(file_content)
mc_newick = mct.to_newick()
mc_order = get_taxa_circular_order(mct)
write_order("./leaf_order_results/mc_600_100.order", mc_order)

taxa_order_upwards = sorted(get_taxa_circular_order(mct))
write_order("./leaf_order_results/taxa_order_upwards", taxa_order_upwards)

taxa_order_downwards = sorted(get_taxa_circular_order(mct), reverse=True)
write_order("./leaf_order_results/taxa_order_downwards", taxa_order_downwards)

# List of all orders
all_orders = [first_order, mcte_order, mc_order, taxa_order_upwards, taxa_order_downwards]
order_names = ["first_order", "mcte_order", "mc_order", "taxa_order_upwards", "taxa_order_downwards"]

# Compare all pairs of orders
# Compare all pairs of orders
for i in range(len(all_orders)):
    for j in range(i + 1, len(all_orders)):
        tau, tau_p_value, spearman_corr, spearman_p_value = compare_orders(all_orders[i], all_orders[j])
        print(f"Comparing {order_names[i]} and {order_names[j]}:")
        print(f"Kendall Tau distance: {tau}")
        print(f"Kendall Tau p-value: {tau_p_value}")
        print(f"Spearman's rank correlation coefficient: {spearman_corr}")
        print(f"Spearman's p-value: {spearman_p_value}")

        if tau_p_value < 0.05:
            print("Kendall Tau: The orders are not independent.\n")
        else:
            print("Kendall Tau: The orders are independent.\n")

        if spearman_p_value < 0.05:
            print("Spearman: The orders are not independent.\n")
        else:
            print("Spearman: The orders are independent.\n")

Comparing first_order and mcte_order:
Kendall Tau distance: 0.1988899167437558
Kendall Tau p-value: 0.048649861809807717
Spearman's rank correlation coefficient: 0.27833024976873266
Spearman's p-value: 0.05817505804619863
Kendall Tau: The orders are not independent.

Spearman: The orders are independent.

Comparing first_order and mc_order:
Kendall Tau distance: -0.1082331174838113
Kendall Tau p-value: 0.2832966676073182
Spearman's rank correlation coefficient: -0.16547178538390378
Spearman's p-value: 0.26632988606894925
Kendall Tau: The orders are independent.

Spearman: The orders are independent.

Comparing first_order and taxa_order_upwards:
Kendall Tau distance: 0.13413506012950974
Kendall Tau p-value: 0.18361194370188882
Spearman's rank correlation coefficient: 0.22224791859389453
Spearman's p-value: 0.1332325138575069
Kendall Tau: The orders are independent.

Spearman: The orders are independent.

Comparing first_order and taxa_order_downwards:
Kendall Tau distance: -0.134135060

(NC_045512.2:1,MN996532.1:1,((MG772934.1:1,MG772933.1:1),((MT040336.1:1,MT040334.1:1,MT040335.1:1,MT040333.1:1,MT072864.1:1),(DQ412043.1:1,KP886808.1:1,MK211374.1:1,MK211375.1:1,MK211376.1:1,MK211377.1:1,MK211378.1:1,KY417147.1:1,KT444582.1:1,KY417143.1:1,KY770858.1:1,KF367457.1:1,KY417144.1:1,KY417151.1:1,KY417142.1:1,KY417145.1:1,KY417149.1:1,KY417148.1:1,KY417146.1:1,KY417152.1:1,KU973692.1:1,DQ071615.1:1,JX993988.1:1,KF569996.1:1,KJ473814.1:1,JX993987.1:1,(GQ153542.1:1,(GQ153547.1:1,DQ022305.2:1)),(KY770860.1:1,(DQ412042.1:1,DQ648856.1:1)),(NC_004718.3:1,(AY686864.1:1,AY572034.1:1,AY572035.1:1,AY572038.1:1,AY686863.1:1))))))R;
