# Benchmarking results

This file is used to calculate the benchmarking results obtained from the benchmarking of 5 species:

- Bos Taurus
- Homo sapiens
- Penicillium Digitatum
- Puccinia Graminis
- Schizosaccharomyces Pombe

For this benchmarks, we used 3 different solvers:

- Advantage 6.4 QPU
- Advantage2 1.8 QPU
- Leap Hybrid BQM solver

We need to create one file *"./\<species\>/\<solver\>_RF-distance.csv"* for every combination of species and solver posible. To do this, we need to compare the created tree to the homonym ground truth located in *"../benchmarking matrices/\<species\>/"*.

In [None]:
import pandas as pd
import numpy as np
import sys
import re
import os
sys.path.append('../')
from qa_functions import *
import rpy2.robjects as robjects
from rpy2.robjects.packages import importr
from rpy2.robjects import pandas2ri

# Bos Taurus

First, let's start by taking a look into Bos Taurus species. Here we will define the main function that we are going to use for the rest of the species.

In [10]:
def load_tree(file_path):
    # Function to load a tree from a given file path
    with open(file_path, 'r') as file:
        tree_data = file.read()
    return tree_data

def compare_trees(species_name, method, verbose=True):
    r'''
    Compares reconstructed trees against ground truth trees for a given species and method.
    
    Parameters:
        species_name (str): Name of the species to analyze.
        method (str): Method identifier used in the filenames of reconstructed trees. Posible values: 'qa', 'qa_adv2', 'hy'.
    '''
    
    valid_methods = ['qa', 'qa_adv2', 'hy']
    if method not in valid_methods:
        raise ValueError(f"Invalid method '{method}'. Valid methods are: {valid_methods}")    
    
    files = os.listdir(f'../benchmarking matrices/{species_name}')
    
    with open(f'./{species_name}/nmcut{method}_RF-distance.csv', 'w') as fp:
        fp.write('size,branch_length,id,RF\n')
    
    for file in files:
        # if i == 0:
        #     continue  # skip first file if needed
        if not file.endswith("_tree.txt"):
            continue  # skip non-tree files
        file_ext = file[:-len("_tree.txt")]
        if verbose:
            print(f'Processing file: {file_ext} for method {method}\n')
        
        size = np.loadtxt(f'../benchmarking matrices/{species_name}/{file_ext}_matrix.txt').shape[0]
        branch_length = '0.' + re.search(r'\d{4}', file_ext)[0]
        
        ground_truth = load_tree(f'../benchmarking matrices/{species_name}/{file}')
        reconstructed = load_tree(f'./{species_name}/{file_ext}_nmcut{method}_tree.txt')
        
        dist = treecmp(ground_truth, reconstructed)
        with open(f'./{species_name}/nmcut{method}_RF-distance.csv', 'a') as fp:
            fp.write(f'{size},{branch_length},{file_ext},{dist}\n')
        

## Advantage 1 (QPU)

We now have the function, so this should just be a rapid-fire of small sections. However, to keep it neat, we will have the sections' divisions.

In [5]:
method = 'qa'
species_name = 'Bos_taurus'
compare_trees(species_name, method)

Processing file: 0-2516_Phy0001SS5_BOVIN for method qa

Processing file: 0-2536_Phy0001SML_BOVIN for method qa

Processing file: 0-2653_Phy0001SSU_BOVIN for method qa

Processing file: 0-2659_Phy0001SUZ_BOVIN for method qa

Processing file: 0-2876_Phy0001SON_BOVIN for method qa

Processing file: 0-2940_Phy0001SHY_BOVIN for method qa

Processing file: 0-3039_Phy0001SMM_BOVIN for method qa

Processing file: 0-3081_Phy0001STS_BOVIN for method qa

Processing file: 0-3126_Phy0001SJ2_BOVIN for method qa

Processing file: 0-3134_Phy0001SQ2_BOVIN for method qa

Processing file: 0-3366_Phy0001T1N_BOVIN for method qa

Processing file: 0-3560_Phy0001SLE_BOVIN for method qa

Processing file: 0-3653_Phy0001SNJ_BOVIN for method qa

Processing file: 0-3893_Phy0001T1M_BOVIN for method qa

Processing file: 0-3978_Phy0001SKM_BOVIN for method qa

Processing file: 0-4064_Phy0001SXK_BOVIN for method qa

Processing file: 0-4163_Phy0001U32_BOVIN for method qa

Processing file: 0-4262_Phy0001SY5_BOVIN for met

## Advantage 2 (QPU)


In [6]:
method = 'qa_adv2'
species_name = 'Bos_taurus'
compare_trees(species_name, method)

Processing file: 0-2516_Phy0001SS5_BOVIN for method qa_adv2

Processing file: 0-2536_Phy0001SML_BOVIN for method qa_adv2

Processing file: 0-2653_Phy0001SSU_BOVIN for method qa_adv2

Processing file: 0-2659_Phy0001SUZ_BOVIN for method qa_adv2

Processing file: 0-2876_Phy0001SON_BOVIN for method qa_adv2

Processing file: 0-2940_Phy0001SHY_BOVIN for method qa_adv2

Processing file: 0-3039_Phy0001SMM_BOVIN for method qa_adv2

Processing file: 0-3081_Phy0001STS_BOVIN for method qa_adv2

Processing file: 0-3126_Phy0001SJ2_BOVIN for method qa_adv2

Processing file: 0-3134_Phy0001SQ2_BOVIN for method qa_adv2

Processing file: 0-3366_Phy0001T1N_BOVIN for method qa_adv2

Processing file: 0-3560_Phy0001SLE_BOVIN for method qa_adv2

Processing file: 0-3653_Phy0001SNJ_BOVIN for method qa_adv2

Processing file: 0-3893_Phy0001T1M_BOVIN for method qa_adv2

Processing file: 0-3978_Phy0001SKM_BOVIN for method qa_adv2

Processing file: 0-4064_Phy0001SXK_BOVIN for method qa_adv2

Processing file: 0-4163_

## Hybrid approach

In [7]:
method = 'hy'
species_name = 'Bos_taurus'
compare_trees(species_name, method)

Processing file: 0-2516_Phy0001SS5_BOVIN for method hy

Processing file: 0-2536_Phy0001SML_BOVIN for method hy

Processing file: 0-2653_Phy0001SSU_BOVIN for method hy

Processing file: 0-2659_Phy0001SUZ_BOVIN for method hy

Processing file: 0-2876_Phy0001SON_BOVIN for method hy

Processing file: 0-2940_Phy0001SHY_BOVIN for method hy

Processing file: 0-3039_Phy0001SMM_BOVIN for method hy

Processing file: 0-3081_Phy0001STS_BOVIN for method hy

Processing file: 0-3126_Phy0001SJ2_BOVIN for method hy

Processing file: 0-3134_Phy0001SQ2_BOVIN for method hy

Processing file: 0-3366_Phy0001T1N_BOVIN for method hy

Processing file: 0-3560_Phy0001SLE_BOVIN for method hy

Processing file: 0-3653_Phy0001SNJ_BOVIN for method hy

Processing file: 0-3893_Phy0001T1M_BOVIN for method hy

Processing file: 0-3978_Phy0001SKM_BOVIN for method hy

Processing file: 0-4064_Phy0001SXK_BOVIN for method hy

Processing file: 0-4163_Phy0001U32_BOVIN for method hy

Processing file: 0-4262_Phy0001SY5_BOVIN for met

---
# **Homo Sapiens**

## Advantage 1 (QPU)

In [8]:
method = 'qa'
species_name = 'Homo_sapiens'
compare_trees(species_name, method)

Processing file: 0-1457_Phy0007YUQ_HUMAN for method qa

Processing file: 0-1835_Phy0008G06_HUMAN for method qa

Processing file: 0-2074_Phy0008CR1_HUMAN for method qa

Processing file: 0-2125_Phy0007ZV6_HUMAN for method qa

Processing file: 0-2140_Phy0007YN0_HUMAN for method qa

Processing file: 0-2250_Phy0007XGT_HUMAN for method qa

Processing file: 0-2487_Phy0008FYG_HUMAN for method qa

Processing file: 0-2567_Phy00081A5_HUMAN for method qa

Processing file: 0-2615_Phy0008BM0_HUMAN for method qa

Processing file: 0-2801_Phy00086HI_HUMAN for method qa

Processing file: 0-2823_Phy00088LQ_HUMAN for method qa

Processing file: 0-2841_Phy000839J_HUMAN for method qa

Processing file: 0-3055_Phy000875B_HUMAN for method qa

Processing file: 0-3060_Phy0007ZNC_HUMAN for method qa

Processing file: 0-3195_Phy0007XME_HUMAN for method qa

Processing file: 0-3237_Phy0007XCN_HUMAN for method qa

Processing file: 0-3537_Phy0008HMK_HUMAN for method qa

Processing file: 0-3630_Phy0008IWH_HUMAN for met

## Advantage 2 (QPU)

In [9]:
method = 'qa_adv2'
species_name = 'Homo_sapiens'
compare_trees(species_name, method)

Processing file: 0-1457_Phy0007YUQ_HUMAN for method qa_adv2

Processing file: 0-1835_Phy0008G06_HUMAN for method qa_adv2

Processing file: 0-2074_Phy0008CR1_HUMAN for method qa_adv2

Processing file: 0-2125_Phy0007ZV6_HUMAN for method qa_adv2

Processing file: 0-2140_Phy0007YN0_HUMAN for method qa_adv2

Processing file: 0-2250_Phy0007XGT_HUMAN for method qa_adv2

Processing file: 0-2487_Phy0008FYG_HUMAN for method qa_adv2

Processing file: 0-2567_Phy00081A5_HUMAN for method qa_adv2

Processing file: 0-2615_Phy0008BM0_HUMAN for method qa_adv2

Processing file: 0-2801_Phy00086HI_HUMAN for method qa_adv2

Processing file: 0-2823_Phy00088LQ_HUMAN for method qa_adv2

Processing file: 0-2841_Phy000839J_HUMAN for method qa_adv2

Processing file: 0-3055_Phy000875B_HUMAN for method qa_adv2

Processing file: 0-3060_Phy0007ZNC_HUMAN for method qa_adv2

Processing file: 0-3195_Phy0007XME_HUMAN for method qa_adv2

Processing file: 0-3237_Phy0007XCN_HUMAN for method qa_adv2

Processing file: 0-3537_

## Hybrid approach

In [10]:
method = 'hy'
species_name = 'Homo_sapiens'
compare_trees(species_name, method)

Processing file: 0-1457_Phy0007YUQ_HUMAN for method hy

Processing file: 0-1835_Phy0008G06_HUMAN for method hy

Processing file: 0-2074_Phy0008CR1_HUMAN for method hy

Processing file: 0-2125_Phy0007ZV6_HUMAN for method hy

Processing file: 0-2140_Phy0007YN0_HUMAN for method hy

Processing file: 0-2250_Phy0007XGT_HUMAN for method hy

Processing file: 0-2487_Phy0008FYG_HUMAN for method hy

Processing file: 0-2567_Phy00081A5_HUMAN for method hy

Processing file: 0-2615_Phy0008BM0_HUMAN for method hy

Processing file: 0-2801_Phy00086HI_HUMAN for method hy

Processing file: 0-2823_Phy00088LQ_HUMAN for method hy

Processing file: 0-2841_Phy000839J_HUMAN for method hy

Processing file: 0-3055_Phy000875B_HUMAN for method hy

Processing file: 0-3060_Phy0007ZNC_HUMAN for method hy

Processing file: 0-3195_Phy0007XME_HUMAN for method hy

Processing file: 0-3237_Phy0007XCN_HUMAN for method hy

Processing file: 0-3537_Phy0008HMK_HUMAN for method hy

Processing file: 0-3630_Phy0008IWH_HUMAN for met

---
# **Penicillium Digitatum**

## Advantage 1 (QPU)

In [11]:
method = 'qa'
species_name = 'Penicillium_digitatum'
compare_trees(species_name, method)

Processing file: 0-2576_Phy0042T98_PENDI for method qa

Processing file: 0-2645_Phy003Q8P2_PENDI for method qa

Processing file: 0-2712_Phy003Q6UV_PENDI for method qa

Processing file: 0-2859_Phy003QA9A_PENDI for method qa

Processing file: 0-2912_Phy003QBA0_PENDI for method qa

Processing file: 0-3202_Phy003QAL1_PENDI for method qa

Processing file: 0-3312_Phy003Q83V_PENDI for method qa

Processing file: 0-3552_Phy0042SL3_PENDI for method qa

Processing file: 0-3611_Phy003QBO6_PENDI for method qa

Processing file: 0-3995_Phy0045P6V_PENDI for method qa

Processing file: 0-4062_Phy0042T5Q_PENDI for method qa

Processing file: 0-4122_Phy003QCB4_PENDI for method qa

Processing file: 0-4151_Phy003QBS4_PENDI for method qa

Processing file: 0-4226_Phy0042T4N_PENDI for method qa

Processing file: 0-4502_Phy003Q8L5_PENDI for method qa

Processing file: 0-4642_Phy0048AWY_PENDI for method qa

Processing file: 0-4843_Phy0042UCY_PENDI for method qa

Processing file: 0-4916_Phy003QAXZ_PENDI for met

## Advantage 2 (QPU)

In [12]:
method = 'qa_adv2'
species_name = 'Penicillium_digitatum'
compare_trees(species_name, method)

Processing file: 0-2576_Phy0042T98_PENDI for method qa_adv2

Processing file: 0-2645_Phy003Q8P2_PENDI for method qa_adv2

Processing file: 0-2712_Phy003Q6UV_PENDI for method qa_adv2

Processing file: 0-2859_Phy003QA9A_PENDI for method qa_adv2

Processing file: 0-2912_Phy003QBA0_PENDI for method qa_adv2

Processing file: 0-3202_Phy003QAL1_PENDI for method qa_adv2

Processing file: 0-3312_Phy003Q83V_PENDI for method qa_adv2

Processing file: 0-3552_Phy0042SL3_PENDI for method qa_adv2

Processing file: 0-3611_Phy003QBO6_PENDI for method qa_adv2

Processing file: 0-3995_Phy0045P6V_PENDI for method qa_adv2

Processing file: 0-4062_Phy0042T5Q_PENDI for method qa_adv2

Processing file: 0-4122_Phy003QCB4_PENDI for method qa_adv2

Processing file: 0-4151_Phy003QBS4_PENDI for method qa_adv2

Processing file: 0-4226_Phy0042T4N_PENDI for method qa_adv2

Processing file: 0-4502_Phy003Q8L5_PENDI for method qa_adv2

Processing file: 0-4642_Phy0048AWY_PENDI for method qa_adv2

Processing file: 0-4843_

## Hybrid approach

In [13]:
method = 'hy'
species_name = 'Penicillium_digitatum'
compare_trees(species_name, method)

Processing file: 0-2576_Phy0042T98_PENDI for method hy

Processing file: 0-2645_Phy003Q8P2_PENDI for method hy

Processing file: 0-2712_Phy003Q6UV_PENDI for method hy

Processing file: 0-2859_Phy003QA9A_PENDI for method hy

Processing file: 0-2912_Phy003QBA0_PENDI for method hy

Processing file: 0-3202_Phy003QAL1_PENDI for method hy

Processing file: 0-3312_Phy003Q83V_PENDI for method hy

Processing file: 0-3552_Phy0042SL3_PENDI for method hy

Processing file: 0-3611_Phy003QBO6_PENDI for method hy

Processing file: 0-3995_Phy0045P6V_PENDI for method hy

Processing file: 0-4062_Phy0042T5Q_PENDI for method hy

Processing file: 0-4122_Phy003QCB4_PENDI for method hy

Processing file: 0-4151_Phy003QBS4_PENDI for method hy

Processing file: 0-4226_Phy0042T4N_PENDI for method hy

Processing file: 0-4502_Phy003Q8L5_PENDI for method hy

Processing file: 0-4642_Phy0048AWY_PENDI for method hy

Processing file: 0-4843_Phy0042UCY_PENDI for method hy

Processing file: 0-4916_Phy003QAXZ_PENDI for met

---
# **Puccinia Graminis**

## Advantage 1 (QPU)

In [14]:
method = 'qa'
species_name = 'Puccinia_graminis'
compare_trees(species_name, method)

Processing file: 0-1187_Phy008NZS8_PUCGT for method qa

Processing file: 0-1374_Phy008O5P7_PUCGT for method qa

Processing file: 0-1408_Phy008O938_PUCGT for method qa

Processing file: 0-1652_Phy008O3QR_PUCGT for method qa

Processing file: 0-1767_Phy008O30H_PUCGT for method qa

Processing file: 0-1780_Phy008O9U2_PUCGT for method qa

Processing file: 0-1870_Phy008O0TK_PUCGT for method qa

Processing file: 0-1976_Phy008O6FV_PUCGT for method qa

Processing file: 0-2071_Phy008O4KN_PUCGT for method qa

Processing file: 0-2334_Phy008O82G_PUCGT for method qa

Processing file: 0-2336_Phy008O30Z_PUCGT for method qa

Processing file: 0-2348_Phy008OAD8_PUCGT for method qa

Processing file: 0-2422_Phy008O85K_PUCGT for method qa

Processing file: 0-2544_Phy008NZ6B_PUCGT for method qa

Processing file: 0-2621_Phy008O7OW_PUCGT for method qa

Processing file: 0-2660_Phy008O7K1_PUCGT for method qa

Processing file: 0-2796_Phy008NZ68_PUCGT for method qa

Processing file: 0-3095_Phy008O7MF_PUCGT for met

## Advantage 2 (QPU)

In [15]:
method = 'qa_adv2'
species_name = 'Puccinia_graminis'
compare_trees(species_name, method)

Processing file: 0-1187_Phy008NZS8_PUCGT for method qa_adv2

Processing file: 0-1374_Phy008O5P7_PUCGT for method qa_adv2

Processing file: 0-1408_Phy008O938_PUCGT for method qa_adv2

Processing file: 0-1652_Phy008O3QR_PUCGT for method qa_adv2

Processing file: 0-1767_Phy008O30H_PUCGT for method qa_adv2

Processing file: 0-1780_Phy008O9U2_PUCGT for method qa_adv2

Processing file: 0-1870_Phy008O0TK_PUCGT for method qa_adv2

Processing file: 0-1976_Phy008O6FV_PUCGT for method qa_adv2

Processing file: 0-2071_Phy008O4KN_PUCGT for method qa_adv2

Processing file: 0-2334_Phy008O82G_PUCGT for method qa_adv2

Processing file: 0-2336_Phy008O30Z_PUCGT for method qa_adv2

Processing file: 0-2348_Phy008OAD8_PUCGT for method qa_adv2

Processing file: 0-2422_Phy008O85K_PUCGT for method qa_adv2

Processing file: 0-2544_Phy008NZ6B_PUCGT for method qa_adv2

Processing file: 0-2621_Phy008O7OW_PUCGT for method qa_adv2

Processing file: 0-2660_Phy008O7K1_PUCGT for method qa_adv2

Processing file: 0-2796_

## Hybrid approach

In [16]:
method = 'hy'
species_name = 'Puccinia_graminis'
compare_trees(species_name, method)

Processing file: 0-1187_Phy008NZS8_PUCGT for method hy

Processing file: 0-1374_Phy008O5P7_PUCGT for method hy

Processing file: 0-1408_Phy008O938_PUCGT for method hy

Processing file: 0-1652_Phy008O3QR_PUCGT for method hy

Processing file: 0-1767_Phy008O30H_PUCGT for method hy

Processing file: 0-1780_Phy008O9U2_PUCGT for method hy

Processing file: 0-1870_Phy008O0TK_PUCGT for method hy

Processing file: 0-1976_Phy008O6FV_PUCGT for method hy

Processing file: 0-2071_Phy008O4KN_PUCGT for method hy

Processing file: 0-2334_Phy008O82G_PUCGT for method hy

Processing file: 0-2336_Phy008O30Z_PUCGT for method hy

Processing file: 0-2348_Phy008OAD8_PUCGT for method hy

Processing file: 0-2422_Phy008O85K_PUCGT for method hy

Processing file: 0-2544_Phy008NZ6B_PUCGT for method hy

Processing file: 0-2621_Phy008O7OW_PUCGT for method hy

Processing file: 0-2660_Phy008O7K1_PUCGT for method hy

Processing file: 0-2796_Phy008NZ68_PUCGT for method hy

Processing file: 0-3095_Phy008O7MF_PUCGT for met

---
# **Schizosaccharomyces Pombe**

## Advantage 1 (QPU)

In [17]:
method = 'qa'
species_name = 'Schizosaccharomyces_pombe'
compare_trees(species_name, method)

Processing file: 0-2641_Phy000D1A6_SCHPO for method qa

Processing file: 0-2722_Phy000D0UI_SCHPO for method qa

Processing file: 0-2805_Phy000D0YZ_SCHPO for method qa

Processing file: 0-2993_Phy000D1TD_SCHPO for method qa

Processing file: 0-3028_Phy000D0XD_SCHPO for method qa

Processing file: 0-3205_Phy000D0PQ_SCHPO for method qa

Processing file: 0-3329_Phy000D0PY_SCHPO for method qa

Processing file: 0-3437_Phy000D0PP_SCHPO for method qa

Processing file: 0-3607_Phy000D0Q0_SCHPO for method qa

Processing file: 0-3779_Phy000D0OG_SCHPO for method qa

Processing file: 0-3891_Phy000D0SX_SCHPO for method qa

Processing file: 0-3906_Phy000D0SK_SCHPO for method qa

Processing file: 0-4318_Phy000D0M2_SCHPO for method qa

Processing file: 0-4331_Phy000D0N4_SCHPO for method qa

Processing file: 0-4394_Phy000D0SL_SCHPO for method qa

Processing file: 0-4494_Phy000D0QL_SCHPO for method qa

Processing file: 0-4533_Phy000D0UP_SCHPO for method qa

Processing file: 0-4593_Phy000D17Y_SCHPO for met

## Advantage 2 (QPU)

In [18]:
method = 'qa_adv2'
species_name = 'Schizosaccharomyces_pombe'
compare_trees(species_name, method)

Processing file: 0-2641_Phy000D1A6_SCHPO for method qa_adv2

Processing file: 0-2722_Phy000D0UI_SCHPO for method qa_adv2

Processing file: 0-2805_Phy000D0YZ_SCHPO for method qa_adv2

Processing file: 0-2993_Phy000D1TD_SCHPO for method qa_adv2

Processing file: 0-3028_Phy000D0XD_SCHPO for method qa_adv2

Processing file: 0-3205_Phy000D0PQ_SCHPO for method qa_adv2

Processing file: 0-3329_Phy000D0PY_SCHPO for method qa_adv2

Processing file: 0-3437_Phy000D0PP_SCHPO for method qa_adv2

Processing file: 0-3607_Phy000D0Q0_SCHPO for method qa_adv2

Processing file: 0-3779_Phy000D0OG_SCHPO for method qa_adv2

Processing file: 0-3891_Phy000D0SX_SCHPO for method qa_adv2

Processing file: 0-3906_Phy000D0SK_SCHPO for method qa_adv2

Processing file: 0-4318_Phy000D0M2_SCHPO for method qa_adv2

Processing file: 0-4331_Phy000D0N4_SCHPO for method qa_adv2

Processing file: 0-4394_Phy000D0SL_SCHPO for method qa_adv2

Processing file: 0-4494_Phy000D0QL_SCHPO for method qa_adv2

Processing file: 0-4533_

## Hybrid approach

In [19]:
method = 'hy'
species_name = 'Schizosaccharomyces_pombe'
compare_trees(species_name, method)

Processing file: 0-2641_Phy000D1A6_SCHPO for method hy

Processing file: 0-2722_Phy000D0UI_SCHPO for method hy

Processing file: 0-2805_Phy000D0YZ_SCHPO for method hy

Processing file: 0-2993_Phy000D1TD_SCHPO for method hy

Processing file: 0-3028_Phy000D0XD_SCHPO for method hy

Processing file: 0-3205_Phy000D0PQ_SCHPO for method hy

Processing file: 0-3329_Phy000D0PY_SCHPO for method hy

Processing file: 0-3437_Phy000D0PP_SCHPO for method hy

Processing file: 0-3607_Phy000D0Q0_SCHPO for method hy

Processing file: 0-3779_Phy000D0OG_SCHPO for method hy

Processing file: 0-3891_Phy000D0SX_SCHPO for method hy

Processing file: 0-3906_Phy000D0SK_SCHPO for method hy

Processing file: 0-4318_Phy000D0M2_SCHPO for method hy

Processing file: 0-4331_Phy000D0N4_SCHPO for method hy

Processing file: 0-4394_Phy000D0SL_SCHPO for method hy

Processing file: 0-4494_Phy000D0QL_SCHPO for method hy

Processing file: 0-4533_Phy000D0UP_SCHPO for method hy

Processing file: 0-4593_Phy000D17Y_SCHPO for met

# **All together**

For purposes of adding a new method easier or do new tests, here we define a new cell that can be used to calculate all distances without running the whole notebook.

In [None]:
species = ['Bos_taurus','Homo_sapiens','Puccinia_graminis','Penicillium_digitatum','Schizosaccharomyces_pombe']
methods = ['qa', 'qa_adv2', 'hy']

for specie in species:
    for method in methods:
        compare_trees(specie,method)

# ***Clustering distance?***

In [4]:
# TODO: Do this for the clustering distance

In [25]:
ape = importr('ape')
tree_dist = importr('TreeDist')

ground_truth = load_tree(f'../benchmarking matrices/Bos_taurus/0-2516_Phy0001SS5_BOVIN_tree.txt')
qa_reconstructed = load_tree(f'./Bos_taurus/0-2516_Phy0001SS5_BOVIN_nmcutqa_tree.txt')
qa2_reconstructed = load_tree(f'./Bos_taurus/0-2516_Phy0001SS5_BOVIN_nmcutqa_adv2_tree.txt')
hy_reconstructed = load_tree(f'./Bos_taurus/0-2516_Phy0001SS5_BOVIN_nmcuthy_tree.txt')

comparison = [ground_truth,qa_reconstructed,qa2_reconstructed,hy_reconstructed]

r_trees = ape.read_tree(text=robjects.StrVector(comparison))

r_dist_obj = tree_dist.ClusteringInfoDistance(r_trees,normalize=True)

# 5. Convert the R distance object to a usable Python/Numpy matrix
as_matrix = robjects.r['as.matrix']
dist_matrix = np.array(as_matrix(r_dist_obj))

# 6. Format as a DataFrame for readability
labels = [f"Tree_{i+1}" for i in range(len(comparison))]
df_dist = pd.DataFrame(dist_matrix, index=labels, columns=labels)

print("Smith (2020) Clustering Information Distance Matrix (Bits):")
print(1-dist_matrix[1:,0])

Smith (2020) Clustering Information Distance Matrix (Bits):
[0.60370116 0.59393971 0.87017119]


In [27]:
species = ['Bos_taurus','Homo_sapiens','Puccinia_graminis','Penicillium_digitatum','Schizosaccharomyces_pombe']
methods = ['qa', 'qa_adv2', 'hy']
ape = importr('ape')
tree_dist = importr('TreeDist')


for species_name in species:
    
    for method in methods:
        with open(f'./{species_name}/nmcut{method}_clust-distance.csv', 'w') as fp:
            fp.write('size,branch_length,id,clustering_distance\n')
            
    files = os.listdir(f'../benchmarking matrices/{species_name}')
    for file in files:
        # if i == 0:
        #     continue  # skip first file if needed
        if not file.endswith("_tree.txt"):
            continue  # skip non-tree files
        file_ext = file[:-len("_tree.txt")]
            
        print(f'Processing file: {file_ext} for method {method}\n')
        
        size = np.loadtxt(f'../benchmarking matrices/{species_name}/{file_ext}_matrix.txt').shape[0]
        branch_length = '0.' + re.search(r'\d{4}', file_ext)[0]
        
        all_trees = []
        
        all_trees.append(load_tree(f'../benchmarking matrices/{species_name}/{file}'))
        
        for method in methods:
            all_trees.append(load_tree(f'./{species_name}/{file_ext}_nmcut{method}_tree.txt'))
        
        r_trees = ape.read_tree(text=robjects.StrVector(all_trees))

        r_dist_obj = tree_dist.ClusteringInfoDistance(r_trees,normalize=True)
        
        as_matrix = robjects.r['as.matrix']
        similarities = 1- np.array(as_matrix(r_dist_obj))[1:,0]
        
        for i, method in enumerate(methods):
            with open(f'./{species_name}/nmcut{method}_clust-distance.csv', 'a') as fp:
                fp.write(f'{size},{branch_length},{file_ext},{similarities[i]}\n')

Processing file: 0-2516_Phy0001SS5_BOVIN for method hy

Processing file: 0-2536_Phy0001SML_BOVIN for method hy

Processing file: 0-2653_Phy0001SSU_BOVIN for method hy

Processing file: 0-2659_Phy0001SUZ_BOVIN for method hy

Processing file: 0-2876_Phy0001SON_BOVIN for method hy

Processing file: 0-2940_Phy0001SHY_BOVIN for method hy

Processing file: 0-3039_Phy0001SMM_BOVIN for method hy

Processing file: 0-3081_Phy0001STS_BOVIN for method hy

Processing file: 0-3126_Phy0001SJ2_BOVIN for method hy

Processing file: 0-3134_Phy0001SQ2_BOVIN for method hy

Processing file: 0-3366_Phy0001T1N_BOVIN for method hy

Processing file: 0-3560_Phy0001SLE_BOVIN for method hy

Processing file: 0-3653_Phy0001SNJ_BOVIN for method hy

Processing file: 0-3893_Phy0001T1M_BOVIN for method hy

Processing file: 0-3978_Phy0001SKM_BOVIN for method hy

Processing file: 0-4064_Phy0001SXK_BOVIN for method hy

Processing file: 0-4163_Phy0001U32_BOVIN for method hy

Processing file: 0-4262_Phy0001SY5_BOVIN for met