```python
pip install dendropy
```

In [6]:
import json
import dendropy

In [7]:
def nexus_tree_to_jsonl(input_file, output_file):
    # 从输入文件中读取Nexus树
    nexus_trees = dendropy.TreeList.get_from_path(input_file, "nexus")

    # 将Nexus树转换为JSONL格式
    with open(output_file, 'w', encoding='utf-8') as f:
        for tree in nexus_trees:
            tree_dict = {
                'newick': tree.as_string(schema='newick'),
                'labels': {},
                'metadata': {}
            }

            # 为节点添加标签和元数据
            for node in tree.preorder_node_iter():
                node_id = node.__hash__()
                if node.taxon is not None:
                    tree_dict['labels'][str(node_id)] = node.taxon.label

                if node.annotations:
                    metadata = {}
                    for annotation in node.annotations:
                        metadata[annotation.name] = annotation.value
                    tree_dict['metadata'][str(node_id)] = metadata

            # 将字典转换为JSON字符串，并将其写入输出文件
            json_str = json.dumps(tree_dict)
            f.write(json_str + '\n')



In [8]:
# 调用函数进行转换
input_file = 'MAPLE_support_exampleMAT_100_MATfromGivenTree_nexusTree.tree'
output_file = 'your_output_file.jsonl'
nexus_tree_to_jsonl(input_file, output_file)


In [9]:
import csv
import dendropy

def extract_metadata_from_nexus(input_file, output_file):
    # 从输入文件中读取Nexus树
    nexus_trees = dendropy.TreeList.get_from_path(input_file, "nexus")

    # 提取元数据并存储为CSV
    with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
        fieldnames = ['node_id', 'annotation_name', 'annotation_value']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()

        for tree in nexus_trees:
            for node in tree.preorder_node_iter():
                node_id = node.__hash__()

                if node.annotations:
                    for annotation in node.annotations:
                        writer.writerow({
                            'node_id': node_id,
                            'annotation_name': annotation.name,
                            'annotation_value': annotation.value
                        })

In [10]:
# 调用函数进行转换
input_file = 'MAPLE_support_exampleMAT_100_MATfromGivenTree_nexusTree.tree'
output_file = 'metadata_output.csv'
extract_metadata_from_nexus(input_file, output_file)

In [12]:
import csv
import dendropy

def extract_metadata_from_nexus(input_file, output_file):
    # 从输入文件中读取Nexus树
    nexus_trees = dendropy.TreeList.get_from_path(input_file, "nexus")

    # 提取元数据并存储为CSV
    with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
        fieldnames = ['node_name', 'annotation_name', 'annotation_value']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()

        for tree in nexus_trees:
            for node in tree.preorder_node_iter():
                node_id = node.__hash__()
                node_name = node.label if node.label else f"node_{node_id}"

                if node.annotations:
                    for annotation in node.annotations:
                        writer.writerow({
                            'node_name': node_name,
                            'annotation_name': annotation.name,
                            'annotation_value': annotation.value
                        })

# 调用函数进行转换
input_file = 'MAPLE_support_exampleMAT_100_MATfromGivenTree_nexusTree.tree'
output_file = 'metadata_output.csv'
extract_metadata_from_nexus(input_file, output_file)


In [3]:
import csv
import dendropy

def nexus_tree_to_metadata_csv(input_file, output_file):
    # 从输入文件中读取Nexus树
    nexus_trees = dendropy.TreeList.get_from_path(input_file, "nexus")

    # 收集元数据
    metadata_list = []
    fieldnames = set()

    for tree in nexus_trees:
        for node in tree.preorder_node_iter():
            if node.taxon is not None and node.annotations:
                node_metadata = {'taxon': node.taxon.label}
                for annotation in node.annotations:
                    node_metadata[annotation.name] = annotation.value
                    fieldnames.add(annotation.name)
                metadata_list.append(node_metadata)

    # 将元数据写入CSV文件
    with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
        fieldnames = ['taxon'] + sorted(list(fieldnames))
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

        writer.writeheader()
        for metadata in metadata_list:
            writer.writerow(metadata)

# 调用函数进行转换
input_file = 'MAPLE_support_exampleMAT_100_MATfromGivenTree_nexusTree.tree'
output_file = 'your_output_file.csv'
nexus_tree_to_metadata_csv(input_file, output_file)


In [4]:
import pandas as pd

# 读取 CSV 文件
df = pd.read_csv('your_output_file.csv')

# 打印 DataFrame 的前几行
df

Unnamed: 0,taxon,Ns,alternativePlacements,mutations,support
0,EPI ISL 776270,"['1-55', '29838-29891']","['}', 'mutations={G22468T:1.0', 'G28878A:1.0']",,1.000000
1,EPI ISL 498658,"['1-17', '19293-19551', '29870-29870']","['}', 'mutations={A8081G:1.0', 'C17747T:1.0', ...",,1.000000
2,EPI ISL 418243,['29782-29891'],"['}', 'mutations={C23707T:1.0', 'T27384C:1.0']",,1.000000
3,EPI ISL 422807,"['1-54', '29830-29891']","['}', 'mutations={T1570C:1.0', 'T17247C:1.0']",,1.000000
4,EPI ISL 543329,"['1-54', '19912-20200', '29837-29891']","['in7:0.48799013517601536', 'EPI_ISL_805346:0....","['C5654T:1.0', 'C11620T:1.0', 'C12525T:1.0', '...",0.488104
...,...,...,...,...,...
91,EPI ISL 843183,['1-37'],"['}', 'mutations={G1444T:1.0', 'A4870G:1.0', '...",,1.000000
92,EPI ISL 869222,"['1-54', '16255-16256', '24981-24982', '29837-...","['}', 'mutations={C583T:1.0', 'A6211G:1.0', 'G...",,1.000000
93,EPI ISL 914595,"['1-12', '15952-15952', '19395-19395', '19399-...","['in27:0.21005683474732648}', 'mutations={C105...",,0.789909
94,EPI ISL 559392,"['1-54', '29837-29891']","['in7:0.4999417685218649}', 'mutations={G25217...",,0.500058


In [13]:
pip install taxoniumtools

Collecting taxoniumtools
  Downloading taxoniumtools-2.0.91-py3-none-any.whl (18 kB)
Collecting treeswift
  Downloading treeswift-1.1.35-py2.py3-none-any.whl (32 kB)
Collecting google-api-python-client
  Downloading google_api_python_client-2.86.0-py2.py3-none-any.whl (11.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.3/11.3 MB[0m [31m124.2 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting alive-progress
  Downloading alive_progress-3.1.1-py3-none-any.whl (75 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.7/75.7 kB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting protobuf<4
  Downloading protobuf-3.20.3-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m84.5 MB/s[0m eta [36m0:00:00[0m
Collecting docker
  Downloading docker-6.0.1-py3-none-any.whl (147 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
usher_to_taxonium --input tfci.pb --output tfci-taxonium.jsonl.gz --metadata tfci.meta.tsv.gz --genbank hu1.gb \
--columns genbank_accession,country,date,pangolin_lineage

In [None]:
usage: newick_to_taxonium [-h] -i INPUT -o OUTPUT [-m METADATA] [-c COLUMNS]
                          [-j CONFIG_JSON] [-t TITLE]
                          [--overlay_html OVERLAY_HTML]
                          [--key_column KEY_COLUMN]

In [14]:
newick_to_taxonium -i MAPLE_support_exampleMAT_100_MATfromGivenTree_nexusTree.tree -o tree.html -m metadata.csv -c species -t "Evolutionary tree of species"


SyntaxError: invalid syntax (917537725.py, line 1)

In [None]:
newick_to_taxonium -i MAPLE_support_exampleMAT_100_MATfromGivenTree_nexusTree.tree -o tree.html -m metadata.csv -c species -t "Evolutionary tree of species"
