In [29]:
import os
from Bio import Phylo, SeqIO
import pandas as pd
import networkx as nx
import random
import json
import gzip
from collections import defaultdict

# Verzeichnis und Datei definieren
directory = "/mnt/c/Users/uhewm/Desktop/ProjectHGT/bacterial_pangenomes_2025/bacthur_example/1428"


# === Newick-Baum laden ===
newick_path = os.path.join(directory, "vis", "strain_tree.nwk")
if not os.path.exists(newick_path):
    raise FileNotFoundError(f"Datei nicht gefunden: {newick_path}")

tree = Phylo.read(newick_path, "newick")
#Phylo.draw(tree)

# === CSV-Datei laden ===
csv_path = os.path.join(directory, "gene_presence_absence_matrix.csv")
if not os.path.exists(csv_path):
    raise FileNotFoundError(f"Datei nicht gefunden: {csv_path}")

gene_matrix = pd.read_csv(csv_path, index_col=0)

number_of_strains = gene_matrix.shape[0]
clonal_root_node = number_of_strains * 2 - 1

print("CSV-Form:", gene_matrix.shape)
#display(gene_matrix.head())

# GEN NICHT GLEICH LANG. Bsp: GC00001959
# GEN DUPLIZIERT Bsp: GC00000933_3

# === Choose random gene ===
gene_columns = gene_matrix.columns[1:]
random_gene = random.choice(gene_columns)
#random_gene = "GC00000933_3"
print(f"Ausgewähltes Gen: {random_gene}")

# Index i des zufälligen Gens in den Spaltennamen finden
gene_id = gene_matrix.columns.get_loc(random_gene)
print(f"GeneID: {gene_id}")



# Directed Graph erstellen
G = nx.DiGraph()

# Zuerst Wurzel finden
root = tree.root

# Funktion zum rekursiven Setzen der kumulativen Zeiten
def set_cumulative_time(clade, parent_time=0.0):
    current_time = parent_time + (clade.branch_length if clade.branch_length is not None else 0.0)
    node_name = clade.name or str(id(clade))
    
    if node_name not in G:
        G.add_node(
            node_name,
            core_distance=0.0,
            allele_distance=0.0,
            time=current_time
        )
    else:
        G.nodes[node_name]['time'] = current_time

    for child in clade.clades:
        child_name = child.name or str(id(child))
        G.add_edge(node_name, child_name)
        set_cumulative_time(child, current_time)

# Baum durchlaufen und Zeiten setzen
set_cumulative_time(root)

# Correct the times:
max_time = max([G.nodes[node]['time'] for node in G.nodes])
for node in G.nodes:
    G.nodes[node]['time'] = max_time - G.nodes[node]['time']

print(f"Graph hat {G.number_of_nodes()} Knoten und {G.number_of_edges()} Kanten")

# Add clonal distances:
subtree_has_gene = {}
for node in gene_matrix.index.tolist():
    G.nodes[node]['core_distance'] = 0.0
    G.nodes[node]['allele_distance'] = 0.0

    if gene_matrix.iloc[gene_matrix.index.get_loc(node), gene_id] == 1:
        subtree_has_gene[node] = True
    else:
        subtree_has_gene[node] = False

root_node = tree.root.name or str(id(tree.root))
leaves = set(gene_matrix.index)
nodes_above_leaves = [
    n for n in nx.dfs_preorder_nodes(G, source=root_node) 
    if n not in leaves
]
nodes_above_leaves.reverse() 

for node in nodes_above_leaves:        
    child_list = list(G.successors(node))
    if any(subtree_has_gene[child] for child in child_list):
        subtree_has_gene[node] = True
    else:
        subtree_has_gene[node] = False

for node in nodes_above_leaves:
    
    G.nodes[node]['core_distance'] = 0.0
    G.nodes[node]['allele_distance'] = 0.0
    
    successors = list(G.successors(node))
    if len(successors) != 2:
        continue  # weiter zur nächsten Iteration, falls nicht genau 2 Kinder
    else: 
        c0, c1 = successors  # Entpacke einmal
    c0_has, c1_has = subtree_has_gene[c0], subtree_has_gene[c1]

    # Falls beide Subtrees kein Gen enthalten, können wir überspringen
    if not (c0_has or c1_has):
        continue

    if c0_has and c1_has:
        node_time = G.nodes[node]['time']
        core_distance = (
            2 * node_time
            - G.nodes[c0]['time'] - G.nodes[c1]['time']
            + G.nodes[c0]["core_distance"]
            + G.nodes[c1]["core_distance"]
        )
    elif c0_has:
        core_distance = G.nodes[c0]["core_distance"]
    else:  # c1_has
        core_distance = G.nodes[c1]["core_distance"]

    G.nodes[node]["core_distance"] = core_distance

# Pfad zur .fa.gz-Datei
faa_path = os.path.join(directory, "vis", "geneCluster", f"{random_gene}_na_aln.fa.gz")

if not os.path.exists(faa_path):
    raise FileNotFoundError(f"Datei nicht gefunden: {faa_path}")

# .fa.gz direkt parsen
with gzip.open(faa_path, "rt") as handle:
    sequences = list(SeqIO.parse(handle, "fasta"))

# Dictionary bauen: key = Teil vor dem ersten "-", value = Liste von Sequenzen
sequence_alignments = defaultdict(list)

for rec in sequences:
    key = rec.name.split('-')[0]
    sequence_alignments[key].append(rec)
    
print(f"{len(sequences)} Sequenzen in {random_gene}_na_aln.fa.gz gefunden.")
print(f"Erste Sequenz: {sequences[0].id}")

print(sequences[0].seq[:60], "...")

# Datei geneCluster laden
gene_cluster_path = os.path.join(directory, "vis", "geneCluster.json")
with open(gene_cluster_path, "r") as f:
    gene_cluster_data = json.load(f)


# Den i-ten Eintrag aus geneCluster auswählen
if gene_id >= len(gene_cluster_data):
    raise IndexError(f"Index {i} liegt außerhalb der geneCluster-Daten mit {len(gene_cluster_data)} Einträgen.")

selected_cluster = gene_cluster_data[gene_id]
if selected_cluster['msa'] != random_gene:
    raise ValueError(f"Gene Id error. Selected_cluster['msa'] is not equal to random_gene.")


CSV-Form: (102, 21584)
Ausgewähltes Gen: GC00001663
GeneID: 771
Graph hat 203 Knoten und 202 Kanten
102 Sequenzen in GC00001663_na_aln.fa.gz gefunden.
Erste Sequenz: GCF_000161495.1_ASM16149v1_genomic-BTHUR0002_RS08255-1-aldose_1-epimerase_family_protein
ATGACAGCAACAATTCAAAATGAAAAAGTGATCGTTTCCATTTCTGACAAAGGTGCAGAA ...


In [30]:
import numpy as np
from Bio.Align import MultipleSeqAlignment
from Bio.SeqRecord import SeqRecord
from scipy.optimize import linear_sum_assignment

# Wandelt SeqRecord in np.array von chars um (einmalig, für Geschwindigkeit)
def seq_to_array(seq_record):
    return np.frombuffer(str(seq_record.seq).encode("ascii"), dtype="S1")

# SNP-Zählung direkt zwischen zwei Sequenzen
def count_snp_positions_pair(arr1, arr2):
    m = min(len(arr1), len(arr2))
    return np.sum(arr1[:m] != arr2[:m])

# SNP-Zählung für ein Alignment
def count_snp_positions(alignment):
    if len(alignment) <= 1:
        return 0
    arr = np.array([list(str(rec.seq)) for rec in alignment], dtype="U1")
    identical_cols = np.all(arr == arr[0], axis=0)
    return np.sum(~identical_cols)

# Paarweises Matching von zwei Mengen an Sequenzen
def match_sequences(seq_list1, seq_list2):
    n, m = len(seq_list1), len(seq_list2)
    size = min(n, m)

    # Vorab in Arrays konvertieren
    arrs1 = [seq_to_array(s) for s in seq_list1]
    arrs2 = [seq_to_array(s) for s in seq_list2]

    # Kostenmatrix mit SNPs berechnen
    cost_matrix = np.zeros((n, m), dtype=int)
    for i, a1 in enumerate(arrs1):
        for j, a2 in enumerate(arrs2):
            cost_matrix[i, j] = count_snp_positions_pair(a1, a2)

    # Hungarian Algorithmus
    row_ind, col_ind = linear_sum_assignment(cost_matrix)

    # Nur beste "size" Paare nehmen
    pairs = [(seq_list1[i], seq_list2[j]) for i, j in zip(row_ind, col_ind)][:size]
    return pairs

# === Hauptlogik ===

# Leaf -> Liste von SeqRecords
leaf_to_seqs = {
    leaf: sequence_alignments[leaf]
    for leaf in gene_matrix.index
    if leaf in sequence_alignments and gene_matrix.iloc[gene_matrix.index.get_loc(leaf), gene_id] == 1
}

leaf_aln_map = {leaf: seqs for leaf, seqs in leaf_to_seqs.items()}
node_alignment = {}

for node in nx.dfs_postorder_nodes(G, source=root_node):
    if node in leaf_to_seqs:
        # Blattknoten
        node_alignment[node] = MultipleSeqAlignment(leaf_aln_map[node])
        G.nodes[node]['allele_distance'] = 0
    else:
        leaves_under_node = [
            leaf for leaf in gene_matrix.index
            if leaf in leaf_aln_map and nx.has_path(G, source=node, target=leaf)
        ]

        all_records = []
        for i, leaf1 in enumerate(leaves_under_node):
            for leaf2 in leaves_under_node[i+1:]:
                pairs = match_sequences(leaf_aln_map[leaf1], leaf_aln_map[leaf2])
                for rec1, rec2 in pairs:
                    all_records.extend([rec1, rec2])

        if all_records:
            aln = MultipleSeqAlignment(all_records)
            node_alignment[node] = aln
            G.nodes[node]['allele_distance'] = count_snp_positions(aln)
        else:
            node_alignment[node] = MultipleSeqAlignment([])
            G.nodes[node]['allele_distance'] = 0

# Gesamt-SNPs über alle Leafs
all_leaf_records = [rec for seqs in leaf_aln_map.values() for rec in seqs]
print(f"Gesamt-SNPs unter Leafs: {count_snp_positions(MultipleSeqAlignment(all_leaf_records))}")


Gesamt-SNPs unter Leafs: 208


In [9]:
all_records[123]

SeqRecord(seq=Seq('------------------------------ATGAAGGTCAAGAATATTTATAAC...GGA'), id='GCF_003546665.1_ASM354666v1_genomic-CUC43_RS25635-1-hypothetical_protein', name='GCF_003546665.1_ASM354666v1_genomic-CUC43_RS25635-1-hypothetical_protein', description='GCF_003546665.1_ASM354666v1_genomic-CUC43_RS25635-1-hypothetical_protein <unknown description>', dbxrefs=[])

In [10]:
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.Align import MultipleSeqAlignment
from Bio import SeqIO
import numpy as np
import subprocess
from pathlib import Path

MUSCLE_PATH = "/usr/bin/muscle"

leaf_to_seq = {
    leaf: str(sequence_alignments[leaf].seq)
    for leaf in gene_matrix.index
    if leaf in sequence_alignments and gene_matrix.iloc[gene_matrix.index.get_loc(leaf), gene_id] == 1
}

leaf_sequences = list(leaf_to_seq.items())

def sequences_all_same_length(seqs):
    lengths = {len(seq) for _, seq in seqs}
    return len(lengths) == 1

def run_muscle_alignment_to_file(sequences):
    if len(sequences) <= 1:
        return MultipleSeqAlignment([SeqRecord(Seq(seq), id=name) for name, seq in sequences])
    
    # Temporäre FASTA-Datei schreiben
    tmp_in = Path("tmp_input.fasta")
    tmp_out = Path("tmp_output.aln")
    with tmp_in.open("w") as f:
        for name, seq in sequences:
            f.write(f">{name}\n{seq}\n")
    
    # MUSCLE auf Datei ausführen
    subprocess.run([MUSCLE_PATH, "-in", str(tmp_in), "-out", str(tmp_out), "-quiet"], check=True)
    
    # Ergebnis lesen
    aln_records = list(SeqIO.parse(str(tmp_out), "fasta"))
    
    # Temporäre Dateien löschen
    tmp_in.unlink()
    tmp_out.unlink()
    
    return MultipleSeqAlignment(aln_records)

# Alignment
if len(leaf_sequences) <= 1:
    print("Zu wenige Leaf-Sequenzen für Alignment, direkt übernehmen")
    leaf_alignment = MultipleSeqAlignment([SeqRecord(Seq(seq), id=name) for name, seq in leaf_sequences])
elif sequences_all_same_length(leaf_sequences):
    print("Sequenzen schon gleich lang, Alignment nicht nötig")
    leaf_alignment = MultipleSeqAlignment([SeqRecord(Seq(seq), id=name) for name, seq in leaf_sequences])
else:
    print("Führe MUSCLE-Alignment über Datei durch...")
    leaf_alignment = run_muscle_alignment_to_file(leaf_sequences)

# SNP-Zählung
def count_snp_positions(alignment):
    if len(alignment) <= 1:
        return 0
    arr = np.array([list(str(rec.seq)) for rec in alignment], dtype='U1')
    identical_cols = np.all(arr == arr[0], axis=0)
    return np.sum(~identical_cols)


# Mapping Leaf -> SeqRecord aus dem Alignment
leaf_aln_map = {rec.id: rec for rec in leaf_alignment if rec.id in gene_matrix.index}

node_alignment = {}

for node in nx.dfs_postorder_nodes(G, source=root_node):
    if node in leaf_to_seq:
        # Blattknoten
        node_alignment[node] = MultipleSeqAlignment([leaf_aln_map[node]])
        G.nodes[node]['allele_distance'] = 0
    else:
        # Sammle Sequenzen aller Blätter unter diesem Knoten
        leaves_under_node = [leaf for leaf in gene_matrix.index if leaf in leaf_aln_map and nx.has_path(G, source=node, target=leaf)]
        child_records = [leaf_aln_map[leaf] for leaf in leaves_under_node]

        node_alignment[node] = MultipleSeqAlignment(child_records)
        G.nodes[node]['allele_distance'] = count_snp_positions(node_alignment[node])
            
print(f"Gesamt-SNPs unter Leafs: {count_snp_positions(leaf_alignment)}")


AttributeError: 'list' object has no attribute 'seq'

In [209]:
sequence_alignments

{'GCF_001017635.1_ASM101763v1_genomic': SeqRecord(seq=Seq('TTGCCCTATAAACGCCAGTTCATTCAAACAGACATCCATAAAGCTTTATTCGGT...ACG'), id='GCF_001017635.1_ASM101763v1_genomic-XI92_RS38315-1-hypothetical_protein', name='GCF_001017635.1_ASM101763v1_genomic-XI92_RS38315-1-hypothetical_protein', description='GCF_001017635.1_ASM101763v1_genomic-XI92_RS38315-1-hypothetical_protein <unknown description>', dbxrefs=[]),
 'GCF_001420855.1_ASM142085v1_genomic': SeqRecord(seq=Seq('TTGCCCTATAAACGCCAGTTCATTCAAACAGACATCCATAAAGCTTTATTCGGT...ACG'), id='GCF_001420855.1_ASM142085v1_genomic-AQ980_RS35390-1-hypothetical_protein', name='GCF_001420855.1_ASM142085v1_genomic-AQ980_RS35390-1-hypothetical_protein', description='GCF_001420855.1_ASM142085v1_genomic-AQ980_RS35390-1-hypothetical_protein <unknown description>', dbxrefs=[])}

In [147]:
# Alle Knoten mit ihren Attributen auflisten
nodes_with_features = list(G.nodes(data=True))

for node, attrs in nodes_with_features:
    print(f"Knoten: {node}, Features: {attrs}")

Knoten: NODE_0000000, Features: {'core_distance': 0.38297999999999943, 'allele_distance': 264, 'time': 0.6175200000000001}
Knoten: GCF_025947955.1_ASM2594795v1_genomic, Features: {'time': 0.33460000000000006, 'core_distance': 0.0, 'allele_distance': 0}
Knoten: NODE_0000001, Features: {'time': 0.33460000000000006, 'core_distance': 0.38297999999999943, 'allele_distance': 264}
Knoten: NODE_0000002, Features: {'time': 0.2400500000000001, 'core_distance': 0.0, 'allele_distance': 0}
Knoten: GCF_012222085.2_ASM1222208v2_genomic, Features: {'time': 0.07563000000000009, 'core_distance': 0.0, 'allele_distance': 0}
Knoten: NODE_0000003, Features: {'time': 0.2120300000000001, 'core_distance': 0.0, 'allele_distance': 0}
Knoten: NODE_0000004, Features: {'time': 0.16714000000000012, 'core_distance': 0.0, 'allele_distance': 0}
Knoten: GCF_001455345.1_ASM145534v1_genomic, Features: {'time': 0.06264000000000014, 'core_distance': 0.0, 'allele_distance': 0}
Knoten: GCF_023824095.1_ASM2382409v1_genomic, Fe

In [23]:
from pyvis.network import Network
from collections import defaultdict
import networkx as nx
from pathlib import Path
import subprocess
from Bio import Phylo
import os
import numpy as np

# === Optional: Newick-Baum laden für Leaf-Reihenfolge ===
newick_path = os.path.join(directory, "vis", "strain_tree.nwk")
tree = Phylo.read(newick_path, "newick")
leaves_in_order = [leaf.name for leaf in tree.get_terminals()]
manual_order_level_0 = [leaf for leaf in leaves_in_order if leaf in gene_matrix.index]
print(manual_order_level_0)

# === Level für alle Knoten bestimmen ===
levels = {n: 0 for n in G.nodes()}  # alle Knoten auf 0 initialisieren

for node in reversed(list(nx.topological_sort(G))):
    successors = list(G.successors(node))
    if successors:
        levels[node] = 1 + max(levels[s] for s in successors)

nx.set_node_attributes(G, levels, "level")

# --- x/y Koordinaten für Blätter und innere Knoten berechnen ---
x_spacing = 100
y_spacing = 100

node_x = {}
node_y = {}

max_level = max(levels.values())

# Level 0: Blätter oben
for i, node in enumerate(manual_order_level_0):
    node_x[node] = i * x_spacing
    node_y[node] = (max_level - 0) * y_spacing  # Blätter oben

# Innere Knoten: von unten nach oben
for level in sorted(set(levels.values()))[1:]:
    nodes_in_level = [n for n, l in levels.items() if l == level]
    for node in nodes_in_level:
        children = list(G.successors(node))
        if children:
            child_x = [node_x[child] for child in children if child in node_x]
            node_x[node] = np.mean(child_x)
            node_y[node] = (max_level - level) * y_spacing  # Invertiert
        else:
            node_x[node] = 0
            node_y[node] = (max_level - level) * y_spacing

# === Netzwerk initialisieren (Hierarchical Layout deaktiviert!) ===
net = Network(height="900px", width="100%", directed=True)

net.set_options("""
{
  "nodes": {
    "shape": "dot",
    "size": 12,
    "font": { "size": 30 }
  },
  "edges": {
    "arrows": {
      "to": { "enabled": true, "scaleFactor": 0.5 }
    }
  },
  "physics": {
    "enabled": false
  }
}
""")

# === Knoten hinzufügen mit festen x/y ===
for node in G.nodes():
    core = G.nodes[node].get('core_distance', 0)
    allele = G.nodes[node].get('allele_distance', 0)
    time = G.nodes[node].get('time', 0)
    title = f"Core: {core:.2f}, Allele: {allele:.2f}, Time: {time:.2f}"
    label = f"{node}\n({core:.2f}, {allele:.2f}, {time:.2f})"

    # Farbe
    if node in gene_matrix.index and gene_matrix.iloc[gene_matrix.index.get_loc(node), gene_id] == 1:
        color = "green"
    elif node in gene_matrix.index and gene_matrix.iloc[gene_matrix.index.get_loc(node), gene_id] == 0:
        color = "black"
    else:
        color = "lightblue"

    net.add_node(node, label=label, title=title, color=color,
                 x=node_x[node], y=node_y[node])

# === Kanten hinzufügen ===
for u, v in G.edges():
    net.add_edge(u, v)

# === HTML-Datei speichern und direkt in Chrome öffnen ===
html_file = Path("/mnt/c/Users/uhewm/OneDrive/PhD/Project No.2/pangenome/graph.html")
html_file.parent.mkdir(parents=True, exist_ok=True)
net.show(str(html_file), notebook=False)

# WSL-Pfad in Windows-Pfad umwandeln
win_path = subprocess.run(["wslpath", "-w", str(html_file)], capture_output=True, text=True).stdout.strip()

# Direkt in Chrome öffnen
subprocess.run(["cmd.exe", "/C", "start", "chrome", win_path])


['GCF_025947955.1_ASM2594795v1_genomic', 'GCF_012222085.2_ASM1222208v2_genomic', 'GCF_001455345.1_ASM145534v1_genomic', 'GCF_023824095.1_ASM2382409v1_genomic', 'GCF_003546665.1_ASM354666v1_genomic', 'GCF_000015065.1_ASM1506v1_genomic', 'GCF_000832825.1_ASM83282v1_genomic', 'GCF_013267335.1_ASM1326733v1_genomic', 'GCF_013343075.1_ASM1334307v1_genomic', 'GCF_000008505.1_ASM850v1_genomic', 'GCF_000833085.1_ASM83308v1_genomic', 'GCF_013267815.1_ASM1326781v1_genomic', 'GCF_000161635.1_ASM16163v1_genomic', 'GCF_002192515.1_ASM219251v1_genomic', 'GCF_000161695.1_ASM16169v1_genomic', 'GCF_000161595.1_ASM16159v1_genomic', 'GCF_000832925.1_ASM83292v1_genomic', 'GCF_013267795.1_ASM1326779v1_genomic', 'GCF_000161655.1_ASM16165v1_genomic', 'GCF_000832485.1_ASM83248v1_genomic', 'GCF_013267315.1_ASM1326731v1_genomic', 'GCF_001182785.1_ASM118278v1_genomic', 'GCF_025960305.1_ASM2596030v1_genomic', 'GCF_001721165.1_ASM172116v1_genomic', 'GCF_000497525.1_ASM49752v2_genomic', 'GCF_001685565.1_ASM168556v1_

CompletedProcess(args=['cmd.exe', '/C', 'start', 'chrome', 'C:\\Users\\uhewm\\OneDrive\\PhD\\Project No.2\\pangenome\\graph.html'], returncode=0)

In [22]:
G.nodes

NodeView(('NODE_0000000', 'GCF_025947955.1_ASM2594795v1_genomic', 'NODE_0000001', 'NODE_0000002', 'GCF_012222085.2_ASM1222208v2_genomic', 'NODE_0000003', 'NODE_0000004', 'GCF_001455345.1_ASM145534v1_genomic', 'GCF_023824095.1_ASM2382409v1_genomic', 'NODE_0000005', 'GCF_003546665.1_ASM354666v1_genomic', 'NODE_0000006', 'NODE_0000007', 'GCF_000015065.1_ASM1506v1_genomic', 'NODE_0000008', 'GCF_000832825.1_ASM83282v1_genomic', 'NODE_0000009', 'GCF_013267335.1_ASM1326733v1_genomic', 'GCF_013343075.1_ASM1334307v1_genomic', 'NODE_0000010', 'NODE_0000011', 'GCF_000008505.1_ASM850v1_genomic', 'NODE_0000012', 'GCF_000833085.1_ASM83308v1_genomic', 'GCF_013267815.1_ASM1326781v1_genomic', 'NODE_0000013', 'NODE_0000014', 'GCF_000161635.1_ASM16163v1_genomic', 'GCF_002192515.1_ASM219251v1_genomic', 'NODE_0000015', 'GCF_000161695.1_ASM16169v1_genomic', 'NODE_0000016', 'GCF_000161595.1_ASM16159v1_genomic', 'NODE_0000017', 'NODE_0000018', 'GCF_000832925.1_ASM83292v1_genomic', 'GCF_013267795.1_ASM1326779v

In [227]:
from collections import Counter
from Bio.SeqRecord import SeqRecord

# Angenommen: sequences ist deine Liste mit SeqRecords

# 1. IDs bis zum ersten "-" extrahieren
prefixes = [rec.id.split("-", 1)[0] for rec in sequences]

# 2. Zählen wie oft jede vorkommt
counts = Counter(prefixes)

# 3. Nur die mehrfach vorkommenden Namen herausfiltern
duplicates = [name for name, cnt in counts.items() if cnt > 1]

print("Mehrfach vorkommende Namen:")
for dup in duplicates:
    print(dup)


Mehrfach vorkommende Namen:
GCF_013340745.1_ASM1334074v1_genomic
GCF_021595545.1_ASM2159554v1_genomic
GCF_001183785.1_ASM118378v1_genomic
GCF_030166715.1_ASM3016671v1_genomic
GCF_020809145.1_ASM2080914v1_genomic
GCF_020809105.1_ASM2080910v1_genomic
GCF_000835235.1_ASM83523v1_genomic
GCF_028356555.1_ASM2835655v1_genomic
GCF_000688795.1_ASM68879v1_genomic
GCF_001017635.1_ASM101763v1_genomic
GCF_000717535.1_ASM71753v1_genomic
GCF_000747545.1_ASM74754v1_genomic
GCF_000161595.1_ASM16159v1_genomic
GCF_000161695.1_ASM16169v1_genomic
GCF_027627725.1_ASM2762772v1_genomic
GCF_000940785.1_ASM94078v1_genomic
GCF_027627765.1_ASM2762776v1_genomic
GCF_027627745.1_ASM2762774v1_genomic
GCF_020809165.1_ASM2080916v1_genomic
GCF_022810725.1_ASM2281072v1_genomic
GCF_020005185.2_ASM2000518v2_genomic
GCF_000161675.1_ASM16167v1_genomic
GCF_001595725.1_ASM159572v1_genomic
GCF_006151925.1_ASM615192v1_genomic
GCF_003626955.1_ASM362695v1_genomic
