From 92e84168298bdc8c074b7694961ea97498ae03a0 Mon Sep 17 00:00:00 2001 From: Niklas Abraham GPU Date: Sun, 9 Feb 2025 11:10:16 +0000 Subject: [PATCH 01/13] added function for last hidden state of embeddings in the embedding cal --- src/pyeed/analysis/embedding_analysis.py | 43 +------------------- src/pyeed/embedding.py | 51 +++++++++++++++++++++++- 2 files changed, 52 insertions(+), 42 deletions(-) diff --git a/src/pyeed/analysis/embedding_analysis.py b/src/pyeed/analysis/embedding_analysis.py index efe83cb9..38c2875a 100644 --- a/src/pyeed/analysis/embedding_analysis.py +++ b/src/pyeed/analysis/embedding_analysis.py @@ -1,10 +1,9 @@ import logging -from typing import Any, Literal, Optional +from typing import Literal, Optional import matplotlib.pyplot as plt import numpy as np import scipy.spatial as sp -import torch from matplotlib.figure import Figure from numpy.typing import NDArray from pyeed.dbconnect import DatabaseConnector @@ -46,44 +45,6 @@ def get_embedding( return embedding - def _get_single_embedding_last_hidden_state( - self, sequence: str, model: Any, tokenizer: Any, device: torch.device - ) -> NDArray[np.float64]: - """Generate embeddings for a single sequence using the last hidden state. - - Args: - sequence (str): The protein sequence to embed - model (Any): The transformer model to use - tokenizer (Any): The tokenizer for the model - device (torch.device): The device to run the model on (CPU/GPU) - - Returns: - np.ndarray: Normalized embeddings for each token in the sequence - """ - from esm.models.esmc import ESMC - - with torch.no_grad(): - if isinstance(model, ESMC): - # ESM-3 logic - from esm.sdk.api import ESMProtein, LogitsConfig - - protein = ESMProtein(sequence=sequence) - protein_tensor = model.encode(protein) - logits_output = model.logits( - protein_tensor, LogitsConfig(sequence=True, return_embeddings=True) - ) - embedding = logits_output.embeddings[0].cpu().numpy() - else: - # ESM-2 logic - inputs = tokenizer(sequence, return_tensors="pt").to(device) - outputs = model(**inputs) - embedding = outputs.last_hidden_state[0, 1:-1, :].detach().cpu().numpy() - - # normalize the embedding - embedding = embedding / np.linalg.norm(embedding, axis=1, keepdims=True) - - return embedding # type: ignore - def find_closest_matches_simple( self, start_sequence_id: str, @@ -474,4 +435,4 @@ def drop_vector_index( logger.info(f"Dropping vector index {index_name}") query_drop_index = f"DROP INDEX {index_name} IF EXISTS;" - db.execute_write(query_drop_index) + db.execute_write(query_drop_index) \ No newline at end of file diff --git a/src/pyeed/embedding.py b/src/pyeed/embedding.py index 2bc00adf..67ba5529 100644 --- a/src/pyeed/embedding.py +++ b/src/pyeed/embedding.py @@ -1,6 +1,6 @@ import gc import os -from typing import Tuple, Union +from typing import Any, Tuple, Union import numpy as np import torch @@ -122,6 +122,55 @@ def get_batch_embeddings( return list(embeddings) +def calculate_single_sequence_embedding_last_hidden_state( + sequence: str, model_name: str = "facebook/esm2_t33_650M_UR50D" +): + """ + Calculates an embedding for a single sequence. + """ + model, tokenizer, device = load_model_and_tokenizer(model_name) + return get_single_embedding_last_hidden_state(sequence, model, tokenizer, device) + + +def get_single_embedding_last_hidden_state( + sequence: str, model: Any, tokenizer: Any, device: torch.device +) -> NDArray[np.float64]: + """Generate embeddings for a single sequence using the last hidden state. + + Args: + sequence (str): The protein sequence to embed + model (Any): The transformer model to use + tokenizer (Any): The tokenizer for the model + device (torch.device): The device to run the model on (CPU/GPU) + + Returns: + np.ndarray: Normalized embeddings for each token in the sequence + """ + from esm.models.esmc import ESMC + + with torch.no_grad(): + if isinstance(model, ESMC): + # ESM-3 logic + from esm.sdk.api import ESMProtein, LogitsConfig + + protein = ESMProtein(sequence=sequence) + protein_tensor = model.encode(protein) + logits_output = model.logits( + protein_tensor, LogitsConfig(sequence=True, return_embeddings=True) + ) + embedding = logits_output.embeddings[0].cpu().numpy() + else: + # ESM-2 logic + inputs = tokenizer(sequence, return_tensors="pt").to(device) + outputs = model(**inputs) + embedding = outputs.last_hidden_state[0, 1:-1, :].detach().cpu().numpy() + + # normalize the embedding + embedding = embedding / np.linalg.norm(embedding, axis=1, keepdims=True) + + return embedding # type: ignore + + # The rest of your existing functions will need to be adapted in a similar way # if they interact with the model or tokenizer directly From 59b14790d5c51c314e0f2303a08f3282efb8b756 Mon Sep 17 00:00:00 2001 From: Niklas Abraham GPU Date: Sun, 9 Feb 2025 17:58:29 +0000 Subject: [PATCH 02/13] fixed lintr and added coemmnts --- .gitignore | 3 + docs/usage/alphafold.ipynb | 280 ++++++++++++++++ docs/usage/embeddings_analysis.ipynb | 58 +--- pyproject.toml | 2 + src/pyeed/analysis/embedding_analysis.py | 4 +- src/pyeed/embedding.py | 9 +- src/pyeed/tools/alphafold2.py | 121 +++++++ .../tools/resources/alphafold/docker_run.py | 308 ++++++++++++++++++ 8 files changed, 730 insertions(+), 55 deletions(-) create mode 100644 docs/usage/alphafold.ipynb create mode 100644 src/pyeed/tools/alphafold2.py create mode 100644 src/pyeed/tools/resources/alphafold/docker_run.py diff --git a/.gitignore b/.gitignore index aa75df1c..f1e9c2d3 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,9 @@ __pycache__/ *.py[cod] *$py.class +# AlphaFold output +docs/resources/alphafold/output/* + # C extensions *.so .vscode/ diff --git a/docs/usage/alphafold.ipynb b/docs/usage/alphafold.ipynb new file mode 100644 index 00000000..3c6dcdc6 --- /dev/null +++ b/docs/usage/alphafold.ipynb @@ -0,0 +1,280 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# AlphaFold Usage Example\n", + "\n", + "This notebook demonstrates how to use AlphaFold through the `pyeed` package interface.\n", + "\n", + "## Setup and Imports" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "import logging\n", + "from pyeed.tools.alphafold2 import AlphaFoldRunner" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')\n", + "LOGGER = logging.getLogger(__name__)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Configures logging to display timestamped INFO-level messages.\n", + "\n", + "## Input Preparation" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "id = 'AAP20891.1'\n", + "sequence = 'MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW'" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "286" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(sequence)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Verifies the length of the input sequence (286 amino acids).\n", + "\n", + "## AlphaFold Configuration" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "data_dir = '/media/database/alphafold' # the path to the downloaded alphafold data\n", + "output_dir = os.path.join(os.path.dirname(os.getcwd()), \"resources\", \"alphafold\", \"output\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Sets up the paths for AlphaFold database and output directory.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025-02-09 12:18:57,218 - INFO - GPU detected. AlphaFold will run on GPU.\n" + ] + } + ], + "source": [ + "alphafold_runner = AlphaFoldRunner(data_dir, output_dir)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Initializes the AlphaFold runner with the specified directories. The system detected a GPU for computation.\n", + "\n", + "## Execution\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025-02-09 12:18:57,226 - INFO - Created FASTA file at: /home/nab/Niklas/pyeed/docs/resources/alphafold/output/AAP20891_1.fasta\n", + "2025-02-09 12:18:57,226 - INFO - Running AlphaFold with command: source ~/anaconda3/etc/profile.d/conda.sh && conda activate alphafold_env && python /home/nab/Niklas/pyeed/src/pyeed/tools/resources/alphafold/docker_run.py --fasta_paths=/home/nab/Niklas/pyeed/docs/resources/alphafold/output/AAP20891_1.fasta --max_template_date=2022-01-01 --data_dir=/media/database/alphafold --output_dir=/home/nab/Niklas/pyeed/docs/resources/alphafold/output\n", + "2025-02-09 12:56:43,434 - ERROR - AlphaFold stderr:\n", + "I0209 12:18:57.794830 133202111862592 docker_run.py:143] Mounting /home/nab/Niklas/pyeed/docs/resources/alphafold/output -> /mnt/fasta_path_0\n", + "I0209 12:18:57.794989 133202111862592 docker_run.py:143] Mounting /media/database/alphafold/uniref90 -> /mnt/uniref90_database_path\n", + "I0209 12:18:57.795081 133202111862592 docker_run.py:143] Mounting /media/database/alphafold/mgnify -> /mnt/mgnify_database_path\n", + "I0209 12:18:57.795150 133202111862592 docker_run.py:143] Mounting /media/database/alphafold -> /mnt/data_dir\n", + "I0209 12:18:57.795215 133202111862592 docker_run.py:143] Mounting /media/database/alphafold/pdb_mmcif/mmcif_files -> /mnt/template_mmcif_dir\n", + "I0209 12:18:57.795280 133202111862592 docker_run.py:143] Mounting /media/database/alphafold/pdb_mmcif -> /mnt/obsolete_pdbs_path\n", + "I0209 12:18:57.795432 133202111862592 docker_run.py:143] Mounting /media/database/alphafold/pdb70 -> /mnt/pdb70_database_path\n", + "I0209 12:18:57.795514 133202111862592 docker_run.py:143] Mounting /media/database/alphafold/uniref30 -> /mnt/uniref30_database_path\n", + "I0209 12:18:57.795584 133202111862592 docker_run.py:143] Mounting /media/database/alphafold/bfd -> /mnt/bfd_database_path\n", + "I0209 12:18:58.210450 133202111862592 docker_run.py:297] /bin/bash: /opt/conda/lib/libtinfo.so.6: no version information available (required by /bin/bash)\n", + "I0209 12:19:01.228823 133202111862592 docker_run.py:297] I0209 12:19:01.227992 128704331829888 templates.py:858] Using precomputed obsolete pdbs /mnt/obsolete_pdbs_path/obsolete.dat.\n", + "I0209 12:19:01.773952 133202111862592 docker_run.py:297] I0209 12:19:01.772986 128704331829888 xla_bridge.py:863] Unable to initialize backend 'rocm': NOT_FOUND: Could not find registered platform with name: \"rocm\". Available platform names are: CUDA\n", + "I0209 12:19:01.774687 133202111862592 docker_run.py:297] I0209 12:19:01.774202 128704331829888 xla_bridge.py:863] Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: libtpu.so: cannot open shared object file: No such file or directory\n", + "I0209 12:19:07.194021 133202111862592 docker_run.py:297] I0209 12:19:07.192821 128704331829888 run_alphafold.py:524] Have 5 models: ['model_1_pred_0', 'model_2_pred_0', 'model_3_pred_0', 'model_4_pred_0', 'model_5_pred_0']\n", + "I0209 12:19:07.194216 133202111862592 docker_run.py:297] I0209 12:19:07.193000 128704331829888 run_alphafold.py:538] Using random seed 1176757644469293568 for the data pipeline\n", + "I0209 12:19:07.194303 133202111862592 docker_run.py:297] I0209 12:19:07.193229 128704331829888 run_alphafold.py:245] Predicting AAP20891_1\n", + "I0209 12:19:07.194409 133202111862592 docker_run.py:297] I0209 12:19:07.193762 128704331829888 jackhmmer.py:133] Launching subprocess \"/usr/bin/jackhmmer -o /dev/null -A /tmp/tmpdxix475j/output.sto --noali --F1 0.0005 --F2 5e-05 --F3 5e-07 --incE 0.0001 -E 0.0001 --cpu 8 -N 1 /mnt/fasta_path_0/AAP20891_1.fasta /mnt/uniref90_database_path/uniref90.fasta\"\n", + "I0209 12:19:07.196226 133202111862592 docker_run.py:297] I0209 12:19:07.195944 128704331829888 utils.py:36] Started Jackhmmer (uniref90.fasta) query\n", + "I0209 12:26:52.677268 133202111862592 docker_run.py:297] I0209 12:26:52.675808 128704331829888 utils.py:40] Finished Jackhmmer (uniref90.fasta) query in 465.480 seconds\n", + "I0209 12:26:52.894916 133202111862592 docker_run.py:297] I0209 12:26:52.894393 128704331829888 jackhmmer.py:133] Launching subprocess \"/usr/bin/jackhmmer -o /dev/null -A /tmp/tmptqe7udq4/output.sto --noali --F1 0.0005 --F2 5e-05 --F3 5e-07 --incE 0.0001 -E 0.0001 --cpu 8 -N 1 /mnt/fasta_path_0/AAP20891_1.fasta /mnt/mgnify_database_path/mgy_clusters_2022_05.fa\"\n", + "I0209 12:26:52.895838 133202111862592 docker_run.py:297] I0209 12:26:52.895566 128704331829888 utils.py:36] Started Jackhmmer (mgy_clusters_2022_05.fa) query\n", + "I0209 12:39:31.579277 133202111862592 docker_run.py:297] I0209 12:39:31.578708 128704331829888 utils.py:40] Finished Jackhmmer (mgy_clusters_2022_05.fa) query in 758.683 seconds\n", + "I0209 12:39:33.533422 133202111862592 docker_run.py:297] I0209 12:39:33.532869 128704331829888 hhsearch.py:85] Launching subprocess \"/usr/bin/hhsearch -i /tmp/tmpo0o6h3es/query.a3m -o /tmp/tmpo0o6h3es/output.hhr -maxseq 1000000 -d /mnt/pdb70_database_path/pdb70\"\n", + "I0209 12:39:33.534581 133202111862592 docker_run.py:297] I0209 12:39:33.534294 128704331829888 utils.py:36] Started HHsearch query\n", + "I0209 12:40:02.849414 133202111862592 docker_run.py:297] I0209 12:40:02.848684 128704331829888 utils.py:40] Finished HHsearch query in 29.314 seconds\n", + "I0209 12:40:03.607998 133202111862592 docker_run.py:297] I0209 12:40:03.607374 128704331829888 hhblits.py:128] Launching subprocess \"/usr/bin/hhblits -i /mnt/fasta_path_0/AAP20891_1.fasta -cpu 4 -oa3m /tmp/tmpde8m8uej/output.a3m -o /dev/null -n 3 -e 0.001 -maxseq 1000000 -realign_max 100000 -maxfilt 100000 -min_prefilter_hits 1000 -d /mnt/bfd_database_path/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt -d /mnt/uniref30_database_path/UniRef30_2021_03\"\n", + "I0209 12:40:03.609346 133202111862592 docker_run.py:297] I0209 12:40:03.608979 128704331829888 utils.py:36] Started HHblits query\n", + "I0209 12:48:30.964893 133202111862592 docker_run.py:297] I0209 12:48:30.930698 128704331829888 utils.py:40] Finished HHblits query in 507.321 seconds\n", + "I0209 12:48:31.006231 133202111862592 docker_run.py:297] I0209 12:48:31.004925 128704331829888 templates.py:879] Searching for template for: MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW\n", + "I0209 12:48:31.187149 133202111862592 docker_run.py:297] I0209 12:48:31.186135 128704331829888 templates.py:267] Found an exact template match 4zj1_A.\n", + "I0209 12:48:31.619367 133202111862592 docker_run.py:297] I0209 12:48:31.618804 128704331829888 templates.py:267] Found an exact template match 2p74_B.\n", + "I0209 12:48:33.195153 133202111862592 docker_run.py:297] I0209 12:48:33.194629 128704331829888 templates.py:267] Found an exact template match 4bd0_A.\n", + "I0209 12:48:33.352998 133202111862592 docker_run.py:297] I0209 12:48:33.352648 128704331829888 templates.py:267] Found an exact template match 6nfd_A.\n", + "I0209 12:48:33.653642 133202111862592 docker_run.py:297] I0209 12:48:33.653137 128704331829888 templates.py:267] Found an exact template match 1m40_A.\n", + "I0209 12:48:33.813061 133202111862592 docker_run.py:297] I0209 12:48:33.812606 128704331829888 templates.py:267] Found an exact template match 1n9b_A.\n", + "I0209 12:48:34.047026 133202111862592 docker_run.py:297] I0209 12:48:34.046073 128704331829888 templates.py:267] Found an exact template match 2b5r_B.\n", + "I0209 12:48:34.474168 133202111862592 docker_run.py:297] I0209 12:48:34.473685 128704331829888 templates.py:267] Found an exact template match 4ua6_A.\n", + "I0209 12:48:34.564374 133202111862592 docker_run.py:297] I0209 12:48:34.564018 128704331829888 templates.py:267] Found an exact template match 1g6a_A.\n", + "I0209 12:48:34.831888 133202111862592 docker_run.py:297] I0209 12:48:34.831412 128704331829888 templates.py:267] Found an exact template match 6afo_B.\n", + "I0209 12:48:35.275108 133202111862592 docker_run.py:297] I0209 12:48:35.274749 128704331829888 templates.py:267] Found an exact template match 6td0_A.\n", + "I0209 12:48:35.447641 133202111862592 docker_run.py:297] I0209 12:48:35.446554 128704331829888 templates.py:267] Found an exact template match 1o7e_B.\n", + "I0209 12:48:35.686393 133202111862592 docker_run.py:297] I0209 12:48:35.685771 128704331829888 templates.py:267] Found an exact template match 6niq_A.\n", + "I0209 12:48:35.832004 133202111862592 docker_run.py:297] I0209 12:48:35.831659 128704331829888 templates.py:267] Found an exact template match 4mbh_A.\n", + "I0209 12:48:36.200996 133202111862592 docker_run.py:297] I0209 12:48:36.200606 128704331829888 templates.py:267] Found an exact template match 5ne2_B.\n", + "I0209 12:48:36.430038 133202111862592 docker_run.py:297] I0209 12:48:36.427193 128704331829888 templates.py:267] Found an exact template match 6qwb_A.\n", + "I0209 12:48:36.576209 133202111862592 docker_run.py:297] I0209 12:48:36.575323 128704331829888 templates.py:267] Found an exact template match 6c7a_A.\n", + "I0209 12:48:36.806448 133202111862592 docker_run.py:297] I0209 12:48:36.805810 128704331829888 templates.py:286] Found a fuzzy sequence-only match 6dmh_A.\n", + "I0209 12:48:37.264686 133202111862592 docker_run.py:297] I0209 12:48:37.264108 128704331829888 templates.py:267] Found an exact template match 4c75_D.\n", + "I0209 12:48:37.393242 133202111862592 docker_run.py:297] I0209 12:48:37.392906 128704331829888 templates.py:267] Found an exact template match 6bn3_A.\n", + "I0209 12:48:38.150960 133202111862592 docker_run.py:297] I0209 12:48:38.150416 128704331829888 pipeline.py:234] Uniref90 MSA size: 10000 sequences.\n", + "I0209 12:48:38.151282 133202111862592 docker_run.py:297] I0209 12:48:38.150556 128704331829888 pipeline.py:235] BFD MSA size: 2460 sequences.\n", + "I0209 12:48:38.151347 133202111862592 docker_run.py:297] I0209 12:48:38.150588 128704331829888 pipeline.py:236] MGnify MSA size: 501 sequences.\n", + "I0209 12:48:38.151422 133202111862592 docker_run.py:297] I0209 12:48:38.150624 128704331829888 pipeline.py:237] Final (deduplicated) MSA size: 12900 sequences.\n", + "I0209 12:48:38.151681 133202111862592 docker_run.py:297] I0209 12:48:38.150837 128704331829888 pipeline.py:239] Total number of templates (NB: this can include bad templates and is later filtered to top 4): 20.\n", + "I0209 12:48:38.194027 133202111862592 docker_run.py:297] I0209 12:48:38.193551 128704331829888 run_alphafold.py:276] Running model model_1_pred_0 on AAP20891_1\n", + "I0209 12:48:42.863350 133202111862592 docker_run.py:297] I0209 12:48:42.861231 128704331829888 model.py:165] Running predict with shape(feat) = {'aatype': (4, 286), 'residue_index': (4, 286), 'seq_length': (4,), 'template_aatype': (4, 4, 286), 'template_all_atom_masks': (4, 4, 286, 37), 'template_all_atom_positions': (4, 4, 286, 37, 3), 'template_sum_probs': (4, 4, 1), 'is_distillation': (4,), 'seq_mask': (4, 286), 'msa_mask': (4, 508, 286), 'msa_row_mask': (4, 508), 'random_crop_to_size_seed': (4, 2), 'template_mask': (4, 4), 'template_pseudo_beta': (4, 4, 286, 3), 'template_pseudo_beta_mask': (4, 4, 286), 'atom14_atom_exists': (4, 286, 14), 'residx_atom14_to_atom37': (4, 286, 14), 'residx_atom37_to_atom14': (4, 286, 37), 'atom37_atom_exists': (4, 286, 37), 'extra_msa': (4, 5120, 286), 'extra_msa_mask': (4, 5120, 286), 'extra_msa_row_mask': (4, 5120), 'bert_mask': (4, 508, 286), 'true_msa': (4, 508, 286), 'extra_has_deletion': (4, 5120, 286), 'extra_deletion_value': (4, 5120, 286), 'msa_feat': (4, 508, 286, 49), 'target_feat': (4, 286, 22)}\n", + "I0209 12:50:49.352141 133202111862592 docker_run.py:297] I0209 12:50:49.350582 128704331829888 model.py:175] Output shape was {'distogram': {'bin_edges': (63,), 'logits': (286, 286, 64)}, 'experimentally_resolved': {'logits': (286, 37)}, 'masked_msa': {'logits': (508, 286, 23)}, 'predicted_lddt': {'logits': (286, 50)}, 'structure_module': {'final_atom_mask': (286, 37), 'final_atom_positions': (286, 37, 3)}, 'plddt': (286,), 'ranking_confidence': ()}\n", + "I0209 12:50:49.352594 133202111862592 docker_run.py:297] I0209 12:50:49.350745 128704331829888 run_alphafold.py:288] Total JAX model model_1_pred_0 on AAP20891_1 predict time (includes compilation time, see --benchmark): 126.5s\n", + "I0209 12:50:49.534061 133202111862592 docker_run.py:297] I0209 12:50:49.533301 128704331829888 run_alphafold.py:276] Running model model_2_pred_0 on AAP20891_1\n", + "I0209 12:50:53.137300 133202111862592 docker_run.py:297] I0209 12:50:53.134411 128704331829888 model.py:165] Running predict with shape(feat) = {'aatype': (4, 286), 'residue_index': (4, 286), 'seq_length': (4,), 'template_aatype': (4, 4, 286), 'template_all_atom_masks': (4, 4, 286, 37), 'template_all_atom_positions': (4, 4, 286, 37, 3), 'template_sum_probs': (4, 4, 1), 'is_distillation': (4,), 'seq_mask': (4, 286), 'msa_mask': (4, 508, 286), 'msa_row_mask': (4, 508), 'random_crop_to_size_seed': (4, 2), 'template_mask': (4, 4), 'template_pseudo_beta': (4, 4, 286, 3), 'template_pseudo_beta_mask': (4, 4, 286), 'atom14_atom_exists': (4, 286, 14), 'residx_atom14_to_atom37': (4, 286, 14), 'residx_atom37_to_atom14': (4, 286, 37), 'atom37_atom_exists': (4, 286, 37), 'extra_msa': (4, 1024, 286), 'extra_msa_mask': (4, 1024, 286), 'extra_msa_row_mask': (4, 1024), 'bert_mask': (4, 508, 286), 'true_msa': (4, 508, 286), 'extra_has_deletion': (4, 1024, 286), 'extra_deletion_value': (4, 1024, 286), 'msa_feat': (4, 508, 286, 49), 'target_feat': (4, 286, 22)}\n", + "I0209 12:52:26.653816 133202111862592 docker_run.py:297] I0209 12:52:26.652812 128704331829888 model.py:175] Output shape was {'distogram': {'bin_edges': (63,), 'logits': (286, 286, 64)}, 'experimentally_resolved': {'logits': (286, 37)}, 'masked_msa': {'logits': (508, 286, 23)}, 'predicted_lddt': {'logits': (286, 50)}, 'structure_module': {'final_atom_mask': (286, 37), 'final_atom_positions': (286, 37, 3)}, 'plddt': (286,), 'ranking_confidence': ()}\n", + "I0209 12:52:26.653986 133202111862592 docker_run.py:297] I0209 12:52:26.652946 128704331829888 run_alphafold.py:288] Total JAX model model_2_pred_0 on AAP20891_1 predict time (includes compilation time, see --benchmark): 93.5s\n", + "I0209 12:52:26.833158 133202111862592 docker_run.py:297] I0209 12:52:26.832517 128704331829888 run_alphafold.py:276] Running model model_3_pred_0 on AAP20891_1\n", + "I0209 12:52:29.968823 133202111862592 docker_run.py:297] I0209 12:52:29.967889 128704331829888 model.py:165] Running predict with shape(feat) = {'aatype': (4, 286), 'residue_index': (4, 286), 'seq_length': (4,), 'is_distillation': (4,), 'seq_mask': (4, 286), 'msa_mask': (4, 512, 286), 'msa_row_mask': (4, 512), 'random_crop_to_size_seed': (4, 2), 'atom14_atom_exists': (4, 286, 14), 'residx_atom14_to_atom37': (4, 286, 14), 'residx_atom37_to_atom14': (4, 286, 37), 'atom37_atom_exists': (4, 286, 37), 'extra_msa': (4, 5120, 286), 'extra_msa_mask': (4, 5120, 286), 'extra_msa_row_mask': (4, 5120), 'bert_mask': (4, 512, 286), 'true_msa': (4, 512, 286), 'extra_has_deletion': (4, 5120, 286), 'extra_deletion_value': (4, 5120, 286), 'msa_feat': (4, 512, 286, 49), 'target_feat': (4, 286, 22)}\n", + "I0209 12:53:48.996051 133202111862592 docker_run.py:297] I0209 12:53:48.995177 128704331829888 model.py:175] Output shape was {'distogram': {'bin_edges': (63,), 'logits': (286, 286, 64)}, 'experimentally_resolved': {'logits': (286, 37)}, 'masked_msa': {'logits': (512, 286, 23)}, 'predicted_lddt': {'logits': (286, 50)}, 'structure_module': {'final_atom_mask': (286, 37), 'final_atom_positions': (286, 37, 3)}, 'plddt': (286,), 'ranking_confidence': ()}\n", + "I0209 12:53:48.996179 133202111862592 docker_run.py:297] I0209 12:53:48.995306 128704331829888 run_alphafold.py:288] Total JAX model model_3_pred_0 on AAP20891_1 predict time (includes compilation time, see --benchmark): 79.0s\n", + "I0209 12:53:49.174313 133202111862592 docker_run.py:297] I0209 12:53:49.173727 128704331829888 run_alphafold.py:276] Running model model_4_pred_0 on AAP20891_1\n", + "I0209 12:53:52.157123 133202111862592 docker_run.py:297] I0209 12:53:52.156084 128704331829888 model.py:165] Running predict with shape(feat) = {'aatype': (4, 286), 'residue_index': (4, 286), 'seq_length': (4,), 'is_distillation': (4,), 'seq_mask': (4, 286), 'msa_mask': (4, 512, 286), 'msa_row_mask': (4, 512), 'random_crop_to_size_seed': (4, 2), 'atom14_atom_exists': (4, 286, 14), 'residx_atom14_to_atom37': (4, 286, 14), 'residx_atom37_to_atom14': (4, 286, 37), 'atom37_atom_exists': (4, 286, 37), 'extra_msa': (4, 5120, 286), 'extra_msa_mask': (4, 5120, 286), 'extra_msa_row_mask': (4, 5120), 'bert_mask': (4, 512, 286), 'true_msa': (4, 512, 286), 'extra_has_deletion': (4, 5120, 286), 'extra_deletion_value': (4, 5120, 286), 'msa_feat': (4, 512, 286, 49), 'target_feat': (4, 286, 22)}\n", + "I0209 12:55:07.543145 133202111862592 docker_run.py:297] I0209 12:55:07.542435 128704331829888 model.py:175] Output shape was {'distogram': {'bin_edges': (63,), 'logits': (286, 286, 64)}, 'experimentally_resolved': {'logits': (286, 37)}, 'masked_msa': {'logits': (512, 286, 23)}, 'predicted_lddt': {'logits': (286, 50)}, 'structure_module': {'final_atom_mask': (286, 37), 'final_atom_positions': (286, 37, 3)}, 'plddt': (286,), 'ranking_confidence': ()}\n", + "I0209 12:55:07.543324 133202111862592 docker_run.py:297] I0209 12:55:07.542576 128704331829888 run_alphafold.py:288] Total JAX model model_4_pred_0 on AAP20891_1 predict time (includes compilation time, see --benchmark): 75.4s\n", + "I0209 12:55:07.726988 133202111862592 docker_run.py:297] I0209 12:55:07.726579 128704331829888 run_alphafold.py:276] Running model model_5_pred_0 on AAP20891_1\n", + "I0209 12:55:10.723475 133202111862592 docker_run.py:297] I0209 12:55:10.722320 128704331829888 model.py:165] Running predict with shape(feat) = {'aatype': (4, 286), 'residue_index': (4, 286), 'seq_length': (4,), 'is_distillation': (4,), 'seq_mask': (4, 286), 'msa_mask': (4, 512, 286), 'msa_row_mask': (4, 512), 'random_crop_to_size_seed': (4, 2), 'atom14_atom_exists': (4, 286, 14), 'residx_atom14_to_atom37': (4, 286, 14), 'residx_atom37_to_atom14': (4, 286, 37), 'atom37_atom_exists': (4, 286, 37), 'extra_msa': (4, 1024, 286), 'extra_msa_mask': (4, 1024, 286), 'extra_msa_row_mask': (4, 1024), 'bert_mask': (4, 512, 286), 'true_msa': (4, 512, 286), 'extra_has_deletion': (4, 1024, 286), 'extra_deletion_value': (4, 1024, 286), 'msa_feat': (4, 512, 286, 49), 'target_feat': (4, 286, 22)}\n", + "I0209 12:56:24.505498 133202111862592 docker_run.py:297] I0209 12:56:24.504845 128704331829888 model.py:175] Output shape was {'distogram': {'bin_edges': (63,), 'logits': (286, 286, 64)}, 'experimentally_resolved': {'logits': (286, 37)}, 'masked_msa': {'logits': (512, 286, 23)}, 'predicted_lddt': {'logits': (286, 50)}, 'structure_module': {'final_atom_mask': (286, 37), 'final_atom_positions': (286, 37, 3)}, 'plddt': (286,), 'ranking_confidence': ()}\n", + "I0209 12:56:24.505714 133202111862592 docker_run.py:297] I0209 12:56:24.504977 128704331829888 run_alphafold.py:288] Total JAX model model_5_pred_0 on AAP20891_1 predict time (includes compilation time, see --benchmark): 73.8s\n", + "I0209 12:56:29.924700 133202111862592 docker_run.py:297] I0209 12:56:29.923331 128704331829888 amber_minimize.py:178] alterations info: {'nonstandard_residues': [], 'removed_heterogens': set(), 'missing_residues': {}, 'missing_heavy_atoms': {}, 'missing_terminals': {: ['OXT']}, 'Se_in_MET': [], 'removed_chains': {0: []}}\n", + "I0209 12:56:30.122141 133202111862592 docker_run.py:297] I0209 12:56:30.121656 128704331829888 amber_minimize.py:408] Minimizing protein, attempt 1 of 100.\n", + "I0209 12:56:30.439441 133202111862592 docker_run.py:297] I0209 12:56:30.439038 128704331829888 amber_minimize.py:69] Restraining 2212 / 4439 particles.\n", + "I0209 12:56:32.885671 133202111862592 docker_run.py:297] I0209 12:56:32.884042 128704331829888 amber_minimize.py:178] alterations info: {'nonstandard_residues': [], 'removed_heterogens': set(), 'missing_residues': {}, 'missing_heavy_atoms': {}, 'missing_terminals': {}, 'Se_in_MET': [], 'removed_chains': {0: []}}\n", + "I0209 12:56:35.878960 133202111862592 docker_run.py:297] I0209 12:56:35.878404 128704331829888 amber_minimize.py:500] Iteration completed: Einit 90002.51 Efinal -7195.49 Time 1.29 s num residue violations 0 num residue exclusions 0\n", + "I0209 12:56:36.584167 133202111862592 docker_run.py:297] I0209 12:56:36.583163 128704331829888 run_alphafold.py:414] Final timings for AAP20891_1: {'features': 1770.96572804451, 'process_features_model_1_pred_0': 4.667158365249634, 'predict_and_compile_model_1_pred_0': 126.48990654945374, 'process_features_model_2_pred_0': 3.60086727142334, 'predict_and_compile_model_2_pred_0': 93.51866555213928, 'process_features_model_3_pred_0': 3.1351327896118164, 'predict_and_compile_model_3_pred_0': 79.02753925323486, 'process_features_model_4_pred_0': 2.9821181297302246, 'predict_and_compile_model_4_pred_0': 75.38662052154541, 'process_features_model_5_pred_0': 2.9954960346221924, 'predict_and_compile_model_5_pred_0': 73.78278589248657, 'relax_model_1_pred_0': 11.30461859703064}\n", + "\n", + "2025-02-09 12:56:43,483 - INFO - Looking for output structure at: /home/nab/Niklas/pyeed/docs/resources/alphafold/output/AAP20891_1_relaxed.pdb\n", + "2025-02-09 12:56:43,485 - ERROR - Structure file not found: /home/nab/Niklas/pyeed/docs/resources/alphafold/output/AAP20891_1_relaxed.pdb\n" + ] + }, + { + "ename": "FileNotFoundError", + "evalue": "Structure file not found: /home/nab/Niklas/pyeed/docs/resources/alphafold/output/AAP20891_1_relaxed.pdb", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[7], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43malphafold_runner\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun_alphafold\u001b[49m\u001b[43m(\u001b[49m\u001b[43msequence\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msequence\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msequence_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mid\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/Niklas/pyeed/src/pyeed/tools/alphafold2.py:106\u001b[0m, in \u001b[0;36mAlphaFoldRunner.run_alphafold\u001b[0;34m(self, sequence, sequence_id, max_template_date)\u001b[0m\n\u001b[1;32m 104\u001b[0m error_msg \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mStructure file not found: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mstructure_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 105\u001b[0m logger\u001b[38;5;241m.\u001b[39merror(error_msg)\n\u001b[0;32m--> 106\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mFileNotFoundError\u001b[39;00m(error_msg)\n\u001b[1;32m 108\u001b[0m logger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSuccessfully generated structure at: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mstructure_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 109\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstructure_file\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28mstr\u001b[39m(structure_path)}\n", + "\u001b[0;31mFileNotFoundError\u001b[0m: Structure file not found: /home/nab/Niklas/pyeed/docs/resources/alphafold/output/AAP20891_1_relaxed.pdb" + ] + } + ], + "source": [ + "alphafold_runner.run_alphafold(sequence=sequence, sequence_id=id)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Attempts to run AlphaFold prediction on the input sequence. The output indicates that a FASTA file was created, but the execution encountered an error when trying to locate the output structure file.\n", + "\n", + "## Note\n", + "The execution appears to have encountered an error when trying to find the output PDB file (`AAP20891_1_relaxed.pdb`). This might indicate an issue with the AlphaFold execution or output file generation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "pyeed_niklas", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/usage/embeddings_analysis.ipynb b/docs/usage/embeddings_analysis.ipynb index 040b7554..65a2398c 100644 --- a/docs/usage/embeddings_analysis.ipynb +++ b/docs/usage/embeddings_analysis.ipynb @@ -18,7 +18,7 @@ }, { "cell_type": "code", - "execution_count": 67, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -31,7 +31,7 @@ "from pyeed.analysis.embedding_analysis import EmbeddingTool\n", "\n", "logger.remove()\n", - "level = logger.add(sys.stderr, level=\"WARNING\")" + "level = logger.add(sys.stderr, level=\"INFO\")" ] }, { @@ -47,60 +47,14 @@ }, { "cell_type": "code", - "execution_count": 69, + "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "📡 Connected to database.\n", - "All data has been wiped from the database.\n", - "Connecting to bolt://neo4j:12345678@129.69.129.130:7687\n", - "Dropping constraints...\n", - " - Dropping unique constraint and index on label CatalyticActivity with property catalytic_id.\n", - " - Dropping unique constraint and index on label DNA with property accession_id.\n", - " - Dropping unique constraint and index on label GOAnnotation with property go_id.\n", - " - Dropping unique constraint and index on label OntologyObject with property name.\n", - " - Dropping unique constraint and index on label Organism with property taxonomy_id.\n", - " - Dropping unique constraint and index on label Protein with property accession_id.\n", - " - Dropping unique constraint and index on label Region with property region_id.\n", - " - Dropping unique constraint and index on label Site with property site_id.\n", - "\n", - "Dropping indexes...\n", - " - Dropping index on labels DNA with properties embedding.\n", - " - Dropping index on labels Protein with properties embedding.\n", - "\n", - "All constraints and indexes have been removed from the database.\n", - "the connection url is bolt://neo4j:12345678@129.69.129.130:7687\n", - "Loaded /home/nab/Niklas/pyeed/src/pyeed/model.py\n", - "Connecting to bolt://neo4j:12345678@129.69.129.130:7687\n", - "Setting up indexes and constraints...\n", - "\n", - "Found model.StrictStructuredNode\n", - " ! Skipping class model.StrictStructuredNode is abstract\n", - "Found model.Organism\n", - " + Creating node unique constraint for taxonomy_id on label Organism for class model.Organism\n", - "Found model.Site\n", - " + Creating node unique constraint for site_id on label Site for class model.Site\n", - "Found model.Region\n", - " + Creating node unique constraint for region_id on label Region for class model.Region\n", - "Found model.CatalyticActivity\n", - " + Creating node unique constraint for catalytic_id on label CatalyticActivity for class model.CatalyticActivity\n", - "Found model.StandardNumbering\n", - "Found model.GOAnnotation\n", - " + Creating node unique constraint for go_id on label GOAnnotation for class model.GOAnnotation\n", - "Found model.Protein\n", - " + Creating node unique constraint for accession_id on label Protein for class model.Protein\n", - " + Creating vector index for embedding on label Protein for class model.Protein\n", - "Found model.DNA\n", - " + Creating node unique constraint for accession_id on label DNA for class model.DNA\n", - " + Creating vector index for embedding on label DNA for class model.DNA\n", - "Found model.OntologyObject\n", - " + Creating node unique constraint for name on label OntologyObject for class model.OntologyObject\n", - "\n", - "Finished 10 classes.\n", - "✅ Databse constraints and indexes set up according to Pyeed Graph Object Model.\n" + "📡 Connected to database.\n" ] } ], @@ -1269,7 +1223,7 @@ ], "metadata": { "kernelspec": { - "display_name": "pyeed", + "display_name": "pyeed_niklas", "language": "python", "name": "python3" }, @@ -1283,7 +1237,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.5" + "version": "3.12.8" } }, "nbformat": 4, diff --git a/pyproject.toml b/pyproject.toml index 731ab5c4..abf7f8d0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,6 +34,8 @@ numpy = ">=1.14.5,<2.0" openai = "^1.52.2" esm = "^3.1.3" rdflib = "^6.0.0" +docker = "5.0.0" +absl-py = "1.0.0" [tool.poetry.group.dev.dependencies] mkdocstrings = {extras = ["python"], version = "^0.26.2"} diff --git a/src/pyeed/analysis/embedding_analysis.py b/src/pyeed/analysis/embedding_analysis.py index 38c2875a..fa9d6c0e 100644 --- a/src/pyeed/analysis/embedding_analysis.py +++ b/src/pyeed/analysis/embedding_analysis.py @@ -412,7 +412,7 @@ def find_nearest_neighbors_based_on_vector_index( YIELD node AS fprotein, score RETURN fprotein.accession_id, score """ - results = db.execute_read(query_find_nearest_neighbors) # type: list[dict[str, Any]] + results = db.execute_read(query_find_nearest_neighbors) neighbors: list[tuple[str, float]] = [ (str(record["fprotein.accession_id"]), float(record["score"])) for record in results @@ -435,4 +435,4 @@ def drop_vector_index( logger.info(f"Dropping vector index {index_name}") query_drop_index = f"DROP INDEX {index_name} IF EXISTS;" - db.execute_write(query_drop_index) \ No newline at end of file + db.execute_write(query_drop_index) diff --git a/src/pyeed/embedding.py b/src/pyeed/embedding.py index 67ba5529..d53ef8c7 100644 --- a/src/pyeed/embedding.py +++ b/src/pyeed/embedding.py @@ -124,9 +124,16 @@ def get_batch_embeddings( def calculate_single_sequence_embedding_last_hidden_state( sequence: str, model_name: str = "facebook/esm2_t33_650M_UR50D" -): +) -> NDArray[np.float64]: """ Calculates an embedding for a single sequence. + + Args: + sequence: Input protein sequence + model_name: Name of the ESM model to use + + Returns: + NDArray[np.float64]: Normalized embedding vector for the sequence """ model, tokenizer, device = load_model_and_tokenizer(model_name) return get_single_embedding_last_hidden_state(sequence, model, tokenizer, device) diff --git a/src/pyeed/tools/alphafold2.py b/src/pyeed/tools/alphafold2.py new file mode 100644 index 00000000..87aca3e4 --- /dev/null +++ b/src/pyeed/tools/alphafold2.py @@ -0,0 +1,121 @@ +""" +AlphaFold2 runner module. + +This module provides functionality to run AlphaFold2 predictions on protein sequences. +AlphaFold2 must be installed in a conda environment named 'alphafold_env' as described in: +https://github.com/google-deepmind/alphafold/tree/main +""" + +import logging +import os +import subprocess +from pathlib import Path + +import torch + +logger = logging.getLogger(__name__) + + +class AlphaFoldRunner: + """Class to manage and execute AlphaFold2 protein structure predictions.""" + + def __init__(self, data_dir: str, output_dir: str) -> None: + """ + Initialize the AlphaFold runner with required directories. + + Args: + data_dir: Path to the directory containing AlphaFold model data + output_dir: Path where prediction results will be stored + + Raises: + FileNotFoundError: If required paths or files are not found + EnvironmentError: If no GPU is detected + """ + # Get the base directory of the pyeed project + self.base_dir = Path(os.path.dirname(os.path.abspath(__file__))) + self.data_dir = Path(data_dir) + self.output_dir = Path(output_dir) + + # Set path to the docker run script + self.docker_script = self.base_dir / "resources/alphafold/docker_run.py" + + # Validate required paths + if not self.docker_script.exists(): + raise FileNotFoundError( + f"Docker run script not found: {self.docker_script}" + ) + if not self.data_dir.exists(): + raise FileNotFoundError( + f"AlphaFold data directory not found: {self.data_dir}" + ) + if not self.output_dir.exists(): + self.output_dir.mkdir(parents=True) + + # Verify GPU availability + if not torch.cuda.is_available(): + raise EnvironmentError("No GPU detected. AlphaFold requires a GPU to run.") + + logger.info("GPU detected. AlphaFold will run on GPU.") + + def run_alphafold( + self, sequence: str, sequence_id: str, max_template_date: str = "2022-01-01" + ) -> dict[str, str]: + """ + Run AlphaFold prediction on a given protein sequence. + + Args: + sequence: The protein sequence to predict structure for + sequence_id: Unique identifier for the sequence + max_template_date: Latest date allowed for template structures (YYYY-MM-DD format) + + Returns: + dict[str, str]: Contains AlphaFold confidence score and structure path with + keys 'confidence_score' and 'structure_path' + + Raises: + RuntimeError: If AlphaFold execution fails + """ + # Sanitize sequence ID by replacing dots with underscores + sequence_id = sequence_id.replace(".", "_") + + # Create FASTA file for the sequence + fasta_path = self.output_dir / f"{sequence_id}.fasta" + with open(fasta_path, "w", encoding="utf-8") as fasta_file: + fasta_file.write(f">{sequence_id}\n{sequence}") + logger.info("Created FASTA file at: %s", fasta_path) + + # Construct AlphaFold command + cmd = [ + "source ~/anaconda3/etc/profile.d/conda.sh && " + "conda activate alphafold_env && " + f"python {self.docker_script} " + f"--fasta_paths={fasta_path} " + f"--max_template_date={max_template_date} " + f"--data_dir={self.data_dir} " + f"--output_dir={self.output_dir}" + ] + + logger.info("Running AlphaFold with command: %s", " ".join(cmd)) + + # Execute AlphaFold command + process = subprocess.run( + cmd, + capture_output=True, + text=True, + shell=True, + executable="/bin/bash", + check=False, + ) + + # Log output streams + if process.stdout: + logger.info("AlphaFold stdout:\n%s", process.stdout) + if process.stderr: + logger.error("AlphaFold stderr:\n%s", process.stderr) + + if process.returncode != 0: + error_msg = f"AlphaFold execution failed: {process.stderr}" + logger.error(error_msg) + raise RuntimeError(error_msg) + + return {} # TODO: Implement return value with confidence score and structure path diff --git a/src/pyeed/tools/resources/alphafold/docker_run.py b/src/pyeed/tools/resources/alphafold/docker_run.py new file mode 100644 index 00000000..6c21ade7 --- /dev/null +++ b/src/pyeed/tools/resources/alphafold/docker_run.py @@ -0,0 +1,308 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Docker launch script for Alphafold docker image.""" + +import os +import pathlib +import signal +from typing import Tuple + +from absl import app, flags, logging + +import docker +from docker import types # type: ignore + +flags.DEFINE_bool("use_gpu", True, "Enable NVIDIA runtime to run with GPUs.") +flags.DEFINE_enum( + "models_to_relax", + "best", + ["best", "all", "none"], + "The models to run the final relaxation step on. " + "If `all`, all models are relaxed, which may be time " + "consuming. If `best`, only the most confident model is " + "relaxed. If `none`, relaxation is not run. Turning off " + "relaxation might result in predictions with " + "distracting stereochemical violations but might help " + "in case you are having issues with the relaxation " + "stage.", +) +flags.DEFINE_bool("enable_gpu_relax", True, "Run relax on GPU if GPU is enabled.") +flags.DEFINE_string( + "gpu_devices", + "all", + "Comma separated list of devices to pass to NVIDIA_VISIBLE_DEVICES.", +) +flags.DEFINE_list( + "fasta_paths", + None, + "Paths to FASTA files, each containing a prediction " + "target that will be folded one after another. If a FASTA file contains " + "multiple sequences, then it will be folded as a multimer. Paths should be " + "separated by commas. All FASTA paths must have a unique basename as the " + "basename is used to name the output directories for each prediction.", +) +flags.DEFINE_string( + "output_dir", "/tmp/alphafold", "Path to a directory that will store the results." +) +flags.DEFINE_string( + "data_dir", + None, + "Path to directory with supporting data: AlphaFold parameters and genetic " + "and template databases. Set to the target of download_all_databases.sh.", +) +flags.DEFINE_string( + "docker_image_name", "alphafold", "Name of the AlphaFold Docker image." +) +flags.DEFINE_string( + "max_template_date", + None, + "Maximum template release date to consider (ISO-8601 format: YYYY-MM-DD). " + "Important if folding historical test sets.", +) +flags.DEFINE_enum( + "db_preset", + "full_dbs", + ["full_dbs", "reduced_dbs"], + "Choose preset MSA database configuration - smaller genetic database " + "config (reduced_dbs) or full genetic database config (full_dbs)", +) +flags.DEFINE_enum( + "model_preset", + "monomer", + ["monomer", "monomer_casp14", "monomer_ptm", "multimer"], + "Choose preset model configuration - the monomer model, the monomer model " + "with extra ensembling, monomer model with pTM head, or multimer model", +) +flags.DEFINE_integer( + "num_multimer_predictions_per_model", + 5, + "How many " + "predictions (each with a different random seed) will be " + "generated per model. E.g. if this is 2 and there are 5 " + "models then there will be 10 predictions per input. " + "Note: this FLAG only applies if model_preset=multimer", +) +flags.DEFINE_boolean( + "benchmark", + False, + "Run multiple JAX model evaluations to obtain a timing that excludes the " + "compilation time, which should be more indicative of the time required " + "for inferencing many proteins.", +) +flags.DEFINE_boolean( + "use_precomputed_msas", + False, + "Whether to read MSAs that have been written to disk instead of running " + "the MSA tools. The MSA files are looked up in the output directory, so it " + "must stay the same between multiple runs that are to reuse the MSAs. " + "WARNING: This will not check if the sequence, database or configuration " + "have changed.", +) +flags.DEFINE_string( + "docker_user", + f"{os.geteuid()}:{os.getegid()}", + "UID:GID with which to run the Docker container. The output directories " + "will be owned by this user:group. By default, this is the current user. " + "Valid options are: uid or uid:gid, non-numeric values are not recognised " + "by Docker unless that user has been created within the container.", +) + +FLAGS = flags.FLAGS + +_ROOT_MOUNT_DIRECTORY = "/mnt/" + + +def _create_mount(mount_name: str, path: str) -> Tuple[types.Mount, str]: + """Create a mount point for each file and directory used by the model.""" + path = pathlib.Path(path).absolute() # type: ignore + target_path = pathlib.Path(_ROOT_MOUNT_DIRECTORY, mount_name) + + if path.is_dir(): # type: ignore + source_path = path + mounted_path = target_path + else: + source_path = path.parent # type: ignore + mounted_path = pathlib.Path(target_path, path.name) # type: ignore + if not source_path.exists(): # type: ignore + raise ValueError( + f'Failed to find source directory "{source_path}" to ' + "mount in Docker container." + ) + logging.info("Mounting %s -> %s", source_path, target_path) + mount = types.Mount( + target=str(target_path), source=str(source_path), type="bind", read_only=True + ) + return mount, str(mounted_path) + + +def main(argv): # type: ignore + if len(argv) > 1: + raise app.UsageError("Too many command-line arguments.") + + # You can individually override the following paths if you have placed the + # data in locations other than the FLAGS.data_dir. + + # Path to the Uniref90 database for use by JackHMMER. + uniref90_database_path = os.path.join(FLAGS.data_dir, "uniref90", "uniref90.fasta") + + # Path to the Uniprot database for use by JackHMMER. + uniprot_database_path = os.path.join(FLAGS.data_dir, "uniprot", "uniprot.fasta") + + # Path to the MGnify database for use by JackHMMER. + mgnify_database_path = os.path.join( + FLAGS.data_dir, "mgnify", "mgy_clusters_2022_05.fa" + ) + + # Path to the BFD database for use by HHblits. + bfd_database_path = os.path.join( + FLAGS.data_dir, + "bfd", + "bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt", + ) + + # Path to the Small BFD database for use by JackHMMER. + small_bfd_database_path = os.path.join( + FLAGS.data_dir, "small_bfd", "bfd-first_non_consensus_sequences.fasta" + ) + + # Path to the Uniref30 database for use by HHblits. + uniref30_database_path = os.path.join( + FLAGS.data_dir, "uniref30", "UniRef30_2021_03" + ) + + # Path to the PDB70 database for use by HHsearch. + pdb70_database_path = os.path.join(FLAGS.data_dir, "pdb70", "pdb70") + + # Path to the PDB seqres database for use by hmmsearch. + pdb_seqres_database_path = os.path.join( + FLAGS.data_dir, "pdb_seqres", "pdb_seqres.txt" + ) + + # Path to a directory with template mmCIF structures, each named .cif. + template_mmcif_dir = os.path.join(FLAGS.data_dir, "pdb_mmcif", "mmcif_files") + + # Path to a file mapping obsolete PDB IDs to their replacements. + obsolete_pdbs_path = os.path.join(FLAGS.data_dir, "pdb_mmcif", "obsolete.dat") + + alphafold_path = pathlib.Path(__file__).parent.parent + data_dir_path = pathlib.Path(FLAGS.data_dir) + if alphafold_path == data_dir_path or alphafold_path in data_dir_path.parents: + raise app.UsageError( + f"The download directory {FLAGS.data_dir} should not be a subdirectory " + f"in the AlphaFold repository directory. If it is, the Docker build is " + f"slow since the large databases are copied during the image creation." + ) + + mounts = [] + command_args = [] + + # Mount each fasta path as a unique target directory. + target_fasta_paths = [] + for i, fasta_path in enumerate(FLAGS.fasta_paths): + mount, target_path = _create_mount(f"fasta_path_{i}", fasta_path) + mounts.append(mount) + target_fasta_paths.append(target_path) + command_args.append(f'--fasta_paths={",".join(target_fasta_paths)}') + + database_paths = [ + ("uniref90_database_path", uniref90_database_path), + ("mgnify_database_path", mgnify_database_path), + ("data_dir", FLAGS.data_dir), + ("template_mmcif_dir", template_mmcif_dir), + ("obsolete_pdbs_path", obsolete_pdbs_path), + ] + + if FLAGS.model_preset == "multimer": + database_paths.append(("uniprot_database_path", uniprot_database_path)) + database_paths.append(("pdb_seqres_database_path", pdb_seqres_database_path)) + else: + database_paths.append(("pdb70_database_path", pdb70_database_path)) + + if FLAGS.db_preset == "reduced_dbs": + database_paths.append(("small_bfd_database_path", small_bfd_database_path)) + else: + database_paths.extend( + [ + ("uniref30_database_path", uniref30_database_path), + ("bfd_database_path", bfd_database_path), + ] + ) + for name, path in database_paths: + if path: + mount, target_path = _create_mount(name, path) + mounts.append(mount) + command_args.append(f"--{name}={target_path}") + + output_target_path = os.path.join(_ROOT_MOUNT_DIRECTORY, "output") + mounts.append(types.Mount(output_target_path, FLAGS.output_dir, type="bind")) + + use_gpu_relax = FLAGS.enable_gpu_relax and FLAGS.use_gpu + + command_args.extend( + [ + f"--output_dir={output_target_path}", + f"--max_template_date={FLAGS.max_template_date}", + f"--db_preset={FLAGS.db_preset}", + f"--model_preset={FLAGS.model_preset}", + f"--benchmark={FLAGS.benchmark}", + f"--use_precomputed_msas={FLAGS.use_precomputed_msas}", + f"--num_multimer_predictions_per_model={FLAGS.num_multimer_predictions_per_model}", + f"--models_to_relax={FLAGS.models_to_relax}", + f"--use_gpu_relax={use_gpu_relax}", + "--logtostderr", + ] + ) + + client = docker.from_env() # type: ignore + device_requests = ( + [docker.types.DeviceRequest(driver="nvidia", capabilities=[["gpu"]], count=-1)] # type: ignore + if FLAGS.use_gpu + else None + ) + # DANGER NIKLAS DID SHIT + + container = client.containers.run( + image=FLAGS.docker_image_name, + command=command_args, + device_requests=device_requests, + remove=True, + detach=True, + mounts=mounts, + user=FLAGS.docker_user, + environment={ + "NVIDIA_VISIBLE_DEVICES": FLAGS.gpu_devices, + # The following flags allow us to make predictions on proteins that + # would typically be too long to fit into GPU memory. + "TF_FORCE_UNIFIED_MEMORY": "1", + "XLA_PYTHON_CLIENT_MEM_FRACTION": "4.0", + }, + ) + + # Add signal handler to ensure CTRL+C also stops the running container. + signal.signal(signal.SIGINT, lambda unused_sig, unused_frame: container.kill()) + + for line in container.logs(stream=True): + logging.info(line.strip().decode("utf-8")) + + +if __name__ == "__main__": + flags.mark_flags_as_required( + [ + "data_dir", + "fasta_paths", + "max_template_date", + ] + ) + app.run(main) From 1bc78a75f23b9ed2f6e733ce2f2841c320029888 Mon Sep 17 00:00:00 2001 From: Niklas Abraham GPU Date: Sun, 9 Feb 2025 19:02:42 +0000 Subject: [PATCH 03/13] set up notebook --- docs/usage/alphafold.ipynb | 190 ++++++++++++++++++------------------- 1 file changed, 93 insertions(+), 97 deletions(-) diff --git a/docs/usage/alphafold.ipynb b/docs/usage/alphafold.ipynb index 3c6dcdc6..060ba5e1 100644 --- a/docs/usage/alphafold.ipynb +++ b/docs/usage/alphafold.ipynb @@ -107,7 +107,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-02-09 12:18:57,218 - INFO - GPU detected. AlphaFold will run on GPU.\n" + "2025-02-09 18:21:49,187 - INFO - GPU detected. AlphaFold will run on GPU.\n" ] } ], @@ -133,105 +133,101 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-02-09 12:18:57,226 - INFO - Created FASTA file at: /home/nab/Niklas/pyeed/docs/resources/alphafold/output/AAP20891_1.fasta\n", - "2025-02-09 12:18:57,226 - INFO - Running AlphaFold with command: source ~/anaconda3/etc/profile.d/conda.sh && conda activate alphafold_env && python /home/nab/Niklas/pyeed/src/pyeed/tools/resources/alphafold/docker_run.py --fasta_paths=/home/nab/Niklas/pyeed/docs/resources/alphafold/output/AAP20891_1.fasta --max_template_date=2022-01-01 --data_dir=/media/database/alphafold --output_dir=/home/nab/Niklas/pyeed/docs/resources/alphafold/output\n", - "2025-02-09 12:56:43,434 - ERROR - AlphaFold stderr:\n", - "I0209 12:18:57.794830 133202111862592 docker_run.py:143] Mounting /home/nab/Niklas/pyeed/docs/resources/alphafold/output -> /mnt/fasta_path_0\n", - "I0209 12:18:57.794989 133202111862592 docker_run.py:143] Mounting /media/database/alphafold/uniref90 -> /mnt/uniref90_database_path\n", - "I0209 12:18:57.795081 133202111862592 docker_run.py:143] Mounting /media/database/alphafold/mgnify -> /mnt/mgnify_database_path\n", - "I0209 12:18:57.795150 133202111862592 docker_run.py:143] Mounting /media/database/alphafold -> /mnt/data_dir\n", - "I0209 12:18:57.795215 133202111862592 docker_run.py:143] Mounting /media/database/alphafold/pdb_mmcif/mmcif_files -> /mnt/template_mmcif_dir\n", - "I0209 12:18:57.795280 133202111862592 docker_run.py:143] Mounting /media/database/alphafold/pdb_mmcif -> /mnt/obsolete_pdbs_path\n", - "I0209 12:18:57.795432 133202111862592 docker_run.py:143] Mounting /media/database/alphafold/pdb70 -> /mnt/pdb70_database_path\n", - "I0209 12:18:57.795514 133202111862592 docker_run.py:143] Mounting /media/database/alphafold/uniref30 -> /mnt/uniref30_database_path\n", - "I0209 12:18:57.795584 133202111862592 docker_run.py:143] Mounting /media/database/alphafold/bfd -> /mnt/bfd_database_path\n", - "I0209 12:18:58.210450 133202111862592 docker_run.py:297] /bin/bash: /opt/conda/lib/libtinfo.so.6: no version information available (required by /bin/bash)\n", - "I0209 12:19:01.228823 133202111862592 docker_run.py:297] I0209 12:19:01.227992 128704331829888 templates.py:858] Using precomputed obsolete pdbs /mnt/obsolete_pdbs_path/obsolete.dat.\n", - "I0209 12:19:01.773952 133202111862592 docker_run.py:297] I0209 12:19:01.772986 128704331829888 xla_bridge.py:863] Unable to initialize backend 'rocm': NOT_FOUND: Could not find registered platform with name: \"rocm\". Available platform names are: CUDA\n", - "I0209 12:19:01.774687 133202111862592 docker_run.py:297] I0209 12:19:01.774202 128704331829888 xla_bridge.py:863] Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: libtpu.so: cannot open shared object file: No such file or directory\n", - "I0209 12:19:07.194021 133202111862592 docker_run.py:297] I0209 12:19:07.192821 128704331829888 run_alphafold.py:524] Have 5 models: ['model_1_pred_0', 'model_2_pred_0', 'model_3_pred_0', 'model_4_pred_0', 'model_5_pred_0']\n", - "I0209 12:19:07.194216 133202111862592 docker_run.py:297] I0209 12:19:07.193000 128704331829888 run_alphafold.py:538] Using random seed 1176757644469293568 for the data pipeline\n", - "I0209 12:19:07.194303 133202111862592 docker_run.py:297] I0209 12:19:07.193229 128704331829888 run_alphafold.py:245] Predicting AAP20891_1\n", - "I0209 12:19:07.194409 133202111862592 docker_run.py:297] I0209 12:19:07.193762 128704331829888 jackhmmer.py:133] Launching subprocess \"/usr/bin/jackhmmer -o /dev/null -A /tmp/tmpdxix475j/output.sto --noali --F1 0.0005 --F2 5e-05 --F3 5e-07 --incE 0.0001 -E 0.0001 --cpu 8 -N 1 /mnt/fasta_path_0/AAP20891_1.fasta /mnt/uniref90_database_path/uniref90.fasta\"\n", - "I0209 12:19:07.196226 133202111862592 docker_run.py:297] I0209 12:19:07.195944 128704331829888 utils.py:36] Started Jackhmmer (uniref90.fasta) query\n", - "I0209 12:26:52.677268 133202111862592 docker_run.py:297] I0209 12:26:52.675808 128704331829888 utils.py:40] Finished Jackhmmer (uniref90.fasta) query in 465.480 seconds\n", - "I0209 12:26:52.894916 133202111862592 docker_run.py:297] I0209 12:26:52.894393 128704331829888 jackhmmer.py:133] Launching subprocess \"/usr/bin/jackhmmer -o /dev/null -A /tmp/tmptqe7udq4/output.sto --noali --F1 0.0005 --F2 5e-05 --F3 5e-07 --incE 0.0001 -E 0.0001 --cpu 8 -N 1 /mnt/fasta_path_0/AAP20891_1.fasta /mnt/mgnify_database_path/mgy_clusters_2022_05.fa\"\n", - "I0209 12:26:52.895838 133202111862592 docker_run.py:297] I0209 12:26:52.895566 128704331829888 utils.py:36] Started Jackhmmer (mgy_clusters_2022_05.fa) query\n", - "I0209 12:39:31.579277 133202111862592 docker_run.py:297] I0209 12:39:31.578708 128704331829888 utils.py:40] Finished Jackhmmer (mgy_clusters_2022_05.fa) query in 758.683 seconds\n", - "I0209 12:39:33.533422 133202111862592 docker_run.py:297] I0209 12:39:33.532869 128704331829888 hhsearch.py:85] Launching subprocess \"/usr/bin/hhsearch -i /tmp/tmpo0o6h3es/query.a3m -o /tmp/tmpo0o6h3es/output.hhr -maxseq 1000000 -d /mnt/pdb70_database_path/pdb70\"\n", - "I0209 12:39:33.534581 133202111862592 docker_run.py:297] I0209 12:39:33.534294 128704331829888 utils.py:36] Started HHsearch query\n", - "I0209 12:40:02.849414 133202111862592 docker_run.py:297] I0209 12:40:02.848684 128704331829888 utils.py:40] Finished HHsearch query in 29.314 seconds\n", - "I0209 12:40:03.607998 133202111862592 docker_run.py:297] I0209 12:40:03.607374 128704331829888 hhblits.py:128] Launching subprocess \"/usr/bin/hhblits -i /mnt/fasta_path_0/AAP20891_1.fasta -cpu 4 -oa3m /tmp/tmpde8m8uej/output.a3m -o /dev/null -n 3 -e 0.001 -maxseq 1000000 -realign_max 100000 -maxfilt 100000 -min_prefilter_hits 1000 -d /mnt/bfd_database_path/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt -d /mnt/uniref30_database_path/UniRef30_2021_03\"\n", - "I0209 12:40:03.609346 133202111862592 docker_run.py:297] I0209 12:40:03.608979 128704331829888 utils.py:36] Started HHblits query\n", - "I0209 12:48:30.964893 133202111862592 docker_run.py:297] I0209 12:48:30.930698 128704331829888 utils.py:40] Finished HHblits query in 507.321 seconds\n", - "I0209 12:48:31.006231 133202111862592 docker_run.py:297] I0209 12:48:31.004925 128704331829888 templates.py:879] Searching for template for: MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW\n", - "I0209 12:48:31.187149 133202111862592 docker_run.py:297] I0209 12:48:31.186135 128704331829888 templates.py:267] Found an exact template match 4zj1_A.\n", - "I0209 12:48:31.619367 133202111862592 docker_run.py:297] I0209 12:48:31.618804 128704331829888 templates.py:267] Found an exact template match 2p74_B.\n", - "I0209 12:48:33.195153 133202111862592 docker_run.py:297] I0209 12:48:33.194629 128704331829888 templates.py:267] Found an exact template match 4bd0_A.\n", - "I0209 12:48:33.352998 133202111862592 docker_run.py:297] I0209 12:48:33.352648 128704331829888 templates.py:267] Found an exact template match 6nfd_A.\n", - "I0209 12:48:33.653642 133202111862592 docker_run.py:297] I0209 12:48:33.653137 128704331829888 templates.py:267] Found an exact template match 1m40_A.\n", - "I0209 12:48:33.813061 133202111862592 docker_run.py:297] I0209 12:48:33.812606 128704331829888 templates.py:267] Found an exact template match 1n9b_A.\n", - "I0209 12:48:34.047026 133202111862592 docker_run.py:297] I0209 12:48:34.046073 128704331829888 templates.py:267] Found an exact template match 2b5r_B.\n", - "I0209 12:48:34.474168 133202111862592 docker_run.py:297] I0209 12:48:34.473685 128704331829888 templates.py:267] Found an exact template match 4ua6_A.\n", - "I0209 12:48:34.564374 133202111862592 docker_run.py:297] I0209 12:48:34.564018 128704331829888 templates.py:267] Found an exact template match 1g6a_A.\n", - "I0209 12:48:34.831888 133202111862592 docker_run.py:297] I0209 12:48:34.831412 128704331829888 templates.py:267] Found an exact template match 6afo_B.\n", - "I0209 12:48:35.275108 133202111862592 docker_run.py:297] I0209 12:48:35.274749 128704331829888 templates.py:267] Found an exact template match 6td0_A.\n", - "I0209 12:48:35.447641 133202111862592 docker_run.py:297] I0209 12:48:35.446554 128704331829888 templates.py:267] Found an exact template match 1o7e_B.\n", - "I0209 12:48:35.686393 133202111862592 docker_run.py:297] I0209 12:48:35.685771 128704331829888 templates.py:267] Found an exact template match 6niq_A.\n", - "I0209 12:48:35.832004 133202111862592 docker_run.py:297] I0209 12:48:35.831659 128704331829888 templates.py:267] Found an exact template match 4mbh_A.\n", - "I0209 12:48:36.200996 133202111862592 docker_run.py:297] I0209 12:48:36.200606 128704331829888 templates.py:267] Found an exact template match 5ne2_B.\n", - "I0209 12:48:36.430038 133202111862592 docker_run.py:297] I0209 12:48:36.427193 128704331829888 templates.py:267] Found an exact template match 6qwb_A.\n", - "I0209 12:48:36.576209 133202111862592 docker_run.py:297] I0209 12:48:36.575323 128704331829888 templates.py:267] Found an exact template match 6c7a_A.\n", - "I0209 12:48:36.806448 133202111862592 docker_run.py:297] I0209 12:48:36.805810 128704331829888 templates.py:286] Found a fuzzy sequence-only match 6dmh_A.\n", - "I0209 12:48:37.264686 133202111862592 docker_run.py:297] I0209 12:48:37.264108 128704331829888 templates.py:267] Found an exact template match 4c75_D.\n", - "I0209 12:48:37.393242 133202111862592 docker_run.py:297] I0209 12:48:37.392906 128704331829888 templates.py:267] Found an exact template match 6bn3_A.\n", - "I0209 12:48:38.150960 133202111862592 docker_run.py:297] I0209 12:48:38.150416 128704331829888 pipeline.py:234] Uniref90 MSA size: 10000 sequences.\n", - "I0209 12:48:38.151282 133202111862592 docker_run.py:297] I0209 12:48:38.150556 128704331829888 pipeline.py:235] BFD MSA size: 2460 sequences.\n", - "I0209 12:48:38.151347 133202111862592 docker_run.py:297] I0209 12:48:38.150588 128704331829888 pipeline.py:236] MGnify MSA size: 501 sequences.\n", - "I0209 12:48:38.151422 133202111862592 docker_run.py:297] I0209 12:48:38.150624 128704331829888 pipeline.py:237] Final (deduplicated) MSA size: 12900 sequences.\n", - "I0209 12:48:38.151681 133202111862592 docker_run.py:297] I0209 12:48:38.150837 128704331829888 pipeline.py:239] Total number of templates (NB: this can include bad templates and is later filtered to top 4): 20.\n", - "I0209 12:48:38.194027 133202111862592 docker_run.py:297] I0209 12:48:38.193551 128704331829888 run_alphafold.py:276] Running model model_1_pred_0 on AAP20891_1\n", - "I0209 12:48:42.863350 133202111862592 docker_run.py:297] I0209 12:48:42.861231 128704331829888 model.py:165] Running predict with shape(feat) = {'aatype': (4, 286), 'residue_index': (4, 286), 'seq_length': (4,), 'template_aatype': (4, 4, 286), 'template_all_atom_masks': (4, 4, 286, 37), 'template_all_atom_positions': (4, 4, 286, 37, 3), 'template_sum_probs': (4, 4, 1), 'is_distillation': (4,), 'seq_mask': (4, 286), 'msa_mask': (4, 508, 286), 'msa_row_mask': (4, 508), 'random_crop_to_size_seed': (4, 2), 'template_mask': (4, 4), 'template_pseudo_beta': (4, 4, 286, 3), 'template_pseudo_beta_mask': (4, 4, 286), 'atom14_atom_exists': (4, 286, 14), 'residx_atom14_to_atom37': (4, 286, 14), 'residx_atom37_to_atom14': (4, 286, 37), 'atom37_atom_exists': (4, 286, 37), 'extra_msa': (4, 5120, 286), 'extra_msa_mask': (4, 5120, 286), 'extra_msa_row_mask': (4, 5120), 'bert_mask': (4, 508, 286), 'true_msa': (4, 508, 286), 'extra_has_deletion': (4, 5120, 286), 'extra_deletion_value': (4, 5120, 286), 'msa_feat': (4, 508, 286, 49), 'target_feat': (4, 286, 22)}\n", - "I0209 12:50:49.352141 133202111862592 docker_run.py:297] I0209 12:50:49.350582 128704331829888 model.py:175] Output shape was {'distogram': {'bin_edges': (63,), 'logits': (286, 286, 64)}, 'experimentally_resolved': {'logits': (286, 37)}, 'masked_msa': {'logits': (508, 286, 23)}, 'predicted_lddt': {'logits': (286, 50)}, 'structure_module': {'final_atom_mask': (286, 37), 'final_atom_positions': (286, 37, 3)}, 'plddt': (286,), 'ranking_confidence': ()}\n", - "I0209 12:50:49.352594 133202111862592 docker_run.py:297] I0209 12:50:49.350745 128704331829888 run_alphafold.py:288] Total JAX model model_1_pred_0 on AAP20891_1 predict time (includes compilation time, see --benchmark): 126.5s\n", - "I0209 12:50:49.534061 133202111862592 docker_run.py:297] I0209 12:50:49.533301 128704331829888 run_alphafold.py:276] Running model model_2_pred_0 on AAP20891_1\n", - "I0209 12:50:53.137300 133202111862592 docker_run.py:297] I0209 12:50:53.134411 128704331829888 model.py:165] Running predict with shape(feat) = {'aatype': (4, 286), 'residue_index': (4, 286), 'seq_length': (4,), 'template_aatype': (4, 4, 286), 'template_all_atom_masks': (4, 4, 286, 37), 'template_all_atom_positions': (4, 4, 286, 37, 3), 'template_sum_probs': (4, 4, 1), 'is_distillation': (4,), 'seq_mask': (4, 286), 'msa_mask': (4, 508, 286), 'msa_row_mask': (4, 508), 'random_crop_to_size_seed': (4, 2), 'template_mask': (4, 4), 'template_pseudo_beta': (4, 4, 286, 3), 'template_pseudo_beta_mask': (4, 4, 286), 'atom14_atom_exists': (4, 286, 14), 'residx_atom14_to_atom37': (4, 286, 14), 'residx_atom37_to_atom14': (4, 286, 37), 'atom37_atom_exists': (4, 286, 37), 'extra_msa': (4, 1024, 286), 'extra_msa_mask': (4, 1024, 286), 'extra_msa_row_mask': (4, 1024), 'bert_mask': (4, 508, 286), 'true_msa': (4, 508, 286), 'extra_has_deletion': (4, 1024, 286), 'extra_deletion_value': (4, 1024, 286), 'msa_feat': (4, 508, 286, 49), 'target_feat': (4, 286, 22)}\n", - "I0209 12:52:26.653816 133202111862592 docker_run.py:297] I0209 12:52:26.652812 128704331829888 model.py:175] Output shape was {'distogram': {'bin_edges': (63,), 'logits': (286, 286, 64)}, 'experimentally_resolved': {'logits': (286, 37)}, 'masked_msa': {'logits': (508, 286, 23)}, 'predicted_lddt': {'logits': (286, 50)}, 'structure_module': {'final_atom_mask': (286, 37), 'final_atom_positions': (286, 37, 3)}, 'plddt': (286,), 'ranking_confidence': ()}\n", - "I0209 12:52:26.653986 133202111862592 docker_run.py:297] I0209 12:52:26.652946 128704331829888 run_alphafold.py:288] Total JAX model model_2_pred_0 on AAP20891_1 predict time (includes compilation time, see --benchmark): 93.5s\n", - "I0209 12:52:26.833158 133202111862592 docker_run.py:297] I0209 12:52:26.832517 128704331829888 run_alphafold.py:276] Running model model_3_pred_0 on AAP20891_1\n", - "I0209 12:52:29.968823 133202111862592 docker_run.py:297] I0209 12:52:29.967889 128704331829888 model.py:165] Running predict with shape(feat) = {'aatype': (4, 286), 'residue_index': (4, 286), 'seq_length': (4,), 'is_distillation': (4,), 'seq_mask': (4, 286), 'msa_mask': (4, 512, 286), 'msa_row_mask': (4, 512), 'random_crop_to_size_seed': (4, 2), 'atom14_atom_exists': (4, 286, 14), 'residx_atom14_to_atom37': (4, 286, 14), 'residx_atom37_to_atom14': (4, 286, 37), 'atom37_atom_exists': (4, 286, 37), 'extra_msa': (4, 5120, 286), 'extra_msa_mask': (4, 5120, 286), 'extra_msa_row_mask': (4, 5120), 'bert_mask': (4, 512, 286), 'true_msa': (4, 512, 286), 'extra_has_deletion': (4, 5120, 286), 'extra_deletion_value': (4, 5120, 286), 'msa_feat': (4, 512, 286, 49), 'target_feat': (4, 286, 22)}\n", - "I0209 12:53:48.996051 133202111862592 docker_run.py:297] I0209 12:53:48.995177 128704331829888 model.py:175] Output shape was {'distogram': {'bin_edges': (63,), 'logits': (286, 286, 64)}, 'experimentally_resolved': {'logits': (286, 37)}, 'masked_msa': {'logits': (512, 286, 23)}, 'predicted_lddt': {'logits': (286, 50)}, 'structure_module': {'final_atom_mask': (286, 37), 'final_atom_positions': (286, 37, 3)}, 'plddt': (286,), 'ranking_confidence': ()}\n", - "I0209 12:53:48.996179 133202111862592 docker_run.py:297] I0209 12:53:48.995306 128704331829888 run_alphafold.py:288] Total JAX model model_3_pred_0 on AAP20891_1 predict time (includes compilation time, see --benchmark): 79.0s\n", - "I0209 12:53:49.174313 133202111862592 docker_run.py:297] I0209 12:53:49.173727 128704331829888 run_alphafold.py:276] Running model model_4_pred_0 on AAP20891_1\n", - "I0209 12:53:52.157123 133202111862592 docker_run.py:297] I0209 12:53:52.156084 128704331829888 model.py:165] Running predict with shape(feat) = {'aatype': (4, 286), 'residue_index': (4, 286), 'seq_length': (4,), 'is_distillation': (4,), 'seq_mask': (4, 286), 'msa_mask': (4, 512, 286), 'msa_row_mask': (4, 512), 'random_crop_to_size_seed': (4, 2), 'atom14_atom_exists': (4, 286, 14), 'residx_atom14_to_atom37': (4, 286, 14), 'residx_atom37_to_atom14': (4, 286, 37), 'atom37_atom_exists': (4, 286, 37), 'extra_msa': (4, 5120, 286), 'extra_msa_mask': (4, 5120, 286), 'extra_msa_row_mask': (4, 5120), 'bert_mask': (4, 512, 286), 'true_msa': (4, 512, 286), 'extra_has_deletion': (4, 5120, 286), 'extra_deletion_value': (4, 5120, 286), 'msa_feat': (4, 512, 286, 49), 'target_feat': (4, 286, 22)}\n", - "I0209 12:55:07.543145 133202111862592 docker_run.py:297] I0209 12:55:07.542435 128704331829888 model.py:175] Output shape was {'distogram': {'bin_edges': (63,), 'logits': (286, 286, 64)}, 'experimentally_resolved': {'logits': (286, 37)}, 'masked_msa': {'logits': (512, 286, 23)}, 'predicted_lddt': {'logits': (286, 50)}, 'structure_module': {'final_atom_mask': (286, 37), 'final_atom_positions': (286, 37, 3)}, 'plddt': (286,), 'ranking_confidence': ()}\n", - "I0209 12:55:07.543324 133202111862592 docker_run.py:297] I0209 12:55:07.542576 128704331829888 run_alphafold.py:288] Total JAX model model_4_pred_0 on AAP20891_1 predict time (includes compilation time, see --benchmark): 75.4s\n", - "I0209 12:55:07.726988 133202111862592 docker_run.py:297] I0209 12:55:07.726579 128704331829888 run_alphafold.py:276] Running model model_5_pred_0 on AAP20891_1\n", - "I0209 12:55:10.723475 133202111862592 docker_run.py:297] I0209 12:55:10.722320 128704331829888 model.py:165] Running predict with shape(feat) = {'aatype': (4, 286), 'residue_index': (4, 286), 'seq_length': (4,), 'is_distillation': (4,), 'seq_mask': (4, 286), 'msa_mask': (4, 512, 286), 'msa_row_mask': (4, 512), 'random_crop_to_size_seed': (4, 2), 'atom14_atom_exists': (4, 286, 14), 'residx_atom14_to_atom37': (4, 286, 14), 'residx_atom37_to_atom14': (4, 286, 37), 'atom37_atom_exists': (4, 286, 37), 'extra_msa': (4, 1024, 286), 'extra_msa_mask': (4, 1024, 286), 'extra_msa_row_mask': (4, 1024), 'bert_mask': (4, 512, 286), 'true_msa': (4, 512, 286), 'extra_has_deletion': (4, 1024, 286), 'extra_deletion_value': (4, 1024, 286), 'msa_feat': (4, 512, 286, 49), 'target_feat': (4, 286, 22)}\n", - "I0209 12:56:24.505498 133202111862592 docker_run.py:297] I0209 12:56:24.504845 128704331829888 model.py:175] Output shape was {'distogram': {'bin_edges': (63,), 'logits': (286, 286, 64)}, 'experimentally_resolved': {'logits': (286, 37)}, 'masked_msa': {'logits': (512, 286, 23)}, 'predicted_lddt': {'logits': (286, 50)}, 'structure_module': {'final_atom_mask': (286, 37), 'final_atom_positions': (286, 37, 3)}, 'plddt': (286,), 'ranking_confidence': ()}\n", - "I0209 12:56:24.505714 133202111862592 docker_run.py:297] I0209 12:56:24.504977 128704331829888 run_alphafold.py:288] Total JAX model model_5_pred_0 on AAP20891_1 predict time (includes compilation time, see --benchmark): 73.8s\n", - "I0209 12:56:29.924700 133202111862592 docker_run.py:297] I0209 12:56:29.923331 128704331829888 amber_minimize.py:178] alterations info: {'nonstandard_residues': [], 'removed_heterogens': set(), 'missing_residues': {}, 'missing_heavy_atoms': {}, 'missing_terminals': {: ['OXT']}, 'Se_in_MET': [], 'removed_chains': {0: []}}\n", - "I0209 12:56:30.122141 133202111862592 docker_run.py:297] I0209 12:56:30.121656 128704331829888 amber_minimize.py:408] Minimizing protein, attempt 1 of 100.\n", - "I0209 12:56:30.439441 133202111862592 docker_run.py:297] I0209 12:56:30.439038 128704331829888 amber_minimize.py:69] Restraining 2212 / 4439 particles.\n", - "I0209 12:56:32.885671 133202111862592 docker_run.py:297] I0209 12:56:32.884042 128704331829888 amber_minimize.py:178] alterations info: {'nonstandard_residues': [], 'removed_heterogens': set(), 'missing_residues': {}, 'missing_heavy_atoms': {}, 'missing_terminals': {}, 'Se_in_MET': [], 'removed_chains': {0: []}}\n", - "I0209 12:56:35.878960 133202111862592 docker_run.py:297] I0209 12:56:35.878404 128704331829888 amber_minimize.py:500] Iteration completed: Einit 90002.51 Efinal -7195.49 Time 1.29 s num residue violations 0 num residue exclusions 0\n", - "I0209 12:56:36.584167 133202111862592 docker_run.py:297] I0209 12:56:36.583163 128704331829888 run_alphafold.py:414] Final timings for AAP20891_1: {'features': 1770.96572804451, 'process_features_model_1_pred_0': 4.667158365249634, 'predict_and_compile_model_1_pred_0': 126.48990654945374, 'process_features_model_2_pred_0': 3.60086727142334, 'predict_and_compile_model_2_pred_0': 93.51866555213928, 'process_features_model_3_pred_0': 3.1351327896118164, 'predict_and_compile_model_3_pred_0': 79.02753925323486, 'process_features_model_4_pred_0': 2.9821181297302246, 'predict_and_compile_model_4_pred_0': 75.38662052154541, 'process_features_model_5_pred_0': 2.9954960346221924, 'predict_and_compile_model_5_pred_0': 73.78278589248657, 'relax_model_1_pred_0': 11.30461859703064}\n", - "\n", - "2025-02-09 12:56:43,483 - INFO - Looking for output structure at: /home/nab/Niklas/pyeed/docs/resources/alphafold/output/AAP20891_1_relaxed.pdb\n", - "2025-02-09 12:56:43,485 - ERROR - Structure file not found: /home/nab/Niklas/pyeed/docs/resources/alphafold/output/AAP20891_1_relaxed.pdb\n" + "2025-02-09 18:21:49,193 - INFO - Created FASTA file at: /home/nab/Niklas/pyeed/docs/resources/alphafold/output/AAP20891_1.fasta\n", + "2025-02-09 18:21:49,194 - INFO - Running AlphaFold with command: source ~/anaconda3/etc/profile.d/conda.sh && conda activate alphafold_env && python /home/nab/Niklas/pyeed/src/pyeed/tools/resources/alphafold/docker_run.py --fasta_paths=/home/nab/Niklas/pyeed/docs/resources/alphafold/output/AAP20891_1.fasta --max_template_date=2022-01-01 --data_dir=/media/database/alphafold --output_dir=/home/nab/Niklas/pyeed/docs/resources/alphafold/output\n", + "2025-02-09 18:59:25,222 - ERROR - AlphaFold stderr:\n", + "I0209 18:21:49.823744 126234918065984 docker_run.py:143] Mounting /home/nab/Niklas/pyeed/docs/resources/alphafold/output -> /mnt/fasta_path_0\n", + "I0209 18:21:49.823894 126234918065984 docker_run.py:143] Mounting /media/database/alphafold/uniref90 -> /mnt/uniref90_database_path\n", + "I0209 18:21:49.823978 126234918065984 docker_run.py:143] Mounting /media/database/alphafold/mgnify -> /mnt/mgnify_database_path\n", + "I0209 18:21:49.824042 126234918065984 docker_run.py:143] Mounting /media/database/alphafold -> /mnt/data_dir\n", + "I0209 18:21:49.824100 126234918065984 docker_run.py:143] Mounting /media/database/alphafold/pdb_mmcif/mmcif_files -> /mnt/template_mmcif_dir\n", + "I0209 18:21:49.824160 126234918065984 docker_run.py:143] Mounting /media/database/alphafold/pdb_mmcif -> /mnt/obsolete_pdbs_path\n", + "I0209 18:21:49.824414 126234918065984 docker_run.py:143] Mounting /media/database/alphafold/pdb70 -> /mnt/pdb70_database_path\n", + "I0209 18:21:49.824606 126234918065984 docker_run.py:143] Mounting /media/database/alphafold/uniref30 -> /mnt/uniref30_database_path\n", + "I0209 18:21:49.824759 126234918065984 docker_run.py:143] Mounting /media/database/alphafold/bfd -> /mnt/bfd_database_path\n", + "I0209 18:21:50.499799 126234918065984 docker_run.py:297] /bin/bash: /opt/conda/lib/libtinfo.so.6: no version information available (required by /bin/bash)\n", + "I0209 18:21:54.122220 126234918065984 docker_run.py:297] I0209 18:21:54.121551 129414388114048 templates.py:858] Using precomputed obsolete pdbs /mnt/obsolete_pdbs_path/obsolete.dat.\n", + "I0209 18:21:54.896348 126234918065984 docker_run.py:297] I0209 18:21:54.895152 129414388114048 xla_bridge.py:863] Unable to initialize backend 'rocm': NOT_FOUND: Could not find registered platform with name: \"rocm\". Available platform names are: CUDA\n", + "I0209 18:21:54.896494 126234918065984 docker_run.py:297] I0209 18:21:54.896013 129414388114048 xla_bridge.py:863] Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: libtpu.so: cannot open shared object file: No such file or directory\n", + "I0209 18:22:00.123066 126234918065984 docker_run.py:297] I0209 18:22:00.122301 129414388114048 run_alphafold.py:524] Have 5 models: ['model_1_pred_0', 'model_2_pred_0', 'model_3_pred_0', 'model_4_pred_0', 'model_5_pred_0']\n", + "I0209 18:22:00.123264 126234918065984 docker_run.py:297] I0209 18:22:00.122479 129414388114048 run_alphafold.py:538] Using random seed 1472682267336032100 for the data pipeline\n", + "I0209 18:22:00.123352 126234918065984 docker_run.py:297] I0209 18:22:00.122714 129414388114048 run_alphafold.py:245] Predicting AAP20891_1\n", + "I0209 18:22:00.123819 126234918065984 docker_run.py:297] I0209 18:22:00.123200 129414388114048 jackhmmer.py:133] Launching subprocess \"/usr/bin/jackhmmer -o /dev/null -A /tmp/tmpxc0kqa9t/output.sto --noali --F1 0.0005 --F2 5e-05 --F3 5e-07 --incE 0.0001 -E 0.0001 --cpu 8 -N 1 /mnt/fasta_path_0/AAP20891_1.fasta /mnt/uniref90_database_path/uniref90.fasta\"\n", + "I0209 18:22:00.124192 126234918065984 docker_run.py:297] I0209 18:22:00.123989 129414388114048 utils.py:36] Started Jackhmmer (uniref90.fasta) query\n", + "I0209 18:29:46.660077 126234918065984 docker_run.py:297] I0209 18:29:46.659375 129414388114048 utils.py:40] Finished Jackhmmer (uniref90.fasta) query in 466.535 seconds\n", + "I0209 18:29:46.872978 126234918065984 docker_run.py:297] I0209 18:29:46.872399 129414388114048 jackhmmer.py:133] Launching subprocess \"/usr/bin/jackhmmer -o /dev/null -A /tmp/tmpy07gvq9o/output.sto --noali --F1 0.0005 --F2 5e-05 --F3 5e-07 --incE 0.0001 -E 0.0001 --cpu 8 -N 1 /mnt/fasta_path_0/AAP20891_1.fasta /mnt/mgnify_database_path/mgy_clusters_2022_05.fa\"\n", + "I0209 18:29:46.873672 126234918065984 docker_run.py:297] I0209 18:29:46.873422 129414388114048 utils.py:36] Started Jackhmmer (mgy_clusters_2022_05.fa) query\n", + "I0209 18:42:17.609159 126234918065984 docker_run.py:297] I0209 18:42:17.608636 129414388114048 utils.py:40] Finished Jackhmmer (mgy_clusters_2022_05.fa) query in 750.735 seconds\n", + "I0209 18:42:19.588392 126234918065984 docker_run.py:297] I0209 18:42:19.587914 129414388114048 hhsearch.py:85] Launching subprocess \"/usr/bin/hhsearch -i /tmp/tmp1k92nh3b/query.a3m -o /tmp/tmp1k92nh3b/output.hhr -maxseq 1000000 -d /mnt/pdb70_database_path/pdb70\"\n", + "I0209 18:42:19.589171 126234918065984 docker_run.py:297] I0209 18:42:19.588937 129414388114048 utils.py:36] Started HHsearch query\n", + "I0209 18:42:48.892270 126234918065984 docker_run.py:297] I0209 18:42:48.891644 129414388114048 utils.py:40] Finished HHsearch query in 29.303 seconds\n", + "I0209 18:42:49.653391 126234918065984 docker_run.py:297] I0209 18:42:49.652812 129414388114048 hhblits.py:128] Launching subprocess \"/usr/bin/hhblits -i /mnt/fasta_path_0/AAP20891_1.fasta -cpu 4 -oa3m /tmp/tmpj3pynfqf/output.a3m -o /dev/null -n 3 -e 0.001 -maxseq 1000000 -realign_max 100000 -maxfilt 100000 -min_prefilter_hits 1000 -d /mnt/bfd_database_path/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt -d /mnt/uniref30_database_path/UniRef30_2021_03\"\n", + "I0209 18:42:49.654407 126234918065984 docker_run.py:297] I0209 18:42:49.654129 129414388114048 utils.py:36] Started HHblits query\n", + "I0209 18:51:13.099540 126234918065984 docker_run.py:297] I0209 18:51:13.049138 129414388114048 utils.py:40] Finished HHblits query in 503.390 seconds\n", + "I0209 18:51:13.235084 126234918065984 docker_run.py:297] I0209 18:51:13.232861 129414388114048 templates.py:879] Searching for template for: MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW\n", + "I0209 18:51:13.443433 126234918065984 docker_run.py:297] I0209 18:51:13.441954 129414388114048 templates.py:267] Found an exact template match 4zj1_A.\n", + "I0209 18:51:13.919805 126234918065984 docker_run.py:297] I0209 18:51:13.919035 129414388114048 templates.py:267] Found an exact template match 2p74_B.\n", + "I0209 18:51:15.502430 126234918065984 docker_run.py:297] I0209 18:51:15.501675 129414388114048 templates.py:267] Found an exact template match 4bd0_A.\n", + "I0209 18:51:15.661102 126234918065984 docker_run.py:297] I0209 18:51:15.659892 129414388114048 templates.py:267] Found an exact template match 6nfd_A.\n", + "I0209 18:51:15.969052 126234918065984 docker_run.py:297] I0209 18:51:15.968239 129414388114048 templates.py:267] Found an exact template match 1m40_A.\n", + "I0209 18:51:16.131306 126234918065984 docker_run.py:297] I0209 18:51:16.130592 129414388114048 templates.py:267] Found an exact template match 1n9b_A.\n", + "I0209 18:51:16.363438 126234918065984 docker_run.py:297] I0209 18:51:16.362271 129414388114048 templates.py:267] Found an exact template match 2b5r_B.\n", + "I0209 18:51:16.789144 126234918065984 docker_run.py:297] I0209 18:51:16.788326 129414388114048 templates.py:267] Found an exact template match 4ua6_A.\n", + "I0209 18:51:16.879354 126234918065984 docker_run.py:297] I0209 18:51:16.878310 129414388114048 templates.py:267] Found an exact template match 1g6a_A.\n", + "I0209 18:51:17.147200 126234918065984 docker_run.py:297] I0209 18:51:17.146468 129414388114048 templates.py:267] Found an exact template match 6afo_B.\n", + "I0209 18:51:17.594172 126234918065984 docker_run.py:297] I0209 18:51:17.592624 129414388114048 templates.py:267] Found an exact template match 6td0_A.\n", + "I0209 18:51:17.764129 126234918065984 docker_run.py:297] I0209 18:51:17.763039 129414388114048 templates.py:267] Found an exact template match 1o7e_B.\n", + "I0209 18:51:17.999657 126234918065984 docker_run.py:297] I0209 18:51:17.998758 129414388114048 templates.py:267] Found an exact template match 6niq_A.\n", + "I0209 18:51:18.144142 126234918065984 docker_run.py:297] I0209 18:51:18.143428 129414388114048 templates.py:267] Found an exact template match 4mbh_A.\n", + "I0209 18:51:18.514421 126234918065984 docker_run.py:297] I0209 18:51:18.513112 129414388114048 templates.py:267] Found an exact template match 5ne2_B.\n", + "I0209 18:51:18.737822 126234918065984 docker_run.py:297] I0209 18:51:18.737295 129414388114048 templates.py:267] Found an exact template match 6qwb_A.\n", + "I0209 18:51:18.885024 126234918065984 docker_run.py:297] I0209 18:51:18.884303 129414388114048 templates.py:267] Found an exact template match 6c7a_A.\n", + "I0209 18:51:19.108935 126234918065984 docker_run.py:297] I0209 18:51:19.108240 129414388114048 templates.py:286] Found a fuzzy sequence-only match 6dmh_A.\n", + "I0209 18:51:19.565951 126234918065984 docker_run.py:297] I0209 18:51:19.565258 129414388114048 templates.py:267] Found an exact template match 4c75_D.\n", + "I0209 18:51:19.695087 126234918065984 docker_run.py:297] I0209 18:51:19.694348 129414388114048 templates.py:267] Found an exact template match 6bn3_A.\n", + "I0209 18:51:20.471373 126234918065984 docker_run.py:297] I0209 18:51:20.470418 129414388114048 pipeline.py:234] Uniref90 MSA size: 10000 sequences.\n", + "I0209 18:51:20.471764 126234918065984 docker_run.py:297] I0209 18:51:20.470559 129414388114048 pipeline.py:235] BFD MSA size: 2460 sequences.\n", + "I0209 18:51:20.471865 126234918065984 docker_run.py:297] I0209 18:51:20.470590 129414388114048 pipeline.py:236] MGnify MSA size: 501 sequences.\n", + "I0209 18:51:20.471951 126234918065984 docker_run.py:297] I0209 18:51:20.470622 129414388114048 pipeline.py:237] Final (deduplicated) MSA size: 12900 sequences.\n", + "I0209 18:51:20.472341 126234918065984 docker_run.py:297] I0209 18:51:20.470838 129414388114048 pipeline.py:239] Total number of templates (NB: this can include bad templates and is later filtered to top 4): 20.\n", + "I0209 18:51:20.525065 126234918065984 docker_run.py:297] I0209 18:51:20.524172 129414388114048 run_alphafold.py:276] Running model model_1_pred_0 on AAP20891_1\n", + "I0209 18:51:25.273778 126234918065984 docker_run.py:297] I0209 18:51:25.271720 129414388114048 model.py:165] Running predict with shape(feat) = {'aatype': (4, 286), 'residue_index': (4, 286), 'seq_length': (4,), 'template_aatype': (4, 4, 286), 'template_all_atom_masks': (4, 4, 286, 37), 'template_all_atom_positions': (4, 4, 286, 37, 3), 'template_sum_probs': (4, 4, 1), 'is_distillation': (4,), 'seq_mask': (4, 286), 'msa_mask': (4, 508, 286), 'msa_row_mask': (4, 508), 'random_crop_to_size_seed': (4, 2), 'template_mask': (4, 4), 'template_pseudo_beta': (4, 4, 286, 3), 'template_pseudo_beta_mask': (4, 4, 286), 'atom14_atom_exists': (4, 286, 14), 'residx_atom14_to_atom37': (4, 286, 14), 'residx_atom37_to_atom14': (4, 286, 37), 'atom37_atom_exists': (4, 286, 37), 'extra_msa': (4, 5120, 286), 'extra_msa_mask': (4, 5120, 286), 'extra_msa_row_mask': (4, 5120), 'bert_mask': (4, 508, 286), 'true_msa': (4, 508, 286), 'extra_has_deletion': (4, 5120, 286), 'extra_deletion_value': (4, 5120, 286), 'msa_feat': (4, 508, 286, 49), 'target_feat': (4, 286, 22)}\n", + "I0209 18:53:31.565129 126234918065984 docker_run.py:297] I0209 18:53:31.563400 129414388114048 model.py:175] Output shape was {'distogram': {'bin_edges': (63,), 'logits': (286, 286, 64)}, 'experimentally_resolved': {'logits': (286, 37)}, 'masked_msa': {'logits': (508, 286, 23)}, 'predicted_lddt': {'logits': (286, 50)}, 'structure_module': {'final_atom_mask': (286, 37), 'final_atom_positions': (286, 37, 3)}, 'plddt': (286,), 'ranking_confidence': ()}\n", + "I0209 18:53:31.565434 126234918065984 docker_run.py:297] I0209 18:53:31.563578 129414388114048 run_alphafold.py:288] Total JAX model model_1_pred_0 on AAP20891_1 predict time (includes compilation time, see --benchmark): 126.3s\n", + "I0209 18:53:31.764772 126234918065984 docker_run.py:297] I0209 18:53:31.764194 129414388114048 run_alphafold.py:276] Running model model_2_pred_0 on AAP20891_1\n", + "I0209 18:53:35.338457 126234918065984 docker_run.py:297] I0209 18:53:35.335328 129414388114048 model.py:165] Running predict with shape(feat) = {'aatype': (4, 286), 'residue_index': (4, 286), 'seq_length': (4,), 'template_aatype': (4, 4, 286), 'template_all_atom_masks': (4, 4, 286, 37), 'template_all_atom_positions': (4, 4, 286, 37, 3), 'template_sum_probs': (4, 4, 1), 'is_distillation': (4,), 'seq_mask': (4, 286), 'msa_mask': (4, 508, 286), 'msa_row_mask': (4, 508), 'random_crop_to_size_seed': (4, 2), 'template_mask': (4, 4), 'template_pseudo_beta': (4, 4, 286, 3), 'template_pseudo_beta_mask': (4, 4, 286), 'atom14_atom_exists': (4, 286, 14), 'residx_atom14_to_atom37': (4, 286, 14), 'residx_atom37_to_atom14': (4, 286, 37), 'atom37_atom_exists': (4, 286, 37), 'extra_msa': (4, 1024, 286), 'extra_msa_mask': (4, 1024, 286), 'extra_msa_row_mask': (4, 1024), 'bert_mask': (4, 508, 286), 'true_msa': (4, 508, 286), 'extra_has_deletion': (4, 1024, 286), 'extra_deletion_value': (4, 1024, 286), 'msa_feat': (4, 508, 286, 49), 'target_feat': (4, 286, 22)}\n", + "I0209 18:55:08.613217 126234918065984 docker_run.py:297] I0209 18:55:08.612550 129414388114048 model.py:175] Output shape was {'distogram': {'bin_edges': (63,), 'logits': (286, 286, 64)}, 'experimentally_resolved': {'logits': (286, 37)}, 'masked_msa': {'logits': (508, 286, 23)}, 'predicted_lddt': {'logits': (286, 50)}, 'structure_module': {'final_atom_mask': (286, 37), 'final_atom_positions': (286, 37, 3)}, 'plddt': (286,), 'ranking_confidence': ()}\n", + "I0209 18:55:08.613392 126234918065984 docker_run.py:297] I0209 18:55:08.612682 129414388114048 run_alphafold.py:288] Total JAX model model_2_pred_0 on AAP20891_1 predict time (includes compilation time, see --benchmark): 93.3s\n", + "I0209 18:55:08.796196 126234918065984 docker_run.py:297] I0209 18:55:08.795683 129414388114048 run_alphafold.py:276] Running model model_3_pred_0 on AAP20891_1\n", + "I0209 18:55:11.976388 126234918065984 docker_run.py:297] I0209 18:55:11.975605 129414388114048 model.py:165] Running predict with shape(feat) = {'aatype': (4, 286), 'residue_index': (4, 286), 'seq_length': (4,), 'is_distillation': (4,), 'seq_mask': (4, 286), 'msa_mask': (4, 512, 286), 'msa_row_mask': (4, 512), 'random_crop_to_size_seed': (4, 2), 'atom14_atom_exists': (4, 286, 14), 'residx_atom14_to_atom37': (4, 286, 14), 'residx_atom37_to_atom14': (4, 286, 37), 'atom37_atom_exists': (4, 286, 37), 'extra_msa': (4, 5120, 286), 'extra_msa_mask': (4, 5120, 286), 'extra_msa_row_mask': (4, 5120), 'bert_mask': (4, 512, 286), 'true_msa': (4, 512, 286), 'extra_has_deletion': (4, 5120, 286), 'extra_deletion_value': (4, 5120, 286), 'msa_feat': (4, 512, 286, 49), 'target_feat': (4, 286, 22)}\n", + "I0209 18:56:30.749872 126234918065984 docker_run.py:297] I0209 18:56:30.749186 129414388114048 model.py:175] Output shape was {'distogram': {'bin_edges': (63,), 'logits': (286, 286, 64)}, 'experimentally_resolved': {'logits': (286, 37)}, 'masked_msa': {'logits': (512, 286, 23)}, 'predicted_lddt': {'logits': (286, 50)}, 'structure_module': {'final_atom_mask': (286, 37), 'final_atom_positions': (286, 37, 3)}, 'plddt': (286,), 'ranking_confidence': ()}\n", + "I0209 18:56:30.750071 126234918065984 docker_run.py:297] I0209 18:56:30.749317 129414388114048 run_alphafold.py:288] Total JAX model model_3_pred_0 on AAP20891_1 predict time (includes compilation time, see --benchmark): 78.8s\n", + "I0209 18:56:30.932418 126234918065984 docker_run.py:297] I0209 18:56:30.931747 129414388114048 run_alphafold.py:276] Running model model_4_pred_0 on AAP20891_1\n", + "I0209 18:56:34.075862 126234918065984 docker_run.py:297] I0209 18:56:34.074738 129414388114048 model.py:165] Running predict with shape(feat) = {'aatype': (4, 286), 'residue_index': (4, 286), 'seq_length': (4,), 'is_distillation': (4,), 'seq_mask': (4, 286), 'msa_mask': (4, 512, 286), 'msa_row_mask': (4, 512), 'random_crop_to_size_seed': (4, 2), 'atom14_atom_exists': (4, 286, 14), 'residx_atom14_to_atom37': (4, 286, 14), 'residx_atom37_to_atom14': (4, 286, 37), 'atom37_atom_exists': (4, 286, 37), 'extra_msa': (4, 5120, 286), 'extra_msa_mask': (4, 5120, 286), 'extra_msa_row_mask': (4, 5120), 'bert_mask': (4, 512, 286), 'true_msa': (4, 512, 286), 'extra_has_deletion': (4, 5120, 286), 'extra_deletion_value': (4, 5120, 286), 'msa_feat': (4, 512, 286, 49), 'target_feat': (4, 286, 22)}\n", + "I0209 18:57:49.478633 126234918065984 docker_run.py:297] I0209 18:57:49.478027 129414388114048 model.py:175] Output shape was {'distogram': {'bin_edges': (63,), 'logits': (286, 286, 64)}, 'experimentally_resolved': {'logits': (286, 37)}, 'masked_msa': {'logits': (512, 286, 23)}, 'predicted_lddt': {'logits': (286, 50)}, 'structure_module': {'final_atom_mask': (286, 37), 'final_atom_positions': (286, 37, 3)}, 'plddt': (286,), 'ranking_confidence': ()}\n", + "I0209 18:57:49.478832 126234918065984 docker_run.py:297] I0209 18:57:49.478156 129414388114048 run_alphafold.py:288] Total JAX model model_4_pred_0 on AAP20891_1 predict time (includes compilation time, see --benchmark): 75.4s\n", + "I0209 18:57:49.663070 126234918065984 docker_run.py:297] I0209 18:57:49.662528 129414388114048 run_alphafold.py:276] Running model model_5_pred_0 on AAP20891_1\n", + "I0209 18:57:52.800162 126234918065984 docker_run.py:297] I0209 18:57:52.799148 129414388114048 model.py:165] Running predict with shape(feat) = {'aatype': (4, 286), 'residue_index': (4, 286), 'seq_length': (4,), 'is_distillation': (4,), 'seq_mask': (4, 286), 'msa_mask': (4, 512, 286), 'msa_row_mask': (4, 512), 'random_crop_to_size_seed': (4, 2), 'atom14_atom_exists': (4, 286, 14), 'residx_atom14_to_atom37': (4, 286, 14), 'residx_atom37_to_atom14': (4, 286, 37), 'atom37_atom_exists': (4, 286, 37), 'extra_msa': (4, 1024, 286), 'extra_msa_mask': (4, 1024, 286), 'extra_msa_row_mask': (4, 1024), 'bert_mask': (4, 512, 286), 'true_msa': (4, 512, 286), 'extra_has_deletion': (4, 1024, 286), 'extra_deletion_value': (4, 1024, 286), 'msa_feat': (4, 512, 286, 49), 'target_feat': (4, 286, 22)}\n", + "I0209 18:59:06.248019 126234918065984 docker_run.py:297] I0209 18:59:06.247377 129414388114048 model.py:175] Output shape was {'distogram': {'bin_edges': (63,), 'logits': (286, 286, 64)}, 'experimentally_resolved': {'logits': (286, 37)}, 'masked_msa': {'logits': (512, 286, 23)}, 'predicted_lddt': {'logits': (286, 50)}, 'structure_module': {'final_atom_mask': (286, 37), 'final_atom_positions': (286, 37, 3)}, 'plddt': (286,), 'ranking_confidence': ()}\n", + "I0209 18:59:06.248191 126234918065984 docker_run.py:297] I0209 18:59:06.247525 129414388114048 run_alphafold.py:288] Total JAX model model_5_pred_0 on AAP20891_1 predict time (includes compilation time, see --benchmark): 73.4s\n", + "I0209 18:59:11.661808 126234918065984 docker_run.py:297] I0209 18:59:11.660365 129414388114048 amber_minimize.py:178] alterations info: {'nonstandard_residues': [], 'removed_heterogens': set(), 'missing_residues': {}, 'missing_heavy_atoms': {}, 'missing_terminals': {: ['OXT']}, 'Se_in_MET': [], 'removed_chains': {0: []}}\n", + "I0209 18:59:11.858800 126234918065984 docker_run.py:297] I0209 18:59:11.858058 129414388114048 amber_minimize.py:408] Minimizing protein, attempt 1 of 100.\n", + "I0209 18:59:12.170496 126234918065984 docker_run.py:297] I0209 18:59:12.169910 129414388114048 amber_minimize.py:69] Restraining 2212 / 4439 particles.\n", + "I0209 18:59:14.782054 126234918065984 docker_run.py:297] I0209 18:59:14.780544 129414388114048 amber_minimize.py:178] alterations info: {'nonstandard_residues': [], 'removed_heterogens': set(), 'missing_residues': {}, 'missing_heavy_atoms': {}, 'missing_terminals': {}, 'Se_in_MET': [], 'removed_chains': {0: []}}\n", + "I0209 18:59:17.725336 126234918065984 docker_run.py:297] I0209 18:59:17.724404 129414388114048 amber_minimize.py:500] Iteration completed: Einit 67630.03 Efinal -7208.87 Time 1.44 s num residue violations 0 num residue exclusions 0\n", + "I0209 18:59:18.429040 126234918065984 docker_run.py:297] I0209 18:59:18.428259 129414388114048 run_alphafold.py:414] Final timings for AAP20891_1: {'features': 1760.3612880706787, 'process_features_model_1_pred_0': 4.747036695480347, 'predict_and_compile_model_1_pred_0': 126.29224181175232, 'process_features_model_2_pred_0': 3.57088565826416, 'predict_and_compile_model_2_pred_0': 93.27748966217041, 'process_features_model_3_pred_0': 3.1796696186065674, 'predict_and_compile_model_3_pred_0': 78.77385020256042, 'process_features_model_4_pred_0': 3.142765522003174, 'predict_and_compile_model_4_pred_0': 75.4035313129425, 'process_features_model_5_pred_0': 3.1363840103149414, 'predict_and_compile_model_5_pred_0': 73.44848084449768, 'relax_model_1_pred_0': 11.401028394699097}\n", + "\n" ] }, { - "ename": "FileNotFoundError", - "evalue": "Structure file not found: /home/nab/Niklas/pyeed/docs/resources/alphafold/output/AAP20891_1_relaxed.pdb", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[7], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43malphafold_runner\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun_alphafold\u001b[49m\u001b[43m(\u001b[49m\u001b[43msequence\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msequence\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msequence_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mid\u001b[39;49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/Niklas/pyeed/src/pyeed/tools/alphafold2.py:106\u001b[0m, in \u001b[0;36mAlphaFoldRunner.run_alphafold\u001b[0;34m(self, sequence, sequence_id, max_template_date)\u001b[0m\n\u001b[1;32m 104\u001b[0m error_msg \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mStructure file not found: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mstructure_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 105\u001b[0m logger\u001b[38;5;241m.\u001b[39merror(error_msg)\n\u001b[0;32m--> 106\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mFileNotFoundError\u001b[39;00m(error_msg)\n\u001b[1;32m 108\u001b[0m logger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSuccessfully generated structure at: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mstructure_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 109\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstructure_file\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28mstr\u001b[39m(structure_path)}\n", - "\u001b[0;31mFileNotFoundError\u001b[0m: Structure file not found: /home/nab/Niklas/pyeed/docs/resources/alphafold/output/AAP20891_1_relaxed.pdb" - ] + "data": { + "text/plain": [ + "{}" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ From a8d8fbe2757d37890911f082f15c12913edbcd00 Mon Sep 17 00:00:00 2001 From: Niklas Abraham GPU Date: Fri, 28 Feb 2025 16:25:24 +0000 Subject: [PATCH 04/13] staring fixes in standardnumbering --- src/pyeed/adapter/ncbi_dna_mapper.py | 6 ----- src/pyeed/analysis/standard_numbering.py | 4 +-- src/pyeed/main.py | 2 +- src/pyeed/model.py | 31 +++++++++++++++++++++++- 4 files changed, 33 insertions(+), 10 deletions(-) diff --git a/src/pyeed/adapter/ncbi_dna_mapper.py b/src/pyeed/adapter/ncbi_dna_mapper.py index 511c3a40..34cffcf9 100644 --- a/src/pyeed/adapter/ncbi_dna_mapper.py +++ b/src/pyeed/adapter/ncbi_dna_mapper.py @@ -29,12 +29,6 @@ def add_to_db(self, response: Response) -> None: None """ - with open("tests/data/api_responses/ncbi_dna_QLYQ01000020.txt", "w") as f: - f.write(response.content.decode()) - assert ( - response.status_code == 200 - ), f"Request to {response.url} failed with status code {response.status_code}" - records = self.parse_response(response.content) for record in records: diff --git a/src/pyeed/analysis/standard_numbering.py b/src/pyeed/analysis/standard_numbering.py index a2d4ea1e..90928152 100644 --- a/src/pyeed/analysis/standard_numbering.py +++ b/src/pyeed/analysis/standard_numbering.py @@ -487,7 +487,7 @@ def apply_standard_numbering( if __name__ == "__main__": # Database connection setup parameters. - uri = "bolt://127.0.0.1:7687" + uri = "bolt://129.69.129.130:7687" user = "neo4j" password = "12345678" @@ -538,7 +538,7 @@ def apply_standard_numbering( # Instantiate the numbering tool and run the numbering algorithm. sn_tool = StandardNumberingTool("test_standard_numbering") - sn_tool.positions = sn_tool.run_numbering_algorithm_clustalo("seq0", alignment) + sn_tool.positions = sn_tool.run_numbering_algorithm_pairwise("seq0", alignment) # Print a sample of the computed positions to verify the output. count = 0 diff --git a/src/pyeed/main.py b/src/pyeed/main.py index 5950965d..6ab3c157 100644 --- a/src/pyeed/main.py +++ b/src/pyeed/main.py @@ -358,6 +358,6 @@ def fetch_dna_entries_for_proteins(self) -> None: MATCH (p:Protein {{accession_id: '{protein["accession_id"]}'}}) MATCH (d:DNA {{accession_id: '{protein["nucleotide_id"]}'}}) MERGE (d)-[r:ENCODES]->(p) - SET r.start = {protein["start"]}, r.end = {protein["end"]} + SET r.start = {protein["nucleotide_start"]}, r.end = {protein["nucleotide_end"]} """ self.db.execute_write(query) diff --git a/src/pyeed/model.py b/src/pyeed/model.py index 7a720560..0b08f37c 100644 --- a/src/pyeed/model.py +++ b/src/pyeed/model.py @@ -189,6 +189,35 @@ class Region(StrictStructuredNode): ) +class DNAProteinRel(StructuredRel): # type: ignore + """A relationship between a DNA and a protein.""" + + start = IntegerProperty(required=True) + end = IntegerProperty(required=True) + + @classmethod + def validate_and_connect( + cls, + molecule1: StrictStructuredNode, + molecule2: StrictStructuredNode, + start: int, + end: int, + ) -> "DNAProteinRel": + """Validates the start and end positions and connects the two molecules.""" + molecule1.protein.connect( + molecule2, + { + "start": start, + "end": end, + }, + ) + + return cls( + start=start, + end=end, + ) + + class RegionRel(StructuredRel): # type: ignore start = IntegerProperty(required=True) end = IntegerProperty(required=True) @@ -496,7 +525,7 @@ class DNA(StrictStructuredNode): region = RelationshipTo("Region", "HAS_REGION", model=RegionRel) go_annotation = RelationshipTo("GOAnnotation", "ASSOCIATED_WITH") mutation = RelationshipTo("DNA", "MUTATION", model=Mutation) - protein = RelationshipTo("Protein", "ENCODES", model=RegionRel) + protein = RelationshipTo("Protein", "ENCODES", model=DNAProteinRel) pairwise_aligned = RelationshipTo( "DNA", "PAIRWISE_ALIGNED", model=PairwiseAlignmentResult ) From e9f250b1cae549fc815470b6d1e3ff4d2f171e58 Mon Sep 17 00:00:00 2001 From: Niklas Abraham GPU Date: Fri, 28 Feb 2025 17:21:25 +0000 Subject: [PATCH 05/13] fixed issues with clustal o and key errro added new norebookt for standradnumbering --- docs/usage/mutation_analysis.ipynb | 95 ++----- docs/usage/standard_numbering.ipynb | 305 +++++++++++++++++++++++ src/pyeed/analysis/standard_numbering.py | 22 +- src/pyeed/model.py | 2 +- 4 files changed, 353 insertions(+), 71 deletions(-) create mode 100644 docs/usage/standard_numbering.ipynb diff --git a/docs/usage/mutation_analysis.ipynb b/docs/usage/mutation_analysis.ipynb index 7ff1a0bd..3d5618ef 100644 --- a/docs/usage/mutation_analysis.ipynb +++ b/docs/usage/mutation_analysis.ipynb @@ -11,7 +11,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 45, "metadata": {}, "outputs": [], "source": [ @@ -37,15 +37,15 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 46, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Pyeed Graph Object Mapping constraints not defined. Use _install_labels() to set up model constraints.\n", - "📡 Connected to database.\n" + "📡 Connected to database.\n", + "All data has been wiped from the database.\n" ] } ], @@ -54,7 +54,9 @@ "user = \"neo4j\"\n", "password = \"12345678\"\n", "\n", - "eedb = Pyeed(uri, user=user, password=password)" + "eedb = Pyeed(uri, user=user, password=password)\n", + "\n", + "eedb.db.wipe_database(date=\"2025-02-28\")" ] }, { @@ -73,7 +75,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 47, "metadata": {}, "outputs": [], "source": [ @@ -99,60 +101,16 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 48, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2025-02-07 17:32:37.654\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mpyeed.tools.clustalo\u001b[0m:\u001b[36malign\u001b[0m:\u001b[36m35\u001b[0m - \u001b[31m\u001b[1mAlignment failed: [Errno 60] Operation timed out\u001b[0m\n" - ] - }, - { - "ename": "ConnectTimeout", - "evalue": "[Errno 60] Operation timed out", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mConnectTimeout\u001b[0m Traceback (most recent call last)", - "File \u001b[0;32m~/miniconda3/envs/pyeed/lib/python3.11/site-packages/httpx/_transports/default.py:72\u001b[0m, in \u001b[0;36mmap_httpcore_exceptions\u001b[0;34m()\u001b[0m\n\u001b[1;32m 71\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m---> 72\u001b[0m \u001b[38;5;28;01myield\u001b[39;00m\n\u001b[1;32m 73\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n", - "File \u001b[0;32m~/miniconda3/envs/pyeed/lib/python3.11/site-packages/httpx/_transports/default.py:236\u001b[0m, in \u001b[0;36mHTTPTransport.handle_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m 235\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m map_httpcore_exceptions():\n\u001b[0;32m--> 236\u001b[0m resp \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_pool\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mhandle_request\u001b[49m\u001b[43m(\u001b[49m\u001b[43mreq\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 238\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(resp\u001b[38;5;241m.\u001b[39mstream, typing\u001b[38;5;241m.\u001b[39mIterable)\n", - "File \u001b[0;32m~/miniconda3/envs/pyeed/lib/python3.11/site-packages/httpcore/_sync/connection_pool.py:256\u001b[0m, in \u001b[0;36mConnectionPool.handle_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m 255\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_close_connections(closing)\n\u001b[0;32m--> 256\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m exc \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 258\u001b[0m \u001b[38;5;66;03m# Return the response. Note that in this case we still have to manage\u001b[39;00m\n\u001b[1;32m 259\u001b[0m \u001b[38;5;66;03m# the point at which the response is closed.\u001b[39;00m\n", - "File \u001b[0;32m~/miniconda3/envs/pyeed/lib/python3.11/site-packages/httpcore/_sync/connection_pool.py:236\u001b[0m, in \u001b[0;36mConnectionPool.handle_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m 234\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 235\u001b[0m \u001b[38;5;66;03m# Send the request on the assigned connection.\u001b[39;00m\n\u001b[0;32m--> 236\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mconnection\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mhandle_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 237\u001b[0m \u001b[43m \u001b[49m\u001b[43mpool_request\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\n\u001b[1;32m 238\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 239\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m ConnectionNotAvailable:\n\u001b[1;32m 240\u001b[0m \u001b[38;5;66;03m# In some cases a connection may initially be available to\u001b[39;00m\n\u001b[1;32m 241\u001b[0m \u001b[38;5;66;03m# handle a request, but then become unavailable.\u001b[39;00m\n\u001b[1;32m 242\u001b[0m \u001b[38;5;66;03m#\u001b[39;00m\n\u001b[1;32m 243\u001b[0m \u001b[38;5;66;03m# In this case we clear the connection and try again.\u001b[39;00m\n", - "File \u001b[0;32m~/miniconda3/envs/pyeed/lib/python3.11/site-packages/httpcore/_sync/connection.py:101\u001b[0m, in \u001b[0;36mHTTPConnection.handle_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m 100\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_connect_failed \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[0;32m--> 101\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m exc\n\u001b[1;32m 103\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_connection\u001b[38;5;241m.\u001b[39mhandle_request(request)\n", - "File \u001b[0;32m~/miniconda3/envs/pyeed/lib/python3.11/site-packages/httpcore/_sync/connection.py:78\u001b[0m, in \u001b[0;36mHTTPConnection.handle_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m 77\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_connection \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m---> 78\u001b[0m stream \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_connect\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 80\u001b[0m ssl_object \u001b[38;5;241m=\u001b[39m stream\u001b[38;5;241m.\u001b[39mget_extra_info(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mssl_object\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", - "File \u001b[0;32m~/miniconda3/envs/pyeed/lib/python3.11/site-packages/httpcore/_sync/connection.py:124\u001b[0m, in \u001b[0;36mHTTPConnection._connect\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m Trace(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mconnect_tcp\u001b[39m\u001b[38;5;124m\"\u001b[39m, logger, request, kwargs) \u001b[38;5;28;01mas\u001b[39;00m trace:\n\u001b[0;32m--> 124\u001b[0m stream \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_network_backend\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconnect_tcp\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 125\u001b[0m trace\u001b[38;5;241m.\u001b[39mreturn_value \u001b[38;5;241m=\u001b[39m stream\n", - "File \u001b[0;32m~/miniconda3/envs/pyeed/lib/python3.11/site-packages/httpcore/_backends/sync.py:207\u001b[0m, in \u001b[0;36mSyncBackend.connect_tcp\u001b[0;34m(self, host, port, timeout, local_address, socket_options)\u001b[0m\n\u001b[1;32m 202\u001b[0m exc_map: ExceptionMapping \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 203\u001b[0m socket\u001b[38;5;241m.\u001b[39mtimeout: ConnectTimeout,\n\u001b[1;32m 204\u001b[0m \u001b[38;5;167;01mOSError\u001b[39;00m: ConnectError,\n\u001b[1;32m 205\u001b[0m }\n\u001b[0;32m--> 207\u001b[0m \u001b[43m\u001b[49m\u001b[38;5;28;43;01mwith\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mmap_exceptions\u001b[49m\u001b[43m(\u001b[49m\u001b[43mexc_map\u001b[49m\u001b[43m)\u001b[49m\u001b[43m:\u001b[49m\n\u001b[1;32m 208\u001b[0m \u001b[43m \u001b[49m\u001b[43msock\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43msocket\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcreate_connection\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 209\u001b[0m \u001b[43m \u001b[49m\u001b[43maddress\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 210\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 211\u001b[0m \u001b[43m \u001b[49m\u001b[43msource_address\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msource_address\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 212\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniconda3/envs/pyeed/lib/python3.11/contextlib.py:155\u001b[0m, in \u001b[0;36m_GeneratorContextManager.__exit__\u001b[0;34m(self, typ, value, traceback)\u001b[0m\n\u001b[1;32m 154\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 155\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgen\u001b[38;5;241m.\u001b[39mthrow(typ, value, traceback)\n\u001b[1;32m 156\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mStopIteration\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[1;32m 157\u001b[0m \u001b[38;5;66;03m# Suppress StopIteration *unless* it's the same exception that\u001b[39;00m\n\u001b[1;32m 158\u001b[0m \u001b[38;5;66;03m# was passed to throw(). This prevents a StopIteration\u001b[39;00m\n\u001b[1;32m 159\u001b[0m \u001b[38;5;66;03m# raised inside the \"with\" statement from being suppressed.\u001b[39;00m\n", - "File \u001b[0;32m~/miniconda3/envs/pyeed/lib/python3.11/site-packages/httpcore/_exceptions.py:14\u001b[0m, in \u001b[0;36mmap_exceptions\u001b[0;34m(map)\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(exc, from_exc):\n\u001b[0;32m---> 14\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m to_exc(exc) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mexc\u001b[39;00m\n\u001b[1;32m 15\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m\n", - "\u001b[0;31mConnectTimeout\u001b[0m: [Errno 60] Operation timed out", - "\nThe above exception was the direct cause of the following exception:\n", - "\u001b[0;31mConnectTimeout\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[4], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m sn \u001b[38;5;241m=\u001b[39m StandardNumberingTool(name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtest_standard_numbering\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m----> 2\u001b[0m \u001b[43msn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply_standard_numbering\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[43mbase_sequence_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mKJO56189.1\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdb\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43meedb\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdb\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlist_of_seq_ids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mids\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/Documents/GitHub/pyeed/src/pyeed/analysis/standard_numbering.py:466\u001b[0m, in \u001b[0;36mStandardNumberingTool.apply_standard_numbering\u001b[0;34m(self, base_sequence_id, db, list_of_seq_ids)\u001b[0m\n\u001b[1;32m 464\u001b[0m \u001b[38;5;66;03m# Run the multiple sequence alignment using ClustalOmega.\u001b[39;00m\n\u001b[1;32m 465\u001b[0m clustalO \u001b[38;5;241m=\u001b[39m ClustalOmega()\n\u001b[0;32m--> 466\u001b[0m alignment \u001b[38;5;241m=\u001b[39m \u001b[43mclustalO\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43malign\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 467\u001b[0m \u001b[43m \u001b[49m\u001b[43msequences_dict\u001b[49m\n\u001b[1;32m 468\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# Passing a dict of sequences to ClustalOmega.\u001b[39;00m\n\u001b[1;32m 470\u001b[0m logger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAlignment received from ClustalOmega:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00malignment\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 472\u001b[0m \u001b[38;5;66;03m# Compute standard numbering positions using the computed alignment.\u001b[39;00m\n", - "File \u001b[0;32m~/Documents/GitHub/pyeed/src/pyeed/tools/clustalo.py:31\u001b[0m, in \u001b[0;36mClustalOmega.align\u001b[0;34m(self, sequences)\u001b[0m\n\u001b[1;32m 29\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 30\u001b[0m data \u001b[38;5;241m=\u001b[39m dict_to_fasta(sequences)\n\u001b[0;32m---> 31\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_run_clustalo_service\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 32\u001b[0m sanitized \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_sanitize_response(response)\n\u001b[1;32m 33\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_parse_alignment_output(sanitized)\n", - "File \u001b[0;32m~/Documents/GitHub/pyeed/src/pyeed/tools/clustalo.py:87\u001b[0m, in \u001b[0;36mClustalOmega._run_clustalo_service\u001b[0;34m(self, sequences, timeout)\u001b[0m\n\u001b[1;32m 83\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mclustalo\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mservice_url:\n\u001b[1;32m 84\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mservice_url \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mservice_url\u001b[38;5;241m.\u001b[39mreplace(\n\u001b[1;32m 85\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mclustalo\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m129.69.129.130\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 86\u001b[0m )\n\u001b[0;32m---> 87\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_run_clustalo_service\u001b[49m\u001b[43m(\u001b[49m\u001b[43msequences\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 88\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m httpx\u001b[38;5;241m.\u001b[39mConnectError(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPyEED Docker Service not running\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01me\u001b[39;00m\n", - "File \u001b[0;32m~/Documents/GitHub/pyeed/src/pyeed/tools/clustalo.py:73\u001b[0m, in \u001b[0;36mClustalOmega._run_clustalo_service\u001b[0;34m(self, sequences, timeout)\u001b[0m\n\u001b[1;32m 71\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 72\u001b[0m files \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfile\u001b[39m\u001b[38;5;124m\"\u001b[39m: (\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minput.fasta\u001b[39m\u001b[38;5;124m\"\u001b[39m, sequences, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext/plain\u001b[39m\u001b[38;5;124m\"\u001b[39m)}\n\u001b[0;32m---> 73\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mhttpx\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpost\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 74\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mservice_url\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 75\u001b[0m \u001b[43m \u001b[49m\u001b[43mfiles\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfiles\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 76\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 77\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 78\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_alignment_success(response)\n\u001b[1;32m 79\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m response\n", - "File \u001b[0;32m~/miniconda3/envs/pyeed/lib/python3.11/site-packages/httpx/_api.py:331\u001b[0m, in \u001b[0;36mpost\u001b[0;34m(url, content, data, files, json, params, headers, cookies, auth, proxy, proxies, follow_redirects, cert, verify, timeout, trust_env)\u001b[0m\n\u001b[1;32m 307\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mpost\u001b[39m(\n\u001b[1;32m 308\u001b[0m url: URL \u001b[38;5;241m|\u001b[39m \u001b[38;5;28mstr\u001b[39m,\n\u001b[1;32m 309\u001b[0m \u001b[38;5;241m*\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 324\u001b[0m trust_env: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[1;32m 325\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Response:\n\u001b[1;32m 326\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 327\u001b[0m \u001b[38;5;124;03m Sends a `POST` request.\u001b[39;00m\n\u001b[1;32m 328\u001b[0m \n\u001b[1;32m 329\u001b[0m \u001b[38;5;124;03m **Parameters**: See `httpx.request`.\u001b[39;00m\n\u001b[1;32m 330\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 331\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 332\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mPOST\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 333\u001b[0m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 334\u001b[0m \u001b[43m \u001b[49m\u001b[43mcontent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcontent\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 335\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 336\u001b[0m \u001b[43m \u001b[49m\u001b[43mfiles\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfiles\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 337\u001b[0m \u001b[43m \u001b[49m\u001b[43mjson\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjson\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 338\u001b[0m \u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 339\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 340\u001b[0m \u001b[43m \u001b[49m\u001b[43mcookies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcookies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 341\u001b[0m \u001b[43m \u001b[49m\u001b[43mauth\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mauth\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 342\u001b[0m \u001b[43m \u001b[49m\u001b[43mproxy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 343\u001b[0m \u001b[43m \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 344\u001b[0m \u001b[43m \u001b[49m\u001b[43mfollow_redirects\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfollow_redirects\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 345\u001b[0m \u001b[43m \u001b[49m\u001b[43mcert\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcert\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 346\u001b[0m \u001b[43m \u001b[49m\u001b[43mverify\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mverify\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 347\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 348\u001b[0m \u001b[43m \u001b[49m\u001b[43mtrust_env\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtrust_env\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 349\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniconda3/envs/pyeed/lib/python3.11/site-packages/httpx/_api.py:118\u001b[0m, in \u001b[0;36mrequest\u001b[0;34m(method, url, params, content, data, files, json, headers, cookies, auth, proxy, proxies, timeout, follow_redirects, verify, cert, trust_env)\u001b[0m\n\u001b[1;32m 58\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 59\u001b[0m \u001b[38;5;124;03mSends an HTTP request.\u001b[39;00m\n\u001b[1;32m 60\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 107\u001b[0m \u001b[38;5;124;03m```\u001b[39;00m\n\u001b[1;32m 108\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 109\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m Client(\n\u001b[1;32m 110\u001b[0m cookies\u001b[38;5;241m=\u001b[39mcookies,\n\u001b[1;32m 111\u001b[0m proxy\u001b[38;5;241m=\u001b[39mproxy,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 116\u001b[0m trust_env\u001b[38;5;241m=\u001b[39mtrust_env,\n\u001b[1;32m 117\u001b[0m ) \u001b[38;5;28;01mas\u001b[39;00m client:\n\u001b[0;32m--> 118\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 119\u001b[0m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 120\u001b[0m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 121\u001b[0m \u001b[43m \u001b[49m\u001b[43mcontent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcontent\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 122\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 123\u001b[0m \u001b[43m \u001b[49m\u001b[43mfiles\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfiles\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 124\u001b[0m \u001b[43m \u001b[49m\u001b[43mjson\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjson\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 125\u001b[0m \u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 126\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 127\u001b[0m \u001b[43m \u001b[49m\u001b[43mauth\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mauth\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 128\u001b[0m \u001b[43m \u001b[49m\u001b[43mfollow_redirects\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfollow_redirects\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 129\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniconda3/envs/pyeed/lib/python3.11/site-packages/httpx/_client.py:837\u001b[0m, in \u001b[0;36mClient.request\u001b[0;34m(self, method, url, content, data, files, json, params, headers, cookies, auth, follow_redirects, timeout, extensions)\u001b[0m\n\u001b[1;32m 822\u001b[0m warnings\u001b[38;5;241m.\u001b[39mwarn(message, \u001b[38;5;167;01mDeprecationWarning\u001b[39;00m)\n\u001b[1;32m 824\u001b[0m request \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbuild_request(\n\u001b[1;32m 825\u001b[0m method\u001b[38;5;241m=\u001b[39mmethod,\n\u001b[1;32m 826\u001b[0m url\u001b[38;5;241m=\u001b[39murl,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 835\u001b[0m extensions\u001b[38;5;241m=\u001b[39mextensions,\n\u001b[1;32m 836\u001b[0m )\n\u001b[0;32m--> 837\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mauth\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mauth\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfollow_redirects\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfollow_redirects\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniconda3/envs/pyeed/lib/python3.11/site-packages/httpx/_client.py:926\u001b[0m, in \u001b[0;36mClient.send\u001b[0;34m(self, request, stream, auth, follow_redirects)\u001b[0m\n\u001b[1;32m 922\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_set_timeout(request)\n\u001b[1;32m 924\u001b[0m auth \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_build_request_auth(request, auth)\n\u001b[0;32m--> 926\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_send_handling_auth\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 927\u001b[0m \u001b[43m \u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 928\u001b[0m \u001b[43m \u001b[49m\u001b[43mauth\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mauth\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 929\u001b[0m \u001b[43m \u001b[49m\u001b[43mfollow_redirects\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfollow_redirects\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 930\u001b[0m \u001b[43m \u001b[49m\u001b[43mhistory\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 931\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 932\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 933\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m stream:\n", - "File \u001b[0;32m~/miniconda3/envs/pyeed/lib/python3.11/site-packages/httpx/_client.py:954\u001b[0m, in \u001b[0;36mClient._send_handling_auth\u001b[0;34m(self, request, auth, follow_redirects, history)\u001b[0m\n\u001b[1;32m 951\u001b[0m request \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mnext\u001b[39m(auth_flow)\n\u001b[1;32m 953\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m--> 954\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_send_handling_redirects\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 955\u001b[0m \u001b[43m \u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 956\u001b[0m \u001b[43m \u001b[49m\u001b[43mfollow_redirects\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfollow_redirects\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 957\u001b[0m \u001b[43m \u001b[49m\u001b[43mhistory\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhistory\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 958\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 959\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 960\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n", - "File \u001b[0;32m~/miniconda3/envs/pyeed/lib/python3.11/site-packages/httpx/_client.py:991\u001b[0m, in \u001b[0;36mClient._send_handling_redirects\u001b[0;34m(self, request, follow_redirects, history)\u001b[0m\n\u001b[1;32m 988\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m hook \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_event_hooks[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrequest\u001b[39m\u001b[38;5;124m\"\u001b[39m]:\n\u001b[1;32m 989\u001b[0m hook(request)\n\u001b[0;32m--> 991\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_send_single_request\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 992\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 993\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m hook \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_event_hooks[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mresponse\u001b[39m\u001b[38;5;124m\"\u001b[39m]:\n", - "File \u001b[0;32m~/miniconda3/envs/pyeed/lib/python3.11/site-packages/httpx/_client.py:1027\u001b[0m, in \u001b[0;36mClient._send_single_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m 1022\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\n\u001b[1;32m 1023\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAttempted to send an async request with a sync Client instance.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1024\u001b[0m )\n\u001b[1;32m 1026\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m request_context(request\u001b[38;5;241m=\u001b[39mrequest):\n\u001b[0;32m-> 1027\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mtransport\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mhandle_request\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1029\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(response\u001b[38;5;241m.\u001b[39mstream, SyncByteStream)\n\u001b[1;32m 1031\u001b[0m response\u001b[38;5;241m.\u001b[39mrequest \u001b[38;5;241m=\u001b[39m request\n", - "File \u001b[0;32m~/miniconda3/envs/pyeed/lib/python3.11/site-packages/httpx/_transports/default.py:235\u001b[0m, in \u001b[0;36mHTTPTransport.handle_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m 221\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(request\u001b[38;5;241m.\u001b[39mstream, SyncByteStream)\n\u001b[1;32m 223\u001b[0m req \u001b[38;5;241m=\u001b[39m httpcore\u001b[38;5;241m.\u001b[39mRequest(\n\u001b[1;32m 224\u001b[0m method\u001b[38;5;241m=\u001b[39mrequest\u001b[38;5;241m.\u001b[39mmethod,\n\u001b[1;32m 225\u001b[0m url\u001b[38;5;241m=\u001b[39mhttpcore\u001b[38;5;241m.\u001b[39mURL(\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 233\u001b[0m extensions\u001b[38;5;241m=\u001b[39mrequest\u001b[38;5;241m.\u001b[39mextensions,\n\u001b[1;32m 234\u001b[0m )\n\u001b[0;32m--> 235\u001b[0m \u001b[43m\u001b[49m\u001b[38;5;28;43;01mwith\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mmap_httpcore_exceptions\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m:\u001b[49m\n\u001b[1;32m 236\u001b[0m \u001b[43m \u001b[49m\u001b[43mresp\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_pool\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mhandle_request\u001b[49m\u001b[43m(\u001b[49m\u001b[43mreq\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 238\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(resp\u001b[38;5;241m.\u001b[39mstream, typing\u001b[38;5;241m.\u001b[39mIterable)\n", - "File \u001b[0;32m~/miniconda3/envs/pyeed/lib/python3.11/contextlib.py:155\u001b[0m, in \u001b[0;36m_GeneratorContextManager.__exit__\u001b[0;34m(self, typ, value, traceback)\u001b[0m\n\u001b[1;32m 153\u001b[0m value \u001b[38;5;241m=\u001b[39m typ()\n\u001b[1;32m 154\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 155\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgen\u001b[38;5;241m.\u001b[39mthrow(typ, value, traceback)\n\u001b[1;32m 156\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mStopIteration\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[1;32m 157\u001b[0m \u001b[38;5;66;03m# Suppress StopIteration *unless* it's the same exception that\u001b[39;00m\n\u001b[1;32m 158\u001b[0m \u001b[38;5;66;03m# was passed to throw(). This prevents a StopIteration\u001b[39;00m\n\u001b[1;32m 159\u001b[0m \u001b[38;5;66;03m# raised inside the \"with\" statement from being suppressed.\u001b[39;00m\n\u001b[1;32m 160\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m exc \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m value\n", - "File \u001b[0;32m~/miniconda3/envs/pyeed/lib/python3.11/site-packages/httpx/_transports/default.py:89\u001b[0m, in \u001b[0;36mmap_httpcore_exceptions\u001b[0;34m()\u001b[0m\n\u001b[1;32m 86\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m\n\u001b[1;32m 88\u001b[0m message \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mstr\u001b[39m(exc)\n\u001b[0;32m---> 89\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m mapped_exc(message) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mexc\u001b[39;00m\n", - "\u001b[0;31mConnectTimeout\u001b[0m: [Errno 60] Operation timed out" - ] - } - ], + "outputs": [], "source": [ "sn = StandardNumberingTool(name=\"test_standard_numbering\")\n", + "\n", + "\n", "sn.apply_standard_numbering(\n", " base_sequence_id=\"KJO56189.1\", db=eedb.db, list_of_seq_ids=ids\n", - ")" + ")\n" ] }, { @@ -173,17 +131,9 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 49, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2025-02-07 15:26:53.370\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.analysis.mutation_detection\u001b[0m:\u001b[36msave_mutations_to_db\u001b[0m:\u001b[36m137\u001b[0m - \u001b[34m\u001b[1mSaved 3 mutations to database\u001b[0m\n" - ] - } - ], + "outputs": [], "source": [ "md = MutationDetection()\n", "\n", @@ -196,6 +146,13 @@ ")" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "markdown", "metadata": {}, @@ -211,14 +168,14 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 50, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "{'from_positions': [236, 102, 162], 'to_positions': [236, 102, 162], 'from_monomers': ['G', 'E', 'S'], 'to_monomers': ['S', 'K', 'R']}\n" + "{'from_positions': [102, 162, 236], 'to_positions': [102, 162, 236], 'from_monomers': ['E', 'S', 'G'], 'to_monomers': ['K', 'R', 'S']}\n" ] } ], @@ -245,7 +202,7 @@ ], "metadata": { "kernelspec": { - "display_name": "pyeed", + "display_name": "pyeed_niklas", "language": "python", "name": "python3" }, @@ -259,7 +216,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.5" + "version": "3.12.8" } }, "nbformat": 4, diff --git a/docs/usage/standard_numbering.ipynb b/docs/usage/standard_numbering.ipynb new file mode 100644 index 00000000..d2537f25 --- /dev/null +++ b/docs/usage/standard_numbering.ipynb @@ -0,0 +1,305 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Standard Numbering\n", + "\n", + "The standard numbering tool is used to number the residues of a protein sequence. It allows for comparison of different protein sequences by aligning them and numbering the residues in a common reference frame.\n", + "\n", + "It can be run in two different modes:\n", + "\n", + "1. **Pairwise alignment**: This mode aligns two sequences and numbers the residues in a common reference frame. Here a base sequence is provided and the other sequences are aligned to it.\n", + "2. **Clustal alignment**: This mode aligns a sequence against a multiple sequence alignment and numbers the residues in a common reference frame. Here a base sequence is provided and the other sequences are aligned to it.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [], + "source": [ + "%reload_ext autoreload\n", + "%autoreload 2\n", + "import sys\n", + "from loguru import logger\n", + "\n", + "from pyeed import Pyeed\n", + "from pyeed.analysis.mutation_detection import MutationDetection\n", + "from pyeed.analysis.standard_numbering import StandardNumberingTool\n", + "\n", + "logger.remove()\n", + "level = logger.add(sys.stderr, level=\"INFO\")" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "📡 Connected to database.\n", + "All data has been wiped from the database.\n", + "the connection url is bolt://neo4j:12345678@129.69.129.130:7687\n", + "Loaded /home/nab/Niklas/pyeed/src/pyeed/model.py\n", + "Connecting to bolt://neo4j:12345678@129.69.129.130:7687\n", + "Setting up indexes and constraints...\n", + "\n", + "Found model.StrictStructuredNode\n", + " ! Skipping class model.StrictStructuredNode is abstract\n", + "Found model.Organism\n", + " + Creating node unique constraint for taxonomy_id on label Organism for class model.Organism\n", + "{code: Neo.ClientError.Schema.EquivalentSchemaRuleAlreadyExists} {message: An equivalent constraint already exists, 'Constraint( id=12, name='constraint_unique_Organism_taxonomy_id', type='UNIQUENESS', schema=(:Organism {taxonomy_id}), ownedIndex=5 )'.}\n", + "Found model.Site\n", + " + Creating node unique constraint for site_id on label Site for class model.Site\n", + "{code: Neo.ClientError.Schema.EquivalentSchemaRuleAlreadyExists} {message: An equivalent constraint already exists, 'Constraint( id=14, name='constraint_unique_Site_site_id', type='UNIQUENESS', schema=(:Site {site_id}), ownedIndex=7 )'.}\n", + "Found model.Region\n", + " + Creating node unique constraint for region_id on label Region for class model.Region\n", + "{code: Neo.ClientError.Schema.EquivalentSchemaRuleAlreadyExists} {message: An equivalent constraint already exists, 'Constraint( id=19, name='constraint_unique_Region_region_id', type='UNIQUENESS', schema=(:Region {region_id}), ownedIndex=11 )'.}\n", + "Found model.CatalyticActivity\n", + " + Creating node unique constraint for catalytic_id on label CatalyticActivity for class model.CatalyticActivity\n", + "{code: Neo.ClientError.Schema.EquivalentSchemaRuleAlreadyExists} {message: An equivalent constraint already exists, 'Constraint( id=3, name='constraint_unique_CatalyticActivity_catalytic_id', type='UNIQUENESS', schema=(:CatalyticActivity {catalytic_id}), ownedIndex=15 )'.}\n", + "Found model.StandardNumbering\n", + " + Creating node unique constraint for name on label StandardNumbering for class model.StandardNumbering\n", + "{code: Neo.ClientError.Schema.EquivalentSchemaRuleAlreadyExists} {message: An equivalent constraint already exists, 'Constraint( id=16, name='constraint_unique_StandardNumbering_name', type='UNIQUENESS', schema=(:StandardNumbering {name}), ownedIndex=20 )'.}\n", + "Found model.GOAnnotation\n", + " + Creating node unique constraint for go_id on label GOAnnotation for class model.GOAnnotation\n", + "{code: Neo.ClientError.Schema.EquivalentSchemaRuleAlreadyExists} {message: An equivalent constraint already exists, 'Constraint( id=10, name='constraint_unique_GOAnnotation_go_id', type='UNIQUENESS', schema=(:GOAnnotation {go_id}), ownedIndex=4 )'.}\n", + "Found model.Protein\n", + " + Creating node unique constraint for accession_id on label Protein for class model.Protein\n", + "{code: Neo.ClientError.Schema.EquivalentSchemaRuleAlreadyExists} {message: An equivalent constraint already exists, 'Constraint( id=24, name='constraint_unique_Protein_accession_id', type='UNIQUENESS', schema=(:Protein {accession_id}), ownedIndex=13 )'.}\n", + " + Creating vector index for embedding on label Protein for class model.Protein\n", + "{code: Neo.ClientError.Schema.EquivalentSchemaRuleAlreadyExists} {message: An equivalent index already exists, 'Index( id=9, name='vector_index_Protein_embedding', type='VECTOR', schema=(:Protein {embedding}), indexProvider='vector-2.0' )'.}\n", + "Found model.DNA\n", + " + Creating node unique constraint for accession_id on label DNA for class model.DNA\n", + "{code: Neo.ClientError.Schema.EquivalentSchemaRuleAlreadyExists} {message: An equivalent constraint already exists, 'Constraint( id=8, name='constraint_unique_DNA_accession_id', type='UNIQUENESS', schema=(:DNA {accession_id}), ownedIndex=21 )'.}\n", + " + Creating vector index for embedding on label DNA for class model.DNA\n", + "{code: Neo.ClientError.Schema.EquivalentSchemaRuleAlreadyExists} {message: An equivalent index already exists, 'Index( id=6, name='vector_index_DNA_embedding', type='VECTOR', schema=(:DNA {embedding}), indexProvider='vector-2.0' )'.}\n", + "Found model.OntologyObject\n", + " + Creating node unique constraint for name on label OntologyObject for class model.OntologyObject\n", + "{code: Neo.ClientError.Schema.EquivalentSchemaRuleAlreadyExists} {message: An equivalent constraint already exists, 'Constraint( id=17, name='constraint_unique_OntologyObject_name', type='UNIQUENESS', schema=(:OntologyObject {name}), ownedIndex=23 )'.}\n", + "\n", + "Finished 10 classes.\n", + "✅ Databse constraints and indexes set up according to Pyeed Graph Object Model.\n" + ] + } + ], + "source": [ + "uri = \"bolt://129.69.129.130:7687\"\n", + "user = \"neo4j\"\n", + "password = \"12345678\"\n", + "\n", + "eedb = Pyeed(uri, user=user, password=password)\n", + "eedb.db.wipe_database(date=\"2025-02-28\")\n", + "\n", + "eedb.db.initialize_db_constraints(user=user, password=password)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-02-28 17:20:32.533\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.main\u001b[0m:\u001b[36mfetch_from_primary_db\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mFound 0 sequences in the database.\u001b[0m\n", + "\u001b[32m2025-02-28 17:20:32.533\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.main\u001b[0m:\u001b[36mfetch_from_primary_db\u001b[0m:\u001b[36m89\u001b[0m - \u001b[1mFetching 8 sequences from ncbi_protein.\u001b[0m\n", + "\u001b[32m2025-02-28 17:20:32.560\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.primary_db_adapter\u001b[0m:\u001b[36mexecute_requests\u001b[0m:\u001b[36m140\u001b[0m - \u001b[1mStarting requests for 1 batches.\u001b[0m\n", + "\u001b[32m2025-02-28 17:20:33.682\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein KJO56189.1 in database\u001b[0m\n", + "\u001b[32m2025-02-28 17:20:33.711\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein KLP91446.1 in database\u001b[0m\n", + "\u001b[32m2025-02-28 17:20:33.741\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAM15527.1 in database\u001b[0m\n", + "\u001b[32m2025-02-28 17:20:33.771\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAF05614.1 in database\u001b[0m\n", + "\u001b[32m2025-02-28 17:20:33.804\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AFN21551.1 in database\u001b[0m\n", + "\u001b[32m2025-02-28 17:20:33.831\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA76794.1 in database\u001b[0m\n", + "\u001b[32m2025-02-28 17:20:33.858\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AGQ50511.1 in database\u001b[0m\n", + "\u001b[32m2025-02-28 17:20:33.884\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein KJO56289.1 in database\u001b[0m\n" + ] + } + ], + "source": [ + "ids = [\"KJO56189.1\", \"KLP91446.1\", \"AAM15527.1\", \"AAF05614.1\", \"AFN21551.1\", \"CAA76794.1\", \"AGQ50511.1\", \"KJO56289.1\"]\n", + "\n", + "eedb.fetch_from_primary_db(ids, db=\"ncbi_protein\")" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "bdd1752d875b4218bec76621c7f7e84a", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+      ],
+      "text/plain": []
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "sn = StandardNumberingTool(name=\"test_standard_numbering_pairwise\")\n",
+    "\n",
+    "\n",
+    "sn.apply_standard_numbering_pairwise(\n",
+    "    base_sequence_id=\"KJO56189.1\", db=eedb.db, list_of_seq_ids=ids[0:5]\n",
+    ")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32m2025-02-28 17:20:35.248\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering_pairwise\u001b[0m:\u001b[36m372\u001b[0m - \u001b[1mPair KJO56189.1 and KLP91446.1 already exists under the same standard numbering node\u001b[0m\n",
+      "\u001b[32m2025-02-28 17:20:35.250\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering_pairwise\u001b[0m:\u001b[36m372\u001b[0m - \u001b[1mPair KJO56189.1 and AAM15527.1 already exists under the same standard numbering node\u001b[0m\n",
+      "\u001b[32m2025-02-28 17:20:35.252\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering_pairwise\u001b[0m:\u001b[36m372\u001b[0m - \u001b[1mPair KJO56189.1 and AAF05614.1 already exists under the same standard numbering node\u001b[0m\n",
+      "\u001b[32m2025-02-28 17:20:35.254\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering_pairwise\u001b[0m:\u001b[36m372\u001b[0m - \u001b[1mPair KJO56189.1 and AFN21551.1 already exists under the same standard numbering node\u001b[0m\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "42c47d8085c6499ca30700b7f617d987",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Output()"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "
\n"
+      ],
+      "text/plain": []
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "sn.apply_standard_numbering_pairwise(\n",
+    "    base_sequence_id=\"KJO56189.1\", db=eedb.db, list_of_seq_ids=ids\n",
+    ")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32m2025-02-28 17:20:36.255\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering\u001b[0m:\u001b[36m470\u001b[0m - \u001b[1mUsing 7 sequences for standard numbering\u001b[0m\n",
+      "\u001b[32m2025-02-28 17:20:36.414\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering\u001b[0m:\u001b[36m490\u001b[0m - \u001b[1mAlignment received from ClustalOmega:\n",
+      "KJO56189.1  -----------------------------------------MSIQHFRVALIPFFAAFC-LPVFAHPE--TLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDSWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKV---AGP---LLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW\n",
+      "KLP91446.1  -----------------------------------------MSIQHFRVALIPFFAAFC-LPVFAHPE--TLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVKYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKV---AGP---LLRSALPAGWFIADKSGASERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW\n",
+      "KJO56289.1  MKKNKHQAISREDLNKYHEETQGLERDMVNEILNSRSRAWKIATAFFVFAVVSMITAVGVIIRFAQPLPAYLTTINKDTGEV-SQVKITRDEATYGDVIDQYWIS-------QF-------VIH-----------RESYD------YNSIQ-----------VDYDAMSLMASGDVADEYLSMFKGPNRIDKRLGDSER-------TTVHINSVITDREHGVA--TVRFTTQQ-----RIRQRPNPEPPRYWIATIAYEYKALPMTAQQRYINPLGFRVTSYRKN--------AENVGA----------------------------VGG------\n",
+      "AGQ50511.1  -----------------------------------------MSIQHFRVALIPFFAAFC-LPVFAHPE--TLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMVSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKV---AGP---LLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW\n",
+      "AAM15527.1  -----------------------------------------MSIQHFRVALIPFFAAFC-LPVFAHPE--TLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMLSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAVTMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKV---AGP---LLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERDRQIAEIGASLIKHW\n",
+      "AAF05614.1  -----------------------------------------MSIQHFRVALIPFFAAFC-LPVFAHPE--TLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMLSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKV---AGP---LLRSALPAGWFIADKSGAGERGSSGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW\n",
+      "CAA76794.1  -----------------------------------------MSIQHFRVALIPFFAAFC-LPVFAHPE--TLVKVKDAEDKLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVKYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLRNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKV---AGP---LLRSALPAGWFIADKSGASERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW\n",
+      "AFN21551.1  -----------------------------------------MSIQHFRVALIPFFAAFC-LPVFAHPE--TLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKV---AGP---LLRSALPAGWFIADKSGASERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW\u001b[0m\n",
+      "\u001b[32m2025-02-28 17:20:36.415\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mrun_numbering_algorithm_clustalo\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mRunning numbering algorithm for base sequence KJO56189.1\u001b[0m\n",
+      "\u001b[32m2025-02-28 17:20:36.416\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering\u001b[0m:\u001b[36m496\u001b[0m - \u001b[1mPositions computed: {'KJO56189.1': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286'], 'KJO56289.1': ['0.1', '0.2', '0.3', '0.4', '0.5', '0.6', '0.7', '0.8', '0.9', '0.10', '0.11', '0.12', '0.13', '0.14', '0.15', '0.16', '0.17', '0.18', '0.19', '0.20', '0.21', '0.22', '0.23', '0.24', '0.25', '0.26', '0.27', '0.28', '0.29', '0.30', '0.31', '0.32', '0.33', '0.34', '0.35', '0.36', '0.37', '0.38', '0.39', '0.40', '0.41', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '18.1', '19', '20', '21', '22', '23', '24', '25', '26', '26.1', '26.2', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '69', '70', '78', '79', '80', '92', '93', '94', '95', '96', '103', '104', '105', '106', '107', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '182', '183', '184', '185', '186', '187', '188', '189', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '214.1', '214.2', '214.3', '215', '216', '217', '217.1', '217.2', '217.3', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '244', '245', '246', '247', '248', '249', '278', '279', '280'], 'KLP91446.1': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286'], 'AGQ50511.1': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286'], 'AAM15527.1': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286'], 'AAF05614.1': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286'], 'CAA76794.1': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286'], 'AFN21551.1': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286']}\u001b[0m\n"
+     ]
+    }
+   ],
+   "source": [
+    "sn_clustal = StandardNumberingTool(name=\"test_standard_numbering_clustal\")\n",
+    "\n",
+    "sn_clustal.apply_standard_numbering(\n",
+    "    base_sequence_id=\"KJO56189.1\", db=eedb.db, list_of_seq_ids=ids\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "pyeed_niklas",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/src/pyeed/analysis/standard_numbering.py b/src/pyeed/analysis/standard_numbering.py
index 90928152..fcc71a79 100644
--- a/src/pyeed/analysis/standard_numbering.py
+++ b/src/pyeed/analysis/standard_numbering.py
@@ -354,6 +354,25 @@ def apply_standard_numbering_pairwise(
         for protein_id in list_of_seq_ids:
             pairs.append((base_sequence_id, protein_id))
 
+        # check if the pairs are already existing with the same name under the same standard numbering node
+        query = """
+        MATCH (s:StandardNumbering {name: $name})
+        MATCH (p:Protein)-[r:HAS_STANDARD_NUMBERING]->(s)
+        WHERE p.accession_id IN $list_of_seq_ids
+        RETURN p.accession_id AS accession_id
+        """
+        results = db.execute_read(
+            query, parameters={"list_of_seq_ids": list_of_seq_ids, "name": self.name}
+        )
+        if results is not None:
+            for row in results:
+                if row is not None:
+                    if row.get("accession_id"):
+                        pairs.remove((base_sequence_id, row["accession_id"]))
+                        logger.info(
+                            f"Pair {base_sequence_id} and {row['accession_id']} already exists under the same standard numbering node"
+                        )
+
         # Run the pairwise alignment using the PairwiseAligner.
         pairwise_aligner = PairwiseAligner()
 
@@ -454,7 +473,8 @@ def apply_standard_numbering(
         base_sequence = self.get_protein_base_sequence(base_sequence_id, db)
 
         # Remove the base sequence from the proteins list to prevent duplicate alignment.
-        proteins_dict.pop(base_sequence_id)
+        if base_sequence_id in proteins_dict:
+            proteins_dict.pop(base_sequence_id)
 
         # Create a dictionary for ClustalOmega that includes both the base and target sequences.
         sequences_dict = {base_sequence["id"]: base_sequence["sequence"]}
diff --git a/src/pyeed/model.py b/src/pyeed/model.py
index 0b08f37c..71544e3a 100644
--- a/src/pyeed/model.py
+++ b/src/pyeed/model.py
@@ -293,7 +293,7 @@ def label(self) -> str:
 
 
 class StandardNumbering(StrictStructuredNode):
-    name = StringProperty(required=True)
+    name = StringProperty(required=True, unique_index=True)
     definition = StringProperty(required=True)
 
     # Relationships

From 0060bf051f079f53d6cba115e20865546904511e Mon Sep 17 00:00:00 2001
From: Niklas Abraham GPU 
Date: Fri, 28 Feb 2025 17:22:15 +0000
Subject: [PATCH 06/13] naming

---
 docs/usage/standard_numbering.ipynb | 36 ++++-------------------------
 1 file changed, 4 insertions(+), 32 deletions(-)

diff --git a/docs/usage/standard_numbering.ipynb b/docs/usage/standard_numbering.ipynb
index d2537f25..dc5f07a4 100644
--- a/docs/usage/standard_numbering.ipynb
+++ b/docs/usage/standard_numbering.ipynb
@@ -246,39 +246,11 @@
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": 57,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
+   "cell_type": "markdown",
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "source": [
+    "In both cases, there are now standard numbering nodes to all the proteins and they have on their edge the standradnumbering data."
+   ]
   }
  ],
  "metadata": {

From 4e27d47ce134af83de8f33b9f228a1756d5dcd07 Mon Sep 17 00:00:00 2001
From: Niklas Abraham GPU 
Date: Fri, 28 Feb 2025 17:25:12 +0000
Subject: [PATCH 07/13] linter fixes

---
 src/pyeed/analysis/standard_numbering.py | 65 ------------------------
 src/pyeed/model.py                       | 47 +++++++----------
 2 files changed, 19 insertions(+), 93 deletions(-)

diff --git a/src/pyeed/analysis/standard_numbering.py b/src/pyeed/analysis/standard_numbering.py
index fcc71a79..e4b17d89 100644
--- a/src/pyeed/analysis/standard_numbering.py
+++ b/src/pyeed/analysis/standard_numbering.py
@@ -503,68 +503,3 @@ def apply_standard_numbering(
 
         # Update the database with the relationships between proteins and standard numbering.
         self.save_positions(db)
-
-
-if __name__ == "__main__":
-    # Database connection setup parameters.
-    uri = "bolt://129.69.129.130:7687"
-    user = "neo4j"
-    password = "12345678"
-
-    from pyeed import Pyeed
-
-    # Create a Pyeed object which automatically connects to the Neo4j database.
-    eedb = Pyeed(uri, user, password)
-
-    # Clear previous standard numbering relationships from the database.
-    query = """
-    MATCH (n:StandardNumbering)-[r:HAS_STANDARD_NUMBERING]-(c:Protein) DELETE r
-    """
-    eedb.db.execute_write(query)
-
-    # Define sequences for testing the numbering algorithm.
-    sequences = [
-        ">seq1\nMTHKLLLTLLFTLLFSSAYSRG",
-        ">seq2\nABCABCABCMTHKITLLLTLLFTLLFSSAYSRG",
-        ">seq3\nMTHKILLLTLLFTLLFSSCYSRGARTHDB",
-    ]
-
-    proteins_dict = {
-        "seq1": "MTHKLLLTLLFTLLFSSAYSRG",
-        "seq2": "ABCABCABCMTHKITLLLTLLFTLLFSSAYSRG",
-        "seq3": "MTHKILLLTLLFTLLFSSCYSRGARTHDB",
-    }
-
-    # Define a base sequence that will be used as the reference.
-    base_sequence = {"id": "seq0", "sequence": "AMTHKLLLTLLFTLLFSSAYSRG"}
-
-    from pyeed.tools.clustalo import ClustalOmega
-
-    clustalO = ClustalOmega()
-
-    # Insert the base sequence as the first sequence in the alignment list.
-    sequences.insert(0, f">{base_sequence['id']}\n{base_sequence['sequence']}")
-
-    # Create a dictionary for ClustalOmega from the sequences.
-    sequences_dict = {
-        base_sequence["id"]: base_sequence["sequence"],
-        "seq1": "MTHKLLLTLLFTLLFSSAYSRG",
-        "seq2": "ABCABCABCMTHKITLLLTLLFTLLFSSAYSRG",
-        "seq3": "MTHKILLLTLLFTLLFSSCYSRGARTHDB",
-    }
-
-    # Perform multiple sequence alignment.
-    alignment = clustalO.align(sequences_dict)
-
-    # Instantiate the numbering tool and run the numbering algorithm.
-    sn_tool = StandardNumberingTool("test_standard_numbering")
-    sn_tool.positions = sn_tool.run_numbering_algorithm_pairwise("seq0", alignment)
-
-    # Print a sample of the computed positions to verify the output.
-    count = 0
-    for i in sn_tool.positions:
-        count += 1
-        print(i, sn_tool.positions[i])
-        # Only show the first few sequences
-        if count > 10:
-            break
diff --git a/src/pyeed/model.py b/src/pyeed/model.py
index 71544e3a..40457068 100644
--- a/src/pyeed/model.py
+++ b/src/pyeed/model.py
@@ -143,7 +143,7 @@ class Annotation(Enum):
 
 
 class Organism(StrictStructuredNode):
-    taxonomy_id: int = IntegerProperty(required=True, unique_index=True)
+    taxonomy_id = IntegerProperty(required=True, unique_index=True)
     name = StringProperty()
 
 
@@ -153,8 +153,8 @@ class SiteRel(StructuredRel):  # type: ignore
     @classmethod
     def validate_and_connect(
         cls,
-        molecule1: StrictStructuredNode,
-        molecule2: StrictStructuredNode,
+        molecule1: Any,
+        molecule2: Any,
         positions: list[int],
     ) -> "SiteRel":
         """Validates the positions and connects the two molecules."""
@@ -198,8 +198,8 @@ class DNAProteinRel(StructuredRel):  # type: ignore
     @classmethod
     def validate_and_connect(
         cls,
-        molecule1: StrictStructuredNode,
-        molecule2: StrictStructuredNode,
+        molecule1: Any,
+        molecule2: Any,
         start: int,
         end: int,
     ) -> "DNAProteinRel":
@@ -225,8 +225,8 @@ class RegionRel(StructuredRel):  # type: ignore
     @classmethod
     def validate_and_connect(
         cls,
-        molecule1: StrictStructuredNode,
-        molecule2: StrictStructuredNode,
+        molecule1: Any,
+        molecule2: Any,
         start: int,
         end: int,
     ) -> "RegionRel":
@@ -255,7 +255,7 @@ class CatalyticActivity(StrictStructuredNode):
     A node representing a catalytic activity.
     """
 
-    catalytic_id: int = IntegerProperty(required=False, unique_index=True)
+    catalytic_id = IntegerProperty(required=False, unique_index=True)
     name = StringProperty()
 
     @property
@@ -270,8 +270,8 @@ class StandardNumberingRel(StructuredRel):  # type: ignore
     @classmethod
     def validate_and_connect(
         cls,
-        molecule1: StrictStructuredNode,
-        molecule2: StrictStructuredNode,
+        molecule1: Any,
+        molecule2: Any,
         positions: list[str],
     ) -> "StandardNumberingRel":
         """Validates the positions and connects the two molecules."""
@@ -324,8 +324,8 @@ class PairwiseAlignmentResult(StructuredRel):  # type: ignore
     @classmethod
     def validate_and_connect(
         cls,
-        molecule1: StrictStructuredNode,
-        molecule2: StrictStructuredNode,
+        molecule1: Any,
+        molecule2: Any,
         similarity: float,
         gaps: int,
         mismatches: int,
@@ -400,25 +400,14 @@ class Mutation(StructuredRel):  # type: ignore
     @classmethod
     def validate_and_connect(
         cls,
-        molecule1: StrictStructuredNode,
-        molecule2: StrictStructuredNode,
+        molecule1: Any,
+        molecule2: Any,
         from_positions: list[int],
         to_positions: list[int],
         from_monomers: list[str],
         to_monomers: list[str],
     ) -> "Mutation":
         """Validates the mutations and connects the two molecules.
-        Args:
-            molecule1 (StrictStructuredNode): DNA or Protein node
-            molecule2 (StrictStructuredNode): DNA or Protein node
-            from_positions (list of int): Positions of the mutations in the original sequence. 0-indexed.
-            to_positions (list of int): Positions of the mutations in the mutated sequence. 0-indexed.
-            from_monomers (list of str): Original residues / nucleotides at the specified positions.
-            to_monomers (list of str): Mutated residues / nucleotides at the specified positions.
-
-        Returns:
-            Mutation: The created mutation relationship.
-
         Raises:
             ValueError: If the specified positions or residues do not match the sequences.
         """
@@ -464,7 +453,9 @@ def label(self) -> str:
         return ",".join(
             f"{from_monomer}{from_position}{to_monomer}"
             for from_position, from_monomer, to_monomer in zip(
-                self.from_positions, self.from_monomers, self.to_monomers
+                list(self.from_positions),
+                list(self.from_monomers),
+                list(self.to_monomers),
             )
         )
 
@@ -540,8 +531,8 @@ class CustomRealationship(StructuredRel):  # type: ignore
     @classmethod
     def validate_and_connect(
         cls,
-        molecule1: StrictStructuredNode,
-        molecule2: StrictStructuredNode,
+        molecule1: Any,
+        molecule2: Any,
         name: str,
         description: str,
     ) -> "CustomRealationship":

From 7abfc8f1ffbd9adae143d5a4613609ff53472ff0 Mon Sep 17 00:00:00 2001
From: Niklas Abraham GPU 
Date: Sat, 1 Mar 2025 12:51:38 +0000
Subject: [PATCH 08/13] new function for embedings and update of last hidden
 states

---
 src/pyeed/embedding.py |  86 ++++++++++++++++++++++++-
 src/pyeed/main.py      | 142 ++++++++++++++++++++++++++---------------
 2 files changed, 176 insertions(+), 52 deletions(-)

diff --git a/src/pyeed/embedding.py b/src/pyeed/embedding.py
index d53ef8c7..3e7f564e 100644
--- a/src/pyeed/embedding.py
+++ b/src/pyeed/embedding.py
@@ -139,6 +139,23 @@ def calculate_single_sequence_embedding_last_hidden_state(
     return get_single_embedding_last_hidden_state(sequence, model, tokenizer, device)
 
 
+def calculate_single_sequence_embedding_all_layers(
+    sequence: str, model_name: str = "facebook/esm2_t33_650M_UR50D"
+) -> dict[int, NDArray[np.float64]]:
+    """
+    Calculates embeddings for a single sequence across all layers.
+
+    Args:
+        sequence: Input protein sequence
+        model_name: Name of the ESM model to use
+
+    Returns:
+        dict[int, NDArray[np.float64]]: A dictionary mapping layer indices (starting at 0) to normalized token embeddings.
+    """
+    model, tokenizer, device = load_model_and_tokenizer(model_name)
+    return get_single_embedding_all_layers(sequence, model, tokenizer, device)
+
+
 def get_single_embedding_last_hidden_state(
     sequence: str, model: Any, tokenizer: Any, device: torch.device
 ) -> NDArray[np.float64]:
@@ -163,9 +180,16 @@ def get_single_embedding_last_hidden_state(
             protein = ESMProtein(sequence=sequence)
             protein_tensor = model.encode(protein)
             logits_output = model.logits(
-                protein_tensor, LogitsConfig(sequence=True, return_embeddings=True)
+                protein_tensor,
+                LogitsConfig(
+                    sequence=True,
+                    return_embeddings=True,
+                    return_hidden_states=True,
+                ),
+            )
+            embedding = (
+                logits_output.hidden_states[-1][0].to(torch.float32).cpu().numpy()
             )
-            embedding = logits_output.embeddings[0].cpu().numpy()
         else:
             # ESM-2 logic
             inputs = tokenizer(sequence, return_tensors="pt").to(device)
@@ -178,6 +202,64 @@ def get_single_embedding_last_hidden_state(
     return embedding  # type: ignore
 
 
+def get_single_embedding_all_layers(
+    sequence: str, model: Any, tokenizer: Any, device: torch.device
+) -> NDArray[np.float64]:
+    """
+    Generates normalized embeddings for each token in the sequence across all layers.
+
+    For ESM-3 (ESMC) models, it assumes that passing
+    LogitsConfig(return_hidden_states=True) returns a collection of layer embeddings.
+    For ESM-2 models, it sets output_hidden_states=True.
+
+    Args:
+        sequence (str): The protein sequence to embed.
+        model (Any): The transformer model to use.
+        tokenizer (Any): The tokenizer for the model (None for ESMC).
+        device (torch.device): The device to run the model on (CPU/GPU).
+
+    Returns:
+        NDArray[np.float64]: A numpy array containing the normalized token embeddings
+        concatenated across all layers.
+    """
+    embeddings_list = []
+    with torch.no_grad():
+        if isinstance(model, ESMC):
+            # For ESM-3: Use ESMProtein and request hidden states via LogitsConfig
+            protein = ESMProtein(sequence=sequence)
+            protein_tensor = model.encode(protein)
+            logits_output = model.logits(
+                protein_tensor,
+                LogitsConfig(
+                    sequence=True,
+                    return_embeddings=True,
+                    return_hidden_states=True,  # Assuming this flag is supported
+                ),
+            )
+            # logits_output.hidden_states should be a tuple of tensors: (layer, batch, seq_len, hidden_dim)
+            for layer_tensor in logits_output.hidden_states:
+                # Remove batch dimension and (if applicable) any special tokens
+                emb = layer_tensor[0].to(torch.float32).cpu().numpy()
+                # If your model adds special tokens, adjust the slicing (e.g., emb[1:-1])
+                emb = emb / np.linalg.norm(emb, axis=1, keepdims=True)
+                embeddings_list.append(emb)
+
+        else:
+            # For ESM-2: Get hidden states with output_hidden_states=True
+            inputs = tokenizer(sequence, return_tensors="pt").to(device)
+            outputs = model(**inputs, output_hidden_states=True)
+            hidden_states = (
+                outputs.hidden_states
+            )  # Tuple: (layer0, layer1, ..., layerN)
+            for layer_tensor in hidden_states:
+                # Remove batch dimension and special tokens ([CLS] and [SEP])
+                emb = layer_tensor[0, 1:-1, :].detach().cpu().numpy()
+                emb = emb / np.linalg.norm(emb, axis=1, keepdims=True)
+                embeddings_list.append(emb)
+
+    return np.array(embeddings_list)
+
+
 # The rest of your existing functions will need to be adapted in a similar way
 # if they interact with the model or tokenizer directly
 
diff --git a/src/pyeed/main.py b/src/pyeed/main.py
index 6ab3c157..f2cdb134 100644
--- a/src/pyeed/main.py
+++ b/src/pyeed/main.py
@@ -281,8 +281,9 @@ def get_dnas(self, accession_ids: list[str]) -> list[dict[str, Any]]:
     def fetch_dna_entries_for_proteins(self) -> None:
         """
         Fetches DNA sequences for proteins that have a nucleotide id, set in the database.
-        The fetching is done from NCBI nucleotide database.
+        The fetching is done from NCBI nucleotide database in batches.
         """
+        BATCH_SIZE = 100
 
         # Get all proteins and a list of coding sequences ids
         query = """
@@ -296,32 +297,47 @@ def fetch_dna_entries_for_proteins(self) -> None:
 
         logger.info(f"Found {len(nucleotide_ids)} coding sequences.")
 
-        # check if the coding sequences are already in the database
-        query = """
-        MATCH (n:DNA)
-        WHERE n.accession_id IN $nucleotide_ids
-        RETURN n.accession_id AS accession_id
-        """
-        coding_sequences_resp = self.db.execute_read(
-            query, {"nucleotide_ids": nucleotide_ids}
-        )
-        coding_sequences = [
-            str(record["accession_id"]) for record in coding_sequences_resp
-        ]
+        # Process nucleotide IDs in batches to check which ones are already in DB
+        all_existing_sequences = set()
+        for i in range(0, len(nucleotide_ids), BATCH_SIZE):
+            batch_ids = nucleotide_ids[i : i + BATCH_SIZE]
+            try:
+                query = """
+                MATCH (n:DNA)
+                WHERE n.accession_id IN $nucleotide_ids
+                RETURN n.accession_id AS accession_id
+                """
+                coding_sequences_resp = self.db.execute_read(
+                    query, {"nucleotide_ids": batch_ids}
+                )
+                batch_existing = {
+                    str(record["accession_id"]) for record in coding_sequences_resp
+                }
+                all_existing_sequences.update(batch_existing)
+            except Exception as e:
+                logger.error(
+                    f"Error checking existing sequences for batch {i}: {str(e)}"
+                )
+                continue
 
-        # check each coding sequence if it is already in the database and only if not add to list
-        nucleotide_ids = [id for id in nucleotide_ids if id not in coding_sequences]
+        # Filter out existing sequences
+        nucleotide_ids = [
+            id for id in nucleotide_ids if id not in all_existing_sequences
+        ]
 
-        logger.info(f"Fetching {len(nucleotide_ids)} coding sequences.")
+        logger.info(f"Fetching {len(nucleotide_ids)} new coding sequences.")
 
-        # Fetch the coding sequences
-        self.fetch_ncbi_nucleotide(nucleotide_ids)
+        # Fetch coding sequences in batches
+        for i in range(0, len(nucleotide_ids), BATCH_SIZE):
+            try:
+                batch_ids = nucleotide_ids[i : i + BATCH_SIZE]
+                self.fetch_ncbi_nucleotide(batch_ids)
+                logger.info(f"Successfully fetched batch {i//BATCH_SIZE + 1}")
+            except Exception as e:
+                logger.error(f"Error fetching batch {i//BATCH_SIZE + 1}: {str(e)}")
+                continue
 
-        # we need to update the protein records with the coding sequences
-        # the connection between protein and DNA is ENCODES (fom DNA to protein)
-        # but this connection could already exist, so we need to check if it exists, and onyl add it if it does not
-        # the start and end positions of nucleotide sequence are stored in protein record
-        # the protein record has the attribute nucleotide_id, which is the id of the coding sequence
+        # Process protein-DNA relationships in batches
         query = """
         MATCH (p:Protein)
         WHERE p.nucleotide_id IS NOT NULL
@@ -329,35 +345,61 @@ def fetch_dna_entries_for_proteins(self) -> None:
         """
         proteins = self.db.execute_read(query)
 
-        for protein in proteins:
-            protein = protein["p"]
-            # check wether the connection already exists
-            # for that we take a look at the DNA node, with the nucleotide_id
-            # and check if there is a connection to the protein node
-            query = f"""
-            MATCH (p:Protein {{accession_id: '{protein["accession_id"]}'}})
-            MATCH (d:DNA {{accession_id: '{protein["nucleotide_id"]}'}})
-            RETURN EXISTS((d)-[:ENCODES]->(p)) AS exists
-            """
-            result = self.db.execute_read(query)
+        for i in range(0, len(proteins), BATCH_SIZE):
             try:
-                exists = result[0]["exists"]
-            except IndexError:
-                logger.debug(
-                    f"No connection between {protein['accession_id']} and {protein['nucleotide_id']} found."
+                batch_proteins = proteins[i : i + BATCH_SIZE]
+
+                # Build batch query for checking existing relationships
+                batch_check_query = """
+                UNWIND $proteins AS protein
+                MATCH (p:Protein {accession_id: protein.p.accession_id})
+                MATCH (d:DNA {accession_id: protein.p.nucleotide_id})
+                RETURN 
+                    protein.p.accession_id AS protein_id,
+                    protein.p.nucleotide_id AS dna_id,
+                    EXISTS((d)-[:ENCODES]->(p)) AS exists,
+                    protein.p.nucleotide_start AS start,
+                    protein.p.nucleotide_end AS end
+                """
+
+                results = self.db.execute_read(
+                    batch_check_query, {"proteins": batch_proteins}
                 )
-                continue
 
-            if exists:
-                logger.info(
-                    f"Connection between {protein['accession_id']} and {protein['nucleotide_id']} already exists."
+                # Filter relationships that need to be created
+                new_relationships = []
+                for result in results:
+                    if not result["exists"]:
+                        new_relationships.append(
+                            {
+                                "protein_id": result["protein_id"],
+                                "dna_id": result["dna_id"],
+                                "start": result["start"],
+                                "end": result["end"],
+                            }
+                        )
+                    else:
+                        logger.info(
+                            f"Connection between {result['protein_id']} and {result['dna_id']} already exists."
+                        )
+
+                if new_relationships:
+                    # Create new relationships in batch
+                    batch_create_query = """
+                    UNWIND $relationships AS rel
+                    MATCH (p:Protein {accession_id: rel.protein_id})
+                    MATCH (d:DNA {accession_id: rel.dna_id})
+                    MERGE (d)-[r:ENCODES]->(p)
+                    SET r.start = rel.start, r.end = rel.end
+                    """
+                    self.db.execute_write(
+                        batch_create_query, {"relationships": new_relationships}
+                    )
+                    logger.info(
+                        f"Successfully processed relationship batch {i//BATCH_SIZE + 1}"
+                    )
+            except Exception as e:
+                logger.error(
+                    f"Error processing relationship batch {i//BATCH_SIZE + 1}: {str(e)}"
                 )
                 continue
-
-            query = f"""
-            MATCH (p:Protein {{accession_id: '{protein["accession_id"]}'}})
-            MATCH (d:DNA {{accession_id: '{protein["nucleotide_id"]}'}})
-            MERGE (d)-[r:ENCODES]->(p)
-            SET r.start = {protein["nucleotide_start"]}, r.end = {protein["nucleotide_end"]}
-            """
-            self.db.execute_write(query)

From 765fe5240d212a8823bc7773bb372b08f59544b4 Mon Sep 17 00:00:00 2001
From: Niklas Abraham GPU 
Date: Fri, 14 Mar 2025 16:07:39 +0000
Subject: [PATCH 09/13] build standard numbering for DNA and build test case
 around it

---
 docs/usage/standard_numbering.ipynb      | 200 ++++++++++++++++++-----
 src/pyeed/analysis/mutation_detection.py |  20 +++
 src/pyeed/analysis/sequence_alignment.py |  20 ++-
 src/pyeed/analysis/standard_numbering.py | 127 ++++++++------
 src/pyeed/main.py                        |  55 +++++--
 src/pyeed/model.py                       |  17 +-
 6 files changed, 319 insertions(+), 120 deletions(-)

diff --git a/docs/usage/standard_numbering.ipynb b/docs/usage/standard_numbering.ipynb
index dc5f07a4..47daf619 100644
--- a/docs/usage/standard_numbering.ipynb
+++ b/docs/usage/standard_numbering.ipynb
@@ -16,7 +16,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 42,
+   "execution_count": 25,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -35,7 +35,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 43,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [
     {
@@ -94,49 +94,60 @@
     "password = \"12345678\"\n",
     "\n",
     "eedb = Pyeed(uri, user=user, password=password)\n",
-    "eedb.db.wipe_database(date=\"2025-02-28\")\n",
+    "eedb.db.wipe_database(date=\"2025-03-14\")\n",
     "\n",
     "eedb.db.initialize_db_constraints(user=user, password=password)\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 44,
+   "execution_count": 27,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\u001b[32m2025-02-28 17:20:32.533\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.main\u001b[0m:\u001b[36mfetch_from_primary_db\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mFound 0 sequences in the database.\u001b[0m\n",
-      "\u001b[32m2025-02-28 17:20:32.533\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.main\u001b[0m:\u001b[36mfetch_from_primary_db\u001b[0m:\u001b[36m89\u001b[0m - \u001b[1mFetching 8 sequences from ncbi_protein.\u001b[0m\n",
-      "\u001b[32m2025-02-28 17:20:32.560\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.primary_db_adapter\u001b[0m:\u001b[36mexecute_requests\u001b[0m:\u001b[36m140\u001b[0m - \u001b[1mStarting requests for 1 batches.\u001b[0m\n",
-      "\u001b[32m2025-02-28 17:20:33.682\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein KJO56189.1 in database\u001b[0m\n",
-      "\u001b[32m2025-02-28 17:20:33.711\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein KLP91446.1 in database\u001b[0m\n",
-      "\u001b[32m2025-02-28 17:20:33.741\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAM15527.1 in database\u001b[0m\n",
-      "\u001b[32m2025-02-28 17:20:33.771\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAF05614.1 in database\u001b[0m\n",
-      "\u001b[32m2025-02-28 17:20:33.804\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AFN21551.1 in database\u001b[0m\n",
-      "\u001b[32m2025-02-28 17:20:33.831\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA76794.1 in database\u001b[0m\n",
-      "\u001b[32m2025-02-28 17:20:33.858\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AGQ50511.1 in database\u001b[0m\n",
-      "\u001b[32m2025-02-28 17:20:33.884\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein KJO56289.1 in database\u001b[0m\n"
+      "\u001b[32m2025-03-14 16:01:33.841\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.main\u001b[0m:\u001b[36mfetch_from_primary_db\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mFound 0 sequences in the database.\u001b[0m\n",
+      "\u001b[32m2025-03-14 16:01:33.841\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.main\u001b[0m:\u001b[36mfetch_from_primary_db\u001b[0m:\u001b[36m89\u001b[0m - \u001b[1mFetching 5 sequences from ncbi_protein.\u001b[0m\n",
+      "\u001b[32m2025-03-14 16:01:33.864\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.primary_db_adapter\u001b[0m:\u001b[36mexecute_requests\u001b[0m:\u001b[36m140\u001b[0m - \u001b[1mStarting requests for 1 batches.\u001b[0m\n",
+      "\u001b[32m2025-03-14 16:01:35.072\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAM15527.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-14 16:01:35.101\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAF05614.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-14 16:01:35.128\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AFN21551.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-14 16:01:35.164\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA76794.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-14 16:01:35.193\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AGQ50511.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-14 16:01:35.197\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.main\u001b[0m:\u001b[36mfetch_dna_entries_for_proteins\u001b[0m:\u001b[36m313\u001b[0m - \u001b[1mFound 5 coding sequences.\u001b[0m\n",
+      "\u001b[32m2025-03-14 16:01:35.201\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.main\u001b[0m:\u001b[36mfetch_dna_entries_for_proteins\u001b[0m:\u001b[36m343\u001b[0m - \u001b[1mFetching 5 new coding sequences.\u001b[0m\n",
+      "\u001b[32m2025-03-14 16:01:35.242\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.primary_db_adapter\u001b[0m:\u001b[36mexecute_requests\u001b[0m:\u001b[36m140\u001b[0m - \u001b[1mStarting requests for 1 batches.\u001b[0m\n",
+      "\u001b[32m2025-03-14 16:01:36.535\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.main\u001b[0m:\u001b[36mfetch_dna_entries_for_proteins\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1mSuccessfully fetched batch 1\u001b[0m\n",
+      "\u001b[32m2025-03-14 16:01:36.692\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.main\u001b[0m:\u001b[36mfetch_dna_entries_for_proteins\u001b[0m:\u001b[36m421\u001b[0m - \u001b[1mSuccessfully processed relationship batch 1\u001b[0m\n"
      ]
     }
    ],
    "source": [
-    "ids = [\"KJO56189.1\", \"KLP91446.1\", \"AAM15527.1\", \"AAF05614.1\", \"AFN21551.1\", \"CAA76794.1\", \"AGQ50511.1\", \"KJO56289.1\"]\n",
+    "ids = [\"AAM15527.1\", \"AAF05614.1\", \"AFN21551.1\", \"CAA76794.1\", \"AGQ50511.1\"]\n",
     "\n",
-    "eedb.fetch_from_primary_db(ids, db=\"ncbi_protein\")"
+    "eedb.fetch_from_primary_db(ids, db=\"ncbi_protein\")\n",
+    "eedb.fetch_dna_entries_for_proteins()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 45,
+   "execution_count": 28,
    "metadata": {},
    "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32m2025-03-14 16:01:37.045\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering_pairwise\u001b[0m:\u001b[36m385\u001b[0m - \u001b[1mPairs: [('AAM15527.1', 'CAA76794.1'), ('AAM15527.1', 'AGQ50511.1'), ('AAM15527.1', 'AFN21551.1'), ('AAM15527.1', 'AAF05614.1')]\u001b[0m\n",
+      "\u001b[32m2025-03-14 16:01:37.046\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering_pairwise\u001b[0m:\u001b[36m394\u001b[0m - \u001b[1mInput: ['AAF05614.1', 'AFN21551.1', 'CAA76794.1', 'AGQ50511.1', 'AAM15527.1']\u001b[0m\n"
+     ]
+    },
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "bdd1752d875b4218bec76621c7f7e84a",
+       "model_id": "34805690583d49b5b2c01190c74a1729",
        "version_major": 2,
        "version_minor": 0
       },
@@ -156,6 +167,15 @@
      },
      "metadata": {},
      "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32m2025-03-14 16:01:41.722\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering_pairwise\u001b[0m:\u001b[36m403\u001b[0m - \u001b[1mPairwise alignment results: [{'query_id': 'AAM15527.1', 'target_id': 'CAA76794.1', 'score': 272.0, 'identity': 0.9755244755244755, 'gaps': 0, 'mismatches': 7, 'query_aligned': 'MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMLSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAVTMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERDRQIAEIGASLIKHW', 'target_aligned': 'MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDKLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVKYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLRNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGASERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW'}, {'query_id': 'AAM15527.1', 'target_id': 'AGQ50511.1', 'score': 280.0, 'identity': 0.9895104895104895, 'gaps': 0, 'mismatches': 3, 'query_aligned': 'MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMLSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAVTMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERDRQIAEIGASLIKHW', 'target_aligned': 'MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMVSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW'}, {'query_id': 'AAM15527.1', 'target_id': 'AFN21551.1', 'score': 278.0, 'identity': 0.986013986013986, 'gaps': 0, 'mismatches': 4, 'query_aligned': 'MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMLSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAVTMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERDRQIAEIGASLIKHW', 'target_aligned': 'MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGASERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW'}, {'query_id': 'AAM15527.1', 'target_id': 'AAF05614.1', 'score': 280.0, 'identity': 0.9895104895104895, 'gaps': 0, 'mismatches': 3, 'query_aligned': 'MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMLSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAVTMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERDRQIAEIGASLIKHW', 'target_aligned': 'MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMLSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSSGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW'}]\u001b[0m\n",
+      "\u001b[32m2025-03-14 16:01:41.724\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering_pairwise\u001b[0m:\u001b[36m422\u001b[0m - \u001b[1mConverted alignment: 4\u001b[0m\n",
+      "\u001b[32m2025-03-14 16:01:41.728\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering_pairwise\u001b[0m:\u001b[36m429\u001b[0m - \u001b[1mPositions: {'AAM15527.1': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286'], 'CAA76794.1': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286'], 'AGQ50511.1': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286'], 'AFN21551.1': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286'], 'AAF05614.1': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286']}\u001b[0m\n"
+     ]
     }
    ],
    "source": [
@@ -163,29 +183,31 @@
     "\n",
     "\n",
     "sn.apply_standard_numbering_pairwise(\n",
-    "    base_sequence_id=\"KJO56189.1\", db=eedb.db, list_of_seq_ids=ids[0:5]\n",
+    "    base_sequence_id=\"AAM15527.1\", db=eedb.db, list_of_seq_ids=ids[0:5]\n",
     ")\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 46,
+   "execution_count": 29,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\u001b[32m2025-02-28 17:20:35.248\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering_pairwise\u001b[0m:\u001b[36m372\u001b[0m - \u001b[1mPair KJO56189.1 and KLP91446.1 already exists under the same standard numbering node\u001b[0m\n",
-      "\u001b[32m2025-02-28 17:20:35.250\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering_pairwise\u001b[0m:\u001b[36m372\u001b[0m - \u001b[1mPair KJO56189.1 and AAM15527.1 already exists under the same standard numbering node\u001b[0m\n",
-      "\u001b[32m2025-02-28 17:20:35.252\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering_pairwise\u001b[0m:\u001b[36m372\u001b[0m - \u001b[1mPair KJO56189.1 and AAF05614.1 already exists under the same standard numbering node\u001b[0m\n",
-      "\u001b[32m2025-02-28 17:20:35.254\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering_pairwise\u001b[0m:\u001b[36m372\u001b[0m - \u001b[1mPair KJO56189.1 and AFN21551.1 already exists under the same standard numbering node\u001b[0m\n"
+      "\u001b[32m2025-03-14 16:01:51.025\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering_pairwise\u001b[0m:\u001b[36m379\u001b[0m - \u001b[1mPair AAM15527.1 and AAF05614.1 already exists under the same standard numbering node\u001b[0m\n",
+      "\u001b[32m2025-03-14 16:01:51.026\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering_pairwise\u001b[0m:\u001b[36m379\u001b[0m - \u001b[1mPair AAM15527.1 and AFN21551.1 already exists under the same standard numbering node\u001b[0m\n",
+      "\u001b[32m2025-03-14 16:01:51.026\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering_pairwise\u001b[0m:\u001b[36m379\u001b[0m - \u001b[1mPair AAM15527.1 and CAA76794.1 already exists under the same standard numbering node\u001b[0m\n",
+      "\u001b[32m2025-03-14 16:01:51.027\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering_pairwise\u001b[0m:\u001b[36m379\u001b[0m - \u001b[1mPair AAM15527.1 and AGQ50511.1 already exists under the same standard numbering node\u001b[0m\n",
+      "\u001b[32m2025-03-14 16:01:51.027\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering_pairwise\u001b[0m:\u001b[36m385\u001b[0m - \u001b[1mPairs: []\u001b[0m\n",
+      "\u001b[32m2025-03-14 16:01:51.028\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering_pairwise\u001b[0m:\u001b[36m394\u001b[0m - \u001b[1mInput: ['AAF05614.1', 'AFN21551.1', 'CAA76794.1', 'AGQ50511.1', 'AAM15527.1']\u001b[0m\n"
      ]
     },
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "42c47d8085c6499ca30700b7f617d987",
+       "model_id": "b03690699eff400b8e713c6bdfb04c8b",
        "version_major": 2,
        "version_minor": 0
       },
@@ -205,35 +227,41 @@
      },
      "metadata": {},
      "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32m2025-03-14 16:01:51.049\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering_pairwise\u001b[0m:\u001b[36m403\u001b[0m - \u001b[1mPairwise alignment results: []\u001b[0m\n",
+      "\u001b[32m2025-03-14 16:01:51.049\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering_pairwise\u001b[0m:\u001b[36m419\u001b[0m - \u001b[1mNo alignment found for AAM15527.1\u001b[0m\n"
+     ]
     }
    ],
    "source": [
     "sn.apply_standard_numbering_pairwise(\n",
-    "    base_sequence_id=\"KJO56189.1\", db=eedb.db, list_of_seq_ids=ids\n",
+    "    base_sequence_id=\"AAM15527.1\", db=eedb.db, list_of_seq_ids=ids\n",
     ")\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 47,
+   "execution_count": 30,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\u001b[32m2025-02-28 17:20:36.255\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering\u001b[0m:\u001b[36m470\u001b[0m - \u001b[1mUsing 7 sequences for standard numbering\u001b[0m\n",
-      "\u001b[32m2025-02-28 17:20:36.414\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering\u001b[0m:\u001b[36m490\u001b[0m - \u001b[1mAlignment received from ClustalOmega:\n",
-      "KJO56189.1  -----------------------------------------MSIQHFRVALIPFFAAFC-LPVFAHPE--TLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDSWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKV---AGP---LLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW\n",
-      "KLP91446.1  -----------------------------------------MSIQHFRVALIPFFAAFC-LPVFAHPE--TLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVKYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKV---AGP---LLRSALPAGWFIADKSGASERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW\n",
-      "KJO56289.1  MKKNKHQAISREDLNKYHEETQGLERDMVNEILNSRSRAWKIATAFFVFAVVSMITAVGVIIRFAQPLPAYLTTINKDTGEV-SQVKITRDEATYGDVIDQYWIS-------QF-------VIH-----------RESYD------YNSIQ-----------VDYDAMSLMASGDVADEYLSMFKGPNRIDKRLGDSER-------TTVHINSVITDREHGVA--TVRFTTQQ-----RIRQRPNPEPPRYWIATIAYEYKALPMTAQQRYINPLGFRVTSYRKN--------AENVGA----------------------------VGG------\n",
-      "AGQ50511.1  -----------------------------------------MSIQHFRVALIPFFAAFC-LPVFAHPE--TLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMVSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKV---AGP---LLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW\n",
-      "AAM15527.1  -----------------------------------------MSIQHFRVALIPFFAAFC-LPVFAHPE--TLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMLSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAVTMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKV---AGP---LLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERDRQIAEIGASLIKHW\n",
-      "AAF05614.1  -----------------------------------------MSIQHFRVALIPFFAAFC-LPVFAHPE--TLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMLSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKV---AGP---LLRSALPAGWFIADKSGAGERGSSGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW\n",
-      "CAA76794.1  -----------------------------------------MSIQHFRVALIPFFAAFC-LPVFAHPE--TLVKVKDAEDKLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVKYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLRNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKV---AGP---LLRSALPAGWFIADKSGASERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW\n",
-      "AFN21551.1  -----------------------------------------MSIQHFRVALIPFFAAFC-LPVFAHPE--TLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKV---AGP---LLRSALPAGWFIADKSGASERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW\u001b[0m\n",
-      "\u001b[32m2025-02-28 17:20:36.415\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mrun_numbering_algorithm_clustalo\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mRunning numbering algorithm for base sequence KJO56189.1\u001b[0m\n",
-      "\u001b[32m2025-02-28 17:20:36.416\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering\u001b[0m:\u001b[36m496\u001b[0m - \u001b[1mPositions computed: {'KJO56189.1': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286'], 'KJO56289.1': ['0.1', '0.2', '0.3', '0.4', '0.5', '0.6', '0.7', '0.8', '0.9', '0.10', '0.11', '0.12', '0.13', '0.14', '0.15', '0.16', '0.17', '0.18', '0.19', '0.20', '0.21', '0.22', '0.23', '0.24', '0.25', '0.26', '0.27', '0.28', '0.29', '0.30', '0.31', '0.32', '0.33', '0.34', '0.35', '0.36', '0.37', '0.38', '0.39', '0.40', '0.41', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '18.1', '19', '20', '21', '22', '23', '24', '25', '26', '26.1', '26.2', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '69', '70', '78', '79', '80', '92', '93', '94', '95', '96', '103', '104', '105', '106', '107', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '182', '183', '184', '185', '186', '187', '188', '189', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '214.1', '214.2', '214.3', '215', '216', '217', '217.1', '217.2', '217.3', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '244', '245', '246', '247', '248', '249', '278', '279', '280'], 'KLP91446.1': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286'], 'AGQ50511.1': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286'], 'AAM15527.1': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286'], 'AAF05614.1': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286'], 'CAA76794.1': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286'], 'AFN21551.1': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286']}\u001b[0m\n"
+      "\u001b[32m2025-03-14 16:01:52.356\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering\u001b[0m:\u001b[36m494\u001b[0m - \u001b[1mUsing 4 sequences for standard numbering\u001b[0m\n",
+      "\u001b[32m2025-03-14 16:01:52.467\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering\u001b[0m:\u001b[36m514\u001b[0m - \u001b[1mAlignment received from ClustalOmega:\n",
+      "AAM15527.1  MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMLSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAVTMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERDRQIAEIGASLIKHW\n",
+      "AAF05614.1  MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMLSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSSGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW\n",
+      "AFN21551.1  MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGASERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW\n",
+      "CAA76794.1  MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDKLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVKYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLRNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGASERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW\n",
+      "AGQ50511.1  MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMVSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW\u001b[0m\n",
+      "\u001b[32m2025-03-14 16:01:52.468\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering\u001b[0m:\u001b[36m515\u001b[0m - \u001b[1mAlignment length: 286\u001b[0m\n",
+      "\u001b[32m2025-03-14 16:01:52.468\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mrun_numbering_algorithm_clustalo\u001b[0m:\u001b[36m118\u001b[0m - \u001b[1mRunning numbering algorithm for base sequence AAM15527.1\u001b[0m\n",
+      "\u001b[32m2025-03-14 16:01:52.469\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering\u001b[0m:\u001b[36m519\u001b[0m - \u001b[1mPositions computed: {'AAM15527.1': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286'], 'AAF05614.1': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286'], 'AFN21551.1': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286'], 'CAA76794.1': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286'], 'AGQ50511.1': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286']}\u001b[0m\n"
      ]
     }
    ],
@@ -241,10 +269,100 @@
     "sn_clustal = StandardNumberingTool(name=\"test_standard_numbering_clustal\")\n",
     "\n",
     "sn_clustal.apply_standard_numbering(\n",
-    "    base_sequence_id=\"KJO56189.1\", db=eedb.db, list_of_seq_ids=ids\n",
+    "    base_sequence_id=\"AAM15527.1\", db=eedb.db, list_of_seq_ids=ids\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32m2025-03-14 16:01:52.743\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering\u001b[0m:\u001b[36m494\u001b[0m - \u001b[1mUsing 5 sequences for standard numbering\u001b[0m\n",
+      "\u001b[32m2025-03-14 16:01:53.287\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering\u001b[0m:\u001b[36m514\u001b[0m - \u001b[1mAlignment received from ClustalOmega:\n",
+      "AF190695.1  TTCTTGAAGACGAAAGGGCCTCGTGATACGCCTATTTTTATAGGTTAATGTCATGATAATAATGGTTTCTTAGACGTCAGGTGGCACTTTTCGGGGAAATGTGCGCGGAACCCCTATTTGTTTATTTTTCTAAATACATTCAAATATGTATCCGCTCATGAGACAATAACCCTGGTAAATGCTTCAATAATATTGAAAAAGGAAGAGTATGAGTATTCAACATTTTCGTGTCGCCCTTATTCCCTTTTTTGCGGCATTTTGCCTTCCTGTTTTTGCTCACCCAGAAACGCTGGTGAAAGTAAAAGATGCTGAAGATCAGTTGGGTGCACGAGTGGGTTACATCGAACTGGATCTCAACAGCGGTAAGATCCTTGAGAGTTTTCGCCCCGAAGAACGTTTTCCAATGCTGAGCACTTTTAAAGTTCTGCTATGTGGTGCGGTATTATCCCGTGTTGACGCCGGGCAAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTGAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCATAACCATGAGTGATAACACTGCTGCCAACTTACTTCTGACAACGATCGGAGGACCGAAGGAGCTAACCGCTTTTTTGCACAACATGGGGGATCATGTAACTCGCCTTGATCGTTGGGAACCGGAGCTGAATGAAGCCATACCAAACGACGAGCGTGACACCACGATGCCTGCAGCAATGGCAACAACGTTGCGCAAACTATTAACTGGCGAACTACTTACTCTAGCTTCCCGGCAACAATTAATAGACTGGATGGAGGCGGATAAAGTTGCAGGACCACTTCTGCGCTCGGCCCTTCCGGCTGGCTGGTTTATTGCTGATAAATCTGGAGCCGGTGAGCGTGGGTCTAGCGGTATCATTGCAGCACTGGGGCCAGATGGTAAGCCCTCCCGTATCGTAGTTATCTACACGACGGGGAGTCAGGCAACTATGGATGAACGAAATAGACAGATCGCTGAGATAGGTGCCTCACTGATTAAGCATTGGTAACTGTCAGACCAAGTTTACTCATATATACTTTAGA\n",
+      "AF347054.1  TTCTTGAAGACGAAAGGGCCTCGTGATACGCTTATTTTTATAGGTTAATGTCATGATAATAATGGTTTCTTAGACGTCAGGTGGCACTTTTCGGGGAAATGTGCGCGGAACCCCTATTTGTTTATTTTTCTAAATACATTCAAATATGTATCCGCTCATGAGACAATAACCCTGATAAATGCTTCAATAATATTGAAAAAGGAAGAGTATGAGTATTCAACATTTCCGTGTCGCCCTTATTCCCTTTTTTGCGGCATTTTGCCTTCCTGTTTTTGCTCACCCAGAAACGCTGGTGAAAGTAAAAGATGCTGAAGATCAGTTGGGTGCACGAGTGGGTTACATCGAACTGGATCTCAACAGCGGTAAGATCCTTGAGAGTTTTCGCCCCGAAGAACGTTTTCCAATGCTGAGCACTTTTAAAGTTCTGCTATGTGGCGCGGTATTATCCCGTGTTGACGCCGGGCAAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTGAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCGTAACCATGAGTGATAACACTGCGGCCAACTTACTTCTGACAACGATCGGAGGACCGAAGGAGCTAACCGCTTTTTTGCACAACATGGGGGATCATGTAACTCGCCTTGATCGTTGGGAACCGGAGCTGAATGAAGCCATACCAAACGACGAGCGTGACACCACGATGCCTGCAGCAATGGCAACAACGTTGCGCAAACTATTAACTGGCGAACTACTTACTCTAGCTTCCCGGCAACAATTAATAGACTGGATGGAGGCGGATAAAGTTGCAGGACCACTTCTGCGCTCGGCCCTTCCGGCTGGCTGGTTTATTGCTGATAAATCTGGAGCCGGTGAGCGTGGGTCTCGCGGTATCATTGCAGCACTGGGGCCAGATGGTAAGCCCTCCCGTATCGTAGTTATCTACACGACGGGGAGTCAGGCAACTATGGATGAACGAGATAGACAGATCGCTGAGATAGGTGCCTCACTGATTAAGCATTGGTAACTGTCAGACCAAG---------------------\n",
+      "JX042489.1  ------------------------------------------------------------------TTCTTAGACGTCAGGTGGC-ACTTTAGGGGAAATGTGCGCGGAACCCCTATTTGTTTATTTTTTCTAATACATTCAAATATGTATCCGCTCATGATACAATAACCCTGATAAATGCTTCAATAATATTGAAAAAGGAAGAGTATGAGTATTCAACATTTCCGTGTCGCCCTTATTCCCTTTTTTGCGGCATTTTGCCTTCCTGTTTTTGCTCACCCAGAAACGCTGGTGAAAGTAAAAGATGCTGAAGATCAGTTGGGTGCACGAGTGGGTTACATCGAGCTGGATCTCAACAGCGGTAAGATCCTTGAGAGTTTTCGCCCCGAAGAACGTTTTCCAATGATGAGCACTTTTAAAGTTCTGCTATGTGGTGCGGTATTATCCCGTGTTGACGCCGGGCAAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTGAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCATAACCATGAGTGATAACACTGCGGCCAACTTACTTCTGACAACGATCGGAGGACCGAAGGAGCTAACCGCTTTTTTGCACAACATGGGGGATCATGTAACCCGCCTTGATCGTTGGGAACCGGAGCTGAATGAAGCCATACCAAACGACGAGCGTGACACCACGATGCCTGCAGCAATGGCAACAACGTTGCGCAAACTATTAACTGGCGAACTACTTACTCTAGCTTCCCGGCAACAATTAATAGACTGGATGGAGGCGGATAAAGTTGCAGGACCACTTCTGCGCTCGGCCCTTCCGGCTGGCTGGTTTATTGCTGATAAATCTGGAGCCAGTGAGCGTGGATCTCGCGGTATCATTGCAGCACTGGGGCCAGATGGTAAGCCCTCCCGTATCGTAGTTATCTACACGACGGGGAGTCAGGCAACTATGGATGAACGAAATAGACAGATCGCTGAGATAGGTGCCTCACTGATTAAGCATTGGTAA----------------------------------\n",
+      "KC844056.1  ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ATGAGTATTCAACATTTTCGTGTCGCCCTTATTCCCTTTTTTGCGGCATTTTGCCTTCCTGTTTTTGCTCACCCAGAAACGCTGGTGAAAGTAAAAGATGCTGAAGATCAGTTGGGTGCACGAGTGGGTTACATCGAACTGGATCTCAACAGCGGTAAGATCCTTGAGAGTTTTCGCCCCGAAGAACGTTTTCCAATGGTGAGCACTTTTAAAGTTCTGCTATGTGGTGCGGTATTATCCCGTGTTGACGCCGGGCAAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTGAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCATAACCATGAGTGATAACACTGCTGCCAACTTACTTCTGACAACGATCGGAGGACCGAAGGAGCTAACCGCTTTTTTGCACAACATGGGGGATCATGTAACTCGCCTTGATCGTTGGGAACCGGAGCTGAATGAAGCCATACCAAACGACGAGCGTGACACCACGATGCCTGCAGCAATGGCAACAACGTTGCGCAAACTATTAACTGGCGAACTACTTACTCTAGCTTCCCGGCAACAATTAATAGACTGGATGGAGGCGGATAAAGTTGCAGGACCACTTCTGCGCTCGGCCCTTCCGGCTGGCTGGTTTATTGCTGATAAATCTGGAGCCGGTGAGCGTGGGTCTCGCGGTATCATTGCAGCACTGGGGCCAGATGGTAAGCCCTCCCGTATCGTAGTTATCTACACGACGGGGAGTCAGGCAACTATGGATGAACGAAATAGACAGATCGCTGAGATAGGTGCCTCACTGATTAAGCATTGGTAA----------------------------------\n",
+      "Y17582.1    ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ATGAGTATTCAACATTTCCGTGTCGCCCTTATTCCCTTTTTTGCGGCATTTTGCCTTCCTGTTTTTGCTCACCCAGAAACGCTGGTGAAAGTAAAAGATGCTGAAGATAAGTTGGGTGCACGAGTGGGTTACATCGAGCTGGATCTCAACAGCGGTAAGATCCTTGAGAGTTTTCGCCCCGAAGAACGTTTTCCAATGATGAGCACTTTTAAAGTTCTGCTATGTGGTGCGGTATTATCCCGTGTTGACGCCGGGCAAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTAAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCATAACCATGAGTGATAACACTGCGGCCAACTTACTTCTGACAACGATCGGAGGACCGAAGGAGCTAACCGCTTTTTTGCGCAACATGGGGGATCATGTAACCCGCCTTGATCGTTGGGAACCGGAGCTGAATGAAGCCATACCAAACGACGAGCGTGACACCACGATGCCTGCAGCAATGGCAACAACGTTGCGCAAACTATTAACTGGCGAACTACTTACTCTAGCTTCCCGGCAACAATTAATAGACTGGATGGAGGCGGATAAAGTTGCAGGACCACTTCTGCGCTCGGCCCTTCCGGCTGGCTGGTTTATTGCTGATAAATCTGGAGCCAGTGAGCGTGGATCTCGCGGTATCATTGCAGCACTGGGGCCAGATGGTAAGCCCTCCCGTATCGTAGTTATCTACACGACGGGGAGTCAGGCAACTATGGATGAACGAAATAGACAGATCGCTGAGATAGGTGCCTCACTGATTAAGCATTGG-------------------------------------\u001b[0m\n",
+      "\u001b[32m2025-03-14 16:01:53.288\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering\u001b[0m:\u001b[36m515\u001b[0m - \u001b[1mAlignment length: 1103\u001b[0m\n",
+      "\u001b[32m2025-03-14 16:01:53.288\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mrun_numbering_algorithm_clustalo\u001b[0m:\u001b[36m118\u001b[0m - \u001b[1mRunning numbering algorithm for base sequence AF190695.1\u001b[0m\n",
+      "\u001b[32m2025-03-14 16:01:53.293\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering\u001b[0m:\u001b[36m519\u001b[0m - \u001b[1mPositions computed: {'AF190695.1': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286', '287', '288', '289', '290', '291', '292', '293', '294', '295', '296', '297', '298', '299', '300', '301', '302', '303', '304', '305', '306', '307', '308', '309', '310', '311', '312', '313', '314', '315', '316', '317', '318', '319', '320', '321', '322', '323', '324', '325', '326', '327', '328', '329', '330', '331', '332', '333', '334', '335', '336', '337', '338', '339', '340', '341', '342', '343', '344', '345', '346', '347', '348', '349', '350', '351', '352', '353', '354', '355', '356', '357', '358', '359', '360', '361', '362', '363', '364', '365', '366', '367', '368', '369', '370', '371', '372', '373', '374', '375', '376', '377', '378', '379', '380', '381', '382', '383', '384', '385', '386', '387', '388', '389', '390', '391', '392', '393', '394', '395', '396', '397', '398', '399', '400', '401', '402', '403', '404', '405', '406', '407', '408', '409', '410', '411', '412', '413', '414', '415', '416', '417', '418', '419', '420', '421', '422', '423', '424', '425', '426', '427', '428', '429', '430', '431', '432', '433', '434', '435', '436', '437', '438', '439', '440', '441', '442', '443', '444', '445', '446', '447', '448', '449', '450', '451', '452', '453', '454', '455', '456', '457', '458', '459', '460', '461', '462', '463', '464', '465', '466', '467', '468', '469', '470', '471', '472', '473', '474', '475', '476', '477', '478', '479', '480', '481', '482', '483', '484', '485', '486', '487', '488', '489', '490', '491', '492', '493', '494', '495', '496', '497', '498', '499', '500', '501', '502', '503', '504', '505', '506', '507', '508', '509', '510', '511', '512', '513', '514', '515', '516', '517', '518', '519', '520', '521', '522', '523', '524', '525', '526', '527', '528', '529', '530', '531', '532', '533', '534', '535', '536', '537', '538', '539', '540', '541', '542', '543', '544', '545', '546', '547', '548', '549', '550', '551', '552', '553', '554', '555', '556', '557', '558', '559', '560', '561', '562', '563', '564', '565', '566', '567', '568', '569', '570', '571', '572', '573', '574', '575', '576', '577', '578', '579', '580', '581', '582', '583', '584', '585', '586', '587', '588', '589', '590', '591', '592', '593', '594', '595', '596', '597', '598', '599', '600', '601', '602', '603', '604', '605', '606', '607', '608', '609', '610', '611', '612', '613', '614', '615', '616', '617', '618', '619', '620', '621', '622', '623', '624', '625', '626', '627', '628', '629', '630', '631', '632', '633', '634', '635', '636', '637', '638', '639', '640', '641', '642', '643', '644', '645', '646', '647', '648', '649', '650', '651', '652', '653', '654', '655', '656', '657', '658', '659', '660', '661', '662', '663', '664', '665', '666', '667', '668', '669', '670', '671', '672', '673', '674', '675', '676', '677', '678', '679', '680', '681', '682', '683', '684', '685', '686', '687', '688', '689', '690', '691', '692', '693', '694', '695', '696', '697', '698', '699', '700', '701', '702', '703', '704', '705', '706', '707', '708', '709', '710', '711', '712', '713', '714', '715', '716', '717', '718', '719', '720', '721', '722', '723', '724', '725', '726', '727', '728', '729', '730', '731', '732', '733', '734', '735', '736', '737', '738', '739', '740', '741', '742', '743', '744', '745', '746', '747', '748', '749', '750', '751', '752', '753', '754', '755', '756', '757', '758', '759', '760', '761', '762', '763', '764', '765', '766', '767', '768', '769', '770', '771', '772', '773', '774', '775', '776', '777', '778', '779', '780', '781', '782', '783', '784', '785', '786', '787', '788', '789', '790', '791', '792', '793', '794', '795', '796', '797', '798', '799', '800', '801', '802', '803', '804', '805', '806', '807', '808', '809', '810', '811', '812', '813', '814', '815', '816', '817', '818', '819', '820', '821', '822', '823', '824', '825', '826', '827', '828', '829', '830', '831', '832', '833', '834', '835', '836', '837', '838', '839', '840', '841', '842', '843', '844', '845', '846', '847', '848', '849', '850', '851', '852', '853', '854', '855', '856', '857', '858', '859', '860', '861', '862', '863', '864', '865', '866', '867', '868', '869', '870', '871', '872', '873', '874', '875', '876', '877', '878', '879', '880', '881', '882', '883', '884', '885', '886', '887', '888', '889', '890', '891', '892', '893', '894', '895', '896', '897', '898', '899', '900', '901', '902', '903', '904', '905', '906', '907', '908', '909', '910', '911', '912', '913', '914', '915', '916', '917', '918', '919', '920', '921', '922', '923', '924', '925', '926', '927', '928', '929', '930', '931', '932', '933', '934', '935', '936', '937', '938', '939', '940', '941', '942', '943', '944', '945', '946', '947', '948', '949', '950', '951', '952', '953', '954', '955', '956', '957', '958', '959', '960', '961', '962', '963', '964', '965', '966', '967', '968', '969', '970', '971', '972', '973', '974', '975', '976', '977', '978', '979', '980', '981', '982', '983', '984', '985', '986', '987', '988', '989', '990', '991', '992', '993', '994', '995', '996', '997', '998', '999', '1000', '1001', '1002', '1003', '1004', '1005', '1006', '1007', '1008', '1009', '1010', '1011', '1012', '1013', '1014', '1015', '1016', '1017', '1018', '1019', '1020', '1021', '1022', '1023', '1024', '1025', '1026', '1027', '1028', '1029', '1030', '1031', '1032', '1033', '1034', '1035', '1036', '1037', '1038', '1039', '1040', '1041', '1042', '1043', '1044', '1045', '1046', '1047', '1048', '1049', '1050', '1051', '1052', '1053', '1054', '1055', '1056', '1057', '1058', '1059', '1060', '1061', '1062', '1063', '1064', '1065', '1066', '1067', '1068', '1069', '1070', '1071', '1072', '1073', '1074', '1075', '1076', '1077', '1078', '1079', '1080', '1081', '1082', '1083', '1084', '1085', '1086', '1087', '1088', '1089', '1090', '1091', '1092', '1093', '1094', '1095', '1096', '1097', '1098', '1099', '1100', '1101', '1102', '1103'], 'AF347054.1': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286', '287', '288', '289', '290', '291', '292', '293', '294', '295', '296', '297', '298', '299', '300', '301', '302', '303', '304', '305', '306', '307', '308', '309', '310', '311', '312', '313', '314', '315', '316', '317', '318', '319', '320', '321', '322', '323', '324', '325', '326', '327', '328', '329', '330', '331', '332', '333', '334', '335', '336', '337', '338', '339', '340', '341', '342', '343', '344', '345', '346', '347', '348', '349', '350', '351', '352', '353', '354', '355', '356', '357', '358', '359', '360', '361', '362', '363', '364', '365', '366', '367', '368', '369', '370', '371', '372', '373', '374', '375', '376', '377', '378', '379', '380', '381', '382', '383', '384', '385', '386', '387', '388', '389', '390', '391', '392', '393', '394', '395', '396', '397', '398', '399', '400', '401', '402', '403', '404', '405', '406', '407', '408', '409', '410', '411', '412', '413', '414', '415', '416', '417', '418', '419', '420', '421', '422', '423', '424', '425', '426', '427', '428', '429', '430', '431', '432', '433', '434', '435', '436', '437', '438', '439', '440', '441', '442', '443', '444', '445', '446', '447', '448', '449', '450', '451', '452', '453', '454', '455', '456', '457', '458', '459', '460', '461', '462', '463', '464', '465', '466', '467', '468', '469', '470', '471', '472', '473', '474', '475', '476', '477', '478', '479', '480', '481', '482', '483', '484', '485', '486', '487', '488', '489', '490', '491', '492', '493', '494', '495', '496', '497', '498', '499', '500', '501', '502', '503', '504', '505', '506', '507', '508', '509', '510', '511', '512', '513', '514', '515', '516', '517', '518', '519', '520', '521', '522', '523', '524', '525', '526', '527', '528', '529', '530', '531', '532', '533', '534', '535', '536', '537', '538', '539', '540', '541', '542', '543', '544', '545', '546', '547', '548', '549', '550', '551', '552', '553', '554', '555', '556', '557', '558', '559', '560', '561', '562', '563', '564', '565', '566', '567', '568', '569', '570', '571', '572', '573', '574', '575', '576', '577', '578', '579', '580', '581', '582', '583', '584', '585', '586', '587', '588', '589', '590', '591', '592', '593', '594', '595', '596', '597', '598', '599', '600', '601', '602', '603', '604', '605', '606', '607', '608', '609', '610', '611', '612', '613', '614', '615', '616', '617', '618', '619', '620', '621', '622', '623', '624', '625', '626', '627', '628', '629', '630', '631', '632', '633', '634', '635', '636', '637', '638', '639', '640', '641', '642', '643', '644', '645', '646', '647', '648', '649', '650', '651', '652', '653', '654', '655', '656', '657', '658', '659', '660', '661', '662', '663', '664', '665', '666', '667', '668', '669', '670', '671', '672', '673', '674', '675', '676', '677', '678', '679', '680', '681', '682', '683', '684', '685', '686', '687', '688', '689', '690', '691', '692', '693', '694', '695', '696', '697', '698', '699', '700', '701', '702', '703', '704', '705', '706', '707', '708', '709', '710', '711', '712', '713', '714', '715', '716', '717', '718', '719', '720', '721', '722', '723', '724', '725', '726', '727', '728', '729', '730', '731', '732', '733', '734', '735', '736', '737', '738', '739', '740', '741', '742', '743', '744', '745', '746', '747', '748', '749', '750', '751', '752', '753', '754', '755', '756', '757', '758', '759', '760', '761', '762', '763', '764', '765', '766', '767', '768', '769', '770', '771', '772', '773', '774', '775', '776', '777', '778', '779', '780', '781', '782', '783', '784', '785', '786', '787', '788', '789', '790', '791', '792', '793', '794', '795', '796', '797', '798', '799', '800', '801', '802', '803', '804', '805', '806', '807', '808', '809', '810', '811', '812', '813', '814', '815', '816', '817', '818', '819', '820', '821', '822', '823', '824', '825', '826', '827', '828', '829', '830', '831', '832', '833', '834', '835', '836', '837', '838', '839', '840', '841', '842', '843', '844', '845', '846', '847', '848', '849', '850', '851', '852', '853', '854', '855', '856', '857', '858', '859', '860', '861', '862', '863', '864', '865', '866', '867', '868', '869', '870', '871', '872', '873', '874', '875', '876', '877', '878', '879', '880', '881', '882', '883', '884', '885', '886', '887', '888', '889', '890', '891', '892', '893', '894', '895', '896', '897', '898', '899', '900', '901', '902', '903', '904', '905', '906', '907', '908', '909', '910', '911', '912', '913', '914', '915', '916', '917', '918', '919', '920', '921', '922', '923', '924', '925', '926', '927', '928', '929', '930', '931', '932', '933', '934', '935', '936', '937', '938', '939', '940', '941', '942', '943', '944', '945', '946', '947', '948', '949', '950', '951', '952', '953', '954', '955', '956', '957', '958', '959', '960', '961', '962', '963', '964', '965', '966', '967', '968', '969', '970', '971', '972', '973', '974', '975', '976', '977', '978', '979', '980', '981', '982', '983', '984', '985', '986', '987', '988', '989', '990', '991', '992', '993', '994', '995', '996', '997', '998', '999', '1000', '1001', '1002', '1003', '1004', '1005', '1006', '1007', '1008', '1009', '1010', '1011', '1012', '1013', '1014', '1015', '1016', '1017', '1018', '1019', '1020', '1021', '1022', '1023', '1024', '1025', '1026', '1027', '1028', '1029', '1030', '1031', '1032', '1033', '1034', '1035', '1036', '1037', '1038', '1039', '1040', '1041', '1042', '1043', '1044', '1045', '1046', '1047', '1048', '1049', '1050', '1051', '1052', '1053', '1054', '1055', '1056', '1057', '1058', '1059', '1060', '1061', '1062', '1063', '1064', '1065', '1066', '1067', '1068', '1069', '1070', '1071', '1072', '1073', '1074', '1075', '1076', '1077', '1078', '1079', '1080', '1081', '1082'], 'JX042489.1': ['67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286', '287', '288', '289', '290', '291', '292', '293', '294', '295', '296', '297', '298', '299', '300', '301', '302', '303', '304', '305', '306', '307', '308', '309', '310', '311', '312', '313', '314', '315', '316', '317', '318', '319', '320', '321', '322', '323', '324', '325', '326', '327', '328', '329', '330', '331', '332', '333', '334', '335', '336', '337', '338', '339', '340', '341', '342', '343', '344', '345', '346', '347', '348', '349', '350', '351', '352', '353', '354', '355', '356', '357', '358', '359', '360', '361', '362', '363', '364', '365', '366', '367', '368', '369', '370', '371', '372', '373', '374', '375', '376', '377', '378', '379', '380', '381', '382', '383', '384', '385', '386', '387', '388', '389', '390', '391', '392', '393', '394', '395', '396', '397', '398', '399', '400', '401', '402', '403', '404', '405', '406', '407', '408', '409', '410', '411', '412', '413', '414', '415', '416', '417', '418', '419', '420', '421', '422', '423', '424', '425', '426', '427', '428', '429', '430', '431', '432', '433', '434', '435', '436', '437', '438', '439', '440', '441', '442', '443', '444', '445', '446', '447', '448', '449', '450', '451', '452', '453', '454', '455', '456', '457', '458', '459', '460', '461', '462', '463', '464', '465', '466', '467', '468', '469', '470', '471', '472', '473', '474', '475', '476', '477', '478', '479', '480', '481', '482', '483', '484', '485', '486', '487', '488', '489', '490', '491', '492', '493', '494', '495', '496', '497', '498', '499', '500', '501', '502', '503', '504', '505', '506', '507', '508', '509', '510', '511', '512', '513', '514', '515', '516', '517', '518', '519', '520', '521', '522', '523', '524', '525', '526', '527', '528', '529', '530', '531', '532', '533', '534', '535', '536', '537', '538', '539', '540', '541', '542', '543', '544', '545', '546', '547', '548', '549', '550', '551', '552', '553', '554', '555', '556', '557', '558', '559', '560', '561', '562', '563', '564', '565', '566', '567', '568', '569', '570', '571', '572', '573', '574', '575', '576', '577', '578', '579', '580', '581', '582', '583', '584', '585', '586', '587', '588', '589', '590', '591', '592', '593', '594', '595', '596', '597', '598', '599', '600', '601', '602', '603', '604', '605', '606', '607', '608', '609', '610', '611', '612', '613', '614', '615', '616', '617', '618', '619', '620', '621', '622', '623', '624', '625', '626', '627', '628', '629', '630', '631', '632', '633', '634', '635', '636', '637', '638', '639', '640', '641', '642', '643', '644', '645', '646', '647', '648', '649', '650', '651', '652', '653', '654', '655', '656', '657', '658', '659', '660', '661', '662', '663', '664', '665', '666', '667', '668', '669', '670', '671', '672', '673', '674', '675', '676', '677', '678', '679', '680', '681', '682', '683', '684', '685', '686', '687', '688', '689', '690', '691', '692', '693', '694', '695', '696', '697', '698', '699', '700', '701', '702', '703', '704', '705', '706', '707', '708', '709', '710', '711', '712', '713', '714', '715', '716', '717', '718', '719', '720', '721', '722', '723', '724', '725', '726', '727', '728', '729', '730', '731', '732', '733', '734', '735', '736', '737', '738', '739', '740', '741', '742', '743', '744', '745', '746', '747', '748', '749', '750', '751', '752', '753', '754', '755', '756', '757', '758', '759', '760', '761', '762', '763', '764', '765', '766', '767', '768', '769', '770', '771', '772', '773', '774', '775', '776', '777', '778', '779', '780', '781', '782', '783', '784', '785', '786', '787', '788', '789', '790', '791', '792', '793', '794', '795', '796', '797', '798', '799', '800', '801', '802', '803', '804', '805', '806', '807', '808', '809', '810', '811', '812', '813', '814', '815', '816', '817', '818', '819', '820', '821', '822', '823', '824', '825', '826', '827', '828', '829', '830', '831', '832', '833', '834', '835', '836', '837', '838', '839', '840', '841', '842', '843', '844', '845', '846', '847', '848', '849', '850', '851', '852', '853', '854', '855', '856', '857', '858', '859', '860', '861', '862', '863', '864', '865', '866', '867', '868', '869', '870', '871', '872', '873', '874', '875', '876', '877', '878', '879', '880', '881', '882', '883', '884', '885', '886', '887', '888', '889', '890', '891', '892', '893', '894', '895', '896', '897', '898', '899', '900', '901', '902', '903', '904', '905', '906', '907', '908', '909', '910', '911', '912', '913', '914', '915', '916', '917', '918', '919', '920', '921', '922', '923', '924', '925', '926', '927', '928', '929', '930', '931', '932', '933', '934', '935', '936', '937', '938', '939', '940', '941', '942', '943', '944', '945', '946', '947', '948', '949', '950', '951', '952', '953', '954', '955', '956', '957', '958', '959', '960', '961', '962', '963', '964', '965', '966', '967', '968', '969', '970', '971', '972', '973', '974', '975', '976', '977', '978', '979', '980', '981', '982', '983', '984', '985', '986', '987', '988', '989', '990', '991', '992', '993', '994', '995', '996', '997', '998', '999', '1000', '1001', '1002', '1003', '1004', '1005', '1006', '1007', '1008', '1009', '1010', '1011', '1012', '1013', '1014', '1015', '1016', '1017', '1018', '1019', '1020', '1021', '1022', '1023', '1024', '1025', '1026', '1027', '1028', '1029', '1030', '1031', '1032', '1033', '1034', '1035', '1036', '1037', '1038', '1039', '1040', '1041', '1042', '1043', '1044', '1045', '1046', '1047', '1048', '1049', '1050', '1051', '1052', '1053', '1054', '1055', '1056', '1057', '1058', '1059', '1060', '1061', '1062', '1063', '1064', '1065', '1066', '1067', '1068', '1069'], 'KC844056.1': ['209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286', '287', '288', '289', '290', '291', '292', '293', '294', '295', '296', '297', '298', '299', '300', '301', '302', '303', '304', '305', '306', '307', '308', '309', '310', '311', '312', '313', '314', '315', '316', '317', '318', '319', '320', '321', '322', '323', '324', '325', '326', '327', '328', '329', '330', '331', '332', '333', '334', '335', '336', '337', '338', '339', '340', '341', '342', '343', '344', '345', '346', '347', '348', '349', '350', '351', '352', '353', '354', '355', '356', '357', '358', '359', '360', '361', '362', '363', '364', '365', '366', '367', '368', '369', '370', '371', '372', '373', '374', '375', '376', '377', '378', '379', '380', '381', '382', '383', '384', '385', '386', '387', '388', '389', '390', '391', '392', '393', '394', '395', '396', '397', '398', '399', '400', '401', '402', '403', '404', '405', '406', '407', '408', '409', '410', '411', '412', '413', '414', '415', '416', '417', '418', '419', '420', '421', '422', '423', '424', '425', '426', '427', '428', '429', '430', '431', '432', '433', '434', '435', '436', '437', '438', '439', '440', '441', '442', '443', '444', '445', '446', '447', '448', '449', '450', '451', '452', '453', '454', '455', '456', '457', '458', '459', '460', '461', '462', '463', '464', '465', '466', '467', '468', '469', '470', '471', '472', '473', '474', '475', '476', '477', '478', '479', '480', '481', '482', '483', '484', '485', '486', '487', '488', '489', '490', '491', '492', '493', '494', '495', '496', '497', '498', '499', '500', '501', '502', '503', '504', '505', '506', '507', '508', '509', '510', '511', '512', '513', '514', '515', '516', '517', '518', '519', '520', '521', '522', '523', '524', '525', '526', '527', '528', '529', '530', '531', '532', '533', '534', '535', '536', '537', '538', '539', '540', '541', '542', '543', '544', '545', '546', '547', '548', '549', '550', '551', '552', '553', '554', '555', '556', '557', '558', '559', '560', '561', '562', '563', '564', '565', '566', '567', '568', '569', '570', '571', '572', '573', '574', '575', '576', '577', '578', '579', '580', '581', '582', '583', '584', '585', '586', '587', '588', '589', '590', '591', '592', '593', '594', '595', '596', '597', '598', '599', '600', '601', '602', '603', '604', '605', '606', '607', '608', '609', '610', '611', '612', '613', '614', '615', '616', '617', '618', '619', '620', '621', '622', '623', '624', '625', '626', '627', '628', '629', '630', '631', '632', '633', '634', '635', '636', '637', '638', '639', '640', '641', '642', '643', '644', '645', '646', '647', '648', '649', '650', '651', '652', '653', '654', '655', '656', '657', '658', '659', '660', '661', '662', '663', '664', '665', '666', '667', '668', '669', '670', '671', '672', '673', '674', '675', '676', '677', '678', '679', '680', '681', '682', '683', '684', '685', '686', '687', '688', '689', '690', '691', '692', '693', '694', '695', '696', '697', '698', '699', '700', '701', '702', '703', '704', '705', '706', '707', '708', '709', '710', '711', '712', '713', '714', '715', '716', '717', '718', '719', '720', '721', '722', '723', '724', '725', '726', '727', '728', '729', '730', '731', '732', '733', '734', '735', '736', '737', '738', '739', '740', '741', '742', '743', '744', '745', '746', '747', '748', '749', '750', '751', '752', '753', '754', '755', '756', '757', '758', '759', '760', '761', '762', '763', '764', '765', '766', '767', '768', '769', '770', '771', '772', '773', '774', '775', '776', '777', '778', '779', '780', '781', '782', '783', '784', '785', '786', '787', '788', '789', '790', '791', '792', '793', '794', '795', '796', '797', '798', '799', '800', '801', '802', '803', '804', '805', '806', '807', '808', '809', '810', '811', '812', '813', '814', '815', '816', '817', '818', '819', '820', '821', '822', '823', '824', '825', '826', '827', '828', '829', '830', '831', '832', '833', '834', '835', '836', '837', '838', '839', '840', '841', '842', '843', '844', '845', '846', '847', '848', '849', '850', '851', '852', '853', '854', '855', '856', '857', '858', '859', '860', '861', '862', '863', '864', '865', '866', '867', '868', '869', '870', '871', '872', '873', '874', '875', '876', '877', '878', '879', '880', '881', '882', '883', '884', '885', '886', '887', '888', '889', '890', '891', '892', '893', '894', '895', '896', '897', '898', '899', '900', '901', '902', '903', '904', '905', '906', '907', '908', '909', '910', '911', '912', '913', '914', '915', '916', '917', '918', '919', '920', '921', '922', '923', '924', '925', '926', '927', '928', '929', '930', '931', '932', '933', '934', '935', '936', '937', '938', '939', '940', '941', '942', '943', '944', '945', '946', '947', '948', '949', '950', '951', '952', '953', '954', '955', '956', '957', '958', '959', '960', '961', '962', '963', '964', '965', '966', '967', '968', '969', '970', '971', '972', '973', '974', '975', '976', '977', '978', '979', '980', '981', '982', '983', '984', '985', '986', '987', '988', '989', '990', '991', '992', '993', '994', '995', '996', '997', '998', '999', '1000', '1001', '1002', '1003', '1004', '1005', '1006', '1007', '1008', '1009', '1010', '1011', '1012', '1013', '1014', '1015', '1016', '1017', '1018', '1019', '1020', '1021', '1022', '1023', '1024', '1025', '1026', '1027', '1028', '1029', '1030', '1031', '1032', '1033', '1034', '1035', '1036', '1037', '1038', '1039', '1040', '1041', '1042', '1043', '1044', '1045', '1046', '1047', '1048', '1049', '1050', '1051', '1052', '1053', '1054', '1055', '1056', '1057', '1058', '1059', '1060', '1061', '1062', '1063', '1064', '1065', '1066', '1067', '1068', '1069'], 'Y17582.1': ['209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286', '287', '288', '289', '290', '291', '292', '293', '294', '295', '296', '297', '298', '299', '300', '301', '302', '303', '304', '305', '306', '307', '308', '309', '310', '311', '312', '313', '314', '315', '316', '317', '318', '319', '320', '321', '322', '323', '324', '325', '326', '327', '328', '329', '330', '331', '332', '333', '334', '335', '336', '337', '338', '339', '340', '341', '342', '343', '344', '345', '346', '347', '348', '349', '350', '351', '352', '353', '354', '355', '356', '357', '358', '359', '360', '361', '362', '363', '364', '365', '366', '367', '368', '369', '370', '371', '372', '373', '374', '375', '376', '377', '378', '379', '380', '381', '382', '383', '384', '385', '386', '387', '388', '389', '390', '391', '392', '393', '394', '395', '396', '397', '398', '399', '400', '401', '402', '403', '404', '405', '406', '407', '408', '409', '410', '411', '412', '413', '414', '415', '416', '417', '418', '419', '420', '421', '422', '423', '424', '425', '426', '427', '428', '429', '430', '431', '432', '433', '434', '435', '436', '437', '438', '439', '440', '441', '442', '443', '444', '445', '446', '447', '448', '449', '450', '451', '452', '453', '454', '455', '456', '457', '458', '459', '460', '461', '462', '463', '464', '465', '466', '467', '468', '469', '470', '471', '472', '473', '474', '475', '476', '477', '478', '479', '480', '481', '482', '483', '484', '485', '486', '487', '488', '489', '490', '491', '492', '493', '494', '495', '496', '497', '498', '499', '500', '501', '502', '503', '504', '505', '506', '507', '508', '509', '510', '511', '512', '513', '514', '515', '516', '517', '518', '519', '520', '521', '522', '523', '524', '525', '526', '527', '528', '529', '530', '531', '532', '533', '534', '535', '536', '537', '538', '539', '540', '541', '542', '543', '544', '545', '546', '547', '548', '549', '550', '551', '552', '553', '554', '555', '556', '557', '558', '559', '560', '561', '562', '563', '564', '565', '566', '567', '568', '569', '570', '571', '572', '573', '574', '575', '576', '577', '578', '579', '580', '581', '582', '583', '584', '585', '586', '587', '588', '589', '590', '591', '592', '593', '594', '595', '596', '597', '598', '599', '600', '601', '602', '603', '604', '605', '606', '607', '608', '609', '610', '611', '612', '613', '614', '615', '616', '617', '618', '619', '620', '621', '622', '623', '624', '625', '626', '627', '628', '629', '630', '631', '632', '633', '634', '635', '636', '637', '638', '639', '640', '641', '642', '643', '644', '645', '646', '647', '648', '649', '650', '651', '652', '653', '654', '655', '656', '657', '658', '659', '660', '661', '662', '663', '664', '665', '666', '667', '668', '669', '670', '671', '672', '673', '674', '675', '676', '677', '678', '679', '680', '681', '682', '683', '684', '685', '686', '687', '688', '689', '690', '691', '692', '693', '694', '695', '696', '697', '698', '699', '700', '701', '702', '703', '704', '705', '706', '707', '708', '709', '710', '711', '712', '713', '714', '715', '716', '717', '718', '719', '720', '721', '722', '723', '724', '725', '726', '727', '728', '729', '730', '731', '732', '733', '734', '735', '736', '737', '738', '739', '740', '741', '742', '743', '744', '745', '746', '747', '748', '749', '750', '751', '752', '753', '754', '755', '756', '757', '758', '759', '760', '761', '762', '763', '764', '765', '766', '767', '768', '769', '770', '771', '772', '773', '774', '775', '776', '777', '778', '779', '780', '781', '782', '783', '784', '785', '786', '787', '788', '789', '790', '791', '792', '793', '794', '795', '796', '797', '798', '799', '800', '801', '802', '803', '804', '805', '806', '807', '808', '809', '810', '811', '812', '813', '814', '815', '816', '817', '818', '819', '820', '821', '822', '823', '824', '825', '826', '827', '828', '829', '830', '831', '832', '833', '834', '835', '836', '837', '838', '839', '840', '841', '842', '843', '844', '845', '846', '847', '848', '849', '850', '851', '852', '853', '854', '855', '856', '857', '858', '859', '860', '861', '862', '863', '864', '865', '866', '867', '868', '869', '870', '871', '872', '873', '874', '875', '876', '877', '878', '879', '880', '881', '882', '883', '884', '885', '886', '887', '888', '889', '890', '891', '892', '893', '894', '895', '896', '897', '898', '899', '900', '901', '902', '903', '904', '905', '906', '907', '908', '909', '910', '911', '912', '913', '914', '915', '916', '917', '918', '919', '920', '921', '922', '923', '924', '925', '926', '927', '928', '929', '930', '931', '932', '933', '934', '935', '936', '937', '938', '939', '940', '941', '942', '943', '944', '945', '946', '947', '948', '949', '950', '951', '952', '953', '954', '955', '956', '957', '958', '959', '960', '961', '962', '963', '964', '965', '966', '967', '968', '969', '970', '971', '972', '973', '974', '975', '976', '977', '978', '979', '980', '981', '982', '983', '984', '985', '986', '987', '988', '989', '990', '991', '992', '993', '994', '995', '996', '997', '998', '999', '1000', '1001', '1002', '1003', '1004', '1005', '1006', '1007', '1008', '1009', '1010', '1011', '1012', '1013', '1014', '1015', '1016', '1017', '1018', '1019', '1020', '1021', '1022', '1023', '1024', '1025', '1026', '1027', '1028', '1029', '1030', '1031', '1032', '1033', '1034', '1035', '1036', '1037', '1038', '1039', '1040', '1041', '1042', '1043', '1044', '1045', '1046', '1047', '1048', '1049', '1050', '1051', '1052', '1053', '1054', '1055', '1056', '1057', '1058', '1059', '1060', '1061', '1062', '1063', '1064', '1065', '1066']}\u001b[0m\n"
+     ]
+    }
+   ],
+   "source": [
+    "sn_dna = StandardNumberingTool(name=\"test_standard_numbering_dna\")\n",
+    "\n",
+    "sn_dna.apply_standard_numbering(\n",
+    "    base_sequence_id=\"AF190695.1\", db=eedb.db, node_type=\"DNA\"\n",
     ")"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32m2025-03-14 16:01:53.600\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering_pairwise\u001b[0m:\u001b[36m385\u001b[0m - \u001b[1mPairs: [('AF190695.1', 'Y17582.1'), ('AF190695.1', 'AF347054.1'), ('AF190695.1', 'KC844056.1'), ('AF190695.1', 'JX042489.1')]\u001b[0m\n",
+      "\u001b[32m2025-03-14 16:01:53.601\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering_pairwise\u001b[0m:\u001b[36m394\u001b[0m - \u001b[1mInput: ['AF347054.1', 'JX042489.1', 'KC844056.1', 'Y17582.1', 'AF190695.1']\u001b[0m\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "ef5548988a3c419cbd5f0dc6719fa4c3",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Output()"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "
\n"
+      ],
+      "text/plain": []
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32m2025-03-14 16:01:58.161\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering_pairwise\u001b[0m:\u001b[36m403\u001b[0m - \u001b[1mPairwise alignment results: [{'query_id': 'AF190695.1', 'target_id': 'Y17582.1', 'score': 834.0, 'identity': 0.7679057116953762, 'gaps': 245, 'mismatches': 11, 'query_aligned': 'TTCTTGAAGACGAAAGGGCCTCGTGATACGCCTATTTTTATAGGTTAATGTCATGATAATAATGGTTTCTTAGACGTCAGGTGGCACTTTTCGGGGAAATGTGCGCGGAACCCCTATTTGTTTATTTTTCTAAATACATTCAAATATGTATCCGCTCATGAGACAATAACCCTGGTAAATGCTTCAATAATATTGAAAAAGGAAGAGTATGAGTATTCAACATTTTCGTGTCGCCCTTATTCCCTTTTTTGCGGCATTTTGCCTTCCTGTTTTTGCTCACCCAGAAACGCTGGTGAAAGTAAAAGATGCTGAAGATCAGTTGGGTGCACGAGTGGGTTACATCGAACTGGATCTCAACAGCGGTAAGATCCTTGAGAGTTTTCGCCCCGAAGAACGTTTTCCAATGCTGAGCACTTTTAAAGTTCTGCTATGTGGTGCGGTATTATCCCGTGTTGACGCCGGGCAAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTGAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCATAACCATGAGTGATAACACTGCTGCCAACTTACTTCTGACAACGATCGGAGGACCGAAGGAGCTAACCGCTTTTTTGCACAACATGGGGGATCATGTAACTCGCCTTGATCGTTGGGAACCGGAGCTGAATGAAGCCATACCAAACGACGAGCGTGACACCACGATGCCTGCAGCAATGGCAACAACGTTGCGCAAACTATTAACTGGCGAACTACTTACTCTAGCTTCCCGGCAACAATTAATAGACTGGATGGAGGCGGATAAAGTTGCAGGACCACTTCTGCGCTCGGCCCTTCCGGCTGGCTGGTTTATTGCTGATAAATCTGGAGCCGGTGAGCGTGGGTCTAGCGGTATCATTGCAGCACTGGGGCCAGATGGTAAGCCCTCCCGTATCGTAGTTATCTACACGACGGGGAGTCAGGCAACTATGGATGAACGAAATAGACAGATCGCTGAGATAGGTGCCTCACTGATTAAGCATTGGTAACTGTCAGACCAAGTTTACTCATATATACTTTAGA', 'target_aligned': '----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ATGAGTATTCAACATTTCCGTGTCGCCCTTATTCCCTTTTTTGCGGCATTTTGCCTTCCTGTTTTTGCTCACCCAGAAACGCTGGTGAAAGTAAAAGATGCTGAAGATAAGTTGGGTGCACGAGTGGGTTACATCGAGCTGGATCTCAACAGCGGTAAGATCCTTGAGAGTTTTCGCCCCGAAGAACGTTTTCCAATGATGAGCACTTTTAAAGTTCTGCTATGTGGTGCGGTATTATCCCGTGTTGACGCCGGGCAAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTAAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCATAACCATGAGTGATAACACTGCGGCCAACTTACTTCTGACAACGATCGGAGGACCGAAGGAGCTAACCGCTTTTTTGCGCAACATGGGGGATCATGTAACCCGCCTTGATCGTTGGGAACCGGAGCTGAATGAAGCCATACCAAACGACGAGCGTGACACCACGATGCCTGCAGCAATGGCAACAACGTTGCGCAAACTATTAACTGGCGAACTACTTACTCTAGCTTCCCGGCAACAATTAATAGACTGGATGGAGGCGGATAAAGTTGCAGGACCACTTCTGCGCTCGGCCCTTCCGGCTGGCTGGTTTATTGCTGATAAATCTGGAGCCAGTGAGCGTGGATCTCGCGGTATCATTGCAGCACTGGGGCCAGATGGTAAGCCCTCCCGTATCGTAGTTATCTACACGACGGGGAGTCAGGCAACTATGGATGAACGAAATAGACAGATCGCTGAGATAGGTGCCTCACTGATTAAGCATTGG-------------------------------------'}, {'query_id': 'AF190695.1', 'target_id': 'AF347054.1', 'score': 1065.0, 'identity': 0.9737080689029919, 'gaps': 21, 'mismatches': 8, 'query_aligned': 'TTCTTGAAGACGAAAGGGCCTCGTGATACGCCTATTTTTATAGGTTAATGTCATGATAATAATGGTTTCTTAGACGTCAGGTGGCACTTTTCGGGGAAATGTGCGCGGAACCCCTATTTGTTTATTTTTCTAAATACATTCAAATATGTATCCGCTCATGAGACAATAACCCTGGTAAATGCTTCAATAATATTGAAAAAGGAAGAGTATGAGTATTCAACATTTTCGTGTCGCCCTTATTCCCTTTTTTGCGGCATTTTGCCTTCCTGTTTTTGCTCACCCAGAAACGCTGGTGAAAGTAAAAGATGCTGAAGATCAGTTGGGTGCACGAGTGGGTTACATCGAACTGGATCTCAACAGCGGTAAGATCCTTGAGAGTTTTCGCCCCGAAGAACGTTTTCCAATGCTGAGCACTTTTAAAGTTCTGCTATGTGGTGCGGTATTATCCCGTGTTGACGCCGGGCAAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTGAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCATAACCATGAGTGATAACACTGCTGCCAACTTACTTCTGACAACGATCGGAGGACCGAAGGAGCTAACCGCTTTTTTGCACAACATGGGGGATCATGTAACTCGCCTTGATCGTTGGGAACCGGAGCTGAATGAAGCCATACCAAACGACGAGCGTGACACCACGATGCCTGCAGCAATGGCAACAACGTTGCGCAAACTATTAACTGGCGAACTACTTACTCTAGCTTCCCGGCAACAATTAATAGACTGGATGGAGGCGGATAAAGTTGCAGGACCACTTCTGCGCTCGGCCCTTCCGGCTGGCTGGTTTATTGCTGATAAATCTGGAGCCGGTGAGCGTGGGTCTAGCGGTATCATTGCAGCACTGGGGCCAGATGGTAAGCCCTCCCGTATCGTAGTTATCTACACGACGGGGAGTCAGGCAACTATGGATGAACGAAATAGACAGATCGCTGAGATAGGTGCCTCACTGATTAAGCATTGGTAACTGTCAGACCAAGTTTACTCATATATACTTTAGA', 'target_aligned': 'TTCTTGAAGACGAAAGGGCCTCGTGATACGCTTATTTTTATAGGTTAATGTCATGATAATAATGGTTTCTTAGACGTCAGGTGGCACTTTTCGGGGAAATGTGCGCGGAACCCCTATTTGTTTATTTTTCTAAATACATTCAAATATGTATCCGCTCATGAGACAATAACCCTGATAAATGCTTCAATAATATTGAAAAAGGAAGAGTATGAGTATTCAACATTTCCGTGTCGCCCTTATTCCCTTTTTTGCGGCATTTTGCCTTCCTGTTTTTGCTCACCCAGAAACGCTGGTGAAAGTAAAAGATGCTGAAGATCAGTTGGGTGCACGAGTGGGTTACATCGAACTGGATCTCAACAGCGGTAAGATCCTTGAGAGTTTTCGCCCCGAAGAACGTTTTCCAATGCTGAGCACTTTTAAAGTTCTGCTATGTGGCGCGGTATTATCCCGTGTTGACGCCGGGCAAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTGAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCGTAACCATGAGTGATAACACTGCGGCCAACTTACTTCTGACAACGATCGGAGGACCGAAGGAGCTAACCGCTTTTTTGCACAACATGGGGGATCATGTAACTCGCCTTGATCGTTGGGAACCGGAGCTGAATGAAGCCATACCAAACGACGAGCGTGACACCACGATGCCTGCAGCAATGGCAACAACGTTGCGCAAACTATTAACTGGCGAACTACTTACTCTAGCTTCCCGGCAACAATTAATAGACTGGATGGAGGCGGATAAAGTTGCAGGACCACTTCTGCGCTCGGCCCTTCCGGCTGGCTGGTTTATTGCTGATAAATCTGGAGCCGGTGAGCGTGGGTCTCGCGGTATCATTGCAGCACTGGGGCCAGATGGTAAGCCCTCCCGTATCGTAGTTATCTACACGACGGGGAGTCAGGCAACTATGGATGAACGAGATAGACAGATCGCTGAGATAGGTGCCTCACTGATTAAGCATTGGTAACTGTCAGACCAAG---------------------'}, {'query_id': 'AF190695.1', 'target_id': 'KC844056.1', 'score': 855.0, 'identity': 0.7787851314596554, 'gaps': 242, 'mismatches': 2, 'query_aligned': 'TTCTTGAAGACGAAAGGGCCTCGTGATACGCCTATTTTTATAGGTTAATGTCATGATAATAATGGTTTCTTAGACGTCAGGTGGCACTTTTCGGGGAAATGTGCGCGGAACCCCTATTTGTTTATTTTTCTAAATACATTCAAATATGTATCCGCTCATGAGACAATAACCCTGGTAAATGCTTCAATAATATTGAAAAAGGAAGAGTATGAGTATTCAACATTTTCGTGTCGCCCTTATTCCCTTTTTTGCGGCATTTTGCCTTCCTGTTTTTGCTCACCCAGAAACGCTGGTGAAAGTAAAAGATGCTGAAGATCAGTTGGGTGCACGAGTGGGTTACATCGAACTGGATCTCAACAGCGGTAAGATCCTTGAGAGTTTTCGCCCCGAAGAACGTTTTCCAATGCTGAGCACTTTTAAAGTTCTGCTATGTGGTGCGGTATTATCCCGTGTTGACGCCGGGCAAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTGAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCATAACCATGAGTGATAACACTGCTGCCAACTTACTTCTGACAACGATCGGAGGACCGAAGGAGCTAACCGCTTTTTTGCACAACATGGGGGATCATGTAACTCGCCTTGATCGTTGGGAACCGGAGCTGAATGAAGCCATACCAAACGACGAGCGTGACACCACGATGCCTGCAGCAATGGCAACAACGTTGCGCAAACTATTAACTGGCGAACTACTTACTCTAGCTTCCCGGCAACAATTAATAGACTGGATGGAGGCGGATAAAGTTGCAGGACCACTTCTGCGCTCGGCCCTTCCGGCTGGCTGGTTTATTGCTGATAAATCTGGAGCCGGTGAGCGTGGGTCTAGCGGTATCATTGCAGCACTGGGGCCAGATGGTAAGCCCTCCCGTATCGTAGTTATCTACACGACGGGGAGTCAGGCAACTATGGATGAACGAAATAGACAGATCGCTGAGATAGGTGCCTCACTGATTAAGCATTGGTAACTGTCAGACCAAGTTTACTCATATATACTTTAGA', 'target_aligned': '----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ATGAGTATTCAACATTTTCGTGTCGCCCTTATTCCCTTTTTTGCGGCATTTTGCCTTCCTGTTTTTGCTCACCCAGAAACGCTGGTGAAAGTAAAAGATGCTGAAGATCAGTTGGGTGCACGAGTGGGTTACATCGAACTGGATCTCAACAGCGGTAAGATCCTTGAGAGTTTTCGCCCCGAAGAACGTTTTCCAATGGTGAGCACTTTTAAAGTTCTGCTATGTGGTGCGGTATTATCCCGTGTTGACGCCGGGCAAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTGAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCATAACCATGAGTGATAACACTGCTGCCAACTTACTTCTGACAACGATCGGAGGACCGAAGGAGCTAACCGCTTTTTTGCACAACATGGGGGATCATGTAACTCGCCTTGATCGTTGGGAACCGGAGCTGAATGAAGCCATACCAAACGACGAGCGTGACACCACGATGCCTGCAGCAATGGCAACAACGTTGCGCAAACTATTAACTGGCGAACTACTTACTCTAGCTTCCCGGCAACAATTAATAGACTGGATGGAGGCGGATAAAGTTGCAGGACCACTTCTGCGCTCGGCCCTTCCGGCTGGCTGGTTTATTGCTGATAAATCTGGAGCCGGTGAGCGTGGGTCTCGCGGTATCATTGCAGCACTGGGGCCAGATGGTAAGCCCTCCCGTATCGTAGTTATCTACACGACGGGGAGTCAGGCAACTATGGATGAACGAAATAGACAGATCGCTGAGATAGGTGCCTCACTGATTAAGCATTGGTA----------------------------------A'}, {'query_id': 'AF190695.1', 'target_id': 'JX042489.1', 'score': 974.0, 'identity': 0.8967391304347826, 'gaps': 103, 'mismatches': 11, 'query_aligned': 'TTCTTGAAGACGAAAGGGCCTCGTGATACGCCTATTTTTATAGGTTAATGTCATGATAATAATGGTTTCTTAGACGTCAGGTGGCACTTTTCGGGGAAATGTGCGCGGAACCCCTATTTGTTTA-TTTTTCTAAATACATTCAAATATGTATCCGCTCATGAGACAATAACCCTGGTAAATGCTTCAATAATATTGAAAAAGGAAGAGTATGAGTATTCAACATTTTCGTGTCGCCCTTATTCCCTTTTTTGCGGCATTTTGCCTTCCTGTTTTTGCTCACCCAGAAACGCTGGTGAAAGTAAAAGATGCTGAAGATCAGTTGGGTGCACGAGTGGGTTACATCGAACTGGATCTCAACAGCGGTAAGATCCTTGAGAGTTTTCGCCCCGAAGAACGTTTTCCAATGCTGAGCACTTTTAAAGTTCTGCTATGTGGTGCGGTATTATCCCGTGTTGACGCCGGGCAAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTGAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCATAACCATGAGTGATAACACTGCTGCCAACTTACTTCTGACAACGATCGGAGGACCGAAGGAGCTAACCGCTTTTTTGCACAACATGGGGGATCATGTAACTCGCCTTGATCGTTGGGAACCGGAGCTGAATGAAGCCATACCAAACGACGAGCGTGACACCACGATGCCTGCAGCAATGGCAACAACGTTGCGCAAACTATTAACTGGCGAACTACTTACTCTAGCTTCCCGGCAACAATTAATAGACTGGATGGAGGCGGATAAAGTTGCAGGACCACTTCTGCGCTCGGCCCTTCCGGCTGGCTGGTTTATTGCTGATAAATCTGGAGCCGGTGAGCGTGGGTCTAGCGGTATCATTGCAGCACTGGGGCCAGATGGTAAGCCCTCCCGTATCGTAGTTATCTACACGACGGGGAGTCAGGCAACTATGGATGAACGAAATAGACAGATCGCTGAGATAGGTGCCTCACTGATTAAGCATTGGTAACTGTCAGACCAAGTTTACTCATATATACTTTAGA', 'target_aligned': '------------------------------------------------------------------TTCTTAGACGTCAGGTGGCAC-TTTAGGGGAAATGTGCGCGGAACCCCTATTTGTTTATTTTTTCT-AATACATTCAAATATGTATCCGCTCATGATACAATAACCCTGATAAATGCTTCAATAATATTGAAAAAGGAAGAGTATGAGTATTCAACATTTCCGTGTCGCCCTTATTCCCTTTTTTGCGGCATTTTGCCTTCCTGTTTTTGCTCACCCAGAAACGCTGGTGAAAGTAAAAGATGCTGAAGATCAGTTGGGTGCACGAGTGGGTTACATCGAGCTGGATCTCAACAGCGGTAAGATCCTTGAGAGTTTTCGCCCCGAAGAACGTTTTCCAATGATGAGCACTTTTAAAGTTCTGCTATGTGGTGCGGTATTATCCCGTGTTGACGCCGGGCAAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTGAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCATAACCATGAGTGATAACACTGCGGCCAACTTACTTCTGACAACGATCGGAGGACCGAAGGAGCTAACCGCTTTTTTGCACAACATGGGGGATCATGTAACCCGCCTTGATCGTTGGGAACCGGAGCTGAATGAAGCCATACCAAACGACGAGCGTGACACCACGATGCCTGCAGCAATGGCAACAACGTTGCGCAAACTATTAACTGGCGAACTACTTACTCTAGCTTCCCGGCAACAATTAATAGACTGGATGGAGGCGGATAAAGTTGCAGGACCACTTCTGCGCTCGGCCCTTCCGGCTGGCTGGTTTATTGCTGATAAATCTGGAGCCAGTGAGCGTGGATCTCGCGGTATCATTGCAGCACTGGGGCCAGATGGTAAGCCCTCCCGTATCGTAGTTATCTACACGACGGGGAGTCAGGCAACTATGGATGAACGAAATAGACAGATCGCTGAGATAGGTGCCTCACTGATTAAGCATTGGTA----------------------------------A'}]\u001b[0m\n",
+      "\u001b[32m2025-03-14 16:01:58.163\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering_pairwise\u001b[0m:\u001b[36m422\u001b[0m - \u001b[1mConverted alignment: 4\u001b[0m\n",
+      "\u001b[32m2025-03-14 16:01:58.169\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering_pairwise\u001b[0m:\u001b[36m429\u001b[0m - \u001b[1mPositions: {'AF190695.1': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286', '287', '288', '289', '290', '291', '292', '293', '294', '295', '296', '297', '298', '299', '300', '301', '302', '303', '304', '305', '306', '307', '308', '309', '310', '311', '312', '313', '314', '315', '316', '317', '318', '319', '320', '321', '322', '323', '324', '325', '326', '327', '328', '329', '330', '331', '332', '333', '334', '335', '336', '337', '338', '339', '340', '341', '342', '343', '344', '345', '346', '347', '348', '349', '350', '351', '352', '353', '354', '355', '356', '357', '358', '359', '360', '361', '362', '363', '364', '365', '366', '367', '368', '369', '370', '371', '372', '373', '374', '375', '376', '377', '378', '379', '380', '381', '382', '383', '384', '385', '386', '387', '388', '389', '390', '391', '392', '393', '394', '395', '396', '397', '398', '399', '400', '401', '402', '403', '404', '405', '406', '407', '408', '409', '410', '411', '412', '413', '414', '415', '416', '417', '418', '419', '420', '421', '422', '423', '424', '425', '426', '427', '428', '429', '430', '431', '432', '433', '434', '435', '436', '437', '438', '439', '440', '441', '442', '443', '444', '445', '446', '447', '448', '449', '450', '451', '452', '453', '454', '455', '456', '457', '458', '459', '460', '461', '462', '463', '464', '465', '466', '467', '468', '469', '470', '471', '472', '473', '474', '475', '476', '477', '478', '479', '480', '481', '482', '483', '484', '485', '486', '487', '488', '489', '490', '491', '492', '493', '494', '495', '496', '497', '498', '499', '500', '501', '502', '503', '504', '505', '506', '507', '508', '509', '510', '511', '512', '513', '514', '515', '516', '517', '518', '519', '520', '521', '522', '523', '524', '525', '526', '527', '528', '529', '530', '531', '532', '533', '534', '535', '536', '537', '538', '539', '540', '541', '542', '543', '544', '545', '546', '547', '548', '549', '550', '551', '552', '553', '554', '555', '556', '557', '558', '559', '560', '561', '562', '563', '564', '565', '566', '567', '568', '569', '570', '571', '572', '573', '574', '575', '576', '577', '578', '579', '580', '581', '582', '583', '584', '585', '586', '587', '588', '589', '590', '591', '592', '593', '594', '595', '596', '597', '598', '599', '600', '601', '602', '603', '604', '605', '606', '607', '608', '609', '610', '611', '612', '613', '614', '615', '616', '617', '618', '619', '620', '621', '622', '623', '624', '625', '626', '627', '628', '629', '630', '631', '632', '633', '634', '635', '636', '637', '638', '639', '640', '641', '642', '643', '644', '645', '646', '647', '648', '649', '650', '651', '652', '653', '654', '655', '656', '657', '658', '659', '660', '661', '662', '663', '664', '665', '666', '667', '668', '669', '670', '671', '672', '673', '674', '675', '676', '677', '678', '679', '680', '681', '682', '683', '684', '685', '686', '687', '688', '689', '690', '691', '692', '693', '694', '695', '696', '697', '698', '699', '700', '701', '702', '703', '704', '705', '706', '707', '708', '709', '710', '711', '712', '713', '714', '715', '716', '717', '718', '719', '720', '721', '722', '723', '724', '725', '726', '727', '728', '729', '730', '731', '732', '733', '734', '735', '736', '737', '738', '739', '740', '741', '742', '743', '744', '745', '746', '747', '748', '749', '750', '751', '752', '753', '754', '755', '756', '757', '758', '759', '760', '761', '762', '763', '764', '765', '766', '767', '768', '769', '770', '771', '772', '773', '774', '775', '776', '777', '778', '779', '780', '781', '782', '783', '784', '785', '786', '787', '788', '789', '790', '791', '792', '793', '794', '795', '796', '797', '798', '799', '800', '801', '802', '803', '804', '805', '806', '807', '808', '809', '810', '811', '812', '813', '814', '815', '816', '817', '818', '819', '820', '821', '822', '823', '824', '825', '826', '827', '828', '829', '830', '831', '832', '833', '834', '835', '836', '837', '838', '839', '840', '841', '842', '843', '844', '845', '846', '847', '848', '849', '850', '851', '852', '853', '854', '855', '856', '857', '858', '859', '860', '861', '862', '863', '864', '865', '866', '867', '868', '869', '870', '871', '872', '873', '874', '875', '876', '877', '878', '879', '880', '881', '882', '883', '884', '885', '886', '887', '888', '889', '890', '891', '892', '893', '894', '895', '896', '897', '898', '899', '900', '901', '902', '903', '904', '905', '906', '907', '908', '909', '910', '911', '912', '913', '914', '915', '916', '917', '918', '919', '920', '921', '922', '923', '924', '925', '926', '927', '928', '929', '930', '931', '932', '933', '934', '935', '936', '937', '938', '939', '940', '941', '942', '943', '944', '945', '946', '947', '948', '949', '950', '951', '952', '953', '954', '955', '956', '957', '958', '959', '960', '961', '962', '963', '964', '965', '966', '967', '968', '969', '970', '971', '972', '973', '974', '975', '976', '977', '978', '979', '980', '981', '982', '983', '984', '985', '986', '987', '988', '989', '990', '991', '992', '993', '994', '995', '996', '997', '998', '999', '1000', '1001', '1002', '1003', '1004', '1005', '1006', '1007', '1008', '1009', '1010', '1011', '1012', '1013', '1014', '1015', '1016', '1017', '1018', '1019', '1020', '1021', '1022', '1023', '1024', '1025', '1026', '1027', '1028', '1029', '1030', '1031', '1032', '1033', '1034', '1035', '1036', '1037', '1038', '1039', '1040', '1041', '1042', '1043', '1044', '1045', '1046', '1047', '1048', '1049', '1050', '1051', '1052', '1053', '1054', '1055', '1056', '1057', '1058', '1059', '1060', '1061', '1062', '1063', '1064', '1065', '1066', '1067', '1068', '1069', '1070', '1071', '1072', '1073', '1074', '1075', '1076', '1077', '1078', '1079', '1080', '1081', '1082', '1083', '1084', '1085', '1086', '1087', '1088', '1089', '1090', '1091', '1092', '1093', '1094', '1095', '1096', '1097', '1098', '1099', '1100', '1101', '1102', '1103'], 'Y17582.1': ['209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286', '287', '288', '289', '290', '291', '292', '293', '294', '295', '296', '297', '298', '299', '300', '301', '302', '303', '304', '305', '306', '307', '308', '309', '310', '311', '312', '313', '314', '315', '316', '317', '318', '319', '320', '321', '322', '323', '324', '325', '326', '327', '328', '329', '330', '331', '332', '333', '334', '335', '336', '337', '338', '339', '340', '341', '342', '343', '344', '345', '346', '347', '348', '349', '350', '351', '352', '353', '354', '355', '356', '357', '358', '359', '360', '361', '362', '363', '364', '365', '366', '367', '368', '369', '370', '371', '372', '373', '374', '375', '376', '377', '378', '379', '380', '381', '382', '383', '384', '385', '386', '387', '388', '389', '390', '391', '392', '393', '394', '395', '396', '397', '398', '399', '400', '401', '402', '403', '404', '405', '406', '407', '408', '409', '410', '411', '412', '413', '414', '415', '416', '417', '418', '419', '420', '421', '422', '423', '424', '425', '426', '427', '428', '429', '430', '431', '432', '433', '434', '435', '436', '437', '438', '439', '440', '441', '442', '443', '444', '445', '446', '447', '448', '449', '450', '451', '452', '453', '454', '455', '456', '457', '458', '459', '460', '461', '462', '463', '464', '465', '466', '467', '468', '469', '470', '471', '472', '473', '474', '475', '476', '477', '478', '479', '480', '481', '482', '483', '484', '485', '486', '487', '488', '489', '490', '491', '492', '493', '494', '495', '496', '497', '498', '499', '500', '501', '502', '503', '504', '505', '506', '507', '508', '509', '510', '511', '512', '513', '514', '515', '516', '517', '518', '519', '520', '521', '522', '523', '524', '525', '526', '527', '528', '529', '530', '531', '532', '533', '534', '535', '536', '537', '538', '539', '540', '541', '542', '543', '544', '545', '546', '547', '548', '549', '550', '551', '552', '553', '554', '555', '556', '557', '558', '559', '560', '561', '562', '563', '564', '565', '566', '567', '568', '569', '570', '571', '572', '573', '574', '575', '576', '577', '578', '579', '580', '581', '582', '583', '584', '585', '586', '587', '588', '589', '590', '591', '592', '593', '594', '595', '596', '597', '598', '599', '600', '601', '602', '603', '604', '605', '606', '607', '608', '609', '610', '611', '612', '613', '614', '615', '616', '617', '618', '619', '620', '621', '622', '623', '624', '625', '626', '627', '628', '629', '630', '631', '632', '633', '634', '635', '636', '637', '638', '639', '640', '641', '642', '643', '644', '645', '646', '647', '648', '649', '650', '651', '652', '653', '654', '655', '656', '657', '658', '659', '660', '661', '662', '663', '664', '665', '666', '667', '668', '669', '670', '671', '672', '673', '674', '675', '676', '677', '678', '679', '680', '681', '682', '683', '684', '685', '686', '687', '688', '689', '690', '691', '692', '693', '694', '695', '696', '697', '698', '699', '700', '701', '702', '703', '704', '705', '706', '707', '708', '709', '710', '711', '712', '713', '714', '715', '716', '717', '718', '719', '720', '721', '722', '723', '724', '725', '726', '727', '728', '729', '730', '731', '732', '733', '734', '735', '736', '737', '738', '739', '740', '741', '742', '743', '744', '745', '746', '747', '748', '749', '750', '751', '752', '753', '754', '755', '756', '757', '758', '759', '760', '761', '762', '763', '764', '765', '766', '767', '768', '769', '770', '771', '772', '773', '774', '775', '776', '777', '778', '779', '780', '781', '782', '783', '784', '785', '786', '787', '788', '789', '790', '791', '792', '793', '794', '795', '796', '797', '798', '799', '800', '801', '802', '803', '804', '805', '806', '807', '808', '809', '810', '811', '812', '813', '814', '815', '816', '817', '818', '819', '820', '821', '822', '823', '824', '825', '826', '827', '828', '829', '830', '831', '832', '833', '834', '835', '836', '837', '838', '839', '840', '841', '842', '843', '844', '845', '846', '847', '848', '849', '850', '851', '852', '853', '854', '855', '856', '857', '858', '859', '860', '861', '862', '863', '864', '865', '866', '867', '868', '869', '870', '871', '872', '873', '874', '875', '876', '877', '878', '879', '880', '881', '882', '883', '884', '885', '886', '887', '888', '889', '890', '891', '892', '893', '894', '895', '896', '897', '898', '899', '900', '901', '902', '903', '904', '905', '906', '907', '908', '909', '910', '911', '912', '913', '914', '915', '916', '917', '918', '919', '920', '921', '922', '923', '924', '925', '926', '927', '928', '929', '930', '931', '932', '933', '934', '935', '936', '937', '938', '939', '940', '941', '942', '943', '944', '945', '946', '947', '948', '949', '950', '951', '952', '953', '954', '955', '956', '957', '958', '959', '960', '961', '962', '963', '964', '965', '966', '967', '968', '969', '970', '971', '972', '973', '974', '975', '976', '977', '978', '979', '980', '981', '982', '983', '984', '985', '986', '987', '988', '989', '990', '991', '992', '993', '994', '995', '996', '997', '998', '999', '1000', '1001', '1002', '1003', '1004', '1005', '1006', '1007', '1008', '1009', '1010', '1011', '1012', '1013', '1014', '1015', '1016', '1017', '1018', '1019', '1020', '1021', '1022', '1023', '1024', '1025', '1026', '1027', '1028', '1029', '1030', '1031', '1032', '1033', '1034', '1035', '1036', '1037', '1038', '1039', '1040', '1041', '1042', '1043', '1044', '1045', '1046', '1047', '1048', '1049', '1050', '1051', '1052', '1053', '1054', '1055', '1056', '1057', '1058', '1059', '1060', '1061', '1062', '1063', '1064', '1065', '1066'], 'AF347054.1': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286', '287', '288', '289', '290', '291', '292', '293', '294', '295', '296', '297', '298', '299', '300', '301', '302', '303', '304', '305', '306', '307', '308', '309', '310', '311', '312', '313', '314', '315', '316', '317', '318', '319', '320', '321', '322', '323', '324', '325', '326', '327', '328', '329', '330', '331', '332', '333', '334', '335', '336', '337', '338', '339', '340', '341', '342', '343', '344', '345', '346', '347', '348', '349', '350', '351', '352', '353', '354', '355', '356', '357', '358', '359', '360', '361', '362', '363', '364', '365', '366', '367', '368', '369', '370', '371', '372', '373', '374', '375', '376', '377', '378', '379', '380', '381', '382', '383', '384', '385', '386', '387', '388', '389', '390', '391', '392', '393', '394', '395', '396', '397', '398', '399', '400', '401', '402', '403', '404', '405', '406', '407', '408', '409', '410', '411', '412', '413', '414', '415', '416', '417', '418', '419', '420', '421', '422', '423', '424', '425', '426', '427', '428', '429', '430', '431', '432', '433', '434', '435', '436', '437', '438', '439', '440', '441', '442', '443', '444', '445', '446', '447', '448', '449', '450', '451', '452', '453', '454', '455', '456', '457', '458', '459', '460', '461', '462', '463', '464', '465', '466', '467', '468', '469', '470', '471', '472', '473', '474', '475', '476', '477', '478', '479', '480', '481', '482', '483', '484', '485', '486', '487', '488', '489', '490', '491', '492', '493', '494', '495', '496', '497', '498', '499', '500', '501', '502', '503', '504', '505', '506', '507', '508', '509', '510', '511', '512', '513', '514', '515', '516', '517', '518', '519', '520', '521', '522', '523', '524', '525', '526', '527', '528', '529', '530', '531', '532', '533', '534', '535', '536', '537', '538', '539', '540', '541', '542', '543', '544', '545', '546', '547', '548', '549', '550', '551', '552', '553', '554', '555', '556', '557', '558', '559', '560', '561', '562', '563', '564', '565', '566', '567', '568', '569', '570', '571', '572', '573', '574', '575', '576', '577', '578', '579', '580', '581', '582', '583', '584', '585', '586', '587', '588', '589', '590', '591', '592', '593', '594', '595', '596', '597', '598', '599', '600', '601', '602', '603', '604', '605', '606', '607', '608', '609', '610', '611', '612', '613', '614', '615', '616', '617', '618', '619', '620', '621', '622', '623', '624', '625', '626', '627', '628', '629', '630', '631', '632', '633', '634', '635', '636', '637', '638', '639', '640', '641', '642', '643', '644', '645', '646', '647', '648', '649', '650', '651', '652', '653', '654', '655', '656', '657', '658', '659', '660', '661', '662', '663', '664', '665', '666', '667', '668', '669', '670', '671', '672', '673', '674', '675', '676', '677', '678', '679', '680', '681', '682', '683', '684', '685', '686', '687', '688', '689', '690', '691', '692', '693', '694', '695', '696', '697', '698', '699', '700', '701', '702', '703', '704', '705', '706', '707', '708', '709', '710', '711', '712', '713', '714', '715', '716', '717', '718', '719', '720', '721', '722', '723', '724', '725', '726', '727', '728', '729', '730', '731', '732', '733', '734', '735', '736', '737', '738', '739', '740', '741', '742', '743', '744', '745', '746', '747', '748', '749', '750', '751', '752', '753', '754', '755', '756', '757', '758', '759', '760', '761', '762', '763', '764', '765', '766', '767', '768', '769', '770', '771', '772', '773', '774', '775', '776', '777', '778', '779', '780', '781', '782', '783', '784', '785', '786', '787', '788', '789', '790', '791', '792', '793', '794', '795', '796', '797', '798', '799', '800', '801', '802', '803', '804', '805', '806', '807', '808', '809', '810', '811', '812', '813', '814', '815', '816', '817', '818', '819', '820', '821', '822', '823', '824', '825', '826', '827', '828', '829', '830', '831', '832', '833', '834', '835', '836', '837', '838', '839', '840', '841', '842', '843', '844', '845', '846', '847', '848', '849', '850', '851', '852', '853', '854', '855', '856', '857', '858', '859', '860', '861', '862', '863', '864', '865', '866', '867', '868', '869', '870', '871', '872', '873', '874', '875', '876', '877', '878', '879', '880', '881', '882', '883', '884', '885', '886', '887', '888', '889', '890', '891', '892', '893', '894', '895', '896', '897', '898', '899', '900', '901', '902', '903', '904', '905', '906', '907', '908', '909', '910', '911', '912', '913', '914', '915', '916', '917', '918', '919', '920', '921', '922', '923', '924', '925', '926', '927', '928', '929', '930', '931', '932', '933', '934', '935', '936', '937', '938', '939', '940', '941', '942', '943', '944', '945', '946', '947', '948', '949', '950', '951', '952', '953', '954', '955', '956', '957', '958', '959', '960', '961', '962', '963', '964', '965', '966', '967', '968', '969', '970', '971', '972', '973', '974', '975', '976', '977', '978', '979', '980', '981', '982', '983', '984', '985', '986', '987', '988', '989', '990', '991', '992', '993', '994', '995', '996', '997', '998', '999', '1000', '1001', '1002', '1003', '1004', '1005', '1006', '1007', '1008', '1009', '1010', '1011', '1012', '1013', '1014', '1015', '1016', '1017', '1018', '1019', '1020', '1021', '1022', '1023', '1024', '1025', '1026', '1027', '1028', '1029', '1030', '1031', '1032', '1033', '1034', '1035', '1036', '1037', '1038', '1039', '1040', '1041', '1042', '1043', '1044', '1045', '1046', '1047', '1048', '1049', '1050', '1051', '1052', '1053', '1054', '1055', '1056', '1057', '1058', '1059', '1060', '1061', '1062', '1063', '1064', '1065', '1066', '1067', '1068', '1069', '1070', '1071', '1072', '1073', '1074', '1075', '1076', '1077', '1078', '1079', '1080', '1081', '1082'], 'KC844056.1': ['209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286', '287', '288', '289', '290', '291', '292', '293', '294', '295', '296', '297', '298', '299', '300', '301', '302', '303', '304', '305', '306', '307', '308', '309', '310', '311', '312', '313', '314', '315', '316', '317', '318', '319', '320', '321', '322', '323', '324', '325', '326', '327', '328', '329', '330', '331', '332', '333', '334', '335', '336', '337', '338', '339', '340', '341', '342', '343', '344', '345', '346', '347', '348', '349', '350', '351', '352', '353', '354', '355', '356', '357', '358', '359', '360', '361', '362', '363', '364', '365', '366', '367', '368', '369', '370', '371', '372', '373', '374', '375', '376', '377', '378', '379', '380', '381', '382', '383', '384', '385', '386', '387', '388', '389', '390', '391', '392', '393', '394', '395', '396', '397', '398', '399', '400', '401', '402', '403', '404', '405', '406', '407', '408', '409', '410', '411', '412', '413', '414', '415', '416', '417', '418', '419', '420', '421', '422', '423', '424', '425', '426', '427', '428', '429', '430', '431', '432', '433', '434', '435', '436', '437', '438', '439', '440', '441', '442', '443', '444', '445', '446', '447', '448', '449', '450', '451', '452', '453', '454', '455', '456', '457', '458', '459', '460', '461', '462', '463', '464', '465', '466', '467', '468', '469', '470', '471', '472', '473', '474', '475', '476', '477', '478', '479', '480', '481', '482', '483', '484', '485', '486', '487', '488', '489', '490', '491', '492', '493', '494', '495', '496', '497', '498', '499', '500', '501', '502', '503', '504', '505', '506', '507', '508', '509', '510', '511', '512', '513', '514', '515', '516', '517', '518', '519', '520', '521', '522', '523', '524', '525', '526', '527', '528', '529', '530', '531', '532', '533', '534', '535', '536', '537', '538', '539', '540', '541', '542', '543', '544', '545', '546', '547', '548', '549', '550', '551', '552', '553', '554', '555', '556', '557', '558', '559', '560', '561', '562', '563', '564', '565', '566', '567', '568', '569', '570', '571', '572', '573', '574', '575', '576', '577', '578', '579', '580', '581', '582', '583', '584', '585', '586', '587', '588', '589', '590', '591', '592', '593', '594', '595', '596', '597', '598', '599', '600', '601', '602', '603', '604', '605', '606', '607', '608', '609', '610', '611', '612', '613', '614', '615', '616', '617', '618', '619', '620', '621', '622', '623', '624', '625', '626', '627', '628', '629', '630', '631', '632', '633', '634', '635', '636', '637', '638', '639', '640', '641', '642', '643', '644', '645', '646', '647', '648', '649', '650', '651', '652', '653', '654', '655', '656', '657', '658', '659', '660', '661', '662', '663', '664', '665', '666', '667', '668', '669', '670', '671', '672', '673', '674', '675', '676', '677', '678', '679', '680', '681', '682', '683', '684', '685', '686', '687', '688', '689', '690', '691', '692', '693', '694', '695', '696', '697', '698', '699', '700', '701', '702', '703', '704', '705', '706', '707', '708', '709', '710', '711', '712', '713', '714', '715', '716', '717', '718', '719', '720', '721', '722', '723', '724', '725', '726', '727', '728', '729', '730', '731', '732', '733', '734', '735', '736', '737', '738', '739', '740', '741', '742', '743', '744', '745', '746', '747', '748', '749', '750', '751', '752', '753', '754', '755', '756', '757', '758', '759', '760', '761', '762', '763', '764', '765', '766', '767', '768', '769', '770', '771', '772', '773', '774', '775', '776', '777', '778', '779', '780', '781', '782', '783', '784', '785', '786', '787', '788', '789', '790', '791', '792', '793', '794', '795', '796', '797', '798', '799', '800', '801', '802', '803', '804', '805', '806', '807', '808', '809', '810', '811', '812', '813', '814', '815', '816', '817', '818', '819', '820', '821', '822', '823', '824', '825', '826', '827', '828', '829', '830', '831', '832', '833', '834', '835', '836', '837', '838', '839', '840', '841', '842', '843', '844', '845', '846', '847', '848', '849', '850', '851', '852', '853', '854', '855', '856', '857', '858', '859', '860', '861', '862', '863', '864', '865', '866', '867', '868', '869', '870', '871', '872', '873', '874', '875', '876', '877', '878', '879', '880', '881', '882', '883', '884', '885', '886', '887', '888', '889', '890', '891', '892', '893', '894', '895', '896', '897', '898', '899', '900', '901', '902', '903', '904', '905', '906', '907', '908', '909', '910', '911', '912', '913', '914', '915', '916', '917', '918', '919', '920', '921', '922', '923', '924', '925', '926', '927', '928', '929', '930', '931', '932', '933', '934', '935', '936', '937', '938', '939', '940', '941', '942', '943', '944', '945', '946', '947', '948', '949', '950', '951', '952', '953', '954', '955', '956', '957', '958', '959', '960', '961', '962', '963', '964', '965', '966', '967', '968', '969', '970', '971', '972', '973', '974', '975', '976', '977', '978', '979', '980', '981', '982', '983', '984', '985', '986', '987', '988', '989', '990', '991', '992', '993', '994', '995', '996', '997', '998', '999', '1000', '1001', '1002', '1003', '1004', '1005', '1006', '1007', '1008', '1009', '1010', '1011', '1012', '1013', '1014', '1015', '1016', '1017', '1018', '1019', '1020', '1021', '1022', '1023', '1024', '1025', '1026', '1027', '1028', '1029', '1030', '1031', '1032', '1033', '1034', '1035', '1036', '1037', '1038', '1039', '1040', '1041', '1042', '1043', '1044', '1045', '1046', '1047', '1048', '1049', '1050', '1051', '1052', '1053', '1054', '1055', '1056', '1057', '1058', '1059', '1060', '1061', '1062', '1063', '1064', '1065', '1066', '1067', '1068', '1103'], 'JX042489.1': ['67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '124.1', '125', '126', '127', '128', '129', '130', '131', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286', '287', '288', '289', '290', '291', '292', '293', '294', '295', '296', '297', '298', '299', '300', '301', '302', '303', '304', '305', '306', '307', '308', '309', '310', '311', '312', '313', '314', '315', '316', '317', '318', '319', '320', '321', '322', '323', '324', '325', '326', '327', '328', '329', '330', '331', '332', '333', '334', '335', '336', '337', '338', '339', '340', '341', '342', '343', '344', '345', '346', '347', '348', '349', '350', '351', '352', '353', '354', '355', '356', '357', '358', '359', '360', '361', '362', '363', '364', '365', '366', '367', '368', '369', '370', '371', '372', '373', '374', '375', '376', '377', '378', '379', '380', '381', '382', '383', '384', '385', '386', '387', '388', '389', '390', '391', '392', '393', '394', '395', '396', '397', '398', '399', '400', '401', '402', '403', '404', '405', '406', '407', '408', '409', '410', '411', '412', '413', '414', '415', '416', '417', '418', '419', '420', '421', '422', '423', '424', '425', '426', '427', '428', '429', '430', '431', '432', '433', '434', '435', '436', '437', '438', '439', '440', '441', '442', '443', '444', '445', '446', '447', '448', '449', '450', '451', '452', '453', '454', '455', '456', '457', '458', '459', '460', '461', '462', '463', '464', '465', '466', '467', '468', '469', '470', '471', '472', '473', '474', '475', '476', '477', '478', '479', '480', '481', '482', '483', '484', '485', '486', '487', '488', '489', '490', '491', '492', '493', '494', '495', '496', '497', '498', '499', '500', '501', '502', '503', '504', '505', '506', '507', '508', '509', '510', '511', '512', '513', '514', '515', '516', '517', '518', '519', '520', '521', '522', '523', '524', '525', '526', '527', '528', '529', '530', '531', '532', '533', '534', '535', '536', '537', '538', '539', '540', '541', '542', '543', '544', '545', '546', '547', '548', '549', '550', '551', '552', '553', '554', '555', '556', '557', '558', '559', '560', '561', '562', '563', '564', '565', '566', '567', '568', '569', '570', '571', '572', '573', '574', '575', '576', '577', '578', '579', '580', '581', '582', '583', '584', '585', '586', '587', '588', '589', '590', '591', '592', '593', '594', '595', '596', '597', '598', '599', '600', '601', '602', '603', '604', '605', '606', '607', '608', '609', '610', '611', '612', '613', '614', '615', '616', '617', '618', '619', '620', '621', '622', '623', '624', '625', '626', '627', '628', '629', '630', '631', '632', '633', '634', '635', '636', '637', '638', '639', '640', '641', '642', '643', '644', '645', '646', '647', '648', '649', '650', '651', '652', '653', '654', '655', '656', '657', '658', '659', '660', '661', '662', '663', '664', '665', '666', '667', '668', '669', '670', '671', '672', '673', '674', '675', '676', '677', '678', '679', '680', '681', '682', '683', '684', '685', '686', '687', '688', '689', '690', '691', '692', '693', '694', '695', '696', '697', '698', '699', '700', '701', '702', '703', '704', '705', '706', '707', '708', '709', '710', '711', '712', '713', '714', '715', '716', '717', '718', '719', '720', '721', '722', '723', '724', '725', '726', '727', '728', '729', '730', '731', '732', '733', '734', '735', '736', '737', '738', '739', '740', '741', '742', '743', '744', '745', '746', '747', '748', '749', '750', '751', '752', '753', '754', '755', '756', '757', '758', '759', '760', '761', '762', '763', '764', '765', '766', '767', '768', '769', '770', '771', '772', '773', '774', '775', '776', '777', '778', '779', '780', '781', '782', '783', '784', '785', '786', '787', '788', '789', '790', '791', '792', '793', '794', '795', '796', '797', '798', '799', '800', '801', '802', '803', '804', '805', '806', '807', '808', '809', '810', '811', '812', '813', '814', '815', '816', '817', '818', '819', '820', '821', '822', '823', '824', '825', '826', '827', '828', '829', '830', '831', '832', '833', '834', '835', '836', '837', '838', '839', '840', '841', '842', '843', '844', '845', '846', '847', '848', '849', '850', '851', '852', '853', '854', '855', '856', '857', '858', '859', '860', '861', '862', '863', '864', '865', '866', '867', '868', '869', '870', '871', '872', '873', '874', '875', '876', '877', '878', '879', '880', '881', '882', '883', '884', '885', '886', '887', '888', '889', '890', '891', '892', '893', '894', '895', '896', '897', '898', '899', '900', '901', '902', '903', '904', '905', '906', '907', '908', '909', '910', '911', '912', '913', '914', '915', '916', '917', '918', '919', '920', '921', '922', '923', '924', '925', '926', '927', '928', '929', '930', '931', '932', '933', '934', '935', '936', '937', '938', '939', '940', '941', '942', '943', '944', '945', '946', '947', '948', '949', '950', '951', '952', '953', '954', '955', '956', '957', '958', '959', '960', '961', '962', '963', '964', '965', '966', '967', '968', '969', '970', '971', '972', '973', '974', '975', '976', '977', '978', '979', '980', '981', '982', '983', '984', '985', '986', '987', '988', '989', '990', '991', '992', '993', '994', '995', '996', '997', '998', '999', '1000', '1001', '1002', '1003', '1004', '1005', '1006', '1007', '1008', '1009', '1010', '1011', '1012', '1013', '1014', '1015', '1016', '1017', '1018', '1019', '1020', '1021', '1022', '1023', '1024', '1025', '1026', '1027', '1028', '1029', '1030', '1031', '1032', '1033', '1034', '1035', '1036', '1037', '1038', '1039', '1040', '1041', '1042', '1043', '1044', '1045', '1046', '1047', '1048', '1049', '1050', '1051', '1052', '1053', '1054', '1055', '1056', '1057', '1058', '1059', '1060', '1061', '1062', '1063', '1064', '1065', '1066', '1067', '1068', '1103']}\u001b[0m\n"
+     ]
+    }
+   ],
+   "source": [
+    "sn_dna_pairwise = StandardNumberingTool(name=\"test_standard_numbering_dna_pairwise\")\n",
+    "\n",
+    "sn_dna_pairwise.apply_standard_numbering_pairwise(\n",
+    "    base_sequence_id=\"AF190695.1\", db=eedb.db, node_type=\"DNA\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": []
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
diff --git a/src/pyeed/analysis/mutation_detection.py b/src/pyeed/analysis/mutation_detection.py
index 7d37e5a5..961c56f4 100644
--- a/src/pyeed/analysis/mutation_detection.py
+++ b/src/pyeed/analysis/mutation_detection.py
@@ -116,6 +116,22 @@ def save_mutations_to_db(
             sequence_id1: First sequence accession ID
             sequence_id2: Second sequence accession ID
         """
+
+        # Check if a mutation relationship already exists between these proteins
+        existing_mutations = db.execute_read(
+            """
+            MATCH (p1:Protein)-[r:MUTATION]->(p2:Protein)
+            WHERE p1.accession_id = $sequence_id1 AND p2.accession_id = $sequence_id2
+            RETURN r
+            """,
+            {"sequence_id1": sequence_id1, "sequence_id2": sequence_id2},
+        )
+        if existing_mutations:
+            logger.debug(
+                f"Mutation relationship already exists between {sequence_id1} and {sequence_id2}"
+            )
+            return
+
         query = """
         MATCH (p1:Protein), (p2:Protein)
         WHERE p1.accession_id = $sequence_id1 AND p2.accession_id = $sequence_id2
@@ -145,6 +161,7 @@ def get_mutations_between_sequences(
         db: DatabaseConnector,
         standard_numbering_tool_name: str,
         save_to_db: bool = True,
+        debug: bool = False,
     ) -> dict[str, list[int | str]]:
         """Get mutations between two sequences using standard numbering.
 
@@ -169,6 +186,9 @@ def get_mutations_between_sequences(
             sequence_id1, sequence_id2, db, standard_numbering_tool_name
         )
 
+        if debug:
+            logger.info(f"Debug mode output: {sequences} and {positions}")
+
         mutations = self.find_mutations(
             sequences[sequence_id1],
             sequences[sequence_id2],
diff --git a/src/pyeed/analysis/sequence_alignment.py b/src/pyeed/analysis/sequence_alignment.py
index f5f808ff..8dd41553 100644
--- a/src/pyeed/analysis/sequence_alignment.py
+++ b/src/pyeed/analysis/sequence_alignment.py
@@ -90,6 +90,7 @@ def align_multipairwise(
         batch_size: int = 500,
         return_results: bool = True,
         pairs: Optional[list[tuple[str, str]]] = None,
+        node_type: str = "Protein",
     ) -> Optional[list[dict[str, Any]]]:
         """
         Creates all possible pairwise alignments from a dictionary of sequences or from sequence IDs.
@@ -112,7 +113,7 @@ def align_multipairwise(
                 returning the results, which can reduce memory usage. Defaults to True.
             pairs (Optional[list[tuple[str, str]]]): A list of tuples, where each tuple contains two
                 sequence IDs to align. If provided, only these pairs will be aligned.
-
+            node_type (str): The type of node to align. Defaults to "Protein".
         Returns:
             Optional[List[dict]]: A list of dictionaries containing the alignment results if
             `return_results` is True. If False, returns None.
@@ -120,7 +121,7 @@ def align_multipairwise(
 
         # Fetch sequences if ids are provided
         if ids is not None and db is not None:
-            sequences = self._get_id_sequence_dict(db, ids)
+            sequences = self._get_id_sequence_dict(db, ids, node_type)
 
         if not sequences:
             raise ValueError(
@@ -242,6 +243,7 @@ def _get_id_sequence_dict(
         self,
         db: DatabaseConnector,
         ids: list[str] = [],
+        node_type: str = "Protein",
     ) -> dict[str, str]:
         """Gets all sequences from the database and returns them in a dictionary.
         Key is the accession id and value is the sequence.
@@ -256,20 +258,20 @@ def _get_id_sequence_dict(
         """
 
         if not ids:
-            query = """
-            MATCH (p:Protein)
+            query = f"""
+            MATCH (p:{node_type})
             RETURN p.accession_id AS accession_id, p.sequence AS sequence
             """
-            proteins = db.execute_read(query)
+            nodes = db.execute_read(query)
         else:
-            query = """
-            MATCH (p:Protein)
+            query = f"""
+            MATCH (p:{node_type})
             WHERE p.accession_id IN $ids
             RETURN p.accession_id AS accession_id, p.sequence AS sequence
             """
-            proteins = db.execute_read(query, {"ids": ids})
+            nodes = db.execute_read(query, {"ids": ids})
 
-        return {protein["accession_id"]: protein["sequence"] for protein in proteins}
+        return {node["accession_id"]: node["sequence"] for node in nodes}
 
     def _load_substitution_matrix(self) -> "BioSubstitutionMatrix":
         from Bio.Align import substitution_matrices
diff --git a/src/pyeed/analysis/standard_numbering.py b/src/pyeed/analysis/standard_numbering.py
index e4b17d89..04d78d96 100644
--- a/src/pyeed/analysis/standard_numbering.py
+++ b/src/pyeed/analysis/standard_numbering.py
@@ -39,26 +39,25 @@ def __init__(self, name: str) -> None:
             positions: A dictionary mapping protein accession ids to lists of numbering positions.
         """
         self.name = name
-        self.positions: dict[str, list[str]] = {}
 
-    def get_protein_base_sequence(
-        self, base_sequence_id: str, db: DatabaseConnector
+    def get_node_base_sequence(
+        self, base_sequence_id: str, db: DatabaseConnector, node_type: str = "Protein"
     ) -> dict[str, str]:
         """
-        Retrieve the base protein sequence from the database for a given accession id.
+        Retrieve the base node sequence from the database for a given accession id.
 
-        This method executes a query that returns the protein with the provided id. It assumes a
+        This method executes a query that returns the node with the provided id. It assumes a
         valid result is returned and constructs a dictionary containing the id and the sequence.
 
         Args:
-            base_sequence_id: The accession id of the base protein sequence.
+            base_sequence_id: The accession id of the base node sequence.
             db: The database connector instance to perform the query.
 
         Returns:
-            A dictionary with keys 'id' and 'sequence' holding the protein accession id and its sequence.
+            A dictionary with keys 'id' and 'sequence' holding the node type id and its sequence.
         """
         query = f"""
-        MATCH (p:Protein)
+        MATCH (p:{node_type})
         WHERE p.accession_id = '{base_sequence_id}'
         RETURN p.accession_id AS accession_id, p.sequence AS sequence
         """
@@ -70,7 +69,12 @@ def get_protein_base_sequence(
         }
         return base_sequence
 
-    def save_positions(self, db: DatabaseConnector) -> None:
+    def save_positions(
+        self,
+        db: DatabaseConnector,
+        positions: dict[str, list[str]],
+        node_type: str = "Protein",
+    ) -> None:
         """
         Save the calculated numbering positions for each protein into the database.
 
@@ -81,12 +85,12 @@ def save_positions(self, db: DatabaseConnector) -> None:
         Args:
             db: The database connector instance used to execute the write queries.
         """
-        for protein_id in self.positions:
+        for protein_id in positions:
             query = f"""
-                MATCH (p:Protein {{accession_id: '{protein_id}'}})
+                MATCH (p:{node_type} {{accession_id: '{protein_id}'}})
                 MATCH (s:StandardNumbering {{name: '{self.name}'}})
                 MERGE (p)-[r:HAS_STANDARD_NUMBERING]->(s)
-                SET r.positions = {str(self.positions[protein_id])}
+                SET r.positions = {str(positions[protein_id])}
             """
             # Execute the write query to update the standard numbering relationship.
             db.execute_write(query)
@@ -312,6 +316,7 @@ def apply_standard_numbering_pairwise(
         db: DatabaseConnector,
         list_of_seq_ids: Optional[List[str]] = None,
         return_positions: bool = False,
+        node_type: str = "Protein",
     ) -> Optional[Dict[str, List[str]]]:
         """
         Apply standard numbering via pairwise alignment using a base sequence.
@@ -324,15 +329,16 @@ def apply_standard_numbering_pairwise(
         Args:
             base_sequence_id: The accession id of the base sequence.
             db: The DatabaseConnector instance used for communication with the database.
-            list_of_seq_ids: An optional list of protein ids to process. If None, all proteins are used.
+            list_of_seq_ids: An optional list of node type ids to process. If None, all node type ids are used.
             return_positions: If True, the method returns the computed positions dictionary after processing.
+            node_type: The type of node to process. Default is "Protein".
 
         Raises:
             ValueError: If the pairwise alignment fails and returns no results.
         """
         if list_of_seq_ids is None:
-            query = """
-            MATCH (p:Protein)
+            query = f"""
+            MATCH (p:{node_type})
             WHERE p.accession_id IS NOT NULL
             RETURN p.accession_id AS accession_id
             """
@@ -347,17 +353,18 @@ def apply_standard_numbering_pairwise(
             list_of_seq_ids = [row["accession_id"] for row in results]
 
         # Remove the base sequence id from the list if present.
-        list_of_seq_ids.remove(base_sequence_id)
+        while base_sequence_id in list_of_seq_ids:
+            list_of_seq_ids.remove(base_sequence_id)
 
         # Generate pairs with the base sequence as the first element.
         pairs = []
-        for protein_id in list_of_seq_ids:
-            pairs.append((base_sequence_id, protein_id))
+        for node_id in list_of_seq_ids:
+            pairs.append((base_sequence_id, node_id))
 
         # check if the pairs are already existing with the same name under the same standard numbering node
-        query = """
-        MATCH (s:StandardNumbering {name: $name})
-        MATCH (p:Protein)-[r:HAS_STANDARD_NUMBERING]->(s)
+        query = f"""
+        MATCH (s:StandardNumbering {{name: $name}})
+        MATCH (p:{node_type})-[r:HAS_STANDARD_NUMBERING]->(s)
         WHERE p.accession_id IN $list_of_seq_ids
         RETURN p.accession_id AS accession_id
         """
@@ -373,6 +380,10 @@ def apply_standard_numbering_pairwise(
                             f"Pair {base_sequence_id} and {row['accession_id']} already exists under the same standard numbering node"
                         )
 
+        # remove double pairs in the list of pairs
+        pairs = list(set(pairs))
+        logger.info(f"Pairs: {pairs}")
+
         # Run the pairwise alignment using the PairwiseAligner.
         pairwise_aligner = PairwiseAligner()
 
@@ -380,12 +391,17 @@ def apply_standard_numbering_pairwise(
         if not input:
             raise ValueError("No input sequences provided")
 
+        logger.info(f"Input: {input}")
+
         results_pairwise = pairwise_aligner.align_multipairwise(
             ids=input,  # Combine ids for alignment
             db=db,
             pairs=pairs,  # List of sequence pairs to be aligned
+            node_type=node_type,
         )
 
+        logger.info(f"Pairwise alignment results: {results_pairwise}")
+
         if results_pairwise is None:
             raise ValueError("Pairwise alignment failed - no results returned")
 
@@ -399,11 +415,19 @@ def apply_standard_numbering_pairwise(
             for result in results_pairwise
         ]
 
+        if len(converted_alignment) == 0:
+            logger.info(f"No alignment found for {base_sequence_id}")
+            return None
+
+        logger.info(f"Converted alignment: {len(converted_alignment)}")
+
         # Compute positions using the pairwise numbering algorithm.
-        self.positions = self.run_numbering_algorithm_pairwise(
+        positions = self.run_numbering_algorithm_pairwise(
             base_sequence_id, converted_alignment
         )
 
+        logger.info(f"Positions: {positions}")
+
         # Ensure the standard numbering node exists in the database.
         StandardNumbering.get_or_save(
             name=self.name,
@@ -411,10 +435,10 @@ def apply_standard_numbering_pairwise(
         )
 
         # Update the database with the calculated positions.
-        self.save_positions(db)
+        self.save_positions(db, positions, node_type)
 
         if return_positions:
-            return self.positions
+            return positions
         return None
 
     def apply_standard_numbering(
@@ -422,23 +446,25 @@ def apply_standard_numbering(
         base_sequence_id: str,
         db: DatabaseConnector,
         list_of_seq_ids: Optional[List[str]] = None,
+        node_type: str = "Protein",
     ) -> None:
         """
-        Apply a standard numbering scheme to a collection of proteins using multiple sequence alignment.
+        Apply a standard numbering scheme to a collection of nodes using multiple sequence alignment.
 
-        This method first retrieves all protein sequences from the database (or a subset if list_of_seq_ids is provided).
+        This method first retrieves all node sequences from the database (or a subset if list_of_seq_ids is provided).
         It then uses ClustalOmega to perform a multiple sequence alignment, computes the numbering positions via
         run_numbering_algorithm, creates (or retrieves) a StandardNumbering node, and saves the positions back into the database.
 
         Args:
             base_sequence_id: The accession id of the base sequence to which others are aligned.
             db: DatabaseConnector instance used for executing queries.
-            list_of_seq_ids: An optional list of specific protein ids to process. If None, all proteins are used.
+            list_of_seq_ids: An optional list of specific node type ids to process. If None, all node type ids are used.
+            node_type: The type of node to process. Default is "Protein".
         """
 
         if list_of_seq_ids is None:
-            query = """
-            MATCH (p:Protein) 
+            query = f"""
+            MATCH (p:{node_type}) 
             WHERE p.sequence IS NOT NULL
             RETURN p.accession_id AS accession_id
             """
@@ -447,39 +473,37 @@ def apply_standard_numbering(
                 raise ValueError("No results returned from the query")
             list_of_seq_ids = [row["accession_id"] for row in results]
 
-        # Retrieve all proteins from the database. With both id and sequence.
-        query = """
-        MATCH (p:Protein)
+        # Retrieve all nodes from the database. With both id and sequence.
+        query = f"""
+        MATCH (p:{node_type})
         WHERE p.sequence IS NOT NULL
         AND p.accession_id IN $list_of_seq_ids
         RETURN p.accession_id AS accession_id, p.sequence AS sequence
         """
-        # Execute the query and build the proteins dictionary
-        proteins_read: List[Dict[str, Any]]
+        # Execute the query and build the nodes dictionary
+        nodes_read: List[Dict[str, Any]]
         query_result = db.execute_read(
             query, parameters={"list_of_seq_ids": list_of_seq_ids}
         )
         if query_result is None:
-            proteins_read = []
+            nodes_read = []
         else:
-            proteins_read = query_result
-        proteins_dict = {
-            protein["accession_id"]: protein["sequence"] for protein in proteins_read
-        }
+            nodes_read = query_result
+        nodes_dict = {node["accession_id"]: node["sequence"] for node in nodes_read}
 
-        logger.info(f"Using {len(proteins_dict)} sequences for standard numbering")
+        logger.info(f"Using {len(nodes_dict)} sequences for standard numbering")
 
         # Obtain the base sequence details from the database.
-        base_sequence = self.get_protein_base_sequence(base_sequence_id, db)
+        base_sequence = self.get_node_base_sequence(base_sequence_id, db, node_type)
 
-        # Remove the base sequence from the proteins list to prevent duplicate alignment.
-        if base_sequence_id in proteins_dict:
-            proteins_dict.pop(base_sequence_id)
+        # Remove the base sequence from the nodes list to prevent duplicate alignment.
+        if base_sequence_id in nodes_dict:
+            nodes_dict.pop(base_sequence_id)
 
         # Create a dictionary for ClustalOmega that includes both the base and target sequences.
         sequences_dict = {base_sequence["id"]: base_sequence["sequence"]}
-        for key in proteins_dict:
-            sequences_dict[key] = proteins_dict[key]
+        for key in nodes_dict:
+            sequences_dict[key] = nodes_dict[key]
 
         # Run the multiple sequence alignment using ClustalOmega.
         clustalO = ClustalOmega()
@@ -488,12 +512,11 @@ def apply_standard_numbering(
         )  # Passing a dict of sequences to ClustalOmega.
 
         logger.info(f"Alignment received from ClustalOmega:\n{alignment}")
+        logger.info(f"Alignment length: {len(list(alignment)[0][1][0].sequence)}")
 
         # Compute standard numbering positions using the computed alignment.
-        self.positions = self.run_numbering_algorithm_clustalo(
-            base_sequence_id, alignment
-        )
-        logger.info(f"Positions computed: {self.positions}")
+        positions = self.run_numbering_algorithm_clustalo(base_sequence_id, alignment)
+        logger.info(f"Positions computed: {positions}")
 
         # Create (or get) the StandardNumbering node in the database.
         StandardNumbering.get_or_save(
@@ -501,5 +524,5 @@ def apply_standard_numbering(
             definition=f"ClustalO based on base sequence {base_sequence_id}",
         )
 
-        # Update the database with the relationships between proteins and standard numbering.
-        self.save_positions(db)
+        # Update the database with the relationships between nodes and standard numbering.
+        self.save_positions(db, positions, node_type)
diff --git a/src/pyeed/main.py b/src/pyeed/main.py
index f2cdb134..25a1b225 100644
--- a/src/pyeed/main.py
+++ b/src/pyeed/main.py
@@ -123,8 +123,9 @@ def fetch_uniprot(self, ids: list[str]) -> None:
             request_params=params_template,
         )
 
-        asyncio.run(adapter.execute_requests())
+        # Fix: call nest_asyncio.apply() first, then run the adapter's coroutine
         nest_asyncio.apply()
+        asyncio.get_event_loop().run_until_complete(adapter.execute_requests())
 
     def fetch_ncbi_protein(self, ids: list[str]) -> None:
         """
@@ -153,8 +154,9 @@ def fetch_ncbi_protein(self, ids: list[str]) -> None:
             request_params=params_template,
         )
 
-        asyncio.run(adapter.execute_requests())
+        # Fix: use run_until_complete instead of asyncio.run
         nest_asyncio.apply()
+        asyncio.get_event_loop().run_until_complete(adapter.execute_requests())
 
     def fetch_ncbi_nucleotide(self, ids: list[str]) -> None:
         """
@@ -183,8 +185,9 @@ def fetch_ncbi_nucleotide(self, ids: list[str]) -> None:
             request_params=params_template,
         )
 
-        asyncio.run(adapter.execute_requests())
+        # Fix: apply nest_asyncio and then run the coroutine with the event loop
         nest_asyncio.apply()
+        asyncio.get_event_loop().run_until_complete(adapter.execute_requests())
 
     def calculate_sequence_embeddings(
         self,
@@ -278,21 +281,33 @@ def get_dnas(self, accession_ids: list[str]) -> list[dict[str, Any]]:
         """
         return self.db.execute_read(query, {"accession_ids": accession_ids})
 
-    def fetch_dna_entries_for_proteins(self) -> None:
+    def fetch_dna_entries_for_proteins(self, ids: list[str] | None = None) -> None:
         """
         Fetches DNA sequences for proteins that have a nucleotide id, set in the database.
         The fetching is done from NCBI nucleotide database in batches.
+
+        Args:
+            ids (list[str], optional): List of protein IDs to fetch DNA sequences for.
+                Defaults to None.
         """
         BATCH_SIZE = 100
 
         # Get all proteins and a list of coding sequences ids
-        query = """
-        MATCH (p:Protein) 
-        WHERE p.nucleotide_id IS NOT NULL 
-        RETURN p.nucleotide_id AS nucleotide_id
-        """
+        if ids is None:
+            query = """
+            MATCH (p:Protein) 
+            WHERE p.nucleotide_id IS NOT NULL
+            RETURN p.nucleotide_id AS nucleotide_id
+            """
+            response = self.db.execute_read(query)
+        else:
+            query = """
+            MATCH (p:Protein) 
+            WHERE p.nucleotide_id IS NOT NULL AND p.accession_id IN $ids
+            RETURN p.nucleotide_id AS nucleotide_id
+            """
+            response = self.db.execute_read(query, {"ids": ids})
 
-        response = self.db.execute_read(query)
         nucleotide_ids = [str(record["nucleotide_id"]) for record in response]
 
         logger.info(f"Found {len(nucleotide_ids)} coding sequences.")
@@ -338,12 +353,20 @@ def fetch_dna_entries_for_proteins(self) -> None:
                 continue
 
         # Process protein-DNA relationships in batches
-        query = """
-        MATCH (p:Protein)
-        WHERE p.nucleotide_id IS NOT NULL
-        RETURN p
-        """
-        proteins = self.db.execute_read(query)
+        if ids is None:
+            query = """
+            MATCH (p:Protein)
+            WHERE p.nucleotide_id IS NOT NULL
+            RETURN p
+            """
+            proteins = self.db.execute_read(query)
+        else:
+            query = """
+            MATCH (p:Protein)
+            WHERE p.nucleotide_id IS NOT NULL AND p.accession_id IN $ids
+            RETURN p
+            """
+            proteins = self.db.execute_read(query, {"ids": ids})
 
         for i in range(0, len(proteins), BATCH_SIZE):
             try:
diff --git a/src/pyeed/model.py b/src/pyeed/model.py
index 40457068..869a4091 100644
--- a/src/pyeed/model.py
+++ b/src/pyeed/model.py
@@ -407,10 +407,23 @@ def validate_and_connect(
         from_monomers: list[str],
         to_monomers: list[str],
     ) -> "Mutation":
-        """Validates the mutations and connects the two molecules.
+        """Validates the mutations and connects the two molecules, ensuring that no double mutations
+        occur – i.e. if a mutation affecting any of the same positions already exists between these proteins,
+        a new mutation cannot be created.
+
         Raises:
-            ValueError: If the specified positions or residues do not match the sequences.
+            ValueError: If input lists have different lengths or if a mutation for any of these positions
+                        already exists.
         """
+        # Instead of checking *any* mutation, retrieve all mutation relationships between these proteins.
+        # Here molecule1.mutation.relationship(molecule2) returns a list of mutation relationship instances.
+        existing_mutations = molecule1.mutation.relationship(molecule2)
+
+        if existing_mutations:
+            raise ValueError(
+                "A mutation relationship affecting one or more of these positions already exists between these proteins."
+            )
+
         if (
             len(from_positions) != len(to_positions)
             or len(from_positions) != len(from_monomers)

From c08828038f15eceb886fcb95d7ab7a3b1e3eea33 Mon Sep 17 00:00:00 2001
From: Niklas Abraham GPU 
Date: Fri, 14 Mar 2025 16:36:32 +0000
Subject: [PATCH 10/13] added mutation detection

---
 docs/usage/mutation_analysis.ipynb       | 101 +++++++++++++++++------
 src/pyeed/analysis/mutation_detection.py |  19 +++--
 2 files changed, 90 insertions(+), 30 deletions(-)

diff --git a/docs/usage/mutation_analysis.ipynb b/docs/usage/mutation_analysis.ipynb
index 3d5618ef..9ccabc1c 100644
--- a/docs/usage/mutation_analysis.ipynb
+++ b/docs/usage/mutation_analysis.ipynb
@@ -11,7 +11,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 45,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -37,7 +37,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 46,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
@@ -56,7 +56,7 @@
     "\n",
     "eedb = Pyeed(uri, user=user, password=password)\n",
     "\n",
-    "eedb.db.wipe_database(date=\"2025-02-28\")"
+    "eedb.db.wipe_database(date=\"2025-03-14\")"
    ]
   },
   {
@@ -75,13 +75,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 47,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
-    "ids = [\"KJO56189.1\", \"KLP91446.1\"]\n",
+    "ids = [\"AAM15527.1\", \"AAF05614.1\", \"AFN21551.1\", \"CAA76794.1\", \"AGQ50511.1\"]\n",
     "\n",
-    "eedb.fetch_from_primary_db(ids, db=\"ncbi_protein\")"
+    "eedb.fetch_from_primary_db(ids, db=\"ncbi_protein\")\n",
+    "eedb.fetch_dna_entries_for_proteins()"
    ]
   },
   {
@@ -90,9 +91,7 @@
    "source": [
     "1. Defines two protein sequence IDs to analyze\n",
     "2. Fetches these sequences from NCBI's protein database\n",
-    "3. Both sequences are beta-lactamase proteins:\n",
-    "   - KJO56189.1: beta-lactamase TEM\n",
-    "   - KLP91446.1: class A beta-lactamase\n",
+    "3. All sequences are beta-lactamase proteins\n",
     "4. The sequences are automatically parsed and stored in the Neo4j database\n",
     "5. Additional metadata like organism information and CDS (Coding Sequence) details are also stored\n",
     "\n",
@@ -101,15 +100,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 48,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
-    "sn = StandardNumberingTool(name=\"test_standard_numbering\")\n",
+    "sn_protein = StandardNumberingTool(name=\"test_standard_numbering_protein\")\n",
     "\n",
     "\n",
-    "sn.apply_standard_numbering(\n",
-    "    base_sequence_id=\"KJO56189.1\", db=eedb.db, list_of_seq_ids=ids\n",
+    "sn_protein.apply_standard_numbering(\n",
+    "    base_sequence_id=\"AAM15527.1\", db=eedb.db, list_of_seq_ids=ids\n",
+    ")\n",
+    "\n",
+    "sn_dna = StandardNumberingTool(name=\"test_standard_numbering_dna\")\n",
+    "\n",
+    "sn_dna.apply_standard_numbering(\n",
+    "    base_sequence_id=\"AF190695.1\", db=eedb.db, node_type=\"DNA\"\n",
     ")\n"
    ]
   },
@@ -131,27 +136,37 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 49,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
     "md = MutationDetection()\n",
     "\n",
-    "seq1 = \"KJO56189.1\"\n",
-    "seq2 = \"KLP91446.1\"\n",
-    "name_of_standard_numbering_tool = \"test_standard_numbering\"\n",
+    "seq1 = \"AAM15527.1\"\n",
+    "seq2 = \"AAF05614.1\"\n",
+    "name_of_standard_numbering_tool = \"test_standard_numbering_protein\"\n",
     "\n",
-    "mutations = md.get_mutations_between_sequences(\n",
+    "mutations_protein = md.get_mutations_between_sequences(\n",
     "    seq1, seq2, eedb.db, name_of_standard_numbering_tool\n",
     ")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "md = MutationDetection()\n",
+    "\n",
+    "seq1 = \"AF190695.1\"\n",
+    "seq2 = \"JX042489.1\"\n",
+    "name_of_standard_numbering_tool = \"test_standard_numbering_dna\"\n",
+    "\n",
+    "mutations_dna = md.get_mutations_between_sequences(\n",
+    "    seq1, seq2, eedb.db, name_of_standard_numbering_tool, node_type=\"DNA\"\n",
+    ")"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -168,19 +183,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 50,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "{'from_positions': [102, 162, 236], 'to_positions': [102, 162, 236], 'from_monomers': ['E', 'S', 'G'], 'to_monomers': ['K', 'R', 'S']}\n"
+      "{'from_positions': [241, 272, 125], 'to_positions': [241, 272, 125], 'from_monomers': ['R', 'D', 'V'], 'to_monomers': ['S', 'N', 'I']}\n"
      ]
     }
    ],
    "source": [
-    "print(mutations)"
+    "print(mutations_protein)"
    ]
   },
   {
@@ -198,6 +213,46 @@
     "2. Position 162: Serine (S) → Arginine (R)\n",
     "3. Position 236: Glycine (G) → Serine (S)"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Mutation on position 682 -> 615 with a nucleotide change of T -> C\n",
+      "Mutation on position 407 -> 340 with a nucleotide change of C -> A\n",
+      "Mutation on position 92 -> 25 with a nucleotide change of C -> A\n",
+      "Mutation on position 162 -> 95 with a nucleotide change of G -> T\n",
+      "Mutation on position 929 -> 862 with a nucleotide change of A -> C\n",
+      "Mutation on position 346 -> 279 with a nucleotide change of A -> G\n",
+      "Mutation on position 87 -> 20 with a nucleotide change of C -> A\n",
+      "Mutation on position 88 -> 21 with a nucleotide change of T -> C\n",
+      "Mutation on position 130 -> 63 with a nucleotide change of C -> T\n",
+      "Mutation on position 175 -> 108 with a nucleotide change of G -> A\n",
+      "Mutation on position 131 -> 64 with a nucleotide change of T -> C\n",
+      "Mutation on position 132 -> 65 with a nucleotide change of A -> T\n",
+      "Mutation on position 914 -> 847 with a nucleotide change of G -> A\n",
+      "Mutation on position 604 -> 537 with a nucleotide change of T -> G\n",
+      "Mutation on position 925 -> 858 with a nucleotide change of G -> A\n",
+      "Mutation on position 226 -> 159 with a nucleotide change of T -> C\n"
+     ]
+    }
+   ],
+   "source": [
+    "for i in range(len(mutations_dna['from_positions'])):\n",
+    "    print(f\"Mutation on position {mutations_dna['from_positions'][i]} -> {mutations_dna['to_positions'][i]} with a nucleotide change of {mutations_dna['from_monomers'][i]} -> {mutations_dna['to_monomers'][i]}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
diff --git a/src/pyeed/analysis/mutation_detection.py b/src/pyeed/analysis/mutation_detection.py
index 961c56f4..082314f6 100644
--- a/src/pyeed/analysis/mutation_detection.py
+++ b/src/pyeed/analysis/mutation_detection.py
@@ -14,6 +14,7 @@ def get_sequence_data(
         sequence_id2: str,
         db: DatabaseConnector,
         standard_numbering_tool_name: str,
+        node_type: str = "Protein",
     ) -> tuple[dict[str, str], dict[str, list[str]]]:
         """Fetch sequence and position data for two sequences from the database.
 
@@ -32,7 +33,7 @@ def get_sequence_data(
             ValueError: If standard numbering positions not found for both sequences
         """
         query = f"""
-        MATCH (p:Protein)-[r:HAS_STANDARD_NUMBERING]->(s:StandardNumbering)
+        MATCH (p:{node_type})-[r:HAS_STANDARD_NUMBERING]->(s:StandardNumbering)
         WHERE p.accession_id IN ['{sequence_id1}', '{sequence_id2}'] 
         AND s.name = '{standard_numbering_tool_name}'
         RETURN p.accession_id as id, p.sequence as sequence, r.positions as positions
@@ -103,6 +104,7 @@ def save_mutations_to_db(
         db: DatabaseConnector,
         sequence_id1: str,
         sequence_id2: str,
+        node_type: str = "Protein",
     ) -> None:
         """Save detected mutations to the database.
 
@@ -119,8 +121,8 @@ def save_mutations_to_db(
 
         # Check if a mutation relationship already exists between these proteins
         existing_mutations = db.execute_read(
-            """
-            MATCH (p1:Protein)-[r:MUTATION]->(p2:Protein)
+            f"""
+            MATCH (p1:{node_type})-[r:MUTATION]->(p2:{node_type})
             WHERE p1.accession_id = $sequence_id1 AND p2.accession_id = $sequence_id2
             RETURN r
             """,
@@ -132,8 +134,8 @@ def save_mutations_to_db(
             )
             return
 
-        query = """
-        MATCH (p1:Protein), (p2:Protein)
+        query = f"""
+        MATCH (p1:{node_type}), (p2:{node_type})
         WHERE p1.accession_id = $sequence_id1 AND p2.accession_id = $sequence_id2
         CREATE (p1)-[r:MUTATION]->(p2)
         SET r.from_positions = $from_positions,
@@ -162,6 +164,7 @@ def get_mutations_between_sequences(
         standard_numbering_tool_name: str,
         save_to_db: bool = True,
         debug: bool = False,
+        node_type: str = "Protein",
     ) -> dict[str, list[int | str]]:
         """Get mutations between two sequences using standard numbering.
 
@@ -183,7 +186,7 @@ def get_mutations_between_sequences(
             ValueError: If standard numbering positions not found for both sequences
         """
         sequences, positions = self.get_sequence_data(
-            sequence_id1, sequence_id2, db, standard_numbering_tool_name
+            sequence_id1, sequence_id2, db, standard_numbering_tool_name, node_type
         )
 
         if debug:
@@ -197,6 +200,8 @@ def get_mutations_between_sequences(
         )
 
         if save_to_db:
-            self.save_mutations_to_db(mutations, db, sequence_id1, sequence_id2)
+            self.save_mutations_to_db(
+                mutations, db, sequence_id1, sequence_id2, node_type
+            )
 
         return mutations

From 3992635c8f78086af9e3c95c140a5678c05a8b92 Mon Sep 17 00:00:00 2001
From: Niklas Abraham GPU 
Date: Fri, 14 Mar 2025 16:38:07 +0000
Subject: [PATCH 11/13] fixed mypy ruff

---
 src/pyeed/embedding.py | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/src/pyeed/embedding.py b/src/pyeed/embedding.py
index 3e7f564e..82b5c6d7 100644
--- a/src/pyeed/embedding.py
+++ b/src/pyeed/embedding.py
@@ -72,7 +72,7 @@ def load_model_and_tokenizer(
 
 def get_batch_embeddings(
     batch_sequences: list[str],
-    model: Union[EsmModel, ESMC],  # Updated type hint
+    model: Union[EsmModel, ESMC],
     tokenizer_or_alphabet: Union[EsmTokenizer, None],
     device: torch.device,
     pool_embeddings: bool = True,
@@ -100,7 +100,11 @@ def get_batch_embeddings(
                 logits_output = model.logits(
                     protein_tensor, LogitsConfig(sequence=True, return_embeddings=True)
                 )
-                # Convert embeddings to numpy array
+                # Convert embeddings to numpy array - ensure embeddings is not None
+                if logits_output.embeddings is None:
+                    raise ValueError(
+                        "Model did not return embeddings. Check LogitsConfig settings."
+                    )
                 embeddings = logits_output.embeddings.cpu().numpy()
                 if pool_embeddings:
                     embeddings = embeddings.mean(axis=1)
@@ -141,7 +145,7 @@ def calculate_single_sequence_embedding_last_hidden_state(
 
 def calculate_single_sequence_embedding_all_layers(
     sequence: str, model_name: str = "facebook/esm2_t33_650M_UR50D"
-) -> dict[int, NDArray[np.float64]]:
+) -> NDArray[np.float64]:
     """
     Calculates embeddings for a single sequence across all layers.
 
@@ -150,7 +154,7 @@ def calculate_single_sequence_embedding_all_layers(
         model_name: Name of the ESM model to use
 
     Returns:
-        dict[int, NDArray[np.float64]]: A dictionary mapping layer indices (starting at 0) to normalized token embeddings.
+        NDArray[np.float64]: A numpy array containing layer embeddings for the sequence.
     """
     model, tokenizer, device = load_model_and_tokenizer(model_name)
     return get_single_embedding_all_layers(sequence, model, tokenizer, device)
@@ -187,6 +191,12 @@ def get_single_embedding_last_hidden_state(
                     return_hidden_states=True,
                 ),
             )
+            # Ensure hidden_states is not None before accessing it
+            if logits_output.hidden_states is None:
+                raise ValueError(
+                    "Model did not return hidden states. Check LogitsConfig settings."
+                )
+
             embedding = (
                 logits_output.hidden_states[-1][0].to(torch.float32).cpu().numpy()
             )
@@ -233,9 +243,15 @@ def get_single_embedding_all_layers(
                 LogitsConfig(
                     sequence=True,
                     return_embeddings=True,
-                    return_hidden_states=True,  # Assuming this flag is supported
+                    return_hidden_states=True,
                 ),
             )
+            # Ensure hidden_states is not None before iterating
+            if logits_output.hidden_states is None:
+                raise ValueError(
+                    "Model did not return hidden states. Check if return_hidden_states=True is supported."
+                )
+
             # logits_output.hidden_states should be a tuple of tensors: (layer, batch, seq_len, hidden_dim)
             for layer_tensor in logits_output.hidden_states:
                 # Remove batch dimension and (if applicable) any special tokens

From 5c8f619cbb3329092172c4fa4ffd672509206fb4 Mon Sep 17 00:00:00 2001
From: Niklas Abraham GPU 
Date: Fri, 14 Mar 2025 17:31:18 +0000
Subject: [PATCH 12/13] removed AF2

---
 .gitignore                          |   1 +
 docs/usage/alphafold.ipynb          | 276 ----------------------------
 docs/usage/basics.ipynb             |   4 +-
 docs/usage/standard_numbering.ipynb | 138 ++------------
 src/pyeed/tools/alphafold2.py       | 121 ------------
 5 files changed, 18 insertions(+), 522 deletions(-)
 delete mode 100644 docs/usage/alphafold.ipynb
 delete mode 100644 src/pyeed/tools/alphafold2.py

diff --git a/.gitignore b/.gitignore
index f1e9c2d3..47b27efc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,6 +5,7 @@ __pycache__/
 
 # AlphaFold output
 docs/resources/alphafold/output/*
+src/pyeed/tools/alphafold2.py
 
 # C extensions
 *.so
diff --git a/docs/usage/alphafold.ipynb b/docs/usage/alphafold.ipynb
deleted file mode 100644
index 060ba5e1..00000000
--- a/docs/usage/alphafold.ipynb
+++ /dev/null
@@ -1,276 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# AlphaFold Usage Example\n",
-    "\n",
-    "This notebook demonstrates how to use AlphaFold through the `pyeed` package interface.\n",
-    "\n",
-    "## Setup and Imports"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "\n",
-    "import logging\n",
-    "from pyeed.tools.alphafold2 import AlphaFoldRunner"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')\n",
-    "LOGGER = logging.getLogger(__name__)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Configures logging to display timestamped INFO-level messages.\n",
-    "\n",
-    "## Input Preparation"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "id = 'AAP20891.1'\n",
-    "sequence = 'MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "286"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "len(sequence)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Verifies the length of the input sequence (286 amino acids).\n",
-    "\n",
-    "## AlphaFold Configuration"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "data_dir = '/media/database/alphafold' # the path to the downloaded alphafold data\n",
-    "output_dir = os.path.join(os.path.dirname(os.getcwd()), \"resources\", \"alphafold\", \"output\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Sets up the paths for AlphaFold database and output directory.\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "2025-02-09 18:21:49,187 - INFO - GPU detected. AlphaFold will run on GPU.\n"
-     ]
-    }
-   ],
-   "source": [
-    "alphafold_runner = AlphaFoldRunner(data_dir, output_dir)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Initializes the AlphaFold runner with the specified directories. The system detected a GPU for computation.\n",
-    "\n",
-    "## Execution\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "2025-02-09 18:21:49,193 - INFO - Created FASTA file at: /home/nab/Niklas/pyeed/docs/resources/alphafold/output/AAP20891_1.fasta\n",
-      "2025-02-09 18:21:49,194 - INFO - Running AlphaFold with command: source ~/anaconda3/etc/profile.d/conda.sh && conda activate alphafold_env && python /home/nab/Niklas/pyeed/src/pyeed/tools/resources/alphafold/docker_run.py --fasta_paths=/home/nab/Niklas/pyeed/docs/resources/alphafold/output/AAP20891_1.fasta --max_template_date=2022-01-01 --data_dir=/media/database/alphafold --output_dir=/home/nab/Niklas/pyeed/docs/resources/alphafold/output\n",
-      "2025-02-09 18:59:25,222 - ERROR - AlphaFold stderr:\n",
-      "I0209 18:21:49.823744 126234918065984 docker_run.py:143] Mounting /home/nab/Niklas/pyeed/docs/resources/alphafold/output -> /mnt/fasta_path_0\n",
-      "I0209 18:21:49.823894 126234918065984 docker_run.py:143] Mounting /media/database/alphafold/uniref90 -> /mnt/uniref90_database_path\n",
-      "I0209 18:21:49.823978 126234918065984 docker_run.py:143] Mounting /media/database/alphafold/mgnify -> /mnt/mgnify_database_path\n",
-      "I0209 18:21:49.824042 126234918065984 docker_run.py:143] Mounting /media/database/alphafold -> /mnt/data_dir\n",
-      "I0209 18:21:49.824100 126234918065984 docker_run.py:143] Mounting /media/database/alphafold/pdb_mmcif/mmcif_files -> /mnt/template_mmcif_dir\n",
-      "I0209 18:21:49.824160 126234918065984 docker_run.py:143] Mounting /media/database/alphafold/pdb_mmcif -> /mnt/obsolete_pdbs_path\n",
-      "I0209 18:21:49.824414 126234918065984 docker_run.py:143] Mounting /media/database/alphafold/pdb70 -> /mnt/pdb70_database_path\n",
-      "I0209 18:21:49.824606 126234918065984 docker_run.py:143] Mounting /media/database/alphafold/uniref30 -> /mnt/uniref30_database_path\n",
-      "I0209 18:21:49.824759 126234918065984 docker_run.py:143] Mounting /media/database/alphafold/bfd -> /mnt/bfd_database_path\n",
-      "I0209 18:21:50.499799 126234918065984 docker_run.py:297] /bin/bash: /opt/conda/lib/libtinfo.so.6: no version information available (required by /bin/bash)\n",
-      "I0209 18:21:54.122220 126234918065984 docker_run.py:297] I0209 18:21:54.121551 129414388114048 templates.py:858] Using precomputed obsolete pdbs /mnt/obsolete_pdbs_path/obsolete.dat.\n",
-      "I0209 18:21:54.896348 126234918065984 docker_run.py:297] I0209 18:21:54.895152 129414388114048 xla_bridge.py:863] Unable to initialize backend 'rocm': NOT_FOUND: Could not find registered platform with name: \"rocm\". Available platform names are: CUDA\n",
-      "I0209 18:21:54.896494 126234918065984 docker_run.py:297] I0209 18:21:54.896013 129414388114048 xla_bridge.py:863] Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: libtpu.so: cannot open shared object file: No such file or directory\n",
-      "I0209 18:22:00.123066 126234918065984 docker_run.py:297] I0209 18:22:00.122301 129414388114048 run_alphafold.py:524] Have 5 models: ['model_1_pred_0', 'model_2_pred_0', 'model_3_pred_0', 'model_4_pred_0', 'model_5_pred_0']\n",
-      "I0209 18:22:00.123264 126234918065984 docker_run.py:297] I0209 18:22:00.122479 129414388114048 run_alphafold.py:538] Using random seed 1472682267336032100 for the data pipeline\n",
-      "I0209 18:22:00.123352 126234918065984 docker_run.py:297] I0209 18:22:00.122714 129414388114048 run_alphafold.py:245] Predicting AAP20891_1\n",
-      "I0209 18:22:00.123819 126234918065984 docker_run.py:297] I0209 18:22:00.123200 129414388114048 jackhmmer.py:133] Launching subprocess \"/usr/bin/jackhmmer -o /dev/null -A /tmp/tmpxc0kqa9t/output.sto --noali --F1 0.0005 --F2 5e-05 --F3 5e-07 --incE 0.0001 -E 0.0001 --cpu 8 -N 1 /mnt/fasta_path_0/AAP20891_1.fasta /mnt/uniref90_database_path/uniref90.fasta\"\n",
-      "I0209 18:22:00.124192 126234918065984 docker_run.py:297] I0209 18:22:00.123989 129414388114048 utils.py:36] Started Jackhmmer (uniref90.fasta) query\n",
-      "I0209 18:29:46.660077 126234918065984 docker_run.py:297] I0209 18:29:46.659375 129414388114048 utils.py:40] Finished Jackhmmer (uniref90.fasta) query in 466.535 seconds\n",
-      "I0209 18:29:46.872978 126234918065984 docker_run.py:297] I0209 18:29:46.872399 129414388114048 jackhmmer.py:133] Launching subprocess \"/usr/bin/jackhmmer -o /dev/null -A /tmp/tmpy07gvq9o/output.sto --noali --F1 0.0005 --F2 5e-05 --F3 5e-07 --incE 0.0001 -E 0.0001 --cpu 8 -N 1 /mnt/fasta_path_0/AAP20891_1.fasta /mnt/mgnify_database_path/mgy_clusters_2022_05.fa\"\n",
-      "I0209 18:29:46.873672 126234918065984 docker_run.py:297] I0209 18:29:46.873422 129414388114048 utils.py:36] Started Jackhmmer (mgy_clusters_2022_05.fa) query\n",
-      "I0209 18:42:17.609159 126234918065984 docker_run.py:297] I0209 18:42:17.608636 129414388114048 utils.py:40] Finished Jackhmmer (mgy_clusters_2022_05.fa) query in 750.735 seconds\n",
-      "I0209 18:42:19.588392 126234918065984 docker_run.py:297] I0209 18:42:19.587914 129414388114048 hhsearch.py:85] Launching subprocess \"/usr/bin/hhsearch -i /tmp/tmp1k92nh3b/query.a3m -o /tmp/tmp1k92nh3b/output.hhr -maxseq 1000000 -d /mnt/pdb70_database_path/pdb70\"\n",
-      "I0209 18:42:19.589171 126234918065984 docker_run.py:297] I0209 18:42:19.588937 129414388114048 utils.py:36] Started HHsearch query\n",
-      "I0209 18:42:48.892270 126234918065984 docker_run.py:297] I0209 18:42:48.891644 129414388114048 utils.py:40] Finished HHsearch query in 29.303 seconds\n",
-      "I0209 18:42:49.653391 126234918065984 docker_run.py:297] I0209 18:42:49.652812 129414388114048 hhblits.py:128] Launching subprocess \"/usr/bin/hhblits -i /mnt/fasta_path_0/AAP20891_1.fasta -cpu 4 -oa3m /tmp/tmpj3pynfqf/output.a3m -o /dev/null -n 3 -e 0.001 -maxseq 1000000 -realign_max 100000 -maxfilt 100000 -min_prefilter_hits 1000 -d /mnt/bfd_database_path/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt -d /mnt/uniref30_database_path/UniRef30_2021_03\"\n",
-      "I0209 18:42:49.654407 126234918065984 docker_run.py:297] I0209 18:42:49.654129 129414388114048 utils.py:36] Started HHblits query\n",
-      "I0209 18:51:13.099540 126234918065984 docker_run.py:297] I0209 18:51:13.049138 129414388114048 utils.py:40] Finished HHblits query in 503.390 seconds\n",
-      "I0209 18:51:13.235084 126234918065984 docker_run.py:297] I0209 18:51:13.232861 129414388114048 templates.py:879] Searching for template for: MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW\n",
-      "I0209 18:51:13.443433 126234918065984 docker_run.py:297] I0209 18:51:13.441954 129414388114048 templates.py:267] Found an exact template match 4zj1_A.\n",
-      "I0209 18:51:13.919805 126234918065984 docker_run.py:297] I0209 18:51:13.919035 129414388114048 templates.py:267] Found an exact template match 2p74_B.\n",
-      "I0209 18:51:15.502430 126234918065984 docker_run.py:297] I0209 18:51:15.501675 129414388114048 templates.py:267] Found an exact template match 4bd0_A.\n",
-      "I0209 18:51:15.661102 126234918065984 docker_run.py:297] I0209 18:51:15.659892 129414388114048 templates.py:267] Found an exact template match 6nfd_A.\n",
-      "I0209 18:51:15.969052 126234918065984 docker_run.py:297] I0209 18:51:15.968239 129414388114048 templates.py:267] Found an exact template match 1m40_A.\n",
-      "I0209 18:51:16.131306 126234918065984 docker_run.py:297] I0209 18:51:16.130592 129414388114048 templates.py:267] Found an exact template match 1n9b_A.\n",
-      "I0209 18:51:16.363438 126234918065984 docker_run.py:297] I0209 18:51:16.362271 129414388114048 templates.py:267] Found an exact template match 2b5r_B.\n",
-      "I0209 18:51:16.789144 126234918065984 docker_run.py:297] I0209 18:51:16.788326 129414388114048 templates.py:267] Found an exact template match 4ua6_A.\n",
-      "I0209 18:51:16.879354 126234918065984 docker_run.py:297] I0209 18:51:16.878310 129414388114048 templates.py:267] Found an exact template match 1g6a_A.\n",
-      "I0209 18:51:17.147200 126234918065984 docker_run.py:297] I0209 18:51:17.146468 129414388114048 templates.py:267] Found an exact template match 6afo_B.\n",
-      "I0209 18:51:17.594172 126234918065984 docker_run.py:297] I0209 18:51:17.592624 129414388114048 templates.py:267] Found an exact template match 6td0_A.\n",
-      "I0209 18:51:17.764129 126234918065984 docker_run.py:297] I0209 18:51:17.763039 129414388114048 templates.py:267] Found an exact template match 1o7e_B.\n",
-      "I0209 18:51:17.999657 126234918065984 docker_run.py:297] I0209 18:51:17.998758 129414388114048 templates.py:267] Found an exact template match 6niq_A.\n",
-      "I0209 18:51:18.144142 126234918065984 docker_run.py:297] I0209 18:51:18.143428 129414388114048 templates.py:267] Found an exact template match 4mbh_A.\n",
-      "I0209 18:51:18.514421 126234918065984 docker_run.py:297] I0209 18:51:18.513112 129414388114048 templates.py:267] Found an exact template match 5ne2_B.\n",
-      "I0209 18:51:18.737822 126234918065984 docker_run.py:297] I0209 18:51:18.737295 129414388114048 templates.py:267] Found an exact template match 6qwb_A.\n",
-      "I0209 18:51:18.885024 126234918065984 docker_run.py:297] I0209 18:51:18.884303 129414388114048 templates.py:267] Found an exact template match 6c7a_A.\n",
-      "I0209 18:51:19.108935 126234918065984 docker_run.py:297] I0209 18:51:19.108240 129414388114048 templates.py:286] Found a fuzzy sequence-only match 6dmh_A.\n",
-      "I0209 18:51:19.565951 126234918065984 docker_run.py:297] I0209 18:51:19.565258 129414388114048 templates.py:267] Found an exact template match 4c75_D.\n",
-      "I0209 18:51:19.695087 126234918065984 docker_run.py:297] I0209 18:51:19.694348 129414388114048 templates.py:267] Found an exact template match 6bn3_A.\n",
-      "I0209 18:51:20.471373 126234918065984 docker_run.py:297] I0209 18:51:20.470418 129414388114048 pipeline.py:234] Uniref90 MSA size: 10000 sequences.\n",
-      "I0209 18:51:20.471764 126234918065984 docker_run.py:297] I0209 18:51:20.470559 129414388114048 pipeline.py:235] BFD MSA size: 2460 sequences.\n",
-      "I0209 18:51:20.471865 126234918065984 docker_run.py:297] I0209 18:51:20.470590 129414388114048 pipeline.py:236] MGnify MSA size: 501 sequences.\n",
-      "I0209 18:51:20.471951 126234918065984 docker_run.py:297] I0209 18:51:20.470622 129414388114048 pipeline.py:237] Final (deduplicated) MSA size: 12900 sequences.\n",
-      "I0209 18:51:20.472341 126234918065984 docker_run.py:297] I0209 18:51:20.470838 129414388114048 pipeline.py:239] Total number of templates (NB: this can include bad templates and is later filtered to top 4): 20.\n",
-      "I0209 18:51:20.525065 126234918065984 docker_run.py:297] I0209 18:51:20.524172 129414388114048 run_alphafold.py:276] Running model model_1_pred_0 on AAP20891_1\n",
-      "I0209 18:51:25.273778 126234918065984 docker_run.py:297] I0209 18:51:25.271720 129414388114048 model.py:165] Running predict with shape(feat) = {'aatype': (4, 286), 'residue_index': (4, 286), 'seq_length': (4,), 'template_aatype': (4, 4, 286), 'template_all_atom_masks': (4, 4, 286, 37), 'template_all_atom_positions': (4, 4, 286, 37, 3), 'template_sum_probs': (4, 4, 1), 'is_distillation': (4,), 'seq_mask': (4, 286), 'msa_mask': (4, 508, 286), 'msa_row_mask': (4, 508), 'random_crop_to_size_seed': (4, 2), 'template_mask': (4, 4), 'template_pseudo_beta': (4, 4, 286, 3), 'template_pseudo_beta_mask': (4, 4, 286), 'atom14_atom_exists': (4, 286, 14), 'residx_atom14_to_atom37': (4, 286, 14), 'residx_atom37_to_atom14': (4, 286, 37), 'atom37_atom_exists': (4, 286, 37), 'extra_msa': (4, 5120, 286), 'extra_msa_mask': (4, 5120, 286), 'extra_msa_row_mask': (4, 5120), 'bert_mask': (4, 508, 286), 'true_msa': (4, 508, 286), 'extra_has_deletion': (4, 5120, 286), 'extra_deletion_value': (4, 5120, 286), 'msa_feat': (4, 508, 286, 49), 'target_feat': (4, 286, 22)}\n",
-      "I0209 18:53:31.565129 126234918065984 docker_run.py:297] I0209 18:53:31.563400 129414388114048 model.py:175] Output shape was {'distogram': {'bin_edges': (63,), 'logits': (286, 286, 64)}, 'experimentally_resolved': {'logits': (286, 37)}, 'masked_msa': {'logits': (508, 286, 23)}, 'predicted_lddt': {'logits': (286, 50)}, 'structure_module': {'final_atom_mask': (286, 37), 'final_atom_positions': (286, 37, 3)}, 'plddt': (286,), 'ranking_confidence': ()}\n",
-      "I0209 18:53:31.565434 126234918065984 docker_run.py:297] I0209 18:53:31.563578 129414388114048 run_alphafold.py:288] Total JAX model model_1_pred_0 on AAP20891_1 predict time (includes compilation time, see --benchmark): 126.3s\n",
-      "I0209 18:53:31.764772 126234918065984 docker_run.py:297] I0209 18:53:31.764194 129414388114048 run_alphafold.py:276] Running model model_2_pred_0 on AAP20891_1\n",
-      "I0209 18:53:35.338457 126234918065984 docker_run.py:297] I0209 18:53:35.335328 129414388114048 model.py:165] Running predict with shape(feat) = {'aatype': (4, 286), 'residue_index': (4, 286), 'seq_length': (4,), 'template_aatype': (4, 4, 286), 'template_all_atom_masks': (4, 4, 286, 37), 'template_all_atom_positions': (4, 4, 286, 37, 3), 'template_sum_probs': (4, 4, 1), 'is_distillation': (4,), 'seq_mask': (4, 286), 'msa_mask': (4, 508, 286), 'msa_row_mask': (4, 508), 'random_crop_to_size_seed': (4, 2), 'template_mask': (4, 4), 'template_pseudo_beta': (4, 4, 286, 3), 'template_pseudo_beta_mask': (4, 4, 286), 'atom14_atom_exists': (4, 286, 14), 'residx_atom14_to_atom37': (4, 286, 14), 'residx_atom37_to_atom14': (4, 286, 37), 'atom37_atom_exists': (4, 286, 37), 'extra_msa': (4, 1024, 286), 'extra_msa_mask': (4, 1024, 286), 'extra_msa_row_mask': (4, 1024), 'bert_mask': (4, 508, 286), 'true_msa': (4, 508, 286), 'extra_has_deletion': (4, 1024, 286), 'extra_deletion_value': (4, 1024, 286), 'msa_feat': (4, 508, 286, 49), 'target_feat': (4, 286, 22)}\n",
-      "I0209 18:55:08.613217 126234918065984 docker_run.py:297] I0209 18:55:08.612550 129414388114048 model.py:175] Output shape was {'distogram': {'bin_edges': (63,), 'logits': (286, 286, 64)}, 'experimentally_resolved': {'logits': (286, 37)}, 'masked_msa': {'logits': (508, 286, 23)}, 'predicted_lddt': {'logits': (286, 50)}, 'structure_module': {'final_atom_mask': (286, 37), 'final_atom_positions': (286, 37, 3)}, 'plddt': (286,), 'ranking_confidence': ()}\n",
-      "I0209 18:55:08.613392 126234918065984 docker_run.py:297] I0209 18:55:08.612682 129414388114048 run_alphafold.py:288] Total JAX model model_2_pred_0 on AAP20891_1 predict time (includes compilation time, see --benchmark): 93.3s\n",
-      "I0209 18:55:08.796196 126234918065984 docker_run.py:297] I0209 18:55:08.795683 129414388114048 run_alphafold.py:276] Running model model_3_pred_0 on AAP20891_1\n",
-      "I0209 18:55:11.976388 126234918065984 docker_run.py:297] I0209 18:55:11.975605 129414388114048 model.py:165] Running predict with shape(feat) = {'aatype': (4, 286), 'residue_index': (4, 286), 'seq_length': (4,), 'is_distillation': (4,), 'seq_mask': (4, 286), 'msa_mask': (4, 512, 286), 'msa_row_mask': (4, 512), 'random_crop_to_size_seed': (4, 2), 'atom14_atom_exists': (4, 286, 14), 'residx_atom14_to_atom37': (4, 286, 14), 'residx_atom37_to_atom14': (4, 286, 37), 'atom37_atom_exists': (4, 286, 37), 'extra_msa': (4, 5120, 286), 'extra_msa_mask': (4, 5120, 286), 'extra_msa_row_mask': (4, 5120), 'bert_mask': (4, 512, 286), 'true_msa': (4, 512, 286), 'extra_has_deletion': (4, 5120, 286), 'extra_deletion_value': (4, 5120, 286), 'msa_feat': (4, 512, 286, 49), 'target_feat': (4, 286, 22)}\n",
-      "I0209 18:56:30.749872 126234918065984 docker_run.py:297] I0209 18:56:30.749186 129414388114048 model.py:175] Output shape was {'distogram': {'bin_edges': (63,), 'logits': (286, 286, 64)}, 'experimentally_resolved': {'logits': (286, 37)}, 'masked_msa': {'logits': (512, 286, 23)}, 'predicted_lddt': {'logits': (286, 50)}, 'structure_module': {'final_atom_mask': (286, 37), 'final_atom_positions': (286, 37, 3)}, 'plddt': (286,), 'ranking_confidence': ()}\n",
-      "I0209 18:56:30.750071 126234918065984 docker_run.py:297] I0209 18:56:30.749317 129414388114048 run_alphafold.py:288] Total JAX model model_3_pred_0 on AAP20891_1 predict time (includes compilation time, see --benchmark): 78.8s\n",
-      "I0209 18:56:30.932418 126234918065984 docker_run.py:297] I0209 18:56:30.931747 129414388114048 run_alphafold.py:276] Running model model_4_pred_0 on AAP20891_1\n",
-      "I0209 18:56:34.075862 126234918065984 docker_run.py:297] I0209 18:56:34.074738 129414388114048 model.py:165] Running predict with shape(feat) = {'aatype': (4, 286), 'residue_index': (4, 286), 'seq_length': (4,), 'is_distillation': (4,), 'seq_mask': (4, 286), 'msa_mask': (4, 512, 286), 'msa_row_mask': (4, 512), 'random_crop_to_size_seed': (4, 2), 'atom14_atom_exists': (4, 286, 14), 'residx_atom14_to_atom37': (4, 286, 14), 'residx_atom37_to_atom14': (4, 286, 37), 'atom37_atom_exists': (4, 286, 37), 'extra_msa': (4, 5120, 286), 'extra_msa_mask': (4, 5120, 286), 'extra_msa_row_mask': (4, 5120), 'bert_mask': (4, 512, 286), 'true_msa': (4, 512, 286), 'extra_has_deletion': (4, 5120, 286), 'extra_deletion_value': (4, 5120, 286), 'msa_feat': (4, 512, 286, 49), 'target_feat': (4, 286, 22)}\n",
-      "I0209 18:57:49.478633 126234918065984 docker_run.py:297] I0209 18:57:49.478027 129414388114048 model.py:175] Output shape was {'distogram': {'bin_edges': (63,), 'logits': (286, 286, 64)}, 'experimentally_resolved': {'logits': (286, 37)}, 'masked_msa': {'logits': (512, 286, 23)}, 'predicted_lddt': {'logits': (286, 50)}, 'structure_module': {'final_atom_mask': (286, 37), 'final_atom_positions': (286, 37, 3)}, 'plddt': (286,), 'ranking_confidence': ()}\n",
-      "I0209 18:57:49.478832 126234918065984 docker_run.py:297] I0209 18:57:49.478156 129414388114048 run_alphafold.py:288] Total JAX model model_4_pred_0 on AAP20891_1 predict time (includes compilation time, see --benchmark): 75.4s\n",
-      "I0209 18:57:49.663070 126234918065984 docker_run.py:297] I0209 18:57:49.662528 129414388114048 run_alphafold.py:276] Running model model_5_pred_0 on AAP20891_1\n",
-      "I0209 18:57:52.800162 126234918065984 docker_run.py:297] I0209 18:57:52.799148 129414388114048 model.py:165] Running predict with shape(feat) = {'aatype': (4, 286), 'residue_index': (4, 286), 'seq_length': (4,), 'is_distillation': (4,), 'seq_mask': (4, 286), 'msa_mask': (4, 512, 286), 'msa_row_mask': (4, 512), 'random_crop_to_size_seed': (4, 2), 'atom14_atom_exists': (4, 286, 14), 'residx_atom14_to_atom37': (4, 286, 14), 'residx_atom37_to_atom14': (4, 286, 37), 'atom37_atom_exists': (4, 286, 37), 'extra_msa': (4, 1024, 286), 'extra_msa_mask': (4, 1024, 286), 'extra_msa_row_mask': (4, 1024), 'bert_mask': (4, 512, 286), 'true_msa': (4, 512, 286), 'extra_has_deletion': (4, 1024, 286), 'extra_deletion_value': (4, 1024, 286), 'msa_feat': (4, 512, 286, 49), 'target_feat': (4, 286, 22)}\n",
-      "I0209 18:59:06.248019 126234918065984 docker_run.py:297] I0209 18:59:06.247377 129414388114048 model.py:175] Output shape was {'distogram': {'bin_edges': (63,), 'logits': (286, 286, 64)}, 'experimentally_resolved': {'logits': (286, 37)}, 'masked_msa': {'logits': (512, 286, 23)}, 'predicted_lddt': {'logits': (286, 50)}, 'structure_module': {'final_atom_mask': (286, 37), 'final_atom_positions': (286, 37, 3)}, 'plddt': (286,), 'ranking_confidence': ()}\n",
-      "I0209 18:59:06.248191 126234918065984 docker_run.py:297] I0209 18:59:06.247525 129414388114048 run_alphafold.py:288] Total JAX model model_5_pred_0 on AAP20891_1 predict time (includes compilation time, see --benchmark): 73.4s\n",
-      "I0209 18:59:11.661808 126234918065984 docker_run.py:297] I0209 18:59:11.660365 129414388114048 amber_minimize.py:178] alterations info: {'nonstandard_residues': [], 'removed_heterogens': set(), 'missing_residues': {}, 'missing_heavy_atoms': {}, 'missing_terminals': {: ['OXT']}, 'Se_in_MET': [], 'removed_chains': {0: []}}\n",
-      "I0209 18:59:11.858800 126234918065984 docker_run.py:297] I0209 18:59:11.858058 129414388114048 amber_minimize.py:408] Minimizing protein, attempt 1 of 100.\n",
-      "I0209 18:59:12.170496 126234918065984 docker_run.py:297] I0209 18:59:12.169910 129414388114048 amber_minimize.py:69] Restraining 2212 / 4439 particles.\n",
-      "I0209 18:59:14.782054 126234918065984 docker_run.py:297] I0209 18:59:14.780544 129414388114048 amber_minimize.py:178] alterations info: {'nonstandard_residues': [], 'removed_heterogens': set(), 'missing_residues': {}, 'missing_heavy_atoms': {}, 'missing_terminals': {}, 'Se_in_MET': [], 'removed_chains': {0: []}}\n",
-      "I0209 18:59:17.725336 126234918065984 docker_run.py:297] I0209 18:59:17.724404 129414388114048 amber_minimize.py:500] Iteration completed: Einit 67630.03 Efinal -7208.87 Time 1.44 s num residue violations 0 num residue exclusions 0\n",
-      "I0209 18:59:18.429040 126234918065984 docker_run.py:297] I0209 18:59:18.428259 129414388114048 run_alphafold.py:414] Final timings for AAP20891_1: {'features': 1760.3612880706787, 'process_features_model_1_pred_0': 4.747036695480347, 'predict_and_compile_model_1_pred_0': 126.29224181175232, 'process_features_model_2_pred_0': 3.57088565826416, 'predict_and_compile_model_2_pred_0': 93.27748966217041, 'process_features_model_3_pred_0': 3.1796696186065674, 'predict_and_compile_model_3_pred_0': 78.77385020256042, 'process_features_model_4_pred_0': 3.142765522003174, 'predict_and_compile_model_4_pred_0': 75.4035313129425, 'process_features_model_5_pred_0': 3.1363840103149414, 'predict_and_compile_model_5_pred_0': 73.44848084449768, 'relax_model_1_pred_0': 11.401028394699097}\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "{}"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "alphafold_runner.run_alphafold(sequence=sequence, sequence_id=id)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Attempts to run AlphaFold prediction on the input sequence. The output indicates that a FASTA file was created, but the execution encountered an error when trying to locate the output structure file.\n",
-    "\n",
-    "## Note\n",
-    "The execution appears to have encountered an error when trying to find the output PDB file (`AAP20891_1_relaxed.pdb`). This might indicate an issue with the AlphaFold execution or output file generation."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "pyeed_niklas",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.8"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/docs/usage/basics.ipynb b/docs/usage/basics.ipynb
index 78306635..ddb903c3 100644
--- a/docs/usage/basics.ipynb
+++ b/docs/usage/basics.ipynb
@@ -189,7 +189,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "pyeed",
+   "display_name": "pyeed_niklas",
    "language": "python",
    "name": "python3"
   },
@@ -203,7 +203,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.5"
+   "version": "3.12.8"
   }
  },
  "nbformat": 4,
diff --git a/docs/usage/standard_numbering.ipynb b/docs/usage/standard_numbering.ipynb
index 47daf619..d2132d3c 100644
--- a/docs/usage/standard_numbering.ipynb
+++ b/docs/usage/standard_numbering.ipynb
@@ -16,7 +16,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 36,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -30,12 +30,12 @@
     "from pyeed.analysis.standard_numbering import StandardNumberingTool\n",
     "\n",
     "logger.remove()\n",
-    "level = logger.add(sys.stderr, level=\"INFO\")"
+    "level = logger.add(sys.stderr, level=\"WARNING\")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 37,
    "metadata": {},
    "outputs": [
     {
@@ -101,29 +101,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 38,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\u001b[32m2025-03-14 16:01:33.841\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.main\u001b[0m:\u001b[36mfetch_from_primary_db\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mFound 0 sequences in the database.\u001b[0m\n",
-      "\u001b[32m2025-03-14 16:01:33.841\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.main\u001b[0m:\u001b[36mfetch_from_primary_db\u001b[0m:\u001b[36m89\u001b[0m - \u001b[1mFetching 5 sequences from ncbi_protein.\u001b[0m\n",
-      "\u001b[32m2025-03-14 16:01:33.864\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.primary_db_adapter\u001b[0m:\u001b[36mexecute_requests\u001b[0m:\u001b[36m140\u001b[0m - \u001b[1mStarting requests for 1 batches.\u001b[0m\n",
-      "\u001b[32m2025-03-14 16:01:35.072\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAM15527.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-14 16:01:35.101\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAF05614.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-14 16:01:35.128\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AFN21551.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-14 16:01:35.164\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA76794.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-14 16:01:35.193\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AGQ50511.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-14 16:01:35.197\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.main\u001b[0m:\u001b[36mfetch_dna_entries_for_proteins\u001b[0m:\u001b[36m313\u001b[0m - \u001b[1mFound 5 coding sequences.\u001b[0m\n",
-      "\u001b[32m2025-03-14 16:01:35.201\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.main\u001b[0m:\u001b[36mfetch_dna_entries_for_proteins\u001b[0m:\u001b[36m343\u001b[0m - \u001b[1mFetching 5 new coding sequences.\u001b[0m\n",
-      "\u001b[32m2025-03-14 16:01:35.242\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.primary_db_adapter\u001b[0m:\u001b[36mexecute_requests\u001b[0m:\u001b[36m140\u001b[0m - \u001b[1mStarting requests for 1 batches.\u001b[0m\n",
-      "\u001b[32m2025-03-14 16:01:36.535\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.main\u001b[0m:\u001b[36mfetch_dna_entries_for_proteins\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1mSuccessfully fetched batch 1\u001b[0m\n",
-      "\u001b[32m2025-03-14 16:01:36.692\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.main\u001b[0m:\u001b[36mfetch_dna_entries_for_proteins\u001b[0m:\u001b[36m421\u001b[0m - \u001b[1mSuccessfully processed relationship batch 1\u001b[0m\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "ids = [\"AAM15527.1\", \"AAF05614.1\", \"AFN21551.1\", \"CAA76794.1\", \"AGQ50511.1\"]\n",
     "\n",
@@ -133,21 +113,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 39,
    "metadata": {},
    "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\u001b[32m2025-03-14 16:01:37.045\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering_pairwise\u001b[0m:\u001b[36m385\u001b[0m - \u001b[1mPairs: [('AAM15527.1', 'CAA76794.1'), ('AAM15527.1', 'AGQ50511.1'), ('AAM15527.1', 'AFN21551.1'), ('AAM15527.1', 'AAF05614.1')]\u001b[0m\n",
-      "\u001b[32m2025-03-14 16:01:37.046\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering_pairwise\u001b[0m:\u001b[36m394\u001b[0m - \u001b[1mInput: ['AAF05614.1', 'AFN21551.1', 'CAA76794.1', 'AGQ50511.1', 'AAM15527.1']\u001b[0m\n"
-     ]
-    },
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "34805690583d49b5b2c01190c74a1729",
+       "model_id": "6470d4b80eb648e1af401b2e59cbe95b",
        "version_major": 2,
        "version_minor": 0
       },
@@ -167,15 +139,6 @@
      },
      "metadata": {},
      "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\u001b[32m2025-03-14 16:01:41.722\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering_pairwise\u001b[0m:\u001b[36m403\u001b[0m - \u001b[1mPairwise alignment results: [{'query_id': 'AAM15527.1', 'target_id': 'CAA76794.1', 'score': 272.0, 'identity': 0.9755244755244755, 'gaps': 0, 'mismatches': 7, 'query_aligned': 'MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMLSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAVTMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERDRQIAEIGASLIKHW', 'target_aligned': 'MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDKLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVKYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLRNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGASERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW'}, {'query_id': 'AAM15527.1', 'target_id': 'AGQ50511.1', 'score': 280.0, 'identity': 0.9895104895104895, 'gaps': 0, 'mismatches': 3, 'query_aligned': 'MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMLSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAVTMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERDRQIAEIGASLIKHW', 'target_aligned': 'MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMVSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW'}, {'query_id': 'AAM15527.1', 'target_id': 'AFN21551.1', 'score': 278.0, 'identity': 0.986013986013986, 'gaps': 0, 'mismatches': 4, 'query_aligned': 'MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMLSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAVTMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERDRQIAEIGASLIKHW', 'target_aligned': 'MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGASERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW'}, {'query_id': 'AAM15527.1', 'target_id': 'AAF05614.1', 'score': 280.0, 'identity': 0.9895104895104895, 'gaps': 0, 'mismatches': 3, 'query_aligned': 'MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMLSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAVTMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERDRQIAEIGASLIKHW', 'target_aligned': 'MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMLSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSSGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW'}]\u001b[0m\n",
-      "\u001b[32m2025-03-14 16:01:41.724\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering_pairwise\u001b[0m:\u001b[36m422\u001b[0m - \u001b[1mConverted alignment: 4\u001b[0m\n",
-      "\u001b[32m2025-03-14 16:01:41.728\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering_pairwise\u001b[0m:\u001b[36m429\u001b[0m - \u001b[1mPositions: {'AAM15527.1': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286'], 'CAA76794.1': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286'], 'AGQ50511.1': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286'], 'AFN21551.1': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286'], 'AAF05614.1': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286']}\u001b[0m\n"
-     ]
     }
    ],
    "source": [
@@ -189,25 +152,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 40,
    "metadata": {},
    "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\u001b[32m2025-03-14 16:01:51.025\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering_pairwise\u001b[0m:\u001b[36m379\u001b[0m - \u001b[1mPair AAM15527.1 and AAF05614.1 already exists under the same standard numbering node\u001b[0m\n",
-      "\u001b[32m2025-03-14 16:01:51.026\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering_pairwise\u001b[0m:\u001b[36m379\u001b[0m - \u001b[1mPair AAM15527.1 and AFN21551.1 already exists under the same standard numbering node\u001b[0m\n",
-      "\u001b[32m2025-03-14 16:01:51.026\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering_pairwise\u001b[0m:\u001b[36m379\u001b[0m - \u001b[1mPair AAM15527.1 and CAA76794.1 already exists under the same standard numbering node\u001b[0m\n",
-      "\u001b[32m2025-03-14 16:01:51.027\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering_pairwise\u001b[0m:\u001b[36m379\u001b[0m - \u001b[1mPair AAM15527.1 and AGQ50511.1 already exists under the same standard numbering node\u001b[0m\n",
-      "\u001b[32m2025-03-14 16:01:51.027\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering_pairwise\u001b[0m:\u001b[36m385\u001b[0m - \u001b[1mPairs: []\u001b[0m\n",
-      "\u001b[32m2025-03-14 16:01:51.028\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering_pairwise\u001b[0m:\u001b[36m394\u001b[0m - \u001b[1mInput: ['AAF05614.1', 'AFN21551.1', 'CAA76794.1', 'AGQ50511.1', 'AAM15527.1']\u001b[0m\n"
-     ]
-    },
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "b03690699eff400b8e713c6bdfb04c8b",
+       "model_id": "47406b43c98e4b31ba41eb15f7cdd000",
        "version_major": 2,
        "version_minor": 0
       },
@@ -227,14 +178,6 @@
      },
      "metadata": {},
      "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\u001b[32m2025-03-14 16:01:51.049\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering_pairwise\u001b[0m:\u001b[36m403\u001b[0m - \u001b[1mPairwise alignment results: []\u001b[0m\n",
-      "\u001b[32m2025-03-14 16:01:51.049\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering_pairwise\u001b[0m:\u001b[36m419\u001b[0m - \u001b[1mNo alignment found for AAM15527.1\u001b[0m\n"
-     ]
     }
    ],
    "source": [
@@ -245,26 +188,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 41,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\u001b[32m2025-03-14 16:01:52.356\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering\u001b[0m:\u001b[36m494\u001b[0m - \u001b[1mUsing 4 sequences for standard numbering\u001b[0m\n",
-      "\u001b[32m2025-03-14 16:01:52.467\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering\u001b[0m:\u001b[36m514\u001b[0m - \u001b[1mAlignment received from ClustalOmega:\n",
-      "AAM15527.1  MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMLSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAVTMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERDRQIAEIGASLIKHW\n",
-      "AAF05614.1  MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMLSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSSGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW\n",
-      "AFN21551.1  MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGASERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW\n",
-      "CAA76794.1  MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDKLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVKYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLRNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGASERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW\n",
-      "AGQ50511.1  MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMVSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW\u001b[0m\n",
-      "\u001b[32m2025-03-14 16:01:52.468\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering\u001b[0m:\u001b[36m515\u001b[0m - \u001b[1mAlignment length: 286\u001b[0m\n",
-      "\u001b[32m2025-03-14 16:01:52.468\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mrun_numbering_algorithm_clustalo\u001b[0m:\u001b[36m118\u001b[0m - \u001b[1mRunning numbering algorithm for base sequence AAM15527.1\u001b[0m\n",
-      "\u001b[32m2025-03-14 16:01:52.469\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering\u001b[0m:\u001b[36m519\u001b[0m - \u001b[1mPositions computed: {'AAM15527.1': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286'], 'AAF05614.1': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286'], 'AFN21551.1': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286'], 'CAA76794.1': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286'], 'AGQ50511.1': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286']}\u001b[0m\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "sn_clustal = StandardNumberingTool(name=\"test_standard_numbering_clustal\")\n",
     "\n",
@@ -275,26 +201,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 42,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\u001b[32m2025-03-14 16:01:52.743\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering\u001b[0m:\u001b[36m494\u001b[0m - \u001b[1mUsing 5 sequences for standard numbering\u001b[0m\n",
-      "\u001b[32m2025-03-14 16:01:53.287\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering\u001b[0m:\u001b[36m514\u001b[0m - \u001b[1mAlignment received from ClustalOmega:\n",
-      "AF190695.1  TTCTTGAAGACGAAAGGGCCTCGTGATACGCCTATTTTTATAGGTTAATGTCATGATAATAATGGTTTCTTAGACGTCAGGTGGCACTTTTCGGGGAAATGTGCGCGGAACCCCTATTTGTTTATTTTTCTAAATACATTCAAATATGTATCCGCTCATGAGACAATAACCCTGGTAAATGCTTCAATAATATTGAAAAAGGAAGAGTATGAGTATTCAACATTTTCGTGTCGCCCTTATTCCCTTTTTTGCGGCATTTTGCCTTCCTGTTTTTGCTCACCCAGAAACGCTGGTGAAAGTAAAAGATGCTGAAGATCAGTTGGGTGCACGAGTGGGTTACATCGAACTGGATCTCAACAGCGGTAAGATCCTTGAGAGTTTTCGCCCCGAAGAACGTTTTCCAATGCTGAGCACTTTTAAAGTTCTGCTATGTGGTGCGGTATTATCCCGTGTTGACGCCGGGCAAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTGAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCATAACCATGAGTGATAACACTGCTGCCAACTTACTTCTGACAACGATCGGAGGACCGAAGGAGCTAACCGCTTTTTTGCACAACATGGGGGATCATGTAACTCGCCTTGATCGTTGGGAACCGGAGCTGAATGAAGCCATACCAAACGACGAGCGTGACACCACGATGCCTGCAGCAATGGCAACAACGTTGCGCAAACTATTAACTGGCGAACTACTTACTCTAGCTTCCCGGCAACAATTAATAGACTGGATGGAGGCGGATAAAGTTGCAGGACCACTTCTGCGCTCGGCCCTTCCGGCTGGCTGGTTTATTGCTGATAAATCTGGAGCCGGTGAGCGTGGGTCTAGCGGTATCATTGCAGCACTGGGGCCAGATGGTAAGCCCTCCCGTATCGTAGTTATCTACACGACGGGGAGTCAGGCAACTATGGATGAACGAAATAGACAGATCGCTGAGATAGGTGCCTCACTGATTAAGCATTGGTAACTGTCAGACCAAGTTTACTCATATATACTTTAGA\n",
-      "AF347054.1  TTCTTGAAGACGAAAGGGCCTCGTGATACGCTTATTTTTATAGGTTAATGTCATGATAATAATGGTTTCTTAGACGTCAGGTGGCACTTTTCGGGGAAATGTGCGCGGAACCCCTATTTGTTTATTTTTCTAAATACATTCAAATATGTATCCGCTCATGAGACAATAACCCTGATAAATGCTTCAATAATATTGAAAAAGGAAGAGTATGAGTATTCAACATTTCCGTGTCGCCCTTATTCCCTTTTTTGCGGCATTTTGCCTTCCTGTTTTTGCTCACCCAGAAACGCTGGTGAAAGTAAAAGATGCTGAAGATCAGTTGGGTGCACGAGTGGGTTACATCGAACTGGATCTCAACAGCGGTAAGATCCTTGAGAGTTTTCGCCCCGAAGAACGTTTTCCAATGCTGAGCACTTTTAAAGTTCTGCTATGTGGCGCGGTATTATCCCGTGTTGACGCCGGGCAAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTGAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCGTAACCATGAGTGATAACACTGCGGCCAACTTACTTCTGACAACGATCGGAGGACCGAAGGAGCTAACCGCTTTTTTGCACAACATGGGGGATCATGTAACTCGCCTTGATCGTTGGGAACCGGAGCTGAATGAAGCCATACCAAACGACGAGCGTGACACCACGATGCCTGCAGCAATGGCAACAACGTTGCGCAAACTATTAACTGGCGAACTACTTACTCTAGCTTCCCGGCAACAATTAATAGACTGGATGGAGGCGGATAAAGTTGCAGGACCACTTCTGCGCTCGGCCCTTCCGGCTGGCTGGTTTATTGCTGATAAATCTGGAGCCGGTGAGCGTGGGTCTCGCGGTATCATTGCAGCACTGGGGCCAGATGGTAAGCCCTCCCGTATCGTAGTTATCTACACGACGGGGAGTCAGGCAACTATGGATGAACGAGATAGACAGATCGCTGAGATAGGTGCCTCACTGATTAAGCATTGGTAACTGTCAGACCAAG---------------------\n",
-      "JX042489.1  ------------------------------------------------------------------TTCTTAGACGTCAGGTGGC-ACTTTAGGGGAAATGTGCGCGGAACCCCTATTTGTTTATTTTTTCTAATACATTCAAATATGTATCCGCTCATGATACAATAACCCTGATAAATGCTTCAATAATATTGAAAAAGGAAGAGTATGAGTATTCAACATTTCCGTGTCGCCCTTATTCCCTTTTTTGCGGCATTTTGCCTTCCTGTTTTTGCTCACCCAGAAACGCTGGTGAAAGTAAAAGATGCTGAAGATCAGTTGGGTGCACGAGTGGGTTACATCGAGCTGGATCTCAACAGCGGTAAGATCCTTGAGAGTTTTCGCCCCGAAGAACGTTTTCCAATGATGAGCACTTTTAAAGTTCTGCTATGTGGTGCGGTATTATCCCGTGTTGACGCCGGGCAAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTGAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCATAACCATGAGTGATAACACTGCGGCCAACTTACTTCTGACAACGATCGGAGGACCGAAGGAGCTAACCGCTTTTTTGCACAACATGGGGGATCATGTAACCCGCCTTGATCGTTGGGAACCGGAGCTGAATGAAGCCATACCAAACGACGAGCGTGACACCACGATGCCTGCAGCAATGGCAACAACGTTGCGCAAACTATTAACTGGCGAACTACTTACTCTAGCTTCCCGGCAACAATTAATAGACTGGATGGAGGCGGATAAAGTTGCAGGACCACTTCTGCGCTCGGCCCTTCCGGCTGGCTGGTTTATTGCTGATAAATCTGGAGCCAGTGAGCGTGGATCTCGCGGTATCATTGCAGCACTGGGGCCAGATGGTAAGCCCTCCCGTATCGTAGTTATCTACACGACGGGGAGTCAGGCAACTATGGATGAACGAAATAGACAGATCGCTGAGATAGGTGCCTCACTGATTAAGCATTGGTAA----------------------------------\n",
-      "KC844056.1  ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ATGAGTATTCAACATTTTCGTGTCGCCCTTATTCCCTTTTTTGCGGCATTTTGCCTTCCTGTTTTTGCTCACCCAGAAACGCTGGTGAAAGTAAAAGATGCTGAAGATCAGTTGGGTGCACGAGTGGGTTACATCGAACTGGATCTCAACAGCGGTAAGATCCTTGAGAGTTTTCGCCCCGAAGAACGTTTTCCAATGGTGAGCACTTTTAAAGTTCTGCTATGTGGTGCGGTATTATCCCGTGTTGACGCCGGGCAAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTGAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCATAACCATGAGTGATAACACTGCTGCCAACTTACTTCTGACAACGATCGGAGGACCGAAGGAGCTAACCGCTTTTTTGCACAACATGGGGGATCATGTAACTCGCCTTGATCGTTGGGAACCGGAGCTGAATGAAGCCATACCAAACGACGAGCGTGACACCACGATGCCTGCAGCAATGGCAACAACGTTGCGCAAACTATTAACTGGCGAACTACTTACTCTAGCTTCCCGGCAACAATTAATAGACTGGATGGAGGCGGATAAAGTTGCAGGACCACTTCTGCGCTCGGCCCTTCCGGCTGGCTGGTTTATTGCTGATAAATCTGGAGCCGGTGAGCGTGGGTCTCGCGGTATCATTGCAGCACTGGGGCCAGATGGTAAGCCCTCCCGTATCGTAGTTATCTACACGACGGGGAGTCAGGCAACTATGGATGAACGAAATAGACAGATCGCTGAGATAGGTGCCTCACTGATTAAGCATTGGTAA----------------------------------\n",
-      "Y17582.1    ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ATGAGTATTCAACATTTCCGTGTCGCCCTTATTCCCTTTTTTGCGGCATTTTGCCTTCCTGTTTTTGCTCACCCAGAAACGCTGGTGAAAGTAAAAGATGCTGAAGATAAGTTGGGTGCACGAGTGGGTTACATCGAGCTGGATCTCAACAGCGGTAAGATCCTTGAGAGTTTTCGCCCCGAAGAACGTTTTCCAATGATGAGCACTTTTAAAGTTCTGCTATGTGGTGCGGTATTATCCCGTGTTGACGCCGGGCAAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTAAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCATAACCATGAGTGATAACACTGCGGCCAACTTACTTCTGACAACGATCGGAGGACCGAAGGAGCTAACCGCTTTTTTGCGCAACATGGGGGATCATGTAACCCGCCTTGATCGTTGGGAACCGGAGCTGAATGAAGCCATACCAAACGACGAGCGTGACACCACGATGCCTGCAGCAATGGCAACAACGTTGCGCAAACTATTAACTGGCGAACTACTTACTCTAGCTTCCCGGCAACAATTAATAGACTGGATGGAGGCGGATAAAGTTGCAGGACCACTTCTGCGCTCGGCCCTTCCGGCTGGCTGGTTTATTGCTGATAAATCTGGAGCCAGTGAGCGTGGATCTCGCGGTATCATTGCAGCACTGGGGCCAGATGGTAAGCCCTCCCGTATCGTAGTTATCTACACGACGGGGAGTCAGGCAACTATGGATGAACGAAATAGACAGATCGCTGAGATAGGTGCCTCACTGATTAAGCATTGG-------------------------------------\u001b[0m\n",
-      "\u001b[32m2025-03-14 16:01:53.288\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering\u001b[0m:\u001b[36m515\u001b[0m - \u001b[1mAlignment length: 1103\u001b[0m\n",
-      "\u001b[32m2025-03-14 16:01:53.288\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mrun_numbering_algorithm_clustalo\u001b[0m:\u001b[36m118\u001b[0m - \u001b[1mRunning numbering algorithm for base sequence AF190695.1\u001b[0m\n",
-      "\u001b[32m2025-03-14 16:01:53.293\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering\u001b[0m:\u001b[36m519\u001b[0m - \u001b[1mPositions computed: {'AF190695.1': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286', '287', '288', '289', '290', '291', '292', '293', '294', '295', '296', '297', '298', '299', '300', '301', '302', '303', '304', '305', '306', '307', '308', '309', '310', '311', '312', '313', '314', '315', '316', '317', '318', '319', '320', '321', '322', '323', '324', '325', '326', '327', '328', '329', '330', '331', '332', '333', '334', '335', '336', '337', '338', '339', '340', '341', '342', '343', '344', '345', '346', '347', '348', '349', '350', '351', '352', '353', '354', '355', '356', '357', '358', '359', '360', '361', '362', '363', '364', '365', '366', '367', '368', '369', '370', '371', '372', '373', '374', '375', '376', '377', '378', '379', '380', '381', '382', '383', '384', '385', '386', '387', '388', '389', '390', '391', '392', '393', '394', '395', '396', '397', '398', '399', '400', '401', '402', '403', '404', '405', '406', '407', '408', '409', '410', '411', '412', '413', '414', '415', '416', '417', '418', '419', '420', '421', '422', '423', '424', '425', '426', '427', '428', '429', '430', '431', '432', '433', '434', '435', '436', '437', '438', '439', '440', '441', '442', '443', '444', '445', '446', '447', '448', '449', '450', '451', '452', '453', '454', '455', '456', '457', '458', '459', '460', '461', '462', '463', '464', '465', '466', '467', '468', '469', '470', '471', '472', '473', '474', '475', '476', '477', '478', '479', '480', '481', '482', '483', '484', '485', '486', '487', '488', '489', '490', '491', '492', '493', '494', '495', '496', '497', '498', '499', '500', '501', '502', '503', '504', '505', '506', '507', '508', '509', '510', '511', '512', '513', '514', '515', '516', '517', '518', '519', '520', '521', '522', '523', '524', '525', '526', '527', '528', '529', '530', '531', '532', '533', '534', '535', '536', '537', '538', '539', '540', '541', '542', '543', '544', '545', '546', '547', '548', '549', '550', '551', '552', '553', '554', '555', '556', '557', '558', '559', '560', '561', '562', '563', '564', '565', '566', '567', '568', '569', '570', '571', '572', '573', '574', '575', '576', '577', '578', '579', '580', '581', '582', '583', '584', '585', '586', '587', '588', '589', '590', '591', '592', '593', '594', '595', '596', '597', '598', '599', '600', '601', '602', '603', '604', '605', '606', '607', '608', '609', '610', '611', '612', '613', '614', '615', '616', '617', '618', '619', '620', '621', '622', '623', '624', '625', '626', '627', '628', '629', '630', '631', '632', '633', '634', '635', '636', '637', '638', '639', '640', '641', '642', '643', '644', '645', '646', '647', '648', '649', '650', '651', '652', '653', '654', '655', '656', '657', '658', '659', '660', '661', '662', '663', '664', '665', '666', '667', '668', '669', '670', '671', '672', '673', '674', '675', '676', '677', '678', '679', '680', '681', '682', '683', '684', '685', '686', '687', '688', '689', '690', '691', '692', '693', '694', '695', '696', '697', '698', '699', '700', '701', '702', '703', '704', '705', '706', '707', '708', '709', '710', '711', '712', '713', '714', '715', '716', '717', '718', '719', '720', '721', '722', '723', '724', '725', '726', '727', '728', '729', '730', '731', '732', '733', '734', '735', '736', '737', '738', '739', '740', '741', '742', '743', '744', '745', '746', '747', '748', '749', '750', '751', '752', '753', '754', '755', '756', '757', '758', '759', '760', '761', '762', '763', '764', '765', '766', '767', '768', '769', '770', '771', '772', '773', '774', '775', '776', '777', '778', '779', '780', '781', '782', '783', '784', '785', '786', '787', '788', '789', '790', '791', '792', '793', '794', '795', '796', '797', '798', '799', '800', '801', '802', '803', '804', '805', '806', '807', '808', '809', '810', '811', '812', '813', '814', '815', '816', '817', '818', '819', '820', '821', '822', '823', '824', '825', '826', '827', '828', '829', '830', '831', '832', '833', '834', '835', '836', '837', '838', '839', '840', '841', '842', '843', '844', '845', '846', '847', '848', '849', '850', '851', '852', '853', '854', '855', '856', '857', '858', '859', '860', '861', '862', '863', '864', '865', '866', '867', '868', '869', '870', '871', '872', '873', '874', '875', '876', '877', '878', '879', '880', '881', '882', '883', '884', '885', '886', '887', '888', '889', '890', '891', '892', '893', '894', '895', '896', '897', '898', '899', '900', '901', '902', '903', '904', '905', '906', '907', '908', '909', '910', '911', '912', '913', '914', '915', '916', '917', '918', '919', '920', '921', '922', '923', '924', '925', '926', '927', '928', '929', '930', '931', '932', '933', '934', '935', '936', '937', '938', '939', '940', '941', '942', '943', '944', '945', '946', '947', '948', '949', '950', '951', '952', '953', '954', '955', '956', '957', '958', '959', '960', '961', '962', '963', '964', '965', '966', '967', '968', '969', '970', '971', '972', '973', '974', '975', '976', '977', '978', '979', '980', '981', '982', '983', '984', '985', '986', '987', '988', '989', '990', '991', '992', '993', '994', '995', '996', '997', '998', '999', '1000', '1001', '1002', '1003', '1004', '1005', '1006', '1007', '1008', '1009', '1010', '1011', '1012', '1013', '1014', '1015', '1016', '1017', '1018', '1019', '1020', '1021', '1022', '1023', '1024', '1025', '1026', '1027', '1028', '1029', '1030', '1031', '1032', '1033', '1034', '1035', '1036', '1037', '1038', '1039', '1040', '1041', '1042', '1043', '1044', '1045', '1046', '1047', '1048', '1049', '1050', '1051', '1052', '1053', '1054', '1055', '1056', '1057', '1058', '1059', '1060', '1061', '1062', '1063', '1064', '1065', '1066', '1067', '1068', '1069', '1070', '1071', '1072', '1073', '1074', '1075', '1076', '1077', '1078', '1079', '1080', '1081', '1082', '1083', '1084', '1085', '1086', '1087', '1088', '1089', '1090', '1091', '1092', '1093', '1094', '1095', '1096', '1097', '1098', '1099', '1100', '1101', '1102', '1103'], 'AF347054.1': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286', '287', '288', '289', '290', '291', '292', '293', '294', '295', '296', '297', '298', '299', '300', '301', '302', '303', '304', '305', '306', '307', '308', '309', '310', '311', '312', '313', '314', '315', '316', '317', '318', '319', '320', '321', '322', '323', '324', '325', '326', '327', '328', '329', '330', '331', '332', '333', '334', '335', '336', '337', '338', '339', '340', '341', '342', '343', '344', '345', '346', '347', '348', '349', '350', '351', '352', '353', '354', '355', '356', '357', '358', '359', '360', '361', '362', '363', '364', '365', '366', '367', '368', '369', '370', '371', '372', '373', '374', '375', '376', '377', '378', '379', '380', '381', '382', '383', '384', '385', '386', '387', '388', '389', '390', '391', '392', '393', '394', '395', '396', '397', '398', '399', '400', '401', '402', '403', '404', '405', '406', '407', '408', '409', '410', '411', '412', '413', '414', '415', '416', '417', '418', '419', '420', '421', '422', '423', '424', '425', '426', '427', '428', '429', '430', '431', '432', '433', '434', '435', '436', '437', '438', '439', '440', '441', '442', '443', '444', '445', '446', '447', '448', '449', '450', '451', '452', '453', '454', '455', '456', '457', '458', '459', '460', '461', '462', '463', '464', '465', '466', '467', '468', '469', '470', '471', '472', '473', '474', '475', '476', '477', '478', '479', '480', '481', '482', '483', '484', '485', '486', '487', '488', '489', '490', '491', '492', '493', '494', '495', '496', '497', '498', '499', '500', '501', '502', '503', '504', '505', '506', '507', '508', '509', '510', '511', '512', '513', '514', '515', '516', '517', '518', '519', '520', '521', '522', '523', '524', '525', '526', '527', '528', '529', '530', '531', '532', '533', '534', '535', '536', '537', '538', '539', '540', '541', '542', '543', '544', '545', '546', '547', '548', '549', '550', '551', '552', '553', '554', '555', '556', '557', '558', '559', '560', '561', '562', '563', '564', '565', '566', '567', '568', '569', '570', '571', '572', '573', '574', '575', '576', '577', '578', '579', '580', '581', '582', '583', '584', '585', '586', '587', '588', '589', '590', '591', '592', '593', '594', '595', '596', '597', '598', '599', '600', '601', '602', '603', '604', '605', '606', '607', '608', '609', '610', '611', '612', '613', '614', '615', '616', '617', '618', '619', '620', '621', '622', '623', '624', '625', '626', '627', '628', '629', '630', '631', '632', '633', '634', '635', '636', '637', '638', '639', '640', '641', '642', '643', '644', '645', '646', '647', '648', '649', '650', '651', '652', '653', '654', '655', '656', '657', '658', '659', '660', '661', '662', '663', '664', '665', '666', '667', '668', '669', '670', '671', '672', '673', '674', '675', '676', '677', '678', '679', '680', '681', '682', '683', '684', '685', '686', '687', '688', '689', '690', '691', '692', '693', '694', '695', '696', '697', '698', '699', '700', '701', '702', '703', '704', '705', '706', '707', '708', '709', '710', '711', '712', '713', '714', '715', '716', '717', '718', '719', '720', '721', '722', '723', '724', '725', '726', '727', '728', '729', '730', '731', '732', '733', '734', '735', '736', '737', '738', '739', '740', '741', '742', '743', '744', '745', '746', '747', '748', '749', '750', '751', '752', '753', '754', '755', '756', '757', '758', '759', '760', '761', '762', '763', '764', '765', '766', '767', '768', '769', '770', '771', '772', '773', '774', '775', '776', '777', '778', '779', '780', '781', '782', '783', '784', '785', '786', '787', '788', '789', '790', '791', '792', '793', '794', '795', '796', '797', '798', '799', '800', '801', '802', '803', '804', '805', '806', '807', '808', '809', '810', '811', '812', '813', '814', '815', '816', '817', '818', '819', '820', '821', '822', '823', '824', '825', '826', '827', '828', '829', '830', '831', '832', '833', '834', '835', '836', '837', '838', '839', '840', '841', '842', '843', '844', '845', '846', '847', '848', '849', '850', '851', '852', '853', '854', '855', '856', '857', '858', '859', '860', '861', '862', '863', '864', '865', '866', '867', '868', '869', '870', '871', '872', '873', '874', '875', '876', '877', '878', '879', '880', '881', '882', '883', '884', '885', '886', '887', '888', '889', '890', '891', '892', '893', '894', '895', '896', '897', '898', '899', '900', '901', '902', '903', '904', '905', '906', '907', '908', '909', '910', '911', '912', '913', '914', '915', '916', '917', '918', '919', '920', '921', '922', '923', '924', '925', '926', '927', '928', '929', '930', '931', '932', '933', '934', '935', '936', '937', '938', '939', '940', '941', '942', '943', '944', '945', '946', '947', '948', '949', '950', '951', '952', '953', '954', '955', '956', '957', '958', '959', '960', '961', '962', '963', '964', '965', '966', '967', '968', '969', '970', '971', '972', '973', '974', '975', '976', '977', '978', '979', '980', '981', '982', '983', '984', '985', '986', '987', '988', '989', '990', '991', '992', '993', '994', '995', '996', '997', '998', '999', '1000', '1001', '1002', '1003', '1004', '1005', '1006', '1007', '1008', '1009', '1010', '1011', '1012', '1013', '1014', '1015', '1016', '1017', '1018', '1019', '1020', '1021', '1022', '1023', '1024', '1025', '1026', '1027', '1028', '1029', '1030', '1031', '1032', '1033', '1034', '1035', '1036', '1037', '1038', '1039', '1040', '1041', '1042', '1043', '1044', '1045', '1046', '1047', '1048', '1049', '1050', '1051', '1052', '1053', '1054', '1055', '1056', '1057', '1058', '1059', '1060', '1061', '1062', '1063', '1064', '1065', '1066', '1067', '1068', '1069', '1070', '1071', '1072', '1073', '1074', '1075', '1076', '1077', '1078', '1079', '1080', '1081', '1082'], 'JX042489.1': ['67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286', '287', '288', '289', '290', '291', '292', '293', '294', '295', '296', '297', '298', '299', '300', '301', '302', '303', '304', '305', '306', '307', '308', '309', '310', '311', '312', '313', '314', '315', '316', '317', '318', '319', '320', '321', '322', '323', '324', '325', '326', '327', '328', '329', '330', '331', '332', '333', '334', '335', '336', '337', '338', '339', '340', '341', '342', '343', '344', '345', '346', '347', '348', '349', '350', '351', '352', '353', '354', '355', '356', '357', '358', '359', '360', '361', '362', '363', '364', '365', '366', '367', '368', '369', '370', '371', '372', '373', '374', '375', '376', '377', '378', '379', '380', '381', '382', '383', '384', '385', '386', '387', '388', '389', '390', '391', '392', '393', '394', '395', '396', '397', '398', '399', '400', '401', '402', '403', '404', '405', '406', '407', '408', '409', '410', '411', '412', '413', '414', '415', '416', '417', '418', '419', '420', '421', '422', '423', '424', '425', '426', '427', '428', '429', '430', '431', '432', '433', '434', '435', '436', '437', '438', '439', '440', '441', '442', '443', '444', '445', '446', '447', '448', '449', '450', '451', '452', '453', '454', '455', '456', '457', '458', '459', '460', '461', '462', '463', '464', '465', '466', '467', '468', '469', '470', '471', '472', '473', '474', '475', '476', '477', '478', '479', '480', '481', '482', '483', '484', '485', '486', '487', '488', '489', '490', '491', '492', '493', '494', '495', '496', '497', '498', '499', '500', '501', '502', '503', '504', '505', '506', '507', '508', '509', '510', '511', '512', '513', '514', '515', '516', '517', '518', '519', '520', '521', '522', '523', '524', '525', '526', '527', '528', '529', '530', '531', '532', '533', '534', '535', '536', '537', '538', '539', '540', '541', '542', '543', '544', '545', '546', '547', '548', '549', '550', '551', '552', '553', '554', '555', '556', '557', '558', '559', '560', '561', '562', '563', '564', '565', '566', '567', '568', '569', '570', '571', '572', '573', '574', '575', '576', '577', '578', '579', '580', '581', '582', '583', '584', '585', '586', '587', '588', '589', '590', '591', '592', '593', '594', '595', '596', '597', '598', '599', '600', '601', '602', '603', '604', '605', '606', '607', '608', '609', '610', '611', '612', '613', '614', '615', '616', '617', '618', '619', '620', '621', '622', '623', '624', '625', '626', '627', '628', '629', '630', '631', '632', '633', '634', '635', '636', '637', '638', '639', '640', '641', '642', '643', '644', '645', '646', '647', '648', '649', '650', '651', '652', '653', '654', '655', '656', '657', '658', '659', '660', '661', '662', '663', '664', '665', '666', '667', '668', '669', '670', '671', '672', '673', '674', '675', '676', '677', '678', '679', '680', '681', '682', '683', '684', '685', '686', '687', '688', '689', '690', '691', '692', '693', '694', '695', '696', '697', '698', '699', '700', '701', '702', '703', '704', '705', '706', '707', '708', '709', '710', '711', '712', '713', '714', '715', '716', '717', '718', '719', '720', '721', '722', '723', '724', '725', '726', '727', '728', '729', '730', '731', '732', '733', '734', '735', '736', '737', '738', '739', '740', '741', '742', '743', '744', '745', '746', '747', '748', '749', '750', '751', '752', '753', '754', '755', '756', '757', '758', '759', '760', '761', '762', '763', '764', '765', '766', '767', '768', '769', '770', '771', '772', '773', '774', '775', '776', '777', '778', '779', '780', '781', '782', '783', '784', '785', '786', '787', '788', '789', '790', '791', '792', '793', '794', '795', '796', '797', '798', '799', '800', '801', '802', '803', '804', '805', '806', '807', '808', '809', '810', '811', '812', '813', '814', '815', '816', '817', '818', '819', '820', '821', '822', '823', '824', '825', '826', '827', '828', '829', '830', '831', '832', '833', '834', '835', '836', '837', '838', '839', '840', '841', '842', '843', '844', '845', '846', '847', '848', '849', '850', '851', '852', '853', '854', '855', '856', '857', '858', '859', '860', '861', '862', '863', '864', '865', '866', '867', '868', '869', '870', '871', '872', '873', '874', '875', '876', '877', '878', '879', '880', '881', '882', '883', '884', '885', '886', '887', '888', '889', '890', '891', '892', '893', '894', '895', '896', '897', '898', '899', '900', '901', '902', '903', '904', '905', '906', '907', '908', '909', '910', '911', '912', '913', '914', '915', '916', '917', '918', '919', '920', '921', '922', '923', '924', '925', '926', '927', '928', '929', '930', '931', '932', '933', '934', '935', '936', '937', '938', '939', '940', '941', '942', '943', '944', '945', '946', '947', '948', '949', '950', '951', '952', '953', '954', '955', '956', '957', '958', '959', '960', '961', '962', '963', '964', '965', '966', '967', '968', '969', '970', '971', '972', '973', '974', '975', '976', '977', '978', '979', '980', '981', '982', '983', '984', '985', '986', '987', '988', '989', '990', '991', '992', '993', '994', '995', '996', '997', '998', '999', '1000', '1001', '1002', '1003', '1004', '1005', '1006', '1007', '1008', '1009', '1010', '1011', '1012', '1013', '1014', '1015', '1016', '1017', '1018', '1019', '1020', '1021', '1022', '1023', '1024', '1025', '1026', '1027', '1028', '1029', '1030', '1031', '1032', '1033', '1034', '1035', '1036', '1037', '1038', '1039', '1040', '1041', '1042', '1043', '1044', '1045', '1046', '1047', '1048', '1049', '1050', '1051', '1052', '1053', '1054', '1055', '1056', '1057', '1058', '1059', '1060', '1061', '1062', '1063', '1064', '1065', '1066', '1067', '1068', '1069'], 'KC844056.1': ['209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286', '287', '288', '289', '290', '291', '292', '293', '294', '295', '296', '297', '298', '299', '300', '301', '302', '303', '304', '305', '306', '307', '308', '309', '310', '311', '312', '313', '314', '315', '316', '317', '318', '319', '320', '321', '322', '323', '324', '325', '326', '327', '328', '329', '330', '331', '332', '333', '334', '335', '336', '337', '338', '339', '340', '341', '342', '343', '344', '345', '346', '347', '348', '349', '350', '351', '352', '353', '354', '355', '356', '357', '358', '359', '360', '361', '362', '363', '364', '365', '366', '367', '368', '369', '370', '371', '372', '373', '374', '375', '376', '377', '378', '379', '380', '381', '382', '383', '384', '385', '386', '387', '388', '389', '390', '391', '392', '393', '394', '395', '396', '397', '398', '399', '400', '401', '402', '403', '404', '405', '406', '407', '408', '409', '410', '411', '412', '413', '414', '415', '416', '417', '418', '419', '420', '421', '422', '423', '424', '425', '426', '427', '428', '429', '430', '431', '432', '433', '434', '435', '436', '437', '438', '439', '440', '441', '442', '443', '444', '445', '446', '447', '448', '449', '450', '451', '452', '453', '454', '455', '456', '457', '458', '459', '460', '461', '462', '463', '464', '465', '466', '467', '468', '469', '470', '471', '472', '473', '474', '475', '476', '477', '478', '479', '480', '481', '482', '483', '484', '485', '486', '487', '488', '489', '490', '491', '492', '493', '494', '495', '496', '497', '498', '499', '500', '501', '502', '503', '504', '505', '506', '507', '508', '509', '510', '511', '512', '513', '514', '515', '516', '517', '518', '519', '520', '521', '522', '523', '524', '525', '526', '527', '528', '529', '530', '531', '532', '533', '534', '535', '536', '537', '538', '539', '540', '541', '542', '543', '544', '545', '546', '547', '548', '549', '550', '551', '552', '553', '554', '555', '556', '557', '558', '559', '560', '561', '562', '563', '564', '565', '566', '567', '568', '569', '570', '571', '572', '573', '574', '575', '576', '577', '578', '579', '580', '581', '582', '583', '584', '585', '586', '587', '588', '589', '590', '591', '592', '593', '594', '595', '596', '597', '598', '599', '600', '601', '602', '603', '604', '605', '606', '607', '608', '609', '610', '611', '612', '613', '614', '615', '616', '617', '618', '619', '620', '621', '622', '623', '624', '625', '626', '627', '628', '629', '630', '631', '632', '633', '634', '635', '636', '637', '638', '639', '640', '641', '642', '643', '644', '645', '646', '647', '648', '649', '650', '651', '652', '653', '654', '655', '656', '657', '658', '659', '660', '661', '662', '663', '664', '665', '666', '667', '668', '669', '670', '671', '672', '673', '674', '675', '676', '677', '678', '679', '680', '681', '682', '683', '684', '685', '686', '687', '688', '689', '690', '691', '692', '693', '694', '695', '696', '697', '698', '699', '700', '701', '702', '703', '704', '705', '706', '707', '708', '709', '710', '711', '712', '713', '714', '715', '716', '717', '718', '719', '720', '721', '722', '723', '724', '725', '726', '727', '728', '729', '730', '731', '732', '733', '734', '735', '736', '737', '738', '739', '740', '741', '742', '743', '744', '745', '746', '747', '748', '749', '750', '751', '752', '753', '754', '755', '756', '757', '758', '759', '760', '761', '762', '763', '764', '765', '766', '767', '768', '769', '770', '771', '772', '773', '774', '775', '776', '777', '778', '779', '780', '781', '782', '783', '784', '785', '786', '787', '788', '789', '790', '791', '792', '793', '794', '795', '796', '797', '798', '799', '800', '801', '802', '803', '804', '805', '806', '807', '808', '809', '810', '811', '812', '813', '814', '815', '816', '817', '818', '819', '820', '821', '822', '823', '824', '825', '826', '827', '828', '829', '830', '831', '832', '833', '834', '835', '836', '837', '838', '839', '840', '841', '842', '843', '844', '845', '846', '847', '848', '849', '850', '851', '852', '853', '854', '855', '856', '857', '858', '859', '860', '861', '862', '863', '864', '865', '866', '867', '868', '869', '870', '871', '872', '873', '874', '875', '876', '877', '878', '879', '880', '881', '882', '883', '884', '885', '886', '887', '888', '889', '890', '891', '892', '893', '894', '895', '896', '897', '898', '899', '900', '901', '902', '903', '904', '905', '906', '907', '908', '909', '910', '911', '912', '913', '914', '915', '916', '917', '918', '919', '920', '921', '922', '923', '924', '925', '926', '927', '928', '929', '930', '931', '932', '933', '934', '935', '936', '937', '938', '939', '940', '941', '942', '943', '944', '945', '946', '947', '948', '949', '950', '951', '952', '953', '954', '955', '956', '957', '958', '959', '960', '961', '962', '963', '964', '965', '966', '967', '968', '969', '970', '971', '972', '973', '974', '975', '976', '977', '978', '979', '980', '981', '982', '983', '984', '985', '986', '987', '988', '989', '990', '991', '992', '993', '994', '995', '996', '997', '998', '999', '1000', '1001', '1002', '1003', '1004', '1005', '1006', '1007', '1008', '1009', '1010', '1011', '1012', '1013', '1014', '1015', '1016', '1017', '1018', '1019', '1020', '1021', '1022', '1023', '1024', '1025', '1026', '1027', '1028', '1029', '1030', '1031', '1032', '1033', '1034', '1035', '1036', '1037', '1038', '1039', '1040', '1041', '1042', '1043', '1044', '1045', '1046', '1047', '1048', '1049', '1050', '1051', '1052', '1053', '1054', '1055', '1056', '1057', '1058', '1059', '1060', '1061', '1062', '1063', '1064', '1065', '1066', '1067', '1068', '1069'], 'Y17582.1': ['209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286', '287', '288', '289', '290', '291', '292', '293', '294', '295', '296', '297', '298', '299', '300', '301', '302', '303', '304', '305', '306', '307', '308', '309', '310', '311', '312', '313', '314', '315', '316', '317', '318', '319', '320', '321', '322', '323', '324', '325', '326', '327', '328', '329', '330', '331', '332', '333', '334', '335', '336', '337', '338', '339', '340', '341', '342', '343', '344', '345', '346', '347', '348', '349', '350', '351', '352', '353', '354', '355', '356', '357', '358', '359', '360', '361', '362', '363', '364', '365', '366', '367', '368', '369', '370', '371', '372', '373', '374', '375', '376', '377', '378', '379', '380', '381', '382', '383', '384', '385', '386', '387', '388', '389', '390', '391', '392', '393', '394', '395', '396', '397', '398', '399', '400', '401', '402', '403', '404', '405', '406', '407', '408', '409', '410', '411', '412', '413', '414', '415', '416', '417', '418', '419', '420', '421', '422', '423', '424', '425', '426', '427', '428', '429', '430', '431', '432', '433', '434', '435', '436', '437', '438', '439', '440', '441', '442', '443', '444', '445', '446', '447', '448', '449', '450', '451', '452', '453', '454', '455', '456', '457', '458', '459', '460', '461', '462', '463', '464', '465', '466', '467', '468', '469', '470', '471', '472', '473', '474', '475', '476', '477', '478', '479', '480', '481', '482', '483', '484', '485', '486', '487', '488', '489', '490', '491', '492', '493', '494', '495', '496', '497', '498', '499', '500', '501', '502', '503', '504', '505', '506', '507', '508', '509', '510', '511', '512', '513', '514', '515', '516', '517', '518', '519', '520', '521', '522', '523', '524', '525', '526', '527', '528', '529', '530', '531', '532', '533', '534', '535', '536', '537', '538', '539', '540', '541', '542', '543', '544', '545', '546', '547', '548', '549', '550', '551', '552', '553', '554', '555', '556', '557', '558', '559', '560', '561', '562', '563', '564', '565', '566', '567', '568', '569', '570', '571', '572', '573', '574', '575', '576', '577', '578', '579', '580', '581', '582', '583', '584', '585', '586', '587', '588', '589', '590', '591', '592', '593', '594', '595', '596', '597', '598', '599', '600', '601', '602', '603', '604', '605', '606', '607', '608', '609', '610', '611', '612', '613', '614', '615', '616', '617', '618', '619', '620', '621', '622', '623', '624', '625', '626', '627', '628', '629', '630', '631', '632', '633', '634', '635', '636', '637', '638', '639', '640', '641', '642', '643', '644', '645', '646', '647', '648', '649', '650', '651', '652', '653', '654', '655', '656', '657', '658', '659', '660', '661', '662', '663', '664', '665', '666', '667', '668', '669', '670', '671', '672', '673', '674', '675', '676', '677', '678', '679', '680', '681', '682', '683', '684', '685', '686', '687', '688', '689', '690', '691', '692', '693', '694', '695', '696', '697', '698', '699', '700', '701', '702', '703', '704', '705', '706', '707', '708', '709', '710', '711', '712', '713', '714', '715', '716', '717', '718', '719', '720', '721', '722', '723', '724', '725', '726', '727', '728', '729', '730', '731', '732', '733', '734', '735', '736', '737', '738', '739', '740', '741', '742', '743', '744', '745', '746', '747', '748', '749', '750', '751', '752', '753', '754', '755', '756', '757', '758', '759', '760', '761', '762', '763', '764', '765', '766', '767', '768', '769', '770', '771', '772', '773', '774', '775', '776', '777', '778', '779', '780', '781', '782', '783', '784', '785', '786', '787', '788', '789', '790', '791', '792', '793', '794', '795', '796', '797', '798', '799', '800', '801', '802', '803', '804', '805', '806', '807', '808', '809', '810', '811', '812', '813', '814', '815', '816', '817', '818', '819', '820', '821', '822', '823', '824', '825', '826', '827', '828', '829', '830', '831', '832', '833', '834', '835', '836', '837', '838', '839', '840', '841', '842', '843', '844', '845', '846', '847', '848', '849', '850', '851', '852', '853', '854', '855', '856', '857', '858', '859', '860', '861', '862', '863', '864', '865', '866', '867', '868', '869', '870', '871', '872', '873', '874', '875', '876', '877', '878', '879', '880', '881', '882', '883', '884', '885', '886', '887', '888', '889', '890', '891', '892', '893', '894', '895', '896', '897', '898', '899', '900', '901', '902', '903', '904', '905', '906', '907', '908', '909', '910', '911', '912', '913', '914', '915', '916', '917', '918', '919', '920', '921', '922', '923', '924', '925', '926', '927', '928', '929', '930', '931', '932', '933', '934', '935', '936', '937', '938', '939', '940', '941', '942', '943', '944', '945', '946', '947', '948', '949', '950', '951', '952', '953', '954', '955', '956', '957', '958', '959', '960', '961', '962', '963', '964', '965', '966', '967', '968', '969', '970', '971', '972', '973', '974', '975', '976', '977', '978', '979', '980', '981', '982', '983', '984', '985', '986', '987', '988', '989', '990', '991', '992', '993', '994', '995', '996', '997', '998', '999', '1000', '1001', '1002', '1003', '1004', '1005', '1006', '1007', '1008', '1009', '1010', '1011', '1012', '1013', '1014', '1015', '1016', '1017', '1018', '1019', '1020', '1021', '1022', '1023', '1024', '1025', '1026', '1027', '1028', '1029', '1030', '1031', '1032', '1033', '1034', '1035', '1036', '1037', '1038', '1039', '1040', '1041', '1042', '1043', '1044', '1045', '1046', '1047', '1048', '1049', '1050', '1051', '1052', '1053', '1054', '1055', '1056', '1057', '1058', '1059', '1060', '1061', '1062', '1063', '1064', '1065', '1066']}\u001b[0m\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "sn_dna = StandardNumberingTool(name=\"test_standard_numbering_dna\")\n",
     "\n",
@@ -305,21 +214,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 43,
    "metadata": {},
    "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\u001b[32m2025-03-14 16:01:53.600\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering_pairwise\u001b[0m:\u001b[36m385\u001b[0m - \u001b[1mPairs: [('AF190695.1', 'Y17582.1'), ('AF190695.1', 'AF347054.1'), ('AF190695.1', 'KC844056.1'), ('AF190695.1', 'JX042489.1')]\u001b[0m\n",
-      "\u001b[32m2025-03-14 16:01:53.601\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering_pairwise\u001b[0m:\u001b[36m394\u001b[0m - \u001b[1mInput: ['AF347054.1', 'JX042489.1', 'KC844056.1', 'Y17582.1', 'AF190695.1']\u001b[0m\n"
-     ]
-    },
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "ef5548988a3c419cbd5f0dc6719fa4c3",
+       "model_id": "6a3fb35a08714174b353558dedff592c",
        "version_major": 2,
        "version_minor": 0
       },
@@ -339,15 +240,6 @@
      },
      "metadata": {},
      "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\u001b[32m2025-03-14 16:01:58.161\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering_pairwise\u001b[0m:\u001b[36m403\u001b[0m - \u001b[1mPairwise alignment results: [{'query_id': 'AF190695.1', 'target_id': 'Y17582.1', 'score': 834.0, 'identity': 0.7679057116953762, 'gaps': 245, 'mismatches': 11, 'query_aligned': 'TTCTTGAAGACGAAAGGGCCTCGTGATACGCCTATTTTTATAGGTTAATGTCATGATAATAATGGTTTCTTAGACGTCAGGTGGCACTTTTCGGGGAAATGTGCGCGGAACCCCTATTTGTTTATTTTTCTAAATACATTCAAATATGTATCCGCTCATGAGACAATAACCCTGGTAAATGCTTCAATAATATTGAAAAAGGAAGAGTATGAGTATTCAACATTTTCGTGTCGCCCTTATTCCCTTTTTTGCGGCATTTTGCCTTCCTGTTTTTGCTCACCCAGAAACGCTGGTGAAAGTAAAAGATGCTGAAGATCAGTTGGGTGCACGAGTGGGTTACATCGAACTGGATCTCAACAGCGGTAAGATCCTTGAGAGTTTTCGCCCCGAAGAACGTTTTCCAATGCTGAGCACTTTTAAAGTTCTGCTATGTGGTGCGGTATTATCCCGTGTTGACGCCGGGCAAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTGAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCATAACCATGAGTGATAACACTGCTGCCAACTTACTTCTGACAACGATCGGAGGACCGAAGGAGCTAACCGCTTTTTTGCACAACATGGGGGATCATGTAACTCGCCTTGATCGTTGGGAACCGGAGCTGAATGAAGCCATACCAAACGACGAGCGTGACACCACGATGCCTGCAGCAATGGCAACAACGTTGCGCAAACTATTAACTGGCGAACTACTTACTCTAGCTTCCCGGCAACAATTAATAGACTGGATGGAGGCGGATAAAGTTGCAGGACCACTTCTGCGCTCGGCCCTTCCGGCTGGCTGGTTTATTGCTGATAAATCTGGAGCCGGTGAGCGTGGGTCTAGCGGTATCATTGCAGCACTGGGGCCAGATGGTAAGCCCTCCCGTATCGTAGTTATCTACACGACGGGGAGTCAGGCAACTATGGATGAACGAAATAGACAGATCGCTGAGATAGGTGCCTCACTGATTAAGCATTGGTAACTGTCAGACCAAGTTTACTCATATATACTTTAGA', 'target_aligned': '----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ATGAGTATTCAACATTTCCGTGTCGCCCTTATTCCCTTTTTTGCGGCATTTTGCCTTCCTGTTTTTGCTCACCCAGAAACGCTGGTGAAAGTAAAAGATGCTGAAGATAAGTTGGGTGCACGAGTGGGTTACATCGAGCTGGATCTCAACAGCGGTAAGATCCTTGAGAGTTTTCGCCCCGAAGAACGTTTTCCAATGATGAGCACTTTTAAAGTTCTGCTATGTGGTGCGGTATTATCCCGTGTTGACGCCGGGCAAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTAAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCATAACCATGAGTGATAACACTGCGGCCAACTTACTTCTGACAACGATCGGAGGACCGAAGGAGCTAACCGCTTTTTTGCGCAACATGGGGGATCATGTAACCCGCCTTGATCGTTGGGAACCGGAGCTGAATGAAGCCATACCAAACGACGAGCGTGACACCACGATGCCTGCAGCAATGGCAACAACGTTGCGCAAACTATTAACTGGCGAACTACTTACTCTAGCTTCCCGGCAACAATTAATAGACTGGATGGAGGCGGATAAAGTTGCAGGACCACTTCTGCGCTCGGCCCTTCCGGCTGGCTGGTTTATTGCTGATAAATCTGGAGCCAGTGAGCGTGGATCTCGCGGTATCATTGCAGCACTGGGGCCAGATGGTAAGCCCTCCCGTATCGTAGTTATCTACACGACGGGGAGTCAGGCAACTATGGATGAACGAAATAGACAGATCGCTGAGATAGGTGCCTCACTGATTAAGCATTGG-------------------------------------'}, {'query_id': 'AF190695.1', 'target_id': 'AF347054.1', 'score': 1065.0, 'identity': 0.9737080689029919, 'gaps': 21, 'mismatches': 8, 'query_aligned': 'TTCTTGAAGACGAAAGGGCCTCGTGATACGCCTATTTTTATAGGTTAATGTCATGATAATAATGGTTTCTTAGACGTCAGGTGGCACTTTTCGGGGAAATGTGCGCGGAACCCCTATTTGTTTATTTTTCTAAATACATTCAAATATGTATCCGCTCATGAGACAATAACCCTGGTAAATGCTTCAATAATATTGAAAAAGGAAGAGTATGAGTATTCAACATTTTCGTGTCGCCCTTATTCCCTTTTTTGCGGCATTTTGCCTTCCTGTTTTTGCTCACCCAGAAACGCTGGTGAAAGTAAAAGATGCTGAAGATCAGTTGGGTGCACGAGTGGGTTACATCGAACTGGATCTCAACAGCGGTAAGATCCTTGAGAGTTTTCGCCCCGAAGAACGTTTTCCAATGCTGAGCACTTTTAAAGTTCTGCTATGTGGTGCGGTATTATCCCGTGTTGACGCCGGGCAAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTGAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCATAACCATGAGTGATAACACTGCTGCCAACTTACTTCTGACAACGATCGGAGGACCGAAGGAGCTAACCGCTTTTTTGCACAACATGGGGGATCATGTAACTCGCCTTGATCGTTGGGAACCGGAGCTGAATGAAGCCATACCAAACGACGAGCGTGACACCACGATGCCTGCAGCAATGGCAACAACGTTGCGCAAACTATTAACTGGCGAACTACTTACTCTAGCTTCCCGGCAACAATTAATAGACTGGATGGAGGCGGATAAAGTTGCAGGACCACTTCTGCGCTCGGCCCTTCCGGCTGGCTGGTTTATTGCTGATAAATCTGGAGCCGGTGAGCGTGGGTCTAGCGGTATCATTGCAGCACTGGGGCCAGATGGTAAGCCCTCCCGTATCGTAGTTATCTACACGACGGGGAGTCAGGCAACTATGGATGAACGAAATAGACAGATCGCTGAGATAGGTGCCTCACTGATTAAGCATTGGTAACTGTCAGACCAAGTTTACTCATATATACTTTAGA', 'target_aligned': 'TTCTTGAAGACGAAAGGGCCTCGTGATACGCTTATTTTTATAGGTTAATGTCATGATAATAATGGTTTCTTAGACGTCAGGTGGCACTTTTCGGGGAAATGTGCGCGGAACCCCTATTTGTTTATTTTTCTAAATACATTCAAATATGTATCCGCTCATGAGACAATAACCCTGATAAATGCTTCAATAATATTGAAAAAGGAAGAGTATGAGTATTCAACATTTCCGTGTCGCCCTTATTCCCTTTTTTGCGGCATTTTGCCTTCCTGTTTTTGCTCACCCAGAAACGCTGGTGAAAGTAAAAGATGCTGAAGATCAGTTGGGTGCACGAGTGGGTTACATCGAACTGGATCTCAACAGCGGTAAGATCCTTGAGAGTTTTCGCCCCGAAGAACGTTTTCCAATGCTGAGCACTTTTAAAGTTCTGCTATGTGGCGCGGTATTATCCCGTGTTGACGCCGGGCAAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTGAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCGTAACCATGAGTGATAACACTGCGGCCAACTTACTTCTGACAACGATCGGAGGACCGAAGGAGCTAACCGCTTTTTTGCACAACATGGGGGATCATGTAACTCGCCTTGATCGTTGGGAACCGGAGCTGAATGAAGCCATACCAAACGACGAGCGTGACACCACGATGCCTGCAGCAATGGCAACAACGTTGCGCAAACTATTAACTGGCGAACTACTTACTCTAGCTTCCCGGCAACAATTAATAGACTGGATGGAGGCGGATAAAGTTGCAGGACCACTTCTGCGCTCGGCCCTTCCGGCTGGCTGGTTTATTGCTGATAAATCTGGAGCCGGTGAGCGTGGGTCTCGCGGTATCATTGCAGCACTGGGGCCAGATGGTAAGCCCTCCCGTATCGTAGTTATCTACACGACGGGGAGTCAGGCAACTATGGATGAACGAGATAGACAGATCGCTGAGATAGGTGCCTCACTGATTAAGCATTGGTAACTGTCAGACCAAG---------------------'}, {'query_id': 'AF190695.1', 'target_id': 'KC844056.1', 'score': 855.0, 'identity': 0.7787851314596554, 'gaps': 242, 'mismatches': 2, 'query_aligned': 'TTCTTGAAGACGAAAGGGCCTCGTGATACGCCTATTTTTATAGGTTAATGTCATGATAATAATGGTTTCTTAGACGTCAGGTGGCACTTTTCGGGGAAATGTGCGCGGAACCCCTATTTGTTTATTTTTCTAAATACATTCAAATATGTATCCGCTCATGAGACAATAACCCTGGTAAATGCTTCAATAATATTGAAAAAGGAAGAGTATGAGTATTCAACATTTTCGTGTCGCCCTTATTCCCTTTTTTGCGGCATTTTGCCTTCCTGTTTTTGCTCACCCAGAAACGCTGGTGAAAGTAAAAGATGCTGAAGATCAGTTGGGTGCACGAGTGGGTTACATCGAACTGGATCTCAACAGCGGTAAGATCCTTGAGAGTTTTCGCCCCGAAGAACGTTTTCCAATGCTGAGCACTTTTAAAGTTCTGCTATGTGGTGCGGTATTATCCCGTGTTGACGCCGGGCAAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTGAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCATAACCATGAGTGATAACACTGCTGCCAACTTACTTCTGACAACGATCGGAGGACCGAAGGAGCTAACCGCTTTTTTGCACAACATGGGGGATCATGTAACTCGCCTTGATCGTTGGGAACCGGAGCTGAATGAAGCCATACCAAACGACGAGCGTGACACCACGATGCCTGCAGCAATGGCAACAACGTTGCGCAAACTATTAACTGGCGAACTACTTACTCTAGCTTCCCGGCAACAATTAATAGACTGGATGGAGGCGGATAAAGTTGCAGGACCACTTCTGCGCTCGGCCCTTCCGGCTGGCTGGTTTATTGCTGATAAATCTGGAGCCGGTGAGCGTGGGTCTAGCGGTATCATTGCAGCACTGGGGCCAGATGGTAAGCCCTCCCGTATCGTAGTTATCTACACGACGGGGAGTCAGGCAACTATGGATGAACGAAATAGACAGATCGCTGAGATAGGTGCCTCACTGATTAAGCATTGGTAACTGTCAGACCAAGTTTACTCATATATACTTTAGA', 'target_aligned': '----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ATGAGTATTCAACATTTTCGTGTCGCCCTTATTCCCTTTTTTGCGGCATTTTGCCTTCCTGTTTTTGCTCACCCAGAAACGCTGGTGAAAGTAAAAGATGCTGAAGATCAGTTGGGTGCACGAGTGGGTTACATCGAACTGGATCTCAACAGCGGTAAGATCCTTGAGAGTTTTCGCCCCGAAGAACGTTTTCCAATGGTGAGCACTTTTAAAGTTCTGCTATGTGGTGCGGTATTATCCCGTGTTGACGCCGGGCAAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTGAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCATAACCATGAGTGATAACACTGCTGCCAACTTACTTCTGACAACGATCGGAGGACCGAAGGAGCTAACCGCTTTTTTGCACAACATGGGGGATCATGTAACTCGCCTTGATCGTTGGGAACCGGAGCTGAATGAAGCCATACCAAACGACGAGCGTGACACCACGATGCCTGCAGCAATGGCAACAACGTTGCGCAAACTATTAACTGGCGAACTACTTACTCTAGCTTCCCGGCAACAATTAATAGACTGGATGGAGGCGGATAAAGTTGCAGGACCACTTCTGCGCTCGGCCCTTCCGGCTGGCTGGTTTATTGCTGATAAATCTGGAGCCGGTGAGCGTGGGTCTCGCGGTATCATTGCAGCACTGGGGCCAGATGGTAAGCCCTCCCGTATCGTAGTTATCTACACGACGGGGAGTCAGGCAACTATGGATGAACGAAATAGACAGATCGCTGAGATAGGTGCCTCACTGATTAAGCATTGGTA----------------------------------A'}, {'query_id': 'AF190695.1', 'target_id': 'JX042489.1', 'score': 974.0, 'identity': 0.8967391304347826, 'gaps': 103, 'mismatches': 11, 'query_aligned': 'TTCTTGAAGACGAAAGGGCCTCGTGATACGCCTATTTTTATAGGTTAATGTCATGATAATAATGGTTTCTTAGACGTCAGGTGGCACTTTTCGGGGAAATGTGCGCGGAACCCCTATTTGTTTA-TTTTTCTAAATACATTCAAATATGTATCCGCTCATGAGACAATAACCCTGGTAAATGCTTCAATAATATTGAAAAAGGAAGAGTATGAGTATTCAACATTTTCGTGTCGCCCTTATTCCCTTTTTTGCGGCATTTTGCCTTCCTGTTTTTGCTCACCCAGAAACGCTGGTGAAAGTAAAAGATGCTGAAGATCAGTTGGGTGCACGAGTGGGTTACATCGAACTGGATCTCAACAGCGGTAAGATCCTTGAGAGTTTTCGCCCCGAAGAACGTTTTCCAATGCTGAGCACTTTTAAAGTTCTGCTATGTGGTGCGGTATTATCCCGTGTTGACGCCGGGCAAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTGAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCATAACCATGAGTGATAACACTGCTGCCAACTTACTTCTGACAACGATCGGAGGACCGAAGGAGCTAACCGCTTTTTTGCACAACATGGGGGATCATGTAACTCGCCTTGATCGTTGGGAACCGGAGCTGAATGAAGCCATACCAAACGACGAGCGTGACACCACGATGCCTGCAGCAATGGCAACAACGTTGCGCAAACTATTAACTGGCGAACTACTTACTCTAGCTTCCCGGCAACAATTAATAGACTGGATGGAGGCGGATAAAGTTGCAGGACCACTTCTGCGCTCGGCCCTTCCGGCTGGCTGGTTTATTGCTGATAAATCTGGAGCCGGTGAGCGTGGGTCTAGCGGTATCATTGCAGCACTGGGGCCAGATGGTAAGCCCTCCCGTATCGTAGTTATCTACACGACGGGGAGTCAGGCAACTATGGATGAACGAAATAGACAGATCGCTGAGATAGGTGCCTCACTGATTAAGCATTGGTAACTGTCAGACCAAGTTTACTCATATATACTTTAGA', 'target_aligned': '------------------------------------------------------------------TTCTTAGACGTCAGGTGGCAC-TTTAGGGGAAATGTGCGCGGAACCCCTATTTGTTTATTTTTTCT-AATACATTCAAATATGTATCCGCTCATGATACAATAACCCTGATAAATGCTTCAATAATATTGAAAAAGGAAGAGTATGAGTATTCAACATTTCCGTGTCGCCCTTATTCCCTTTTTTGCGGCATTTTGCCTTCCTGTTTTTGCTCACCCAGAAACGCTGGTGAAAGTAAAAGATGCTGAAGATCAGTTGGGTGCACGAGTGGGTTACATCGAGCTGGATCTCAACAGCGGTAAGATCCTTGAGAGTTTTCGCCCCGAAGAACGTTTTCCAATGATGAGCACTTTTAAAGTTCTGCTATGTGGTGCGGTATTATCCCGTGTTGACGCCGGGCAAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTGAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCATAACCATGAGTGATAACACTGCGGCCAACTTACTTCTGACAACGATCGGAGGACCGAAGGAGCTAACCGCTTTTTTGCACAACATGGGGGATCATGTAACCCGCCTTGATCGTTGGGAACCGGAGCTGAATGAAGCCATACCAAACGACGAGCGTGACACCACGATGCCTGCAGCAATGGCAACAACGTTGCGCAAACTATTAACTGGCGAACTACTTACTCTAGCTTCCCGGCAACAATTAATAGACTGGATGGAGGCGGATAAAGTTGCAGGACCACTTCTGCGCTCGGCCCTTCCGGCTGGCTGGTTTATTGCTGATAAATCTGGAGCCAGTGAGCGTGGATCTCGCGGTATCATTGCAGCACTGGGGCCAGATGGTAAGCCCTCCCGTATCGTAGTTATCTACACGACGGGGAGTCAGGCAACTATGGATGAACGAAATAGACAGATCGCTGAGATAGGTGCCTCACTGATTAAGCATTGGTA----------------------------------A'}]\u001b[0m\n",
-      "\u001b[32m2025-03-14 16:01:58.163\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering_pairwise\u001b[0m:\u001b[36m422\u001b[0m - \u001b[1mConverted alignment: 4\u001b[0m\n",
-      "\u001b[32m2025-03-14 16:01:58.169\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.analysis.standard_numbering\u001b[0m:\u001b[36mapply_standard_numbering_pairwise\u001b[0m:\u001b[36m429\u001b[0m - \u001b[1mPositions: {'AF190695.1': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286', '287', '288', '289', '290', '291', '292', '293', '294', '295', '296', '297', '298', '299', '300', '301', '302', '303', '304', '305', '306', '307', '308', '309', '310', '311', '312', '313', '314', '315', '316', '317', '318', '319', '320', '321', '322', '323', '324', '325', '326', '327', '328', '329', '330', '331', '332', '333', '334', '335', '336', '337', '338', '339', '340', '341', '342', '343', '344', '345', '346', '347', '348', '349', '350', '351', '352', '353', '354', '355', '356', '357', '358', '359', '360', '361', '362', '363', '364', '365', '366', '367', '368', '369', '370', '371', '372', '373', '374', '375', '376', '377', '378', '379', '380', '381', '382', '383', '384', '385', '386', '387', '388', '389', '390', '391', '392', '393', '394', '395', '396', '397', '398', '399', '400', '401', '402', '403', '404', '405', '406', '407', '408', '409', '410', '411', '412', '413', '414', '415', '416', '417', '418', '419', '420', '421', '422', '423', '424', '425', '426', '427', '428', '429', '430', '431', '432', '433', '434', '435', '436', '437', '438', '439', '440', '441', '442', '443', '444', '445', '446', '447', '448', '449', '450', '451', '452', '453', '454', '455', '456', '457', '458', '459', '460', '461', '462', '463', '464', '465', '466', '467', '468', '469', '470', '471', '472', '473', '474', '475', '476', '477', '478', '479', '480', '481', '482', '483', '484', '485', '486', '487', '488', '489', '490', '491', '492', '493', '494', '495', '496', '497', '498', '499', '500', '501', '502', '503', '504', '505', '506', '507', '508', '509', '510', '511', '512', '513', '514', '515', '516', '517', '518', '519', '520', '521', '522', '523', '524', '525', '526', '527', '528', '529', '530', '531', '532', '533', '534', '535', '536', '537', '538', '539', '540', '541', '542', '543', '544', '545', '546', '547', '548', '549', '550', '551', '552', '553', '554', '555', '556', '557', '558', '559', '560', '561', '562', '563', '564', '565', '566', '567', '568', '569', '570', '571', '572', '573', '574', '575', '576', '577', '578', '579', '580', '581', '582', '583', '584', '585', '586', '587', '588', '589', '590', '591', '592', '593', '594', '595', '596', '597', '598', '599', '600', '601', '602', '603', '604', '605', '606', '607', '608', '609', '610', '611', '612', '613', '614', '615', '616', '617', '618', '619', '620', '621', '622', '623', '624', '625', '626', '627', '628', '629', '630', '631', '632', '633', '634', '635', '636', '637', '638', '639', '640', '641', '642', '643', '644', '645', '646', '647', '648', '649', '650', '651', '652', '653', '654', '655', '656', '657', '658', '659', '660', '661', '662', '663', '664', '665', '666', '667', '668', '669', '670', '671', '672', '673', '674', '675', '676', '677', '678', '679', '680', '681', '682', '683', '684', '685', '686', '687', '688', '689', '690', '691', '692', '693', '694', '695', '696', '697', '698', '699', '700', '701', '702', '703', '704', '705', '706', '707', '708', '709', '710', '711', '712', '713', '714', '715', '716', '717', '718', '719', '720', '721', '722', '723', '724', '725', '726', '727', '728', '729', '730', '731', '732', '733', '734', '735', '736', '737', '738', '739', '740', '741', '742', '743', '744', '745', '746', '747', '748', '749', '750', '751', '752', '753', '754', '755', '756', '757', '758', '759', '760', '761', '762', '763', '764', '765', '766', '767', '768', '769', '770', '771', '772', '773', '774', '775', '776', '777', '778', '779', '780', '781', '782', '783', '784', '785', '786', '787', '788', '789', '790', '791', '792', '793', '794', '795', '796', '797', '798', '799', '800', '801', '802', '803', '804', '805', '806', '807', '808', '809', '810', '811', '812', '813', '814', '815', '816', '817', '818', '819', '820', '821', '822', '823', '824', '825', '826', '827', '828', '829', '830', '831', '832', '833', '834', '835', '836', '837', '838', '839', '840', '841', '842', '843', '844', '845', '846', '847', '848', '849', '850', '851', '852', '853', '854', '855', '856', '857', '858', '859', '860', '861', '862', '863', '864', '865', '866', '867', '868', '869', '870', '871', '872', '873', '874', '875', '876', '877', '878', '879', '880', '881', '882', '883', '884', '885', '886', '887', '888', '889', '890', '891', '892', '893', '894', '895', '896', '897', '898', '899', '900', '901', '902', '903', '904', '905', '906', '907', '908', '909', '910', '911', '912', '913', '914', '915', '916', '917', '918', '919', '920', '921', '922', '923', '924', '925', '926', '927', '928', '929', '930', '931', '932', '933', '934', '935', '936', '937', '938', '939', '940', '941', '942', '943', '944', '945', '946', '947', '948', '949', '950', '951', '952', '953', '954', '955', '956', '957', '958', '959', '960', '961', '962', '963', '964', '965', '966', '967', '968', '969', '970', '971', '972', '973', '974', '975', '976', '977', '978', '979', '980', '981', '982', '983', '984', '985', '986', '987', '988', '989', '990', '991', '992', '993', '994', '995', '996', '997', '998', '999', '1000', '1001', '1002', '1003', '1004', '1005', '1006', '1007', '1008', '1009', '1010', '1011', '1012', '1013', '1014', '1015', '1016', '1017', '1018', '1019', '1020', '1021', '1022', '1023', '1024', '1025', '1026', '1027', '1028', '1029', '1030', '1031', '1032', '1033', '1034', '1035', '1036', '1037', '1038', '1039', '1040', '1041', '1042', '1043', '1044', '1045', '1046', '1047', '1048', '1049', '1050', '1051', '1052', '1053', '1054', '1055', '1056', '1057', '1058', '1059', '1060', '1061', '1062', '1063', '1064', '1065', '1066', '1067', '1068', '1069', '1070', '1071', '1072', '1073', '1074', '1075', '1076', '1077', '1078', '1079', '1080', '1081', '1082', '1083', '1084', '1085', '1086', '1087', '1088', '1089', '1090', '1091', '1092', '1093', '1094', '1095', '1096', '1097', '1098', '1099', '1100', '1101', '1102', '1103'], 'Y17582.1': ['209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286', '287', '288', '289', '290', '291', '292', '293', '294', '295', '296', '297', '298', '299', '300', '301', '302', '303', '304', '305', '306', '307', '308', '309', '310', '311', '312', '313', '314', '315', '316', '317', '318', '319', '320', '321', '322', '323', '324', '325', '326', '327', '328', '329', '330', '331', '332', '333', '334', '335', '336', '337', '338', '339', '340', '341', '342', '343', '344', '345', '346', '347', '348', '349', '350', '351', '352', '353', '354', '355', '356', '357', '358', '359', '360', '361', '362', '363', '364', '365', '366', '367', '368', '369', '370', '371', '372', '373', '374', '375', '376', '377', '378', '379', '380', '381', '382', '383', '384', '385', '386', '387', '388', '389', '390', '391', '392', '393', '394', '395', '396', '397', '398', '399', '400', '401', '402', '403', '404', '405', '406', '407', '408', '409', '410', '411', '412', '413', '414', '415', '416', '417', '418', '419', '420', '421', '422', '423', '424', '425', '426', '427', '428', '429', '430', '431', '432', '433', '434', '435', '436', '437', '438', '439', '440', '441', '442', '443', '444', '445', '446', '447', '448', '449', '450', '451', '452', '453', '454', '455', '456', '457', '458', '459', '460', '461', '462', '463', '464', '465', '466', '467', '468', '469', '470', '471', '472', '473', '474', '475', '476', '477', '478', '479', '480', '481', '482', '483', '484', '485', '486', '487', '488', '489', '490', '491', '492', '493', '494', '495', '496', '497', '498', '499', '500', '501', '502', '503', '504', '505', '506', '507', '508', '509', '510', '511', '512', '513', '514', '515', '516', '517', '518', '519', '520', '521', '522', '523', '524', '525', '526', '527', '528', '529', '530', '531', '532', '533', '534', '535', '536', '537', '538', '539', '540', '541', '542', '543', '544', '545', '546', '547', '548', '549', '550', '551', '552', '553', '554', '555', '556', '557', '558', '559', '560', '561', '562', '563', '564', '565', '566', '567', '568', '569', '570', '571', '572', '573', '574', '575', '576', '577', '578', '579', '580', '581', '582', '583', '584', '585', '586', '587', '588', '589', '590', '591', '592', '593', '594', '595', '596', '597', '598', '599', '600', '601', '602', '603', '604', '605', '606', '607', '608', '609', '610', '611', '612', '613', '614', '615', '616', '617', '618', '619', '620', '621', '622', '623', '624', '625', '626', '627', '628', '629', '630', '631', '632', '633', '634', '635', '636', '637', '638', '639', '640', '641', '642', '643', '644', '645', '646', '647', '648', '649', '650', '651', '652', '653', '654', '655', '656', '657', '658', '659', '660', '661', '662', '663', '664', '665', '666', '667', '668', '669', '670', '671', '672', '673', '674', '675', '676', '677', '678', '679', '680', '681', '682', '683', '684', '685', '686', '687', '688', '689', '690', '691', '692', '693', '694', '695', '696', '697', '698', '699', '700', '701', '702', '703', '704', '705', '706', '707', '708', '709', '710', '711', '712', '713', '714', '715', '716', '717', '718', '719', '720', '721', '722', '723', '724', '725', '726', '727', '728', '729', '730', '731', '732', '733', '734', '735', '736', '737', '738', '739', '740', '741', '742', '743', '744', '745', '746', '747', '748', '749', '750', '751', '752', '753', '754', '755', '756', '757', '758', '759', '760', '761', '762', '763', '764', '765', '766', '767', '768', '769', '770', '771', '772', '773', '774', '775', '776', '777', '778', '779', '780', '781', '782', '783', '784', '785', '786', '787', '788', '789', '790', '791', '792', '793', '794', '795', '796', '797', '798', '799', '800', '801', '802', '803', '804', '805', '806', '807', '808', '809', '810', '811', '812', '813', '814', '815', '816', '817', '818', '819', '820', '821', '822', '823', '824', '825', '826', '827', '828', '829', '830', '831', '832', '833', '834', '835', '836', '837', '838', '839', '840', '841', '842', '843', '844', '845', '846', '847', '848', '849', '850', '851', '852', '853', '854', '855', '856', '857', '858', '859', '860', '861', '862', '863', '864', '865', '866', '867', '868', '869', '870', '871', '872', '873', '874', '875', '876', '877', '878', '879', '880', '881', '882', '883', '884', '885', '886', '887', '888', '889', '890', '891', '892', '893', '894', '895', '896', '897', '898', '899', '900', '901', '902', '903', '904', '905', '906', '907', '908', '909', '910', '911', '912', '913', '914', '915', '916', '917', '918', '919', '920', '921', '922', '923', '924', '925', '926', '927', '928', '929', '930', '931', '932', '933', '934', '935', '936', '937', '938', '939', '940', '941', '942', '943', '944', '945', '946', '947', '948', '949', '950', '951', '952', '953', '954', '955', '956', '957', '958', '959', '960', '961', '962', '963', '964', '965', '966', '967', '968', '969', '970', '971', '972', '973', '974', '975', '976', '977', '978', '979', '980', '981', '982', '983', '984', '985', '986', '987', '988', '989', '990', '991', '992', '993', '994', '995', '996', '997', '998', '999', '1000', '1001', '1002', '1003', '1004', '1005', '1006', '1007', '1008', '1009', '1010', '1011', '1012', '1013', '1014', '1015', '1016', '1017', '1018', '1019', '1020', '1021', '1022', '1023', '1024', '1025', '1026', '1027', '1028', '1029', '1030', '1031', '1032', '1033', '1034', '1035', '1036', '1037', '1038', '1039', '1040', '1041', '1042', '1043', '1044', '1045', '1046', '1047', '1048', '1049', '1050', '1051', '1052', '1053', '1054', '1055', '1056', '1057', '1058', '1059', '1060', '1061', '1062', '1063', '1064', '1065', '1066'], 'AF347054.1': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286', '287', '288', '289', '290', '291', '292', '293', '294', '295', '296', '297', '298', '299', '300', '301', '302', '303', '304', '305', '306', '307', '308', '309', '310', '311', '312', '313', '314', '315', '316', '317', '318', '319', '320', '321', '322', '323', '324', '325', '326', '327', '328', '329', '330', '331', '332', '333', '334', '335', '336', '337', '338', '339', '340', '341', '342', '343', '344', '345', '346', '347', '348', '349', '350', '351', '352', '353', '354', '355', '356', '357', '358', '359', '360', '361', '362', '363', '364', '365', '366', '367', '368', '369', '370', '371', '372', '373', '374', '375', '376', '377', '378', '379', '380', '381', '382', '383', '384', '385', '386', '387', '388', '389', '390', '391', '392', '393', '394', '395', '396', '397', '398', '399', '400', '401', '402', '403', '404', '405', '406', '407', '408', '409', '410', '411', '412', '413', '414', '415', '416', '417', '418', '419', '420', '421', '422', '423', '424', '425', '426', '427', '428', '429', '430', '431', '432', '433', '434', '435', '436', '437', '438', '439', '440', '441', '442', '443', '444', '445', '446', '447', '448', '449', '450', '451', '452', '453', '454', '455', '456', '457', '458', '459', '460', '461', '462', '463', '464', '465', '466', '467', '468', '469', '470', '471', '472', '473', '474', '475', '476', '477', '478', '479', '480', '481', '482', '483', '484', '485', '486', '487', '488', '489', '490', '491', '492', '493', '494', '495', '496', '497', '498', '499', '500', '501', '502', '503', '504', '505', '506', '507', '508', '509', '510', '511', '512', '513', '514', '515', '516', '517', '518', '519', '520', '521', '522', '523', '524', '525', '526', '527', '528', '529', '530', '531', '532', '533', '534', '535', '536', '537', '538', '539', '540', '541', '542', '543', '544', '545', '546', '547', '548', '549', '550', '551', '552', '553', '554', '555', '556', '557', '558', '559', '560', '561', '562', '563', '564', '565', '566', '567', '568', '569', '570', '571', '572', '573', '574', '575', '576', '577', '578', '579', '580', '581', '582', '583', '584', '585', '586', '587', '588', '589', '590', '591', '592', '593', '594', '595', '596', '597', '598', '599', '600', '601', '602', '603', '604', '605', '606', '607', '608', '609', '610', '611', '612', '613', '614', '615', '616', '617', '618', '619', '620', '621', '622', '623', '624', '625', '626', '627', '628', '629', '630', '631', '632', '633', '634', '635', '636', '637', '638', '639', '640', '641', '642', '643', '644', '645', '646', '647', '648', '649', '650', '651', '652', '653', '654', '655', '656', '657', '658', '659', '660', '661', '662', '663', '664', '665', '666', '667', '668', '669', '670', '671', '672', '673', '674', '675', '676', '677', '678', '679', '680', '681', '682', '683', '684', '685', '686', '687', '688', '689', '690', '691', '692', '693', '694', '695', '696', '697', '698', '699', '700', '701', '702', '703', '704', '705', '706', '707', '708', '709', '710', '711', '712', '713', '714', '715', '716', '717', '718', '719', '720', '721', '722', '723', '724', '725', '726', '727', '728', '729', '730', '731', '732', '733', '734', '735', '736', '737', '738', '739', '740', '741', '742', '743', '744', '745', '746', '747', '748', '749', '750', '751', '752', '753', '754', '755', '756', '757', '758', '759', '760', '761', '762', '763', '764', '765', '766', '767', '768', '769', '770', '771', '772', '773', '774', '775', '776', '777', '778', '779', '780', '781', '782', '783', '784', '785', '786', '787', '788', '789', '790', '791', '792', '793', '794', '795', '796', '797', '798', '799', '800', '801', '802', '803', '804', '805', '806', '807', '808', '809', '810', '811', '812', '813', '814', '815', '816', '817', '818', '819', '820', '821', '822', '823', '824', '825', '826', '827', '828', '829', '830', '831', '832', '833', '834', '835', '836', '837', '838', '839', '840', '841', '842', '843', '844', '845', '846', '847', '848', '849', '850', '851', '852', '853', '854', '855', '856', '857', '858', '859', '860', '861', '862', '863', '864', '865', '866', '867', '868', '869', '870', '871', '872', '873', '874', '875', '876', '877', '878', '879', '880', '881', '882', '883', '884', '885', '886', '887', '888', '889', '890', '891', '892', '893', '894', '895', '896', '897', '898', '899', '900', '901', '902', '903', '904', '905', '906', '907', '908', '909', '910', '911', '912', '913', '914', '915', '916', '917', '918', '919', '920', '921', '922', '923', '924', '925', '926', '927', '928', '929', '930', '931', '932', '933', '934', '935', '936', '937', '938', '939', '940', '941', '942', '943', '944', '945', '946', '947', '948', '949', '950', '951', '952', '953', '954', '955', '956', '957', '958', '959', '960', '961', '962', '963', '964', '965', '966', '967', '968', '969', '970', '971', '972', '973', '974', '975', '976', '977', '978', '979', '980', '981', '982', '983', '984', '985', '986', '987', '988', '989', '990', '991', '992', '993', '994', '995', '996', '997', '998', '999', '1000', '1001', '1002', '1003', '1004', '1005', '1006', '1007', '1008', '1009', '1010', '1011', '1012', '1013', '1014', '1015', '1016', '1017', '1018', '1019', '1020', '1021', '1022', '1023', '1024', '1025', '1026', '1027', '1028', '1029', '1030', '1031', '1032', '1033', '1034', '1035', '1036', '1037', '1038', '1039', '1040', '1041', '1042', '1043', '1044', '1045', '1046', '1047', '1048', '1049', '1050', '1051', '1052', '1053', '1054', '1055', '1056', '1057', '1058', '1059', '1060', '1061', '1062', '1063', '1064', '1065', '1066', '1067', '1068', '1069', '1070', '1071', '1072', '1073', '1074', '1075', '1076', '1077', '1078', '1079', '1080', '1081', '1082'], 'KC844056.1': ['209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286', '287', '288', '289', '290', '291', '292', '293', '294', '295', '296', '297', '298', '299', '300', '301', '302', '303', '304', '305', '306', '307', '308', '309', '310', '311', '312', '313', '314', '315', '316', '317', '318', '319', '320', '321', '322', '323', '324', '325', '326', '327', '328', '329', '330', '331', '332', '333', '334', '335', '336', '337', '338', '339', '340', '341', '342', '343', '344', '345', '346', '347', '348', '349', '350', '351', '352', '353', '354', '355', '356', '357', '358', '359', '360', '361', '362', '363', '364', '365', '366', '367', '368', '369', '370', '371', '372', '373', '374', '375', '376', '377', '378', '379', '380', '381', '382', '383', '384', '385', '386', '387', '388', '389', '390', '391', '392', '393', '394', '395', '396', '397', '398', '399', '400', '401', '402', '403', '404', '405', '406', '407', '408', '409', '410', '411', '412', '413', '414', '415', '416', '417', '418', '419', '420', '421', '422', '423', '424', '425', '426', '427', '428', '429', '430', '431', '432', '433', '434', '435', '436', '437', '438', '439', '440', '441', '442', '443', '444', '445', '446', '447', '448', '449', '450', '451', '452', '453', '454', '455', '456', '457', '458', '459', '460', '461', '462', '463', '464', '465', '466', '467', '468', '469', '470', '471', '472', '473', '474', '475', '476', '477', '478', '479', '480', '481', '482', '483', '484', '485', '486', '487', '488', '489', '490', '491', '492', '493', '494', '495', '496', '497', '498', '499', '500', '501', '502', '503', '504', '505', '506', '507', '508', '509', '510', '511', '512', '513', '514', '515', '516', '517', '518', '519', '520', '521', '522', '523', '524', '525', '526', '527', '528', '529', '530', '531', '532', '533', '534', '535', '536', '537', '538', '539', '540', '541', '542', '543', '544', '545', '546', '547', '548', '549', '550', '551', '552', '553', '554', '555', '556', '557', '558', '559', '560', '561', '562', '563', '564', '565', '566', '567', '568', '569', '570', '571', '572', '573', '574', '575', '576', '577', '578', '579', '580', '581', '582', '583', '584', '585', '586', '587', '588', '589', '590', '591', '592', '593', '594', '595', '596', '597', '598', '599', '600', '601', '602', '603', '604', '605', '606', '607', '608', '609', '610', '611', '612', '613', '614', '615', '616', '617', '618', '619', '620', '621', '622', '623', '624', '625', '626', '627', '628', '629', '630', '631', '632', '633', '634', '635', '636', '637', '638', '639', '640', '641', '642', '643', '644', '645', '646', '647', '648', '649', '650', '651', '652', '653', '654', '655', '656', '657', '658', '659', '660', '661', '662', '663', '664', '665', '666', '667', '668', '669', '670', '671', '672', '673', '674', '675', '676', '677', '678', '679', '680', '681', '682', '683', '684', '685', '686', '687', '688', '689', '690', '691', '692', '693', '694', '695', '696', '697', '698', '699', '700', '701', '702', '703', '704', '705', '706', '707', '708', '709', '710', '711', '712', '713', '714', '715', '716', '717', '718', '719', '720', '721', '722', '723', '724', '725', '726', '727', '728', '729', '730', '731', '732', '733', '734', '735', '736', '737', '738', '739', '740', '741', '742', '743', '744', '745', '746', '747', '748', '749', '750', '751', '752', '753', '754', '755', '756', '757', '758', '759', '760', '761', '762', '763', '764', '765', '766', '767', '768', '769', '770', '771', '772', '773', '774', '775', '776', '777', '778', '779', '780', '781', '782', '783', '784', '785', '786', '787', '788', '789', '790', '791', '792', '793', '794', '795', '796', '797', '798', '799', '800', '801', '802', '803', '804', '805', '806', '807', '808', '809', '810', '811', '812', '813', '814', '815', '816', '817', '818', '819', '820', '821', '822', '823', '824', '825', '826', '827', '828', '829', '830', '831', '832', '833', '834', '835', '836', '837', '838', '839', '840', '841', '842', '843', '844', '845', '846', '847', '848', '849', '850', '851', '852', '853', '854', '855', '856', '857', '858', '859', '860', '861', '862', '863', '864', '865', '866', '867', '868', '869', '870', '871', '872', '873', '874', '875', '876', '877', '878', '879', '880', '881', '882', '883', '884', '885', '886', '887', '888', '889', '890', '891', '892', '893', '894', '895', '896', '897', '898', '899', '900', '901', '902', '903', '904', '905', '906', '907', '908', '909', '910', '911', '912', '913', '914', '915', '916', '917', '918', '919', '920', '921', '922', '923', '924', '925', '926', '927', '928', '929', '930', '931', '932', '933', '934', '935', '936', '937', '938', '939', '940', '941', '942', '943', '944', '945', '946', '947', '948', '949', '950', '951', '952', '953', '954', '955', '956', '957', '958', '959', '960', '961', '962', '963', '964', '965', '966', '967', '968', '969', '970', '971', '972', '973', '974', '975', '976', '977', '978', '979', '980', '981', '982', '983', '984', '985', '986', '987', '988', '989', '990', '991', '992', '993', '994', '995', '996', '997', '998', '999', '1000', '1001', '1002', '1003', '1004', '1005', '1006', '1007', '1008', '1009', '1010', '1011', '1012', '1013', '1014', '1015', '1016', '1017', '1018', '1019', '1020', '1021', '1022', '1023', '1024', '1025', '1026', '1027', '1028', '1029', '1030', '1031', '1032', '1033', '1034', '1035', '1036', '1037', '1038', '1039', '1040', '1041', '1042', '1043', '1044', '1045', '1046', '1047', '1048', '1049', '1050', '1051', '1052', '1053', '1054', '1055', '1056', '1057', '1058', '1059', '1060', '1061', '1062', '1063', '1064', '1065', '1066', '1067', '1068', '1103'], 'JX042489.1': ['67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '124.1', '125', '126', '127', '128', '129', '130', '131', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286', '287', '288', '289', '290', '291', '292', '293', '294', '295', '296', '297', '298', '299', '300', '301', '302', '303', '304', '305', '306', '307', '308', '309', '310', '311', '312', '313', '314', '315', '316', '317', '318', '319', '320', '321', '322', '323', '324', '325', '326', '327', '328', '329', '330', '331', '332', '333', '334', '335', '336', '337', '338', '339', '340', '341', '342', '343', '344', '345', '346', '347', '348', '349', '350', '351', '352', '353', '354', '355', '356', '357', '358', '359', '360', '361', '362', '363', '364', '365', '366', '367', '368', '369', '370', '371', '372', '373', '374', '375', '376', '377', '378', '379', '380', '381', '382', '383', '384', '385', '386', '387', '388', '389', '390', '391', '392', '393', '394', '395', '396', '397', '398', '399', '400', '401', '402', '403', '404', '405', '406', '407', '408', '409', '410', '411', '412', '413', '414', '415', '416', '417', '418', '419', '420', '421', '422', '423', '424', '425', '426', '427', '428', '429', '430', '431', '432', '433', '434', '435', '436', '437', '438', '439', '440', '441', '442', '443', '444', '445', '446', '447', '448', '449', '450', '451', '452', '453', '454', '455', '456', '457', '458', '459', '460', '461', '462', '463', '464', '465', '466', '467', '468', '469', '470', '471', '472', '473', '474', '475', '476', '477', '478', '479', '480', '481', '482', '483', '484', '485', '486', '487', '488', '489', '490', '491', '492', '493', '494', '495', '496', '497', '498', '499', '500', '501', '502', '503', '504', '505', '506', '507', '508', '509', '510', '511', '512', '513', '514', '515', '516', '517', '518', '519', '520', '521', '522', '523', '524', '525', '526', '527', '528', '529', '530', '531', '532', '533', '534', '535', '536', '537', '538', '539', '540', '541', '542', '543', '544', '545', '546', '547', '548', '549', '550', '551', '552', '553', '554', '555', '556', '557', '558', '559', '560', '561', '562', '563', '564', '565', '566', '567', '568', '569', '570', '571', '572', '573', '574', '575', '576', '577', '578', '579', '580', '581', '582', '583', '584', '585', '586', '587', '588', '589', '590', '591', '592', '593', '594', '595', '596', '597', '598', '599', '600', '601', '602', '603', '604', '605', '606', '607', '608', '609', '610', '611', '612', '613', '614', '615', '616', '617', '618', '619', '620', '621', '622', '623', '624', '625', '626', '627', '628', '629', '630', '631', '632', '633', '634', '635', '636', '637', '638', '639', '640', '641', '642', '643', '644', '645', '646', '647', '648', '649', '650', '651', '652', '653', '654', '655', '656', '657', '658', '659', '660', '661', '662', '663', '664', '665', '666', '667', '668', '669', '670', '671', '672', '673', '674', '675', '676', '677', '678', '679', '680', '681', '682', '683', '684', '685', '686', '687', '688', '689', '690', '691', '692', '693', '694', '695', '696', '697', '698', '699', '700', '701', '702', '703', '704', '705', '706', '707', '708', '709', '710', '711', '712', '713', '714', '715', '716', '717', '718', '719', '720', '721', '722', '723', '724', '725', '726', '727', '728', '729', '730', '731', '732', '733', '734', '735', '736', '737', '738', '739', '740', '741', '742', '743', '744', '745', '746', '747', '748', '749', '750', '751', '752', '753', '754', '755', '756', '757', '758', '759', '760', '761', '762', '763', '764', '765', '766', '767', '768', '769', '770', '771', '772', '773', '774', '775', '776', '777', '778', '779', '780', '781', '782', '783', '784', '785', '786', '787', '788', '789', '790', '791', '792', '793', '794', '795', '796', '797', '798', '799', '800', '801', '802', '803', '804', '805', '806', '807', '808', '809', '810', '811', '812', '813', '814', '815', '816', '817', '818', '819', '820', '821', '822', '823', '824', '825', '826', '827', '828', '829', '830', '831', '832', '833', '834', '835', '836', '837', '838', '839', '840', '841', '842', '843', '844', '845', '846', '847', '848', '849', '850', '851', '852', '853', '854', '855', '856', '857', '858', '859', '860', '861', '862', '863', '864', '865', '866', '867', '868', '869', '870', '871', '872', '873', '874', '875', '876', '877', '878', '879', '880', '881', '882', '883', '884', '885', '886', '887', '888', '889', '890', '891', '892', '893', '894', '895', '896', '897', '898', '899', '900', '901', '902', '903', '904', '905', '906', '907', '908', '909', '910', '911', '912', '913', '914', '915', '916', '917', '918', '919', '920', '921', '922', '923', '924', '925', '926', '927', '928', '929', '930', '931', '932', '933', '934', '935', '936', '937', '938', '939', '940', '941', '942', '943', '944', '945', '946', '947', '948', '949', '950', '951', '952', '953', '954', '955', '956', '957', '958', '959', '960', '961', '962', '963', '964', '965', '966', '967', '968', '969', '970', '971', '972', '973', '974', '975', '976', '977', '978', '979', '980', '981', '982', '983', '984', '985', '986', '987', '988', '989', '990', '991', '992', '993', '994', '995', '996', '997', '998', '999', '1000', '1001', '1002', '1003', '1004', '1005', '1006', '1007', '1008', '1009', '1010', '1011', '1012', '1013', '1014', '1015', '1016', '1017', '1018', '1019', '1020', '1021', '1022', '1023', '1024', '1025', '1026', '1027', '1028', '1029', '1030', '1031', '1032', '1033', '1034', '1035', '1036', '1037', '1038', '1039', '1040', '1041', '1042', '1043', '1044', '1045', '1046', '1047', '1048', '1049', '1050', '1051', '1052', '1053', '1054', '1055', '1056', '1057', '1058', '1059', '1060', '1061', '1062', '1063', '1064', '1065', '1066', '1067', '1068', '1103']}\u001b[0m\n"
-     ]
     }
    ],
    "source": [
diff --git a/src/pyeed/tools/alphafold2.py b/src/pyeed/tools/alphafold2.py
deleted file mode 100644
index 87aca3e4..00000000
--- a/src/pyeed/tools/alphafold2.py
+++ /dev/null
@@ -1,121 +0,0 @@
-"""
-AlphaFold2 runner module.
-
-This module provides functionality to run AlphaFold2 predictions on protein sequences.
-AlphaFold2 must be installed in a conda environment named 'alphafold_env' as described in:
-https://github.com/google-deepmind/alphafold/tree/main
-"""
-
-import logging
-import os
-import subprocess
-from pathlib import Path
-
-import torch
-
-logger = logging.getLogger(__name__)
-
-
-class AlphaFoldRunner:
-    """Class to manage and execute AlphaFold2 protein structure predictions."""
-
-    def __init__(self, data_dir: str, output_dir: str) -> None:
-        """
-        Initialize the AlphaFold runner with required directories.
-
-        Args:
-            data_dir: Path to the directory containing AlphaFold model data
-            output_dir: Path where prediction results will be stored
-
-        Raises:
-            FileNotFoundError: If required paths or files are not found
-            EnvironmentError: If no GPU is detected
-        """
-        # Get the base directory of the pyeed project
-        self.base_dir = Path(os.path.dirname(os.path.abspath(__file__)))
-        self.data_dir = Path(data_dir)
-        self.output_dir = Path(output_dir)
-
-        # Set path to the docker run script
-        self.docker_script = self.base_dir / "resources/alphafold/docker_run.py"
-
-        # Validate required paths
-        if not self.docker_script.exists():
-            raise FileNotFoundError(
-                f"Docker run script not found: {self.docker_script}"
-            )
-        if not self.data_dir.exists():
-            raise FileNotFoundError(
-                f"AlphaFold data directory not found: {self.data_dir}"
-            )
-        if not self.output_dir.exists():
-            self.output_dir.mkdir(parents=True)
-
-        # Verify GPU availability
-        if not torch.cuda.is_available():
-            raise EnvironmentError("No GPU detected. AlphaFold requires a GPU to run.")
-
-        logger.info("GPU detected. AlphaFold will run on GPU.")
-
-    def run_alphafold(
-        self, sequence: str, sequence_id: str, max_template_date: str = "2022-01-01"
-    ) -> dict[str, str]:
-        """
-        Run AlphaFold prediction on a given protein sequence.
-
-        Args:
-            sequence: The protein sequence to predict structure for
-            sequence_id: Unique identifier for the sequence
-            max_template_date: Latest date allowed for template structures (YYYY-MM-DD format)
-
-        Returns:
-            dict[str, str]: Contains AlphaFold confidence score and structure path with
-                keys 'confidence_score' and 'structure_path'
-
-        Raises:
-            RuntimeError: If AlphaFold execution fails
-        """
-        # Sanitize sequence ID by replacing dots with underscores
-        sequence_id = sequence_id.replace(".", "_")
-
-        # Create FASTA file for the sequence
-        fasta_path = self.output_dir / f"{sequence_id}.fasta"
-        with open(fasta_path, "w", encoding="utf-8") as fasta_file:
-            fasta_file.write(f">{sequence_id}\n{sequence}")
-        logger.info("Created FASTA file at: %s", fasta_path)
-
-        # Construct AlphaFold command
-        cmd = [
-            "source ~/anaconda3/etc/profile.d/conda.sh && "
-            "conda activate alphafold_env && "
-            f"python {self.docker_script} "
-            f"--fasta_paths={fasta_path} "
-            f"--max_template_date={max_template_date} "
-            f"--data_dir={self.data_dir} "
-            f"--output_dir={self.output_dir}"
-        ]
-
-        logger.info("Running AlphaFold with command: %s", " ".join(cmd))
-
-        # Execute AlphaFold command
-        process = subprocess.run(
-            cmd,
-            capture_output=True,
-            text=True,
-            shell=True,
-            executable="/bin/bash",
-            check=False,
-        )
-
-        # Log output streams
-        if process.stdout:
-            logger.info("AlphaFold stdout:\n%s", process.stdout)
-        if process.stderr:
-            logger.error("AlphaFold stderr:\n%s", process.stderr)
-
-        if process.returncode != 0:
-            error_msg = f"AlphaFold execution failed: {process.stderr}"
-            logger.error(error_msg)
-            raise RuntimeError(error_msg)
-
-        return {}  # TODO: Implement return value with confidence score and structure path

From 6cdf2de4098a46a0af7ac909b5de6ebb20365273 Mon Sep 17 00:00:00 2001
From: Niklas Abraham GPU 
Date: Fri, 14 Mar 2025 17:32:40 +0000
Subject: [PATCH 13/13] update gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 47b27efc..3eafb82d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,6 +6,7 @@ __pycache__/
 # AlphaFold output
 docs/resources/alphafold/output/*
 src/pyeed/tools/alphafold2.py
+docs/usage/alphafold.ipynb
 
 # C extensions
 *.so