In [1]:
import os
import wandb
import torch
import json
import csv
import subprocess
import pandas as pd
import numpy as np
from tqdm import tqdm
from datasets import Dataset
from transformers import GPT2Tokenizer
from trl import PPOTrainer, PPOConfig, AutoModelForCausalLMWithValueHead, create_reference_model

os.environ["CUDA_VISIBLE_DEVICES"] = "3"


# ---- I/O helpers ----
def load_data(train_path):
    with open(train_path, "r") as f:
        return [line.strip() for line in f]

def split_line(line):
    epi, tcr = line.split("$")
    return epi + "$", tcr

def save_to_csv_1(epis, tcrs, csv_file_path="tmp_epis_tcrs.csv"):
    with open(csv_file_path, mode="w", newline="") as file:
        writer = csv.DictWriter(file, fieldnames=["Epitopes", "TCRs"])
        writer.writeheader()
        for epi, tcr in zip(epis, tcrs):
            writer.writerow({"Epitopes": epi, "TCRs": tcr})

def append_tmp_to_master(tmp_filename="tmp_epis_tcrs.csv", master_filename="all_results.csv"):
    master_exists = os.path.isfile(master_filename)
    tmp_df = pd.read_csv(tmp_filename)
    tmp_df.to_csv(master_filename, mode="a", header=not master_exists, index=False)

def collator(data):
    return {key: [d[key] for d in data] for key in data[0]}

def shannon_entropy(seq):
    import math
    from collections import Counter
    counts = Counter(seq)
    probs = [v / len(seq) for v in counts.values()]
    return -sum(p * math.log(p + 1e-9) for p in probs)

def longest_homopolymer(seq):
    if not seq:
        return 0
    longest, cur = 1, 1
    for a, b in zip(seq, seq[1:]):
        cur = cur + 1 if a == b else 1
        longest = max(longest, cur)
    return longest


# ---- Reward backend command builder ----
def build_reward_cmd(backend):
    if backend == "tcrbert":
        return (
            "conda run -n trl "
            "python rewards/tcrbert_bap_infer.py "
            "--input-path tmp_epis_tcrs.csv "
            "--output-path tmp_epis_tcrs.csv "
            "--model-path rewards/tcrbert_bap_head_robust.pth"
        )
    elif backend == "catelmo":
        return (
            "conda run -n trl "
            "python rewards/catelmo_bap_infer.py "
            "--input-path tmp_epis_tcrs.csv "
            "--output-path tmp_epis_tcrs.csv "
            "--model-path rewards/catelmo_bap_head_robust.pth"
        )
    else:  # cnn or lstm
        return (
            "/home/hmei7/workspace/.conda/envs/tf26/bin/python "
            "rewards/bap_ensemble/ensemble_bap.py "
            "--mode inference "
            "--testfile tmp_epis_tcrs.csv "
            f"--bap {backend}"
        )

        # return (
        #     'bash -c "conda activate /home/hmei7/workspace/.conda/envs/tf26 && ' #  && '
        #     'python rewards/bap_ensemble/ensemble_bap.py '
        #     '--mode inference '
        #     '--testfile tmp_epis_tcrs.csv '
        #     f'--bap {backend}"'
        # )

def run_reward_backend(backend):
    cmd = build_reward_cmd(backend)
    subprocess.run(cmd, shell=True, check=True)
    # append_tmp_to_master()

In [2]:
# ---- CONFIG / CONSTS ----
# REWARD_BACKEND = "cnn"  # options: "tcrbert", "catelmo", "cnn", "lstm"
BASE_CKPT_DIR = "ckpts/catelmo_ensemble"
SAVE_EVERY = 5
os.makedirs(BASE_CKPT_DIR, exist_ok=True)
INCLUDE_HEURISTIC = False
# SPECIFITY = None # Placeholder for future use infonce or maxmargin


HEURISTIC_CFG = {
    "L_MIN": 9,
    "L_MAX": 21,
    "ENTROPY_FLOOR": 1.76,
    "RUN_MAX_ALLOWED": 3, # Max allowed homopolymer run (e.g., 'AAAA')
    "BAD_SEQ_PENALTY": -10.0,
    "MAX_REWARD": 10.0,
}

# ========================
# Configuration
# ========================
config = PPOConfig(
    model_name="/home/hmei7/workspace/tcr/models_gen/checkpoint-1600",
    learning_rate=1.41e-5, 
    batch_size=128,
    mini_batch_size=32, 
    ppo_epochs=2, # 2
    gradient_accumulation_steps=2,
    max_grad_norm=0.4,
    init_kl_coef=0.1, #[0.00, 0.02, 0.05, 1.0]
    adap_kl_ctrl=False,
    # target_kl=0.03,
    log_with="wandb",
    tracker_project_name="tcr-rlhf-goodhart",
)

# ========================
# Load Model and Tokenizer
# ========================
model = AutoModelForCausalLMWithValueHead.from_pretrained(config.model_name)
model_ref = create_reference_model(model)
tokenizer = GPT2Tokenizer.from_pretrained("/home/hmei7/workspace/tcr/models_gen/aa_tokenizer_trained")
tokenizer.pad_token = tokenizer.eos_token


dataset_lines = load_data('data/epi_training.txt')
epis, tcrs = [], []
for line in dataset_lines:
    epi, tcr = split_line(line)
    epis.append(epi)
    tcrs.append(tcr)
epis = list(set(epis))  # deduplicate epitopes
print(f"Loaded {len(epis)} epitopes:")
my_dataset = {"epis": epis}
dataset = Dataset.from_dict(my_dataset)
dataset.set_format("pytorch")


# Tokenization & Query Construction
dataset = dataset.map(
    lambda x: {"input_ids": tokenizer.encode(x["epis"], return_tensors="pt")[0, :64].to(model.pretrained_model.device)},
    batched=False,
)
dataset = dataset.map(
    lambda x: {"query": tokenizer.decode(x["input_ids"])},
    batched=False
)

# ========================
# Initialize PPO Trainer
# ========================
ppo_trainer = PPOTrainer(config, model, model_ref, tokenizer, dataset=dataset, data_collator=collator)


# ========================
# Generation Configuration
# ========================
generation_kwargs = {
    "min_length": 2,
    "top_k": 0.0,
    "top_p": 1.0,
    "do_sample": True,
    "pad_token_id": tokenizer.eos_token_id,
    "eos_token_id": tokenizer.eos_token_id,
    # "max_length": 72,
    "max_new_tokens": 42,  
}

# ========================
# Training Loop
# ========================
global_step = 0
for epoch in tqdm(range(10)):
    for batch in ppo_trainer.dataloader:
        
        # ---- batch generations ----
        query_tensors = batch["input_ids"]                 # List[Tensor], variable lengths
        queries_text   = batch.get("query", None)          # optional, if you want raw text

        with torch.no_grad():                              
            gen_full = ppo_trainer.generate(list(query_tensors), **generation_kwargs)
        prompt_lens = [int(q.shape[-1]) for q in query_tensors]
        response_tensors = [g[p_len:] for g, p_len in zip(gen_full, prompt_lens)]
        batch["response"] = tokenizer.batch_decode(response_tensors, skip_special_tokens=False)
        

        # ---- extract epi/tcr ----
        epis = [tokenizer.decode(r.squeeze())[:-2] for r in query_tensors]
        tcrs = [resp.split('<EOS>')[0] for resp in batch["response"]]
        special_tokens = ["<PAD>", "<tcr>", "<eotcr>", "[CLS]", "[BOS]", "[MASK]", "[SEP]", "<epi>", "<eoepi>", "$", "<unk>"]
        tcrs = ['WRONGFORMAT' if (not s or any(tok in s for tok in special_tokens)) else s for s in tcrs]
        save_to_csv_1(epis, tcrs, 'tmp_epis_tcrs.csv')


        
        # ---- Ensemble 3 reward backends sequentially ----
        for REWARD_BACKEND in ("catelmo", "cnn", "lstm"):
            run_reward_backend(REWARD_BACKEND)
        append_tmp_to_master()


        # ---- I/O reward values ----
        df_rewards = pd.read_csv("tmp_epis_tcrs.csv")
        bap_cols = [c for c in ["logits", "bap_cnn", "bap_lstm"] if c in df_rewards.columns]
        if not bap_cols:
            raise RuntimeError("No BAP columns found in tmp_epis_tcrs.csv")

        # simple mean ensemble; NaNs ignored per-row
        reward_values = df_rewards[bap_cols].astype(float).mean(axis=1, skipna=True).tolist()

        if INCLUDE_HEURISTIC:
            # reward_values, bad_count = apply_heuristic_penalties(reward_values, tcrs, HEURISTIC_CFG)
            # print(f"[Heuristic] {bad_count}/{len(tcrs)} sequences penalized this batch.")
            bad_count = 0
            for i, tcr in enumerate(tcrs):
                if tcr == "WRONGFORMAT" or len(tcr) < 5:
                    reward_values[i] = HEURISTIC_CFG["BAD_SEQ_PENALTY"]
                    bad_count += 1
                    continue

                L = len(tcr)
                H = shannon_entropy(tcr)
                R = longest_homopolymer(tcr)

                # range checks
                if not (HEURISTIC_CFG["L_MIN"] <= L <= HEURISTIC_CFG["L_MAX"]) \
                   or H < HEURISTIC_CFG["ENTROPY_FLOOR"] \
                   or R > HEURISTIC_CFG["RUN_MAX_ALLOWED"]:
                    reward_values[i] = HEURISTIC_CFG["BAD_SEQ_PENALTY"]
                    bad_count += 1

            # Clip large rewards
            reward_values = [min(v, HEURISTIC_CFG["MAX_REWARD"]) for v in reward_values]
            print(f"[Heuristic] {bad_count}/{len(tcrs)} sequences penalized this batch.")


            
        rewards_bap = [torch.tensor(float(v), dtype=torch.float32) for v in reward_values]

        # PPO Step
        stats = ppo_trainer.step(query_tensors, response_tensors, rewards_bap)
        ppo_trainer.log_stats(stats, batch, rewards_bap)
        
        # ---- save checkpoints ----
        global_step += 1
        if global_step % SAVE_EVERY == 0 and ppo_trainer.accelerator.is_main_process:
            ckpt_dir = os.path.join(BASE_CKPT_DIR, f"step_{global_step:04d}")
            os.makedirs(ckpt_dir, exist_ok=True)
            ppo_trainer.save_pretrained(ckpt_dir)
            tokenizer.save_pretrained(ckpt_dir)
            print(f"[Checkpoint] Saved to {ckpt_dir}")

    print(f'\nEpoch {epoch}:')
    print(tcrs[:20])


  return self.fget.__get__(instance, owner)()


Loaded 786 epitopes:


Map:   0%|          | 0/786 [00:00<?, ? examples/s]

Map:   0%|          | 0/786 [00:00<?, ? examples/s]

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mpengfeiz[0m. Use [1m`wandb login --relogin`[0m to force relogin


  0%|          | 0/10 [00:00<?, ?it/s]

43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/128 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/128 [00:00<00:00, 235.46 examples/s]
Map (num_proc=4): 100%|██████████| 128/128 [00:00<00:00, 393.21 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.28it/s]
100%|██████████| 1/1 [00:00<00:00,  3.09it/s]

2025-11-19 23:22:35.629903: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:22:35.642320: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:22:35.657025: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist

43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/128 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/128 [00:00<00:00, 198.36 examples/s]
Map (num_proc=4): 100%|██████████| 128/128 [00:00<00:00, 353.53 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.28it/s]
100%|██████████| 1/1 [00:00<00:00,  3.09it/s]

2025-11-19 23:22:54.458729: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:22:54.470903: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:22:54.485756: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist

43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/128 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/128 [00:00<00:00, 193.97 examples/s]
Map (num_proc=4): 100%|██████████| 128/128 [00:00<00:00, 429.11 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.28it/s]
100%|██████████| 1/1 [00:00<00:00,  3.12it/s]

2025-11-19 23:23:12.904566: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:23:12.916947: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:23:12.931597: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist

43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/127 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/127 [00:00<00:00, 245.09 examples/s]
Map (num_proc=4): 100%|██████████| 127/127 [00:00<00:00, 408.70 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.33it/s]
100%|██████████| 1/1 [00:00<00:00,  3.15it/s]

2025-11-19 23:23:31.353882: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:23:31.366343: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:23:31.381047: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist

43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/127 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/127 [00:00<00:00, 227.05 examples/s]
Map (num_proc=4): 100%|██████████| 127/127 [00:00<00:00, 420.52 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.22it/s]
100%|██████████| 1/1 [00:00<00:00,  3.02it/s]

2025-11-19 23:23:49.996639: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:23:50.009077: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:23:50.023775: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist

[Checkpoint] Saved to ckpts/catelmo_ensemble/step_0005
43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/127 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/127 [00:00<00:00, 241.18 examples/s]
Map (num_proc=4): 100%|██████████| 127/127 [00:00<00:00, 473.44 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.31it/s]
100%|██████████| 1/1 [00:00<00:00,  3.12it/s]

2025-11-19 23:24:08.608066: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:24:08.620439: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:24:08.635104: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist


Epoch 0:
['CASSLAGGAYEQYF', 'CASSPQGVDTQYF', 'CASSLGGDTEAFF', 'CASTDEQYF', 'CASSSLGGYEQYF', 'CASRAGAEQYF', 'CASSLGGSYEQYF', 'CASSLQGTYEQYF', 'CASSLAGAGGEQYF', 'CASSLGLYEQFF', 'ASYEQY', 'CASSFGGLGEQYF', 'CASSLGTEAFF', 'ASSDVGRSYEQY', 'CASSLAGGEAFF', 'ASSGLAGAYEQY', 'ASSQGGYGYT', 'CASSLGYEQYF', 'CASSQGNEQYF', 'CASSLEGDEQYF']
43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/128 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/128 [00:00<00:00, 238.28 examples/s]
Map (num_proc=4): 100%|██████████| 128/128 [00:00<00:00, 416.54 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.24it/s]
100%|██████████| 1/1 [00:00<00:00,  3.04it/s]

2025-11-19 23:24:26.873235: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:24:26.885694: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:24:26.900411: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist

43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/128 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/128 [00:00<00:00, 230.55 examples/s]
Map (num_proc=4): 100%|██████████| 128/128 [00:00<00:00, 433.91 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.27it/s]
100%|██████████| 1/1 [00:00<00:00,  3.09it/s]

2025-11-19 23:24:45.103192: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:24:45.115586: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:24:45.130528: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist

43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/127 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/127 [00:00<00:00, 244.89 examples/s]
Map (num_proc=4): 100%|██████████| 127/127 [00:00<00:00, 419.33 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.39it/s]
100%|██████████| 1/1 [00:00<00:00,  3.20it/s]

2025-11-19 23:25:03.209038: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:25:03.221756: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:25:03.236839: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist

43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/127 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/127 [00:00<00:00, 240.73 examples/s]
Map (num_proc=4): 100%|██████████| 127/127 [00:00<00:00, 430.76 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.33it/s]
100%|██████████| 1/1 [00:00<00:00,  3.14it/s]

2025-11-19 23:25:21.323034: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:25:21.335288: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:25:21.350146: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist

[Checkpoint] Saved to ckpts/catelmo_ensemble/step_0010
25



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/128 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/128 [00:00<00:00, 240.14 examples/s]
Map (num_proc=4): 100%|██████████| 128/128 [00:00<00:00, 413.56 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.55it/s]
100%|██████████| 1/1 [00:00<00:00,  3.34it/s]

2025-11-19 23:25:39.650288: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:25:39.662712: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:25:39.677357: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist

43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/128 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/128 [00:00<00:00, 243.06 examples/s]
Map (num_proc=4): 100%|██████████| 128/128 [00:00<00:00, 433.97 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.38it/s]
100%|██████████| 1/1 [00:00<00:00,  3.18it/s]

2025-11-19 23:25:57.425962: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:25:57.438413: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:25:57.453173: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist


Epoch 1:
['CASSLGGYEQYF', 'CASSLGYEQYF', 'CASSLAGTGELFF', 'CASSLYNEQFF', 'CASSLGSYEQYF', 'CASSLGAYEQYF', 'CASSLTYEQYF', 'CASSLNTGELFF', 'CASSLPYEQYF', 'CASSLGNEQFF', 'CASSLGGTYEQYF', 'CASSLVYEQYF', 'CASSLGEQFF', 'CASSLAGYEQYF', 'CASSTYELFF', 'CASSLAGSYEQYF', 'CASSLGGNEQFF', 'CASSLAGPYEQYF', 'CASSLGDTQYF', 'CASSLTYEQYF']


  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/128 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/128 [00:00<00:00, 202.82 examples/s]
Map (num_proc=4): 100%|██████████| 128/128 [00:00<00:00, 373.59 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.26it/s]
100%|██████████| 1/1 [00:00<00:00,  3.09it/s]



43



2025-11-19 23:26:15.518794: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:26:15.531053: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:26:15.546131: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-11-19 23:26:15.550447: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-11-19 23:26:15.561769: I tensorflow/core/platform/cpu_feature_guar

38



2025-11-19 23:26:33.406516: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:26:33.418950: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:26:33.433676: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-11-19 23:26:33.437970: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-11-19 23:26:33.449218: I tensorflow/core/platform/cpu_feature_guar

43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/128 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/128 [00:00<00:00, 197.54 examples/s]
Map (num_proc=4): 100%|██████████| 128/128 [00:00<00:00, 386.91 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.26it/s]
100%|██████████| 1/1 [00:00<00:00,  3.08it/s]

2025-11-19 23:26:51.414259: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:26:51.426491: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:26:51.441386: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist

[Checkpoint] Saved to ckpts/catelmo_ensemble/step_0015
43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/128 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/128 [00:00<00:00, 197.72 examples/s]
Map (num_proc=4): 100%|██████████| 128/128 [00:00<00:00, 375.97 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.23it/s]
100%|██████████| 1/1 [00:00<00:00,  3.00it/s]

2025-11-19 23:27:09.824573: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:27:09.837070: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:27:09.851859: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist

43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/128 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/128 [00:00<00:00, 214.10 examples/s]
Map (num_proc=4): 100%|██████████| 128/128 [00:00<00:00, 364.86 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.34it/s]
100%|██████████| 1/1 [00:00<00:00,  3.15it/s]

2025-11-19 23:27:27.754988: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:27:27.767215: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:27:27.782134: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist

43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/128 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/128 [00:00<00:00, 242.72 examples/s]
Map (num_proc=4): 100%|██████████| 128/128 [00:00<00:00, 476.10 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.36it/s]
100%|██████████| 1/1 [00:00<00:00,  3.17it/s]

2025-11-19 23:27:45.582442: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:27:45.594923: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:27:45.609641: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist


Epoch 2:
['CASSYEQYF', 'CSVSYSYEQYF', 'CASSLSVYEQYF', 'CASSSYEQYF', 'CASSLAYEQYF', 'CASSLNYEQYF', 'CASSSYEQYF', 'CASSLGYEQYF', 'CASSYGYEQYF', 'CASSLTYEQYF', 'CASSYEQYFG', 'CASSLGYEQYF', 'CASSYGTDTQYF', 'CASSLTYEQYF', 'CASSLAYEQYF', 'CASSLSYEQYF', 'CASSLAYEQYF', 'CASSLAYEQYF', 'CASSLAYEQYF', 'CASVFTEAFF']
43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/127 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/127 [00:00<00:00, 204.32 examples/s]
Map (num_proc=4): 100%|██████████| 127/127 [00:00<00:00, 370.26 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.24it/s]
100%|██████████| 1/1 [00:00<00:00,  3.02it/s]

2025-11-19 23:28:03.598609: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:28:03.611196: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:28:03.625975: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist

43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/128 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/128 [00:00<00:00, 241.37 examples/s]
Map (num_proc=4): 100%|██████████| 128/128 [00:00<00:00, 430.22 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.26it/s]
100%|██████████| 1/1 [00:00<00:00,  3.08it/s]

2025-11-19 23:28:21.532167: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:28:21.544522: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:28:21.559163: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist

[Checkpoint] Saved to ckpts/catelmo_ensemble/step_0020
43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/127 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/127 [00:00<00:00, 245.56 examples/s]
Map (num_proc=4): 100%|██████████| 127/127 [00:00<00:00, 441.05 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.31it/s]
100%|██████████| 1/1 [00:00<00:00,  3.11it/s]

2025-11-19 23:28:39.700368: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:28:39.712612: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:28:39.727532: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist

43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/128 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/128 [00:00<00:00, 204.02 examples/s]
Map (num_proc=4): 100%|██████████| 128/128 [00:00<00:00, 380.27 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.24it/s]
100%|██████████| 1/1 [00:00<00:00,  3.05it/s]

2025-11-19 23:28:57.881434: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:28:57.893943: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:28:57.908618: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist

43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/128 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/128 [00:00<00:00, 241.65 examples/s]
Map (num_proc=4): 100%|██████████| 128/128 [00:00<00:00, 455.98 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.32it/s]
100%|██████████| 1/1 [00:00<00:00,  3.14it/s]

2025-11-19 23:29:15.797586: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:29:15.809959: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:29:15.824646: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist

43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/127 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/127 [00:00<00:00, 217.73 examples/s]
Map (num_proc=4): 100%|██████████| 127/127 [00:00<00:00, 395.03 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.24it/s]
100%|██████████| 1/1 [00:00<00:00,  3.04it/s]

2025-11-19 23:29:33.859864: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:29:33.872149: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:29:33.887084: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist


Epoch 3:
['CASSLAGYEQYF', 'WRONGFORMAT', 'CASSLSYEQYF', 'CASSLAGYEQYF', 'CASSLAGYEQYF', 'CASSYEQYF', 'ASSSGSYEQY', 'CASSLTYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSIGSYEQYF', 'CASSLSYEQYF', 'CASSSSGAYEQYF', 'CASSQSYEQYF', 'CASSLAYEQYFF', 'CASSLGYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLGYEQYF', 'CASSLSYEQYF']
43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/128 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/128 [00:00<00:00, 216.63 examples/s]
Map (num_proc=4): 100%|██████████| 128/128 [00:00<00:00, 413.87 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.17it/s]
100%|██████████| 1/1 [00:00<00:00,  2.98it/s]

2025-11-19 23:29:52.179581: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:29:52.191880: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:29:52.206636: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist

[Checkpoint] Saved to ckpts/catelmo_ensemble/step_0025
43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/128 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/128 [00:00<00:00, 254.82 examples/s]
Map (num_proc=4): 100%|██████████| 128/128 [00:00<00:00, 450.29 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.35it/s]
100%|██████████| 1/1 [00:00<00:00,  3.18it/s]

2025-11-19 23:30:10.637941: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:30:10.650460: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:30:10.665270: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist

43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/128 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/128 [00:00<00:00, 244.86 examples/s]
Map (num_proc=4): 100%|██████████| 128/128 [00:00<00:00, 437.34 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.36it/s]
100%|██████████| 1/1 [00:00<00:00,  3.17it/s]

2025-11-19 23:30:28.726930: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:30:28.739337: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:30:28.753996: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist

38



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/128 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/128 [00:00<00:00, 190.13 examples/s]
Map (num_proc=4): 100%|██████████| 128/128 [00:00<00:00, 398.74 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.32it/s]
100%|██████████| 1/1 [00:00<00:00,  3.14it/s]

2025-11-19 23:30:46.595726: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:30:46.607952: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:30:46.622855: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist

43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/128 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/128 [00:00<00:00, 250.49 examples/s]
Map (num_proc=4): 100%|██████████| 128/128 [00:00<00:00, 472.55 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.41it/s]
100%|██████████| 1/1 [00:00<00:00,  3.24it/s]

2025-11-19 23:31:04.404249: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:31:04.416496: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:31:04.431434: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist

43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/128 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/128 [00:00<00:00, 204.35 examples/s]
Map (num_proc=4): 100%|██████████| 128/128 [00:00<00:00, 392.38 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.13it/s]
100%|██████████| 1/1 [00:00<00:00,  2.96it/s]

2025-11-19 23:31:22.405265: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:31:22.417751: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:31:22.432487: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist

[Checkpoint] Saved to ckpts/catelmo_ensemble/step_0030

Epoch 4:
['CASSLSYEQYF', 'CASSLSYEQYF', 'CASSSSSYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSSSYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLAYEQYF', 'CASSLGTEAFF', 'CASSYEQYF', 'CASSLSYEQYF', 'CASSSSYEQYF']
43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/128 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/128 [00:00<00:00, 197.30 examples/s]
Map (num_proc=4): 100%|██████████| 128/128 [00:00<00:00, 347.55 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.29it/s]
100%|██████████| 1/1 [00:00<00:00,  3.11it/s]

2025-11-19 23:31:40.722609: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:31:40.734978: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:31:40.749619: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist

43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/128 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/128 [00:00<00:00, 246.97 examples/s]
Map (num_proc=4): 100%|██████████| 128/128 [00:00<00:00, 470.18 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.25it/s]
100%|██████████| 1/1 [00:00<00:00,  3.08it/s]

2025-11-19 23:31:58.276936: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:31:58.289317: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:31:58.304008: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist

43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/128 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/128 [00:00<00:00, 205.90 examples/s]
Map (num_proc=4): 100%|██████████| 128/128 [00:00<00:00, 391.36 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.32it/s]
100%|██████████| 1/1 [00:00<00:00,  3.14it/s]

2025-11-19 23:32:16.107958: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:32:16.120364: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:32:16.135009: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist

43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/127 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/127 [00:00<00:00, 246.70 examples/s]
Map (num_proc=4): 100%|██████████| 127/127 [00:00<00:00, 439.45 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.44it/s]
100%|██████████| 1/1 [00:00<00:00,  3.21it/s]

2025-11-19 23:32:33.688351: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:32:33.700913: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:32:33.715832: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist

43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/128 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/128 [00:00<00:00, 240.25 examples/s]
Map (num_proc=4): 100%|██████████| 128/128 [00:00<00:00, 438.55 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.25it/s]
100%|██████████| 1/1 [00:00<00:00,  3.07it/s]

2025-11-19 23:32:51.487869: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:32:51.500247: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:32:51.514878: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist

[Checkpoint] Saved to ckpts/catelmo_ensemble/step_0035
43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/128 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/128 [00:00<00:00, 256.04 examples/s]
Map (num_proc=4): 100%|██████████| 128/128 [00:00<00:00, 448.64 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.36it/s]
100%|██████████| 1/1 [00:00<00:00,  3.18it/s]

2025-11-19 23:33:09.551934: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:33:09.564261: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:33:09.579523: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist


Epoch 5:
['CASSLSYEQYF', 'CASSLSYEQYF', 'CASSQGYEQFF', 'CASSLSYEQYF', 'CASSLSYEQFF', 'CASSLSYEQYF', 'CASSLGYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLAYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLAYEQYF', 'CASSLAYEQYF']
43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/128 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/128 [00:00<00:00, 254.22 examples/s]
Map (num_proc=4): 100%|██████████| 128/128 [00:00<00:00, 434.75 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.37it/s]
100%|██████████| 1/1 [00:00<00:00,  3.20it/s]

2025-11-19 23:33:27.332495: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:33:27.344976: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:33:27.359724: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist

43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/128 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/128 [00:00<00:00, 206.35 examples/s]
Map (num_proc=4): 100%|██████████| 128/128 [00:00<00:00, 396.58 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.28it/s]
100%|██████████| 1/1 [00:00<00:00,  3.10it/s]

2025-11-19 23:33:45.019596: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:33:45.031991: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:33:45.046704: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist

43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/128 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/128 [00:00<00:00, 207.58 examples/s]
Map (num_proc=4): 100%|██████████| 128/128 [00:00<00:00, 421.74 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.25it/s]
100%|██████████| 1/1 [00:00<00:00,  3.06it/s]

2025-11-19 23:34:02.797735: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:34:02.810258: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:34:02.825042: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist

43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/128 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/128 [00:00<00:00, 207.78 examples/s]
Map (num_proc=4): 100%|██████████| 128/128 [00:00<00:00, 398.44 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.30it/s]
100%|██████████| 1/1 [00:00<00:00,  3.12it/s]

2025-11-19 23:34:20.485874: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:34:20.498110: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:34:20.513031: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist

[Checkpoint] Saved to ckpts/catelmo_ensemble/step_0040
43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/128 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/128 [00:00<00:00, 252.31 examples/s]
Map (num_proc=4): 100%|██████████| 128/128 [00:00<00:00, 464.40 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.44it/s]
100%|██████████| 1/1 [00:00<00:00,  3.25it/s]

2025-11-19 23:34:38.569054: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:34:38.581216: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:34:38.595611: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist

43



2025-11-19 23:34:56.083924: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:34:56.096319: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:34:56.110989: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-11-19 23:34:56.115267: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-11-19 23:34:56.126479: I tensorflow/core/platform/cpu_feature_guar


Epoch 6:
['CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLYEQYF', 'CASSLSYEQYF', 'CASSLAYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSYEQYF', 'CASSLAYEQYF', 'CASSLSYEQYF', 'CASSLAYEQYF', 'CASSLAYEQYF', 'CASSLAYEQYF']
43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/128 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/128 [00:00<00:00, 210.02 examples/s]
Map (num_proc=4): 100%|██████████| 128/128 [00:00<00:00, 358.84 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.37it/s]
100%|██████████| 1/1 [00:00<00:00,  3.16it/s]

2025-11-19 23:35:14.156352: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:35:14.168747: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:35:14.183375: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist

43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/128 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/128 [00:00<00:00, 243.78 examples/s]
Map (num_proc=4): 100%|██████████| 128/128 [00:00<00:00, 452.23 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.32it/s]
100%|██████████| 1/1 [00:00<00:00,  3.14it/s]

2025-11-19 23:35:31.787416: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:35:31.799649: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:35:31.814477: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist

43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/128 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/128 [00:00<00:00, 251.28 examples/s]
Map (num_proc=4): 100%|██████████| 128/128 [00:00<00:00, 437.37 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.36it/s]
100%|██████████| 1/1 [00:00<00:00,  3.15it/s]

2025-11-19 23:35:49.328142: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:35:49.340640: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:35:49.355439: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist

[Checkpoint] Saved to ckpts/catelmo_ensemble/step_0045
43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/128 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/128 [00:00<00:00, 209.24 examples/s]
Map (num_proc=4): 100%|██████████| 128/128 [00:00<00:00, 419.12 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.29it/s]
100%|██████████| 1/1 [00:00<00:00,  3.11it/s]

2025-11-19 23:36:07.477216: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:36:07.489459: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:36:07.504265: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist

43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/128 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/128 [00:00<00:00, 259.27 examples/s]
Map (num_proc=4): 100%|██████████| 128/128 [00:00<00:00, 472.60 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.41it/s]
100%|██████████| 1/1 [00:00<00:00,  3.22it/s]

2025-11-19 23:36:25.101485: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:36:25.113976: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:36:25.128698: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist

43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/128 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/128 [00:00<00:00, 236.92 examples/s]
Map (num_proc=4): 100%|██████████| 128/128 [00:00<00:00, 375.65 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.39it/s]
100%|██████████| 1/1 [00:00<00:00,  3.19it/s]

2025-11-19 23:36:43.107810: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:36:43.120165: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:36:43.134782: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist


Epoch 7:
['CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLTEAFF', 'CASSPSYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLGYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLGYEQYF', 'CASSLSYEQYF']
43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/128 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/128 [00:00<00:00, 203.86 examples/s]
Map (num_proc=4): 100%|██████████| 128/128 [00:00<00:00, 400.62 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.29it/s]
100%|██████████| 1/1 [00:00<00:00,  3.12it/s]

2025-11-19 23:37:00.803287: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:37:00.815715: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:37:00.830391: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist

43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/128 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/128 [00:00<00:00, 245.45 examples/s]
Map (num_proc=4): 100%|██████████| 128/128 [00:00<00:00, 455.36 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.25it/s]
100%|██████████| 1/1 [00:00<00:00,  3.06it/s]

2025-11-19 23:37:18.434028: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:37:18.446421: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:37:18.461085: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist

[Checkpoint] Saved to ckpts/catelmo_ensemble/step_0050


  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/127 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/127 [00:00<00:00, 256.89 examples/s]
Map (num_proc=4): 100%|██████████| 127/127 [00:00<00:00, 382.84 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.45it/s]
100%|██████████| 1/1 [00:00<00:00,  3.25it/s]



43



2025-11-19 23:37:36.341841: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:37:36.354241: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:37:36.368750: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-11-19 23:37:36.373296: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-11-19 23:37:36.384336: I tensorflow/core/platform/cpu_feature_guar

43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/128 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/128 [00:00<00:00, 246.37 examples/s]
Map (num_proc=4): 100%|██████████| 128/128 [00:00<00:00, 493.20 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.46it/s]
100%|██████████| 1/1 [00:00<00:00,  3.26it/s]

2025-11-19 23:37:53.872035: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:37:53.884185: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:37:53.898925: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist

43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/128 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/128 [00:00<00:00, 250.87 examples/s]
Map (num_proc=4): 100%|██████████| 128/128 [00:00<00:00, 451.34 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.35it/s]
100%|██████████| 1/1 [00:00<00:00,  3.17it/s]

2025-11-19 23:38:11.545231: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:38:11.557450: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:38:11.572093: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist

43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/128 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/128 [00:00<00:00, 213.30 examples/s]
Map (num_proc=4): 100%|██████████| 128/128 [00:00<00:00, 415.50 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.30it/s]
100%|██████████| 1/1 [00:00<00:00,  3.11it/s]

2025-11-19 23:38:29.265069: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:38:29.277436: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:38:29.292104: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist


Epoch 8:
['CASSLAYEQYF', 'CASSLSYEQYF', 'CASSLAYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF']
43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/128 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/128 [00:00<00:00, 251.21 examples/s]
Map (num_proc=4): 100%|██████████| 128/128 [00:00<00:00, 461.09 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.34it/s]
100%|██████████| 1/1 [00:00<00:00,  3.14it/s]

2025-11-19 23:38:46.879871: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:38:46.892224: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:38:46.906822: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist

[Checkpoint] Saved to ckpts/catelmo_ensemble/step_0055
43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/128 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/128 [00:00<00:00, 244.91 examples/s]
Map (num_proc=4): 100%|██████████| 128/128 [00:00<00:00, 438.82 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.41it/s]
100%|██████████| 1/1 [00:00<00:00,  3.20it/s]

2025-11-19 23:39:04.875657: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:39:04.887844: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:39:04.902470: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist

43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/128 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/128 [00:00<00:00, 259.13 examples/s]
Map (num_proc=4): 100%|██████████| 128/128 [00:00<00:00, 493.55 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.43it/s]
100%|██████████| 1/1 [00:00<00:00,  3.24it/s]

2025-11-19 23:39:22.437132: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:39:22.449484: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:39:22.464145: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist

43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/127 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/127 [00:00<00:00, 251.90 examples/s]
Map (num_proc=4): 100%|██████████| 127/127 [00:00<00:00, 453.92 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.37it/s]
100%|██████████| 1/1 [00:00<00:00,  3.16it/s]

2025-11-19 23:39:40.045795: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:39:40.058084: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:39:40.072854: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist

43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/128 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/128 [00:00<00:00, 195.20 examples/s]
Map (num_proc=4): 100%|██████████| 128/128 [00:00<00:00, 350.63 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.26it/s]
100%|██████████| 1/1 [00:00<00:00,  3.10it/s]

2025-11-19 23:39:57.700783: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:39:57.712989: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:39:57.727376: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist

43



  _torch_pytree._register_pytree_node(

Map (num_proc=4):   0%|          | 0/128 [00:00<?, ? examples/s]
Map (num_proc=4):  25%|██▌       | 32/128 [00:00<00:00, 227.80 examples/s]
Map (num_proc=4): 100%|██████████| 128/128 [00:00<00:00, 458.33 examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  3.31it/s]
100%|██████████| 1/1 [00:00<00:00,  3.13it/s]

2025-11-19 23:40:15.429312: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-19 23:40:15.441728: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-19 23:40:15.456766: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to regist

[Checkpoint] Saved to ckpts/catelmo_ensemble/step_0060

Epoch 9:
['CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLAYEQYF', 'ASSSYEQ', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLNTEAFF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSYEQYF', 'CASSLSYEQYF', 'CASSYEQYF', 'CASSLSYEQYF', 'CASSLAYEQYF', 'ASSATEA', 'CASSQGYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF', 'CASSLSYEQYF']



