<a href="https://colab.research.google.com/github/Noonanav/phage_host_set_transformers/blob/main/mean_pooling_optuna_glm_cv.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

"""
# Pooling-Based Architecture with Optuna Hyperparameter Optimization

This notebook implements hyperparameter optimization for a simple pooling-based architecture
for strain-phage interaction prediction using Optuna. It's adapted from the set transformer
optimization workflow but uses a simpler architecture based on mean/max/median pooling.
"""

In [None]:
!pip install optuna pandas numpy torch matplotlib seaborn scikit-learn
!pip install plotly kaleido
!pip install optuna-integration[pytorch_lightning]

Collecting optuna
  Downloading optuna-4.3.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.15.2-py3-none-any.whl.metadata (7.3 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-ma

In [None]:
# Setup and imports
import os
import pickle
import datetime
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import optuna
from optuna.pruners import MedianPruner
from optuna.samplers import TPESampler
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from typing import Dict, List, Tuple, Optional
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    matthews_corrcoef, roc_auc_score, average_precision_score, confusion_matrix,
    roc_curve, precision_recall_curve, auc
)
from tqdm import tqdm
import json

# Set up result directory with timestamp
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
RESULTS_DIR = f"optuna_pooling_results_{timestamp}"
os.makedirs(RESULTS_DIR, exist_ok=True)
os.makedirs(os.path.join(RESULTS_DIR, "plots"), exist_ok=True)
os.makedirs(os.path.join(RESULTS_DIR, "models"), exist_ok=True)
os.makedirs(os.path.join(RESULTS_DIR, "metrics"), exist_ok=True)

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Mount Google Drive if in Colab
try:
    from google.colab import drive
    drive.mount('/content/drive')
    IN_COLAB = True
except:
    IN_COLAB = False
    print("Not running in Colab, skipping drive mount")

Using device: cuda
Mounted at /content/drive


## Data Loading and Processing Functions

In [None]:
def load_embeddings_flexible(embedding_dir, genome_ids_csv=None, id_column="genome_id"):
    """Load embeddings from a directory with support for multiple formats."""
    embedding_dir = Path(embedding_dir)
    embeddings = {}

    # If genome_ids_csv is provided, load specific genome IDs
    genome_ids = None
    if genome_ids_csv:
        try:
            genome_df = pd.read_csv(genome_ids_csv)
            if id_column not in genome_df.columns:
                raise ValueError(f"Column '{id_column}' not found in {genome_ids_csv}")
            genome_ids = set(genome_df[id_column].astype(str))
            print(f"Loaded {len(genome_ids)} genome IDs from {genome_ids_csv}")
        except Exception as e:
            print(f"Error loading genome IDs: {e}")
            print("Falling back to loading all embeddings")
            genome_ids = None

    # Get list of all embedding files
    file_paths = list(embedding_dir.glob('*.npy'))
    if not file_paths:
        print(f"No .npy files found in {embedding_dir}")
        return embeddings

    # Track which genomes were found
    found_genomes = set()

    # Process each file
    for file_path in file_paths:
        identifier = file_path.stem  # filename without extension

        # Skip if not in the list of genome IDs (if a list was provided)
        if genome_ids is not None and identifier not in genome_ids:
            continue

        try:
            embedding_data = np.load(file_path, allow_pickle=True)

            # Detect and process based on file format
            if isinstance(embedding_data, np.ndarray):
                # If it's a simple array with shape [num_genes, embedding_dim]
                if embedding_data.ndim == 2:
                    # Create generic gene IDs
                    gene_ids = [f"{identifier}_gene_{i+1}" for i in range(embedding_data.shape[0])]
                    embeddings[identifier] = (embedding_data, gene_ids)
                    format_type = "simple_array"

                # If it's a dictionary-like object in a 0-d array
                elif hasattr(embedding_data.item(), 'keys'):
                    embeddings_dict = embedding_data.item()
                    gene_ids = list(embeddings_dict.keys())
                    embedding_list = [embeddings_dict[gene_id] for gene_id in gene_ids]

                    if not embedding_list:
                        print(f"Warning: No embeddings found in {file_path}")
                        continue

                    embeddings_array = np.stack(embedding_list)
                    embeddings[identifier] = (embeddings_array, gene_ids)
                    format_type = "dict_format"
                else:
                    print(f"Warning: Unrecognized array format for {file_path}")
                    continue
            else:
                print(f"Warning: Unrecognized data type for {file_path}")
                continue

            found_genomes.add(identifier)

        except Exception as e:
            print(f"Error loading {file_path}: {e}")

    # Report on results
    print(f"Successfully loaded {len(embeddings)} embeddings")

    # If genome IDs were provided, report on missing genomes
    if genome_ids is not None:
        missing_genomes = genome_ids - found_genomes
        if missing_genomes:
            print(f"Warning: {len(missing_genomes)} genomes from the CSV were not found")

    return embeddings

def filter_interactions_by_strain(interactions_df, random_state=42):
    """Split interactions into train/test by strain."""
    unique_strains = interactions_df['strain'].unique()
    train_strains, test_strains = train_test_split(
        unique_strains,
        test_size=0.2,
        random_state=random_state
    )

    train_df = interactions_df[interactions_df['strain'].isin(train_strains)]
    test_df = interactions_df[interactions_df['strain'].isin(test_strains)]

    print(f"Train set: {len(train_df)} interactions, {len(train_strains)} strains")
    print(f"Test set:  {len(test_df)} interactions, {len(test_strains)} strains")
    return train_df, test_df

def calculate_phage_specific_weights(train_df):
    """Calculate positive weights for each phage based on its interaction distribution."""
    phage_weights = {}

    for phage in train_df['phage'].unique():
        phage_df = train_df[train_df['phage'] == phage]
        num_pos = (phage_df['interaction'] == 1).sum()
        num_neg = (phage_df['interaction'] == 0).sum()

        if num_pos > 0:
            # Calculate the weight as negative/positive ratio
            weight = num_neg / num_pos
            # Clip extremely high values
            weight = min(max(weight, 1.0), 10.0)
        else:
            # If no positive examples, use a default high weight
            weight = 5.0

        phage_weights[phage] = weight

    return phage_weights

class PrecomputedPooledDataset(torch.utils.data.Dataset):
    def __init__(self, interactions_df, pooled_strains, pooled_phages, phage_weights):
        """Dataset for interactions with precomputed pooled embeddings."""
        self.interactions = interactions_df.reset_index(drop=True)
        self.pooled_strains = pooled_strains
        self.pooled_phages = pooled_phages
        self.phage_weights = phage_weights

        # Check for missing embeddings
        missing_strains = set(self.interactions['strain']) - set(pooled_strains.keys())
        missing_phages = set(self.interactions['phage']) - set(pooled_phages.keys())

        if missing_strains or missing_phages:
            raise ValueError(
                f"Missing embeddings for {len(missing_strains)} strains "
                f"and {len(missing_phages)} phages."
            )

    def __len__(self):
        return len(self.interactions)

    def __getitem__(self, idx):
        row = self.interactions.iloc[idx]
        strain_vec = torch.tensor(self.pooled_strains[row['strain']], dtype=torch.float32)
        phage_vec = torch.tensor(self.pooled_phages[row['phage']], dtype=torch.float32)
        label = torch.tensor(row['interaction'], dtype=torch.float32)
        weight = torch.tensor(self.phage_weights.get(row['phage'], 1.0), dtype=torch.float32)

        return strain_vec, phage_vec, label, weight, row['strain'], row['phage']

def create_data_loaders(train_df, test_df, strain_embeddings, phage_embeddings,
                       batch_size=16, use_phage_weights=True):
    """Create data loaders with pooled vectors."""
    # Calculate phage-specific weights
    phage_weights = {}
    if train_df is not None and not train_df.empty:
        if use_phage_weights:
            phage_weights = calculate_phage_specific_weights(train_df)
            print(f"Calculated weights for {len(phage_weights)} phages")
        else:
            # Use a default weight of 1.0 for all phages
            for phage in train_df['phage'].unique():
                phage_weights[phage] = 1.0
            print("Using default weight of 1.0 for all phages")

    # Precompute pooled embeddings using best pooling type
    pooled_strains, pooled_phages = precompute_pooled_embeddings(
        strain_embeddings, phage_embeddings,
        pooling_type=best_params['pooling_type'] if 'best_params' in globals() else 'mean'
    )

    # Create datasets
    train_dataset = PrecomputedPooledDataset(train_df, pooled_strains, pooled_phages, phage_weights)
    test_dataset = PrecomputedPooledDataset(test_df, pooled_strains, pooled_phages, phage_weights)

    # Create data loaders
    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        collate_fn=collate_pooled_vectors
    )

    test_loader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=batch_size,
        shuffle=False,
        collate_fn=collate_pooled_vectors
    )

    return train_loader, test_loader

## Enhanced Pooling Model

In [None]:
def precompute_pooled_embeddings(strain_embeddings, phage_embeddings, pooling_type='mean'):
    """Precompute pooled embeddings for all strains and phages."""
    pooled_strains = {}
    pooled_phages = {}

    # Apply pooling to each strain
    print(f"Precomputing {pooling_type} pooling for {len(strain_embeddings)} strains...")
    for strain_id, (embedding, _) in strain_embeddings.items():
        # Convert to tensor
        embedding_tensor = torch.tensor(embedding, dtype=torch.float32)

        # Apply pooling
        if pooling_type == 'mean':
            pooled = torch.mean(embedding_tensor, dim=0)
        elif pooling_type == 'max':
            pooled = torch.max(embedding_tensor, dim=0)[0]
        elif pooling_type == 'median':
            pooled = torch.median(embedding_tensor, dim=0)[0]
        else:
            raise ValueError(f"Unknown pooling type: {pooling_type}")

        # Store pooled vector
        pooled_strains[strain_id] = pooled.numpy()

    # Apply pooling to each phage
    print(f"Precomputing {pooling_type} pooling for {len(phage_embeddings)} phages...")
    for phage_id, (embedding, _) in phage_embeddings.items():
        # Convert to tensor
        embedding_tensor = torch.tensor(embedding, dtype=torch.float32)

        # Apply pooling
        if pooling_type == 'mean':
            pooled = torch.mean(embedding_tensor, dim=0)
        elif pooling_type == 'max':
            pooled = torch.max(embedding_tensor, dim=0)[0]
        elif pooling_type == 'median':
            pooled = torch.median(embedding_tensor, dim=0)[0]
        else:
            raise ValueError(f"Unknown pooling type: {pooling_type}")

        # Store pooled vector
        pooled_phages[phage_id] = pooled.numpy()

    return pooled_strains, pooled_phages

class PrecomputedPooledDataset(torch.utils.data.Dataset):
    def __init__(self, interactions_df, pooled_strains, pooled_phages, phage_weights):
        """Dataset for interactions with precomputed pooled embeddings."""
        self.interactions = interactions_df.reset_index(drop=True)
        self.pooled_strains = pooled_strains
        self.pooled_phages = pooled_phages
        self.phage_weights = phage_weights

        # Check for missing embeddings
        missing_strains = set(self.interactions['strain']) - set(pooled_strains.keys())
        missing_phages = set(self.interactions['phage']) - set(pooled_phages.keys())

        if missing_strains or missing_phages:
            raise ValueError(
                f"Missing embeddings for {len(missing_strains)} strains "
                f"and {len(missing_phages)} phages."
            )

    def __len__(self):
        return len(self.interactions)

    def __getitem__(self, idx):
        row = self.interactions.iloc[idx]
        strain_vec = torch.tensor(self.pooled_strains[row['strain']], dtype=torch.float32)
        phage_vec = torch.tensor(self.pooled_phages[row['phage']], dtype=torch.float32)
        label = torch.tensor(row['interaction'], dtype=torch.float32)
        weight = torch.tensor(self.phage_weights.get(row['phage'], 1.0), dtype=torch.float32)

        return strain_vec, phage_vec, label, weight, row['strain'], row['phage']

class PooledVectorModel(nn.Module):
    """A simplified model that takes pre-pooled vectors as input."""
    def __init__(self, embedding_dim=384,
                 dropout=0.1,
                 ln=True,
                 classifier_hidden_layers=1,
                 classifier_hidden_dim=None,
                 activation_function="relu"):
        super().__init__()

        # Set hidden dim for classifier if not provided
        if classifier_hidden_dim is None:
            classifier_hidden_dim = embedding_dim

        # Determine activation function
        if activation_function == "relu":
            self.activation = nn.ReLU()
        elif activation_function == "gelu":
            self.activation = nn.GELU()
        elif activation_function == "silu":
            self.activation = nn.SiLU()
        else:
            self.activation = nn.ReLU()

        # Build classifier with dynamic number of layers
        classifier_layers = []

        # Input layer
        classifier_layers.append(nn.Linear(embedding_dim * 2, classifier_hidden_dim))
        classifier_layers.append(nn.LayerNorm(classifier_hidden_dim) if ln else nn.Identity())
        classifier_layers.append(self.activation)
        classifier_layers.append(nn.Dropout(dropout))

        # Hidden layers
        for _ in range(classifier_hidden_layers - 1):
            classifier_layers.append(nn.Linear(classifier_hidden_dim, classifier_hidden_dim))
            classifier_layers.append(nn.LayerNorm(classifier_hidden_dim) if ln else nn.Identity())
            classifier_layers.append(self.activation)
            classifier_layers.append(nn.Dropout(dropout))

        # Output layer
        classifier_layers.append(nn.Linear(classifier_hidden_dim, 1))

        self.classifier = nn.Sequential(*classifier_layers)

    def forward(self, strain_vec, phage_vec):
        """Forward pass for pre-pooled vectors."""
        # Concatenate strain and phage representations
        combined = torch.cat([strain_vec, phage_vec], dim=-1)  # [B, 2*D]

        # Apply the classifier
        logits = self.classifier(combined)  # [B, 1]

        return logits

def collate_pooled_vectors(batch):
    """Collate function for pre-computed pooled vectors."""
    # Unpack the batch
    strains, phages, labels, weights, strain_ids, phage_ids = zip(*batch)

    # Stack vectors
    strain_batch = torch.stack(strains)  # [B, D]
    phage_batch = torch.stack(phages)    # [B, D]

    # Prepare labels and weights
    label_batch = torch.tensor(labels, dtype=torch.float32).unsqueeze(1)  # [B, 1]
    weight_batch = torch.tensor(weights, dtype=torch.float32).unsqueeze(1)  # [B, 1]

    return strain_batch, phage_batch, label_batch, weight_batch, strain_ids, phage_ids

## Training and Evaluation Functions


## Visualization Functions

In [None]:
class EarlyStopping:
    """Early stopping to stop training when a metric has stopped improving."""
    def __init__(self, patience=7, min_delta=0.0, mode='max'):
        self.patience = patience
        self.min_delta = min_delta
        self.mode = mode  # 'max' or 'min'
        self.best_metric = None
        self.counter = 0
        self.early_stop = False

    def __call__(self, current_metric):
        if self.best_metric is None:
            self.best_metric = current_metric
        else:
            # For 'max' mode, improvement means current_metric >= best_metric + min_delta
            if self.mode == 'max':
                if current_metric < (self.best_metric + self.min_delta):
                    self.counter += 1
                else:
                    self.best_metric = current_metric
                    self.counter = 0
            else:  # 'min' mode
                if current_metric > (self.best_metric - self.min_delta):
                    self.counter += 1
                else:
                    self.best_metric = current_metric
                    self.counter = 0

            if self.counter >= self.patience:
                self.early_stop = True

        return self.best_metric

def get_scheduler(scheduler_type, optimizer, num_train_steps, warmup_ratio=0.1):
    """Create a learning rate scheduler."""
    if scheduler_type == "one_cycle":
        return torch.optim.lr_scheduler.OneCycleLR(
            optimizer,
            max_lr=optimizer.param_groups[0]['lr'],
            total_steps=num_train_steps,
            pct_start=warmup_ratio
        )
    elif scheduler_type == "cosine_annealing":
        return torch.optim.lr_scheduler.CosineAnnealingLR(
            optimizer,
            T_max=num_train_steps
        )
    elif scheduler_type == "reduce_on_plateau":
        return torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer,
            mode='max',
            patience=3,
            factor=0.5
        )
    elif scheduler_type == "linear_warmup_decay":
        # Create a simple linear warmup followed by linear decay
        def lr_lambda(current_step):
            warmup_steps = int(num_train_steps * warmup_ratio)
            if current_step < warmup_steps:
                return float(current_step) / float(max(1, warmup_steps))
            return max(
                0.0,
                float(num_train_steps - current_step) / float(max(1, num_train_steps - warmup_steps))
            )
        return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda)
    else:  # "none" or any other value
        return None

def train_epoch(model, train_loader, optimizer, device, scheduler=None, use_phage_weights=True):
    """Train for one epoch with pooled vectors."""
    model.train()
    total_loss = 0
    all_preds = []
    all_labels = []

    for batch in train_loader:
        # Unpack with weights (first 4 elements for pooled vectors)
        strain_vec, phage_vec, labels, weights = batch[:4]

        strain_vec = strain_vec.to(device)
        phage_vec = phage_vec.to(device)
        labels = labels.to(device)
        weights = weights.to(device) if use_phage_weights else None

        optimizer.zero_grad()
        logits = model(strain_vec, phage_vec)

        # Use batch-specific weights in the loss calculation if enabled
        if use_phage_weights:
            criterion = nn.BCEWithLogitsLoss(pos_weight=weights)
        else:
            criterion = nn.BCEWithLogitsLoss()

        loss = criterion(logits, labels)

        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        if scheduler is not None:
            if not isinstance(scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau):
                scheduler.step()

        total_loss += loss.item()
        preds = torch.sigmoid(logits).reshape(-1).detach().cpu().numpy()
        all_preds.extend(preds)
        all_labels.extend(labels.cpu().numpy())

    epoch_loss = total_loss / len(train_loader)
    binary_preds = (np.array(all_preds) > 0.5).astype(int)
    binary_labels = (np.array(all_labels) > 0.5).astype(int)
    epoch_mcc = matthews_corrcoef(binary_labels, binary_preds)

    return epoch_loss, epoch_mcc

def validate(model, val_loader, device, use_phage_weights=True):
    """Validate the model with pooled vectors."""
    model.eval()
    total_loss = 0
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for batch in val_loader:
            # Unpack with weights (first 4 elements for pooled vectors)
            strain_vec, phage_vec, labels, weights = batch[:4]

            strain_vec = strain_vec.to(device)
            phage_vec = phage_vec.to(device)
            labels = labels.to(device)
            weights = weights.to(device) if use_phage_weights else None

            logits = model(strain_vec, phage_vec)

            # Use batch-specific weights in the loss calculation if enabled
            if use_phage_weights:
                criterion = nn.BCEWithLogitsLoss(pos_weight=weights)
            else:
                criterion = nn.BCEWithLogitsLoss()

            loss = criterion(logits, labels)

            total_loss += loss.item()
            preds = torch.sigmoid(logits).cpu().numpy()
            all_preds.extend(preds)
            all_labels.extend(labels.cpu().numpy())

    val_loss = total_loss / len(val_loader)
    binary_preds = (np.array(all_preds) > 0.5).astype(int)
    binary_labels = (np.array(all_labels) > 0.5).astype(int)
    val_mcc = matthews_corrcoef(binary_labels, binary_preds)

    return val_loss, val_mcc, binary_preds, binary_labels

def evaluate_full(model, data_loader, device, use_phage_weights=True):
    """Comprehensive evaluation with pooled vectors."""
    model.eval()
    all_preds = []
    all_labels = []
    total_loss = 0

    with torch.no_grad():
        for batch in data_loader:
            # Unpack with weights (first 4 elements for pooled vectors)
            strain_vec, phage_vec, labels, weights = batch[:4]

            strain_vec = strain_vec.to(device)
            phage_vec = phage_vec.to(device)
            labels = labels.to(device)
            weights = weights.to(device) if use_phage_weights else None

            logits = model(strain_vec, phage_vec)

            # Calculate loss
            if use_phage_weights:
                criterion = nn.BCEWithLogitsLoss(pos_weight=weights)
            else:
                criterion = nn.BCEWithLogitsLoss()

            loss = criterion(logits, labels)
            total_loss += loss.item()

            preds = torch.sigmoid(logits).cpu().numpy().flatten()
            all_preds.extend(preds)
            all_labels.extend(labels.cpu().numpy().flatten())

    all_preds = np.array(all_preds)
    all_labels = np.array(all_labels)
    loss = total_loss / len(data_loader)

    # Calculate metrics
    binary_preds = (all_preds > 0.5).astype(int)
    accuracy = accuracy_score(all_labels, binary_preds)
    precision = precision_score(all_labels, binary_preds)
    recall = recall_score(all_labels, binary_preds)
    f1 = f1_score(all_labels, binary_preds)
    mcc = matthews_corrcoef(all_labels, binary_preds)
    try:
        roc_auc = roc_auc_score(all_labels, all_preds)
    except:
        roc_auc = 0.5  # Default value when one class only
    try:
        pr_auc = average_precision_score(all_labels, all_preds)
    except:
        pr_auc = 0.5
    conf_matrix = confusion_matrix(all_labels, binary_preds)

    return {
        'loss': total_loss / len(data_loader),
        'accuracy': accuracy_score(all_labels, binary_preds),
        'precision': precision_score(all_labels, binary_preds),
        'recall': recall_score(all_labels, binary_preds),
        'f1': f1_score(all_labels, binary_preds),
        'mcc': matthews_corrcoef(all_labels, binary_preds),
        'roc_auc': roc_auc_score(all_labels, all_preds),
        'pr_auc': average_precision_score(all_labels, all_preds),
        'conf_matrix': confusion_matrix(all_labels, binary_preds),
        'all_preds': all_preds,
        'all_labels': all_labels,
        'binary_preds': binary_preds
    }

def train_model(model, train_loader, val_loader, trial,
                num_epochs=100,
                learning_rate=5e-5,
                patience=7,
                device='cuda',
                use_phage_weights=True,
                scheduler_type="one_cycle",
                warmup_ratio=0.1,
                weight_decay=0.01,
                metrics_dir=None,
                report_to_optuna=True):  # Add this parameter
    """Train model with configurable parameters and Optuna pruning."""
    optimizer = torch.optim.AdamW(
        model.parameters(),
        lr=learning_rate,
        weight_decay=weight_decay
    )

    # Setup scheduler based on type
    num_train_steps = len(train_loader) * num_epochs
    scheduler = get_scheduler(
        scheduler_type,
        optimizer,
        num_train_steps,
        warmup_ratio
    )

    early_stopping = EarlyStopping(patience=patience, mode='max')
    history = {
        'train_loss': [],
        'val_loss': [],
        'train_mcc': [],
        'val_mcc': [],
        'lr': []
    }

    # Initialize metrics DataFrame
    metrics_df = pd.DataFrame(columns=[
        'epoch', 'train_loss', 'val_loss', 'train_mcc', 'val_mcc', 'lr'
    ])
    metrics_records = []  # We'll collect records and create DataFrame at the end

    try:
        for epoch in range(num_epochs):
            # Train and validate
            train_loss, train_mcc = train_epoch(
                model, train_loader, optimizer, device,
                scheduler, use_phage_weights
            )
            val_loss, val_mcc, _, _ = validate(
                model, val_loader, device, use_phage_weights
            )

            # Update history
            history['train_loss'].append(train_loss)
            history['val_loss'].append(val_loss)
            history['train_mcc'].append(train_mcc)
            history['val_mcc'].append(val_mcc)
            history['lr'].append(optimizer.param_groups[0]['lr'])

            # Add to metrics records
            metrics_records.append({
                'epoch': epoch,
                'train_loss': train_loss,
                'val_loss': val_loss,
                'train_mcc': train_mcc,
                'val_mcc': val_mcc,
                'lr': optimizer.param_groups[0]['lr']
            })

            # Print epoch summary
            print(f"Epoch {epoch+1}/{num_epochs} - "
                  f"Train Loss: {train_loss:.4f}, MCC: {train_mcc:.4f} | "
                  f"Val Loss: {val_loss:.4f}, MCC: {val_mcc:.4f} | "
                  f"LR: {optimizer.param_groups[0]['lr']:.6f}")

            # Update ReduceLROnPlateau scheduler if used
            if scheduler_type == "reduce_on_plateau" and scheduler is not None:
                scheduler.step(val_mcc)

            # Report to Optuna for pruning only if enabled
            if report_to_optuna and trial is not None:
                trial.report(val_mcc, epoch)

                # Handle pruning based on val_mcc
                if trial.should_prune():
                    raise optuna.exceptions.TrialPruned()

            # Check early stopping
            best_val_mcc = early_stopping(val_mcc)
            if early_stopping.early_stop:
                print(f"\nEarly stopping triggered at epoch {epoch+1}. "
                      f"Best val MCC: {best_val_mcc:.4f}")
                break

    except Exception as e:
        if isinstance(e, optuna.exceptions.TrialPruned):
            print("Trial pruned by Optuna.")
            raise e
        else:
            print(f"Training error: {e}")

    # Create metrics DataFrame from collected records
    if metrics_records:
        metrics_df = pd.DataFrame(metrics_records)

    # Save metrics to CSV
    trial_id = trial.number if trial is not None else "final"
    metrics_path = os.path.join(RESULTS_DIR, "metrics") if metrics_dir is None else metrics_dir
    os.makedirs(metrics_path, exist_ok=True)
    metrics_df.to_csv(os.path.join(metrics_path, f"trial_{trial_id}_metrics.csv"), index=False)

    # Plot training history
    plot_training_history(history, trial_id)

    return history, best_val_mcc if early_stopping.best_metric is not None else val_mcc

In [None]:
def plot_training_history(history, trial_id):
    """Plot training and validation loss and MCC over epochs."""
    # Ensure the plots directory exists
    plots_dir = os.path.join(RESULTS_DIR, "plots")
    os.makedirs(plots_dir, exist_ok=True)

    epochs = len(history['train_loss'])
    plt.figure(figsize=(15, 6))

    # Loss
    plt.subplot(1, 2, 1)
    plt.plot(range(1, epochs+1), history['train_loss'], label='Train Loss')
    plt.plot(range(1, epochs+1), history['val_loss'], label='Val Loss')
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.title("Loss vs. Epochs")
    plt.legend()
    plt.grid(alpha=0.3)

    # MCC
    plt.subplot(1, 2, 2)
    plt.plot(range(1, epochs+1), history['train_mcc'], label='Train MCC')
    plt.plot(range(1, epochs+1), history['val_mcc'], label='Val MCC')
    plt.xlabel("Epoch")
    plt.ylabel("MCC")
    plt.title("MCC vs. Epochs")
    plt.legend()
    plt.grid(alpha=0.3)

    plt.tight_layout()

    # Save the plot
    plt.savefig(os.path.join(plots_dir, f"trial_{trial_id}_history.png"), dpi=300, bbox_inches='tight')
    plt.close()

    # Also save the data as CSV
    history_df = pd.DataFrame({
        'epoch': list(range(1, epochs+1)),
        'train_loss': history['train_loss'],
        'val_loss': history['val_loss'],
        'train_mcc': history['train_mcc'],
        'val_mcc': history['val_mcc'],
        'lr': history['lr']
    })
    history_df.to_csv(os.path.join(plots_dir, f"trial_{trial_id}_history.csv"), index=False)

def plot_confusion_matrix(conf_matrix, trial_id):
    """Plot confusion matrix."""
    plt.figure(figsize=(8, 7))
    sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", cbar=False,
                xticklabels=['Predicted Negative', 'Predicted Positive'],
                yticklabels=['Actual Negative', 'Actual Positive'])
    plt.title("Confusion Matrix")
    plt.xlabel("Predicted Label")
    plt.ylabel("True Label")

    plt.savefig(os.path.join(RESULTS_DIR, f"plots/trial_{trial_id}_confusion_matrix.png"),
                dpi=300, bbox_inches='tight')
    plt.close()

    # Save confusion matrix data
    pd.DataFrame(conf_matrix).to_csv(
        os.path.join(RESULTS_DIR, f"plots/trial_{trial_id}_confusion_matrix.csv"),
        index=False
    )

def plot_roc_curve(all_labels, all_preds, trial_id):
    """Plot ROC curve."""
    fpr, tpr, _ = roc_curve(all_labels, all_preds)
    roc_auc = auc(fpr, tpr)

    plt.figure(figsize=(8, 7))
    plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.3f})')
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver Operating Characteristic (ROC) Curve')
    plt.legend(loc="lower right")
    plt.grid(alpha=0.3)

    plt.savefig(os.path.join(RESULTS_DIR, f"plots/trial_{trial_id}_roc_curve.png"),
                dpi=300, bbox_inches='tight')
    plt.close()

    # Save ROC curve data
    pd.DataFrame({'fpr': fpr, 'tpr': tpr}).to_csv(
        os.path.join(RESULTS_DIR, f"plots/trial_{trial_id}_roc_curve.csv"),
        index=False
    )

def plot_pr_curve(all_labels, all_preds, trial_id):
    """Plot precision-recall curve."""
    precision_curve, recall_curve, _ = precision_recall_curve(all_labels, all_preds)
    pr_auc = auc(recall_curve, precision_curve)

    plt.figure(figsize=(8, 7))
    plt.plot(recall_curve, precision_curve, color='darkorange', lw=2,
             label=f'PR curve (area = {pr_auc:.3f})')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Precision-Recall Curve')
    plt.legend(loc="lower left")
    plt.grid(alpha=0.3)

    plt.savefig(os.path.join(RESULTS_DIR, f"plots/trial_{trial_id}_pr_curve.png"),
                dpi=300, bbox_inches='tight')
    plt.close()

    # Save PR curve data
    pd.DataFrame({'recall': recall_curve, 'precision': precision_curve}).to_csv(
        os.path.join(RESULTS_DIR, f"plots/trial_{trial_id}_pr_curve.csv"),
        index=False
    )

def visualize_optimization_history(study):
    """Visualize the optimization history."""
    try:
        # Plot optimization history
        fig = optuna.visualization.plot_optimization_history(study)
        fig.write_image(os.path.join(RESULTS_DIR, "optimization_history.png"))

        # Plot parameter importance
        fig = optuna.visualization.plot_param_importances(study)
        fig.write_image(os.path.join(RESULTS_DIR, "param_importances.png"))

        # Plot parallel coordinate
        fig = optuna.visualization.plot_parallel_coordinate(study)
        fig.write_image(os.path.join(RESULTS_DIR, "parallel_coordinate.png"))

        # Plot slice
        fig = optuna.visualization.plot_slice(study)
        fig.write_image(os.path.join(RESULTS_DIR, "slice_plot.png"))
    except:
        print("Error generating Optuna visualizations. Make sure plotly is installed.")
        print("You can install it with: pip install plotly kaleido")

    # Save study values
    study_df = study.trials_dataframe()
    study_df.to_csv(os.path.join(RESULTS_DIR, "study_results.csv"), index=False)

## Optuna Objective Function

In [None]:
def objective_with_cv(trial, interactions_df, strain_embeddings, phage_embeddings, n_folds=5, random_state=42):
    """Optuna objective function with cross-validation for hyperparameter optimization."""
    # Sample hyperparameters
    pooling_type = trial.suggest_categorical("pooling_type", ["mean", "max", "median"])

    # Other hyperparameters remain the same
    classifier_hidden_layers = trial.suggest_int("classifier_hidden_layers", 1, 3)
    classifier_hidden_dim = trial.suggest_categorical("classifier_hidden_dim", [256, 384, 512, 768])
    dropout = trial.suggest_float("dropout", 0.05, 0.3)
    ln = trial.suggest_categorical("ln", [True, False])
    activation_function = trial.suggest_categorical("activation_function", ["relu", "gelu", "silu"])
    learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-4, log=True)
    batch_size = trial.suggest_categorical("batch_size", [128, 256, 512, 1024])
    use_phage_weights = trial.suggest_categorical("use_phage_weights", [True, False])
    weight_decay = trial.suggest_float("weight_decay", 0.001, 0.1, log=True)
    scheduler_type = trial.suggest_categorical(
        "scheduler_type",
        ["one_cycle", "cosine_annealing", "reduce_on_plateau"]
    )
    warmup_ratio = trial.suggest_float("warmup_ratio", 0.05, 0.2)

    # Print trial parameters
    print(f"\n{'='*80}")
    print(f"TRIAL {trial.number}: PARAMETER CONFIGURATION (WITH {n_folds}-FOLD CV)")
    print(f"{'='*80}")
    print("Architecture Parameters:")
    print(f"  - pooling_type: {pooling_type}")
    print(f"  - classifier_hidden_layers: {classifier_hidden_layers}")
    print(f"  - classifier_hidden_dim: {classifier_hidden_dim}")
    print(f"  - dropout: {dropout:.3f}")
    print(f"  - ln: {ln}")
    print(f"  - activation_function: {activation_function}")
    print("Training Parameters:")
    print(f"  - learning_rate: {learning_rate:.6f}")
    print(f"  - batch_size: {batch_size}")
    print(f"  - use_phage_weights: {use_phage_weights}")
    print(f"  - weight_decay: {weight_decay:.4f}")
    print(f"  - scheduler_type: {scheduler_type}")
    print(f"  - warmup_ratio: {warmup_ratio:.3f}")
    print(f"{'='*80}\n")

    # Setting patience based on num_epochs for early stopping
    num_epochs = 75  # Reduced from 100 since we'll be training multiple models
    patience = 7

    # Get unique strains for stratified sampling
    unique_strains = interactions_df['strain'].unique()

    # Create KFold object for splitting by strain
    from sklearn.model_selection import KFold
    kf = KFold(n_splits=n_folds, shuffle=True, random_state=random_state)

    # Store MCC values for each fold
    fold_mcc_values = []

    # Track all validation metrics across folds
    all_val_metrics = []

    # Loop through each fold
    for fold_idx, (train_idx, val_idx) in enumerate(kf.split(unique_strains)):
        print(f"\n{'='*80}")
        print(f"FOLD {fold_idx+1}/{n_folds}")
        print(f"{'='*80}")

        # Split strains into train and validation sets
        train_strains = unique_strains[train_idx]
        val_strains = unique_strains[val_idx]

        # Create dataframes based on strain splits
        train_df = interactions_df[interactions_df['strain'].isin(train_strains)]
        val_df = interactions_df[interactions_df['strain'].isin(val_strains)]

        print(f"Train set: {len(train_df)} interactions, {len(train_strains)} strains")
        print(f"Validation set: {len(val_df)} interactions, {len(val_strains)} strains")

        # Precompute pooled vectors
        pooled_strains, pooled_phages = precompute_pooled_embeddings(
            strain_embeddings, phage_embeddings, pooling_type
        )

        # Calculate phage-specific weights
        phage_weights = {}
        if use_phage_weights:
            phage_weights = calculate_phage_specific_weights(train_df)
        else:
            for phage in train_df['phage'].unique():
                phage_weights[phage] = 1.0

        # Create datasets with precomputed vectors
        train_dataset = PrecomputedPooledDataset(train_df, pooled_strains, pooled_phages, phage_weights)
        val_dataset = PrecomputedPooledDataset(val_df, pooled_strains, pooled_phages, phage_weights)

        # Create data loaders
        train_loader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=batch_size,
            shuffle=True,
            collate_fn=collate_pooled_vectors
        )

        val_loader = torch.utils.data.DataLoader(
            val_dataset,
            batch_size=batch_size,
            shuffle=False,
            collate_fn=collate_pooled_vectors
        )

        # Get embedding dimension from the first strain
        first_strain_id = next(iter(pooled_strains))
        embedding_dim = pooled_strains[first_strain_id].shape[0]

        # Initialize model
        model = PooledVectorModel(
            embedding_dim=embedding_dim,
            dropout=dropout,
            ln=ln,
            classifier_hidden_layers=classifier_hidden_layers,
            classifier_hidden_dim=classifier_hidden_dim,
            activation_function=activation_function
        ).to(device)

        # Train model
        history, val_mcc = train_model(
            model,
            train_loader,
            val_loader,
            trial,
            num_epochs=num_epochs,
            learning_rate=learning_rate,
            patience=patience,
            device=device,
            use_phage_weights=use_phage_weights,
            scheduler_type=scheduler_type,
            warmup_ratio=warmup_ratio,
            weight_decay=weight_decay,
            report_to_optuna=False  # Disable epoch-level reporting
        )

        # Evaluate on validation set
        val_metrics = evaluate_full(model, val_loader, device, use_phage_weights)
        all_val_metrics.append(val_metrics)

        # Store MCC value for this fold
        fold_mcc_values.append(val_mcc)

        print(f"Fold {fold_idx+1} - Val MCC: {val_mcc:.4f}")

        # Report to Optuna for pruning at fold level
        trial.report(val_mcc, fold_idx)

        # Handle pruning based on fold-level performance
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    # Calculate median and also mean/std of MCC across folds for reporting
    median_val_mcc = np.median(fold_mcc_values)
    mean_val_mcc = np.mean(fold_mcc_values)
    std_val_mcc = np.std(fold_mcc_values)

    # Print cross-validation results
    print(f"\n{'='*80}")
    print(f"CROSS-VALIDATION RESULTS")
    print(f"{'='*80}")
    print(f"Median MCC: {median_val_mcc:.4f}")
    print(f"Mean MCC: {mean_val_mcc:.4f} ± {std_val_mcc:.4f}")
    for fold_idx, fold_mcc in enumerate(fold_mcc_values):
        print(f"Fold {fold_idx+1} MCC: {fold_mcc:.4f}")
    print(f"{'='*80}\n")

    # Calculate median metrics across folds for more robust evaluation
    median_metrics = {
        'mcc': median_val_mcc,
        'accuracy': np.median([m['accuracy'] for m in all_val_metrics]),
        'precision': np.median([m['precision'] for m in all_val_metrics]),
        'recall': np.median([m['recall'] for m in all_val_metrics]),
        'f1': np.median([m['f1'] for m in all_val_metrics]),
        'roc_auc': np.median([m['roc_auc'] for m in all_val_metrics]),
        'pr_auc': np.median([m['pr_auc'] for m in all_val_metrics])
    }

    # Also calculate mean metrics for comparison
    mean_metrics = {
        'mcc': mean_val_mcc,
        'accuracy': np.mean([m['accuracy'] for m in all_val_metrics]),
        'precision': np.mean([m['precision'] for m in all_val_metrics]),
        'recall': np.mean([m['recall'] for m in all_val_metrics]),
        'f1': np.mean([m['f1'] for m in all_val_metrics]),
        'roc_auc': np.mean([m['roc_auc'] for m in all_val_metrics]),
        'pr_auc': np.mean([m['pr_auc'] for m in all_val_metrics])
    }

    # Log trial results
    trial_report = {
        'trial_number': trial.number,
        'val_mcc_median': median_val_mcc,
        'val_mcc_mean': mean_val_mcc,
        'val_mcc_std': std_val_mcc,
        'val_mcc_per_fold': fold_mcc_values,
        'median_metrics': median_metrics,
        'mean_metrics': mean_metrics,
        'hyperparameters': {
            'pooling_type': pooling_type,
            'dropout': dropout,
            'ln': ln,
            'classifier_hidden_layers': classifier_hidden_layers,
            'classifier_hidden_dim': classifier_hidden_dim,
            'activation_function': activation_function,
            'learning_rate': learning_rate,
            'batch_size': batch_size,
            'use_phage_weights': use_phage_weights,
            'weight_decay': weight_decay,
            'scheduler_type': scheduler_type,
            'warmup_ratio': warmup_ratio
        }
    }

    # Save trial report
    with open(os.path.join(RESULTS_DIR, f"trial_{trial.number}_report.pkl"), 'wb') as f:
        pickle.dump(trial_report, f)

    # Return median MCC as optimization metric
    return median_val_mcc

def train_best_model_with_multiple_seeds(study, interactions_df, strain_embeddings, phage_embeddings,
                                         n_seeds=5, random_state_base=42):
    """
    Train the best model from the CV study multiple times with different random seeds.
    Uses median for more robust performance estimation.
    """
    best_params = study.best_trial.params

    print(f"\n{'='*80}")
    print(f"TRAINING FINAL MODEL WITH {n_seeds} DIFFERENT RANDOM SEEDS")
    print(f"{'='*80}")

    # Store metrics for each seed
    all_seed_metrics = []
    all_models = []

    for seed_idx in range(n_seeds):
        random_state = random_state_base + seed_idx
        print(f"\n{'='*80}")
        print(f"SEED {seed_idx+1}/{n_seeds} (random_state={random_state})")
        print(f"{'='*80}")

        # Split data with current seed
        train_df, test_df = filter_interactions_by_strain(interactions_df, random_state)

        # Precompute pooled vectors with the best pooling type
        pooled_strains, pooled_phages = precompute_pooled_embeddings(
            strain_embeddings, phage_embeddings, pooling_type=best_params['pooling_type']
        )

        # Calculate phage-specific weights if needed
        phage_weights = {}
        if best_params['use_phage_weights']:
            phage_weights = calculate_phage_specific_weights(train_df)
        else:
            for phage in train_df['phage'].unique():
                phage_weights[phage] = 1.0

        # Create datasets
        train_dataset = PrecomputedPooledDataset(train_df, pooled_strains, pooled_phages, phage_weights)
        test_dataset = PrecomputedPooledDataset(test_df, pooled_strains, pooled_phages, phage_weights)

        # Create dataloaders
        train_loader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=best_params['batch_size'],
            shuffle=True,
            collate_fn=collate_pooled_vectors
        )

        test_loader = torch.utils.data.DataLoader(
            test_dataset,
            batch_size=best_params['batch_size'],
            shuffle=False,
            collate_fn=collate_pooled_vectors
        )

        # Get embedding dimension from the first strain
        first_strain_id = next(iter(pooled_strains))
        embedding_dim = pooled_strains[first_strain_id].shape[0]

        # Initialize model with best parameters
        model = PooledVectorModel(
            embedding_dim=embedding_dim,
            dropout=best_params['dropout'],
            ln=best_params['ln'],
            classifier_hidden_layers=best_params['classifier_hidden_layers'],
            classifier_hidden_dim=best_params['classifier_hidden_dim'],
            activation_function=best_params['activation_function']
        ).to(device)

        # Train the model
        history, val_mcc = train_model(
            model,
            train_loader,
            test_loader,
            None,  # No trial object for final training
            num_epochs=150,  # Use more epochs for final model
            learning_rate=best_params['learning_rate'],
            patience=15,  # More patience for final model
            device=device,
            use_phage_weights=best_params['use_phage_weights'],
            scheduler_type=best_params['scheduler_type'],
            warmup_ratio=best_params['warmup_ratio'],
            weight_decay=best_params['weight_decay']
        )

        # Evaluate on test set
        test_metrics = evaluate_full(model, test_loader, device, best_params['use_phage_weights'])
        all_seed_metrics.append(test_metrics)
        all_models.append(model)

        # Save this seed's model
        torch.save({
            'model_state_dict': model.state_dict(),
            'best_params': best_params,
            'test_metrics': test_metrics,
            'seed': random_state,
            'embedding_dim': embedding_dim
        }, os.path.join(RESULTS_DIR, f"models/final_model_seed_{random_state}.pt"))

        # Print seed results
        print(f"\nSeed {seed_idx+1} performance:")
        print(f"Test MCC: {test_metrics['mcc']:.4f}")
        print(f"Test Accuracy: {test_metrics['accuracy']:.4f}")
        print(f"Test F1: {test_metrics['f1']:.4f}")
        print(f"Test ROC AUC: {test_metrics['roc_auc']:.4f}")
        print(f"Test PR AUC: {test_metrics['pr_auc']:.4f}")

    # Calculate median metrics across seeds
    median_mcc = np.median([m['mcc'] for m in all_seed_metrics])
    median_accuracy = np.median([m['accuracy'] for m in all_seed_metrics])
    median_f1 = np.median([m['f1'] for m in all_seed_metrics])
    median_roc_auc = np.median([m['roc_auc'] for m in all_seed_metrics])
    median_pr_auc = np.median([m['pr_auc'] for m in all_seed_metrics])

    # Also calculate mean and std for comparison
    mean_mcc = np.mean([m['mcc'] for m in all_seed_metrics])
    std_mcc = np.std([m['mcc'] for m in all_seed_metrics])

    # Find the best seed model based on MCC
    best_seed_idx = np.argmax([m['mcc'] for m in all_seed_metrics])
    best_seed = random_state_base + best_seed_idx
    best_model = all_models[best_seed_idx]
    best_metrics = all_seed_metrics[best_seed_idx]

    # Also identify the median model (closest to median MCC)
    median_seed_idx = np.argmin(np.abs(np.array([m['mcc'] for m in all_seed_metrics]) - median_mcc))
    median_seed = random_state_base + median_seed_idx
    median_model = all_models[median_seed_idx]
    median_seed_metrics = all_seed_metrics[median_seed_idx]

    # Save both the best model and the median model
    # Best model (highest MCC)
    torch.save({
        'model_state_dict': best_model.state_dict(),
        'best_params': best_params,
        'test_metrics': best_metrics,
        'seed': best_seed,
        'embedding_dim': embedding_dim,
        'all_seed_metrics': all_seed_metrics,
        'median_mcc': median_mcc,
        'mean_mcc': mean_mcc,
        'std_mcc': std_mcc,
        'selection_criterion': 'best_mcc'
    }, os.path.join(RESULTS_DIR, "models/final_model_best.pt"))

    # Median model (closest to median MCC)
    torch.save({
        'model_state_dict': median_model.state_dict(),
        'best_params': best_params,
        'test_metrics': median_seed_metrics,
        'seed': median_seed,
        'embedding_dim': embedding_dim,
        'all_seed_metrics': all_seed_metrics,
        'median_mcc': median_mcc,
        'mean_mcc': mean_mcc,
        'std_mcc': std_mcc,
        'selection_criterion': 'median_mcc'
    }, os.path.join(RESULTS_DIR, "models/final_model_median.pt"))

    # Print final results summary
    print(f"\n{'='*80}")
    print(f"FINAL MODEL SUMMARY ({n_seeds} SEEDS)")
    print(f"{'='*80}")
    print(f"Median Test MCC: {median_mcc:.4f}")
    print(f"Mean Test MCC: {mean_mcc:.4f} ± {std_mcc:.4f}")
    print(f"Median Test Accuracy: {median_accuracy:.4f}")
    print(f"Median Test F1: {median_f1:.4f}")
    print(f"Median Test ROC AUC: {median_roc_auc:.4f}")
    print(f"Median Test PR AUC: {median_pr_auc:.4f}")

    print(f"\nBest model (seed {best_seed}):")
    print(f"Test MCC: {best_metrics['mcc']:.4f}")
    print(f"Test Accuracy: {best_metrics['accuracy']:.4f}")
    print(f"Test F1: {best_metrics['f1']:.4f}")

    print(f"\nMedian model (seed {median_seed}):")
    print(f"Test MCC: {median_seed_metrics['mcc']:.4f}")
    print(f"Test Accuracy: {median_seed_metrics['accuracy']:.4f}")
    print(f"Test F1: {median_seed_metrics['f1']:.4f}")

    print(f"{'='*80}")

    # Save summary report
    final_report = {
        'best_params': best_params,
        'all_seed_metrics': all_seed_metrics,
        'median_metrics': {
            'mcc': median_mcc,
            'accuracy': median_accuracy,
            'f1': median_f1,
            'roc_auc': median_roc_auc,
            'pr_auc': median_pr_auc
        },
        'mean_metrics': {
            'mcc': mean_mcc,
            'accuracy': np.mean([m['accuracy'] for m in all_seed_metrics]),
            'f1': np.mean([m['f1'] for m in all_seed_metrics]),
            'roc_auc': np.mean([m['roc_auc'] for m in all_seed_metrics]),
            'pr_auc': np.mean([m['pr_auc'] for m in all_seed_metrics])
        },
        'std_metrics': {
            'mcc': std_mcc,
            'accuracy': np.std([m['accuracy'] for m in all_seed_metrics]),
            'f1': np.std([m['f1'] for m in all_seed_metrics]),
            'roc_auc': np.std([m['roc_auc'] for m in all_seed_metrics]),
            'pr_auc': np.std([m['pr_auc'] for m in all_seed_metrics])
        },
        'best_seed': best_seed,
        'best_metrics': best_metrics,
        'median_seed': median_seed,
        'median_seed_metrics': median_seed_metrics,
        'n_seeds': n_seeds
    }

    with open(os.path.join(RESULTS_DIR, "final_model_multiple_seeds_report.pkl"), 'wb') as f:
        pickle.dump(final_report, f)

    # Return both models and the report
    return best_model, median_model, final_report

## Run Optimization

In [None]:
def run_optimization(interactions_path, strain_embeddings_path, phage_embeddings_path,
                    n_trials=50, random_state=42, study_name=None, output_dir=None,
                    n_folds=5, n_final_seeds=5):
    """Run optimization with cross-validation and multiple seeds for final model."""
    # Set global RESULTS_DIR
    global RESULTS_DIR

    # Always generate a timestamp for record-keeping
    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")

    # Use custom output_dir if provided, otherwise create timestamped directory
    if output_dir:
        RESULTS_DIR = output_dir
    else:
        RESULTS_DIR = f"optuna_pooling_cv_results_{timestamp}"

    # Create directory structure
    os.makedirs(RESULTS_DIR, exist_ok=True)
    os.makedirs(os.path.join(RESULTS_DIR, "plots"), exist_ok=True)
    os.makedirs(os.path.join(RESULTS_DIR, "models"), exist_ok=True)
    os.makedirs(os.path.join(RESULTS_DIR, "metrics"), exist_ok=True)

    print("\n" + "="*80)
    print(f"Starting hyperparameter optimization with {n_trials} trials")
    print(f"Using {n_folds}-fold cross-validation")
    print(f"Final model will be trained with {n_final_seeds} different seeds")
    print("="*80)
    print(f"Interactions file: {interactions_path}")
    print(f"Strain embeddings: {strain_embeddings_path}")
    print(f"Phage embeddings: {phage_embeddings_path}")
    print(f"Results directory: {RESULTS_DIR}")
    print("="*80 + "\n")

    # Save configuration
    config = {
        'interactions_path': interactions_path,
        'strain_embeddings_path': strain_embeddings_path,
        'phage_embeddings_path': phage_embeddings_path,
        'n_trials': n_trials,
        'random_state': random_state,
        'study_name': study_name,
        'timestamp': timestamp,
        'results_dir': RESULTS_DIR,
        'n_folds': n_folds,
        'n_final_seeds': n_final_seeds
    }

    with open(os.path.join(RESULTS_DIR, "config.json"), 'w') as f:
        json.dump(config, f, indent=4)

    # 1. Load data
    print("Loading embeddings...")
    strain_embeddings = load_embeddings_flexible(strain_embeddings_path)
    phage_embeddings = load_embeddings_flexible(phage_embeddings_path)

    print("Loading interaction data...")
    interactions_df = pd.read_csv(interactions_path)

    # 2. Filter to ensure we have embeddings for all strains/phages
    strain_keys = set(strain_embeddings.keys())
    phage_keys = set(phage_embeddings.keys())

    filtered_df = interactions_df[
        interactions_df['strain'].isin(strain_keys) &
        interactions_df['phage'].isin(phage_keys)
    ]

    print(f"Original interactions: {len(interactions_df)}")
    print(f"Filtered interactions: {len(filtered_df)}")

    if len(filtered_df) == 0:
        raise ValueError("No interactions match the provided embeddings!")

    # 3. Configure the study with database storage for resumability
    DB_PATH = os.path.join(RESULTS_DIR, "optuna_study_cv.db")
    storage = optuna.storages.RDBStorage(f"sqlite:///{DB_PATH}")

    # Create or load study (for resumability)
    try:
        study = optuna.load_study(
            study_name=study_name if study_name else f"cv_study_{timestamp}",
            storage=storage
        )
        print(f"Resuming existing study '{study_name}' with {len(study.trials)} completed trials")
        remaining_trials = max(0, n_trials - len(study.trials))
    except:
        study = optuna.create_study(
            study_name=study_name if study_name else f"cv_study_{timestamp}",
            direction="maximize",
            sampler=TPESampler(seed=random_state),
            pruner=MedianPruner(n_startup_trials=5, n_warmup_steps=10),
            storage=storage
        )
        print(f"Created new study '{study_name}' with {n_folds}-fold cross-validation")
        remaining_trials = n_trials

    # 4. Create objective function with cross-validation
    objective_func = lambda trial: objective_with_cv(
        trial, filtered_df, strain_embeddings, phage_embeddings, n_folds, random_state
    )

    if remaining_trials > 0:
        # 5. Run optimization
        study.optimize(objective_func, n_trials=remaining_trials)

        # 6. Print study statistics
        print("\nStudy completed!")
        print(f"Number of finished trials: {len(study.trials)}")
        print("Best trial:")
        best_trial = study.best_trial
        print(f"  Value (median MCC): {best_trial.value:.4f}")
        print("  Params:")
        for key, value in best_trial.params.items():
            print(f"    {key}: {value}")

        # 7. Visualize study
        print("\nCreating visualization plots...")
        visualize_optimization_history(study)
    else:
        print("All trials already completed, skipping optimization.")

    # 8. Train the best model with multiple seeds
    print(f"\nTraining the best model with {n_final_seeds} different seeds...")
    best_model, median_model, final_report = train_best_model_with_multiple_seeds(
        study, filtered_df, strain_embeddings, phage_embeddings,
        n_seeds=n_final_seeds, random_state_base=random_state
    )

    # 9. Save the final study results
    trials_df = study.trials_dataframe()
    trials_df.to_csv(os.path.join(RESULTS_DIR, "all_trials_results_cv.csv"), index=False)

    # Save best parameters in a more accessible format
    best_params = study.best_params
    with open(os.path.join(RESULTS_DIR, "best_params_cv.json"), 'w') as f:
        json.dump(best_params, f, indent=4)

    print(f"\nAll study artifacts saved to {RESULTS_DIR}")

    # Print final model performance summary
    print("\nFinal Model Performance Summary:")
    print(f"Best model MCC: {final_report['best_metrics']['mcc']:.4f}")
    print(f"Median model MCC: {final_report['median_seed_metrics']['mcc']:.4f}")
    print(f"Median MCC across all seeds: {final_report['median_metrics']['mcc']:.4f}")
    print(f"Mean MCC across all seeds: {final_report['mean_metrics']['mcc']:.4f} ± {final_report['std_metrics']['mcc']:.4f}")

    return study, best_model, median_model, final_report

## Main Section

In [None]:
# E. coli dataset paths (uncomment and modify as needed)
interactions_path = '/content/drive/MyDrive/Arkin/phage_public_datasets/e_coli/interaction_matrix.csv'
strain_embeddings_path = '/content/drive/MyDrive/Arkin/set_transformer_data/embedding_experiments/ecoli/current_embeddings/strain'
phage_embeddings_path = '/content/drive/MyDrive/Arkin/set_transformer_data/embedding_experiments/ecoli/current_embeddings/phage'
output_dir = "/content/drive/MyDrive/Arkin/set_transformer_data/glm_optuna_pooling_cv"

# EDGE dataset paths (uncomment and modify as needed)
# interactions_path = '/content/drive/MyDrive/Arkin/set_transformer_data/EDGE_interaction_long_172_no2.csv'
# strain_embeddings_path = '/content/drive/MyDrive/Arkin/set_transformer_data/embedding_sets/strains'
# phage_embeddings_path = '/content/drive/MyDrive/Arkin/set_transformer_data/embedding_sets/phages'

if __name__ == "__main__":
    study, best_model, median_model, final_report = run_optimization(
        interactions_path=interactions_path,
        strain_embeddings_path=strain_embeddings_path,
        phage_embeddings_path=phage_embeddings_path,
        n_trials=250,
        study_name='glm_optuna_pooling_cv',
        output_dir=output_dir,
        n_folds=5,       # Number of cross-validation folds
        n_final_seeds=5  # Number of seeds for final model training
    )

    # Display a nice summary of the results
    best_trial = study.best_trial
    print(f"Best trial value (MCC): {best_trial.value:.4f}")
    print("Best parameters:")
    for key, value in best_trial.params.items():
        print(f"  {key}: {value}")

    # print("Run the optimization by uncommenting and setting the correct paths in the main section!")


Starting hyperparameter optimization with 250 trials
Using 5-fold cross-validation
Final model will be trained with 5 different seeds
Interactions file: /content/drive/MyDrive/Arkin/phage_public_datasets/e_coli/interaction_matrix.csv
Strain embeddings: /content/drive/MyDrive/Arkin/set_transformer_data/embedding_experiments/ecoli/current_embeddings/strain
Phage embeddings: /content/drive/MyDrive/Arkin/set_transformer_data/embedding_experiments/ecoli/current_embeddings/phage
Results directory: /content/drive/MyDrive/Arkin/set_transformer_data/glm_optuna_pooling_cv

Loading embeddings...
Successfully loaded 177 embeddings
Successfully loaded 97 embeddings
Loading interaction data...
Original interactions: 38592
Filtered interactions: 16992


[I 2025-04-22 05:21:51,684] A new study created in RDB with name: glm_optuna_pooling_cv


Created new study 'glm_optuna_pooling_cv' with 5-fold cross-validation

TRIAL 0: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: max
  - classifier_hidden_layers: 2
  - classifier_hidden_dim: 768
  - dropout: 0.200
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000015
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0019
  - scheduler_type: reduce_on_plateau
  - warmup_ratio: 0.168


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing max pooling for 177 strains...
Precomputing max pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5833, MCC: -0.0057 | Val Loss: 0.5177, MCC: 0.0000 | LR: 0.000015
Epoch 2/75 - Train Loss: 0.5366, MCC: 0.0000 | Val Loss: 0.4997, MCC: 0.0000 | LR: 0.000015
Epoch 3/75 - Train Loss: 0.5274, MCC: 0.0007 | Val Loss: 0.4906, MCC: 0.0000 | LR: 0.000015
Epoch 4/75 - Train Loss: 0.5160, MCC: 0.0260 | Val Loss: 0

[I 2025-04-22 05:26:30,450] Trial 0 finished with value: 0.28451214388165563 and parameters: {'pooling_type': 'max', 'classifier_hidden_layers': 2, 'classifier_hidden_dim': 768, 'dropout': 0.20027875293580222, 'ln': True, 'activation_function': 'relu', 'learning_rate': 1.5199348301309802e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.0019010245319870357, 'scheduler_type': 'reduce_on_plateau', 'warmup_ratio': 0.16777639420895207}. Best is trial 0 with value: 0.28451214388165563.


Fold 5 - Val MCC: 0.2804

CROSS-VALIDATION RESULTS
Median MCC: 0.2845
Mean MCC: 0.2736 ± 0.0241
Fold 1 MCC: 0.2887
Fold 2 MCC: 0.2845
Fold 3 MCC: 0.2257
Fold 4 MCC: 0.2884
Fold 5 MCC: 0.2804


TRIAL 1: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 1
  - classifier_hidden_dim: 768
  - dropout: 0.291
  - ln: True
  - activation_function: gelu
Training Parameters:
  - learning_rate: 0.000013
  - batch_size: 512
  - use_phage_weights: True
  - weight_decay: 0.0110
  - scheduler_type: reduce_on_plateau
  - warmup_ratio: 0.166


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.9944, MCC: 0.0404 | Val Loss: 0.9308, MCC: -0.0141 | LR: 0.000013
Epoch 2/75 - Train Loss: 0.9891, MCC: 0.0562 | Val Loss: 0.9177, MCC: 0.0550 | LR: 0.000013
Epoch 3/75 - Train Lo

[I 2025-04-22 05:30:18,794] Trial 1 finished with value: 0.11792587630596311 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 1, 'classifier_hidden_dim': 768, 'dropout': 0.29140800826863983, 'ln': True, 'activation_function': 'gelu', 'learning_rate': 1.3244581340099339e-05, 'batch_size': 512, 'use_phage_weights': True, 'weight_decay': 0.010968217207529524, 'scheduler_type': 'reduce_on_plateau', 'warmup_ratio': 0.1662699235041672}. Best is trial 0 with value: 0.28451214388165563.


Fold 5 - Val MCC: 0.0792

CROSS-VALIDATION RESULTS
Median MCC: 0.1179
Mean MCC: 0.1514 ± 0.0849
Fold 1 MCC: 0.0550
Fold 2 MCC: 0.1179
Fold 3 MCC: 0.2508
Fold 4 MCC: 0.2541
Fold 5 MCC: 0.0792


TRIAL 2: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 768
  - dropout: 0.147
  - ln: False
  - activation_function: silu
Training Parameters:
  - learning_rate: 0.000014
  - batch_size: 512
  - use_phage_weights: True
  - weight_decay: 0.0427
  - scheduler_type: reduce_on_plateau
  - warmup_ratio: 0.061


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.9861, MCC: 0.0407 | Val Loss: 0.9231, MCC: 0.0000 | LR: 0.000014
Epoch 2/75 - Train Loss: 0.9816, MCC: 0.0474 | Val Loss: 0.9214, MCC: 0.0069 | LR: 0.000014
Epoch 3/75 - Train Loss: 0.

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Fold 3 - Val MCC: 0.0000

FOLD 4/5
Train set: 13632 interactions, 142 strains
Validation set: 3360 interactions, 35 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.9872, MCC: 0.0000 | Val Loss: 1.1689, MCC: 0.0000 | LR: 0.000014
Epoch 2/75 - Train Loss: 0.9819, MCC: 0.0000 | Val Loss: 1.1746, MCC: 0.0000 | LR: 0.000014
Epoch 3/75 - Train Loss: 0.9825, MCC: 0.0000 | Val Loss: 1.1776, MCC: 0.0000 | LR: 0.000014
Epoch 4/75 - Train Loss: 0.9818, MCC: 0.0000 | Val Loss: 1.1752, MCC: 0.0000 | LR: 0.000014
Epoch 5/75 - Train Loss: 0.9808, MCC: 0.0020 | Val Loss: 1.1693, MCC: 0.0000 | LR: 0.000014
Epoch 6/75 - Train Loss: 0.9807, MCC: 0.0080 | Val Loss: 1.1713, MCC: 0.0000 | LR: 0.000007
Epoch 7/75 - Train Loss: 0.9810, MCC: 0.0000 | Val Loss: 1.1724, MCC: 0.0000 | LR: 0.000007
Epoch 8/75 - Train Loss: 0.9805, MCC: 0.0014 | Val Loss: 1.1720, MCC: 0.0000 | LR: 0.000007
Epoch 9/75 - Train Loss: 0.9818, MCC: 0.0282 | Val L

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Fold 4 - Val MCC: 0.0000

FOLD 5/5
Train set: 13632 interactions, 142 strains
Validation set: 3360 interactions, 35 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.9908, MCC: 0.0117 | Val Loss: 1.0541, MCC: 0.0000 | LR: 0.000014
Epoch 2/75 - Train Loss: 0.9858, MCC: -0.0046 | Val Loss: 1.0586, MCC: 0.0000 | LR: 0.000014
Epoch 3/75 - Train Loss: 0.9824, MCC: 0.0464 | Val Loss: 1.0629, MCC: 0.0423 | LR: 0.000014
Epoch 4/75 - Train Loss: 0.9815, MCC: 0.0664 | Val Loss: 1.0649, MCC: 0.0423 | LR: 0.000014
Epoch 5/75 - Train Loss: 0.9801, MCC: 0.0639 | Val Loss: 1.0669, MCC: 0.0423 | LR: 0.000014
Epoch 6/75 - Train Loss: 0.9805, MCC: 0.0638 | Val Loss: 1.0659, MCC: 0.0423 | LR: 0.000014
Epoch 7/75 - Train Loss: 0.9822, MCC: 0.0661 | Val Loss: 1.0674, MCC: 0.0423 | LR: 0.000014
Epoch 8/75 - Train Loss: 0.9809, MCC: 0.0644 | Val Loss: 1.0668, MCC: 0.0423 | LR: 0.000007
Epoch 9/75 - Train Loss: 0.9808, MCC: 0.0639 | Val 

[I 2025-04-22 05:36:25,396] Trial 2 finished with value: 0.006946699875503751 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 768, 'dropout': 0.14716932242237052, 'ln': False, 'activation_function': 'silu', 'learning_rate': 1.3833249975219966e-05, 'batch_size': 512, 'use_phage_weights': True, 'weight_decay': 0.04274869455295218, 'scheduler_type': 'reduce_on_plateau', 'warmup_ratio': 0.06110669776011356}. Best is trial 0 with value: 0.28451214388165563.


Fold 5 - Val MCC: 0.0517

CROSS-VALIDATION RESULTS
Median MCC: 0.0069
Mean MCC: 0.0492 ± 0.0716
Fold 1 MCC: 0.0069
Fold 2 MCC: 0.1871
Fold 3 MCC: 0.0000
Fold 4 MCC: 0.0000
Fold 5 MCC: 0.0517


TRIAL 3: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 2
  - classifier_hidden_dim: 256
  - dropout: 0.232
  - ln: False
  - activation_function: silu
Training Parameters:
  - learning_rate: 0.000058
  - batch_size: 256
  - use_phage_weights: True
  - weight_decay: 0.0016
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.126


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.9803, MCC: 0.0529 | Val Loss: 0.9259, MCC: -0.0141 | LR: 0.000058
Epoch 2/75 - Train Loss: 0.9780, MCC: 0.0792 | Val Loss: 0.9303, MCC: -0.0141 | LR: 0.000058
Epoch 3/75 - Train L

[I 2025-04-22 05:39:07,320] Trial 3 finished with value: 0.15834435212236878 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 2, 'classifier_hidden_dim': 256, 'dropout': 0.232401544584516, 'ln': False, 'activation_function': 'silu', 'learning_rate': 5.7648106701146705e-05, 'batch_size': 256, 'use_phage_weights': True, 'weight_decay': 0.001643549747511133, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.12628560367470543}. Best is trial 0 with value: 0.28451214388165563.


Fold 5 - Val MCC: 0.1428

CROSS-VALIDATION RESULTS
Median MCC: 0.1583
Mean MCC: 0.1742 ± 0.0630
Fold 1 MCC: 0.0807
Fold 2 MCC: 0.1583
Fold 3 MCC: 0.2446
Fold 4 MCC: 0.2443
Fold 5 MCC: 0.1428


TRIAL 4: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.282
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000078
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0071
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.127


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5541, MCC: -0.0070 | Val Loss: 0.4996, MCC: 0.0000 | LR: 0.000078
Epoch 2/75 - Train Loss: 0.5219, MCC: 0.0299 | Val Loss: 0.4714, MCC: 0.0000 | LR: 0.000078
Epoch 3/75 - Train Loss: 0.

[I 2025-04-22 05:42:32,225] Trial 4 finished with value: 0.31231157091463946 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.28242441308564326, 'ln': True, 'activation_function': 'relu', 'learning_rate': 7.808345085542417e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.007148510793512987, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.12661209538663487}. Best is trial 4 with value: 0.31231157091463946.


Fold 5 - Val MCC: 0.3276

CROSS-VALIDATION RESULTS
Median MCC: 0.3123
Mean MCC: 0.3142 ± 0.0133
Fold 1 MCC: 0.3123
Fold 2 MCC: 0.3277
Fold 3 MCC: 0.3119
Fold 4 MCC: 0.2917
Fold 5 MCC: 0.3276


TRIAL 5: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 2
  - classifier_hidden_dim: 256
  - dropout: 0.141
  - ln: True
  - activation_function: gelu
Training Parameters:
  - learning_rate: 0.000019
  - batch_size: 256
  - use_phage_weights: False
  - weight_decay: 0.0030
  - scheduler_type: reduce_on_plateau
  - warmup_ratio: 0.086


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5517, MCC: -0.0101 | Val Loss: 0.5059, MCC: 0.0000 | LR: 0.000019
Epoch 2/75 - Train Loss: 0.5242, MCC: 0.0000 | Val Loss: 0.4930, MCC: 0.0000 | LR: 0.000019
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 05:46:56,871] Trial 5 finished with value: 0.29777814500630156 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 2, 'classifier_hidden_dim': 256, 'dropout': 0.14090740059482348, 'ln': True, 'activation_function': 'gelu', 'learning_rate': 1.9268171109476203e-05, 'batch_size': 256, 'use_phage_weights': False, 'weight_decay': 0.003013864904679803, 'scheduler_type': 'reduce_on_plateau', 'warmup_ratio': 0.08630829072672508}. Best is trial 4 with value: 0.31231157091463946.


Fold 5 - Val MCC: 0.2976

CROSS-VALIDATION RESULTS
Median MCC: 0.2978
Mean MCC: 0.2807 ± 0.0419
Fold 1 MCC: 0.3095
Fold 2 MCC: 0.2978
Fold 3 MCC: 0.3013
Fold 4 MCC: 0.1973
Fold 5 MCC: 0.2976


TRIAL 6: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: max
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.073
  - ln: True
  - activation_function: silu
Training Parameters:
  - learning_rate: 0.000048
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0059
  - scheduler_type: one_cycle
  - warmup_ratio: 0.067


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing max pooling for 177 strains...
Precomputing max pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6899, MCC: 0.0715 | Val Loss: 0.6078, MCC: -0.0141 | LR: 0.000006
Epoch 2/75 - Train Loss: 0.5759, MCC: 0.0245 | Val Loss: 0.5096, MCC: 0.0000 | LR: 0.000018
Epoch 3/75 - Train Loss: 0.5420, MCC

[I 2025-04-22 05:50:42,338] Trial 6 finished with value: 0.26879371010376896 and parameters: {'pooling_type': 'max', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.07257244251360208, 'ln': True, 'activation_function': 'silu', 'learning_rate': 4.759533213871802e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.005935682912014687, 'scheduler_type': 'one_cycle', 'warmup_ratio': 0.06702102818608836}. Best is trial 4 with value: 0.31231157091463946.


Fold 5 - Val MCC: 0.2637

CROSS-VALIDATION RESULTS
Median MCC: 0.2688
Mean MCC: 0.3032 ± 0.0526
Fold 1 MCC: 0.3827
Fold 2 MCC: 0.3492
Fold 3 MCC: 0.2688
Fold 4 MCC: 0.2517
Fold 5 MCC: 0.2637


TRIAL 7: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 2
  - classifier_hidden_dim: 256
  - dropout: 0.073
  - ln: False
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000053
  - batch_size: 128
  - use_phage_weights: False
  - weight_decay: 0.0627
  - scheduler_type: one_cycle
  - warmup_ratio: 0.150


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6824, MCC: 0.0102 | Val Loss: 0.6598, MCC: 0.0000 | LR: 0.000003
Epoch 2/75 - Train Loss: 0.6366, MCC: 0.0000 | Val Loss: 0.5987, MCC: 0.0000 | LR: 0.000006
Epoch 3/75 - Train Loss: 0.5773, M

[I 2025-04-22 05:56:54,350] Trial 7 finished with value: 0.285772348120874 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 2, 'classifier_hidden_dim': 256, 'dropout': 0.0732756919514748, 'ln': False, 'activation_function': 'relu', 'learning_rate': 5.3205395870154084e-05, 'batch_size': 128, 'use_phage_weights': False, 'weight_decay': 0.06267702540388488, 'scheduler_type': 'one_cycle', 'warmup_ratio': 0.1495252653662084}. Best is trial 4 with value: 0.31231157091463946.


Fold 5 - Val MCC: 0.2442

CROSS-VALIDATION RESULTS
Median MCC: 0.2858
Mean MCC: 0.2738 ± 0.0246
Fold 1 MCC: 0.2939
Fold 2 MCC: 0.3007
Fold 3 MCC: 0.2858
Fold 4 MCC: 0.2442
Fold 5 MCC: 0.2442


TRIAL 8: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.131
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000012
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0183
  - scheduler_type: one_cycle
  - warmup_ratio: 0.124


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.8524, MCC: -0.0064 | Val Loss: 0.8642, MCC: 0.0000 | LR: 0.000001
Epoch 2/75 - Train Loss: 0.8155, MCC: 0.0001 | Val Loss: 0.8002, MCC: 0.0000 | LR: 0.000002
Epoch 3/75 - Train Loss: 0.

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Fold 1 - Val MCC: 0.0132

FOLD 2/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6420, MCC: -0.0082 | Val Loss: 0.6100, MCC: 0.0000 | LR: 0.000001
Epoch 2/75 - Train Loss: 0.6256, MCC: -0.0125 | Val Loss: 0.5815, MCC: 0.0000 | LR: 0.000002
Epoch 3/75 - Train Loss: 0.5931, MCC: -0.0009 | Val Loss: 0.5403, MCC: 0.0000 | LR: 0.000003
Epoch 4/75 - Train Loss: 0.5562, MCC: -0.0094 | Val Loss: 0.5113, MCC: 0.0000 | LR: 0.000005
Epoch 5/75 - Train Loss: 0.5364, MCC: 0.0000 | Val Loss: 0.5095, MCC: 0.0000 | LR: 0.000007
Epoch 6/75 - Train Loss: 0.5372, MCC: 0.0000 | Val Loss: 0.5007, MCC: 0.0000 | LR: 0.000009
Epoch 7/75 - Train Loss: 0.5280, MCC: -0.0047 | Val Loss: 0.4926, MCC: 0.0000 | LR: 0.000011
Epoch 8/75 - Train Loss: 0.5242, MCC: -0.0047 | Val Loss: 0.4847, MCC: 0.0000 | LR: 0.000012
Epoch 9/75 - Train Loss: 0.5179, MCC: 0.00

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Fold 4 - Val MCC: 0.0602

FOLD 5/5
Train set: 13632 interactions, 142 strains
Validation set: 3360 interactions, 35 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6586, MCC: -0.0295 | Val Loss: 0.6441, MCC: 0.0000 | LR: 0.000001
Epoch 2/75 - Train Loss: 0.6385, MCC: -0.0128 | Val Loss: 0.6120, MCC: 0.0000 | LR: 0.000002
Epoch 3/75 - Train Loss: 0.6050, MCC: -0.0057 | Val Loss: 0.5647, MCC: 0.0000 | LR: 0.000003
Epoch 4/75 - Train Loss: 0.5613, MCC: -0.0086 | Val Loss: 0.5301, MCC: 0.0000 | LR: 0.000005
Epoch 5/75 - Train Loss: 0.5335, MCC: 0.0000 | Val Loss: 0.5386, MCC: 0.0000 | LR: 0.000007
Epoch 6/75 - Train Loss: 0.5317, MCC: 0.0000 | Val Loss: 0.5280, MCC: 0.0000 | LR: 0.000009
Epoch 7/75 - Train Loss: 0.5235, MCC: 0.0000 | Val Loss: 0.5211, MCC: 0.0000 | LR: 0.000011
Epoch 8/75 - Train Loss: 0.5152, MCC: 0.0000 | Val Loss: 0.5180, MCC: 0.0000 | LR: 0.000012
Epoch 9/75 - Train Loss: 0.5109, MCC: 0.0080

[I 2025-04-22 06:01:56,316] Trial 8 finished with value: 0.3136371067840282 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.13134992453981692, 'ln': True, 'activation_function': 'relu', 'learning_rate': 1.2407228136107138e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.01829267641174573, 'scheduler_type': 'one_cycle', 'warmup_ratio': 0.1238776540728296}. Best is trial 8 with value: 0.3136371067840282.


Fold 5 - Val MCC: 0.3152

CROSS-VALIDATION RESULTS
Median MCC: 0.3136
Mean MCC: 0.2060 ± 0.1391
Fold 1 MCC: 0.0132
Fold 2 MCC: 0.3276
Fold 3 MCC: 0.3136
Fold 4 MCC: 0.0602
Fold 5 MCC: 0.3152


TRIAL 9: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: max
  - classifier_hidden_layers: 1
  - classifier_hidden_dim: 768
  - dropout: 0.279
  - ln: True
  - activation_function: silu
Training Parameters:
  - learning_rate: 0.000092
  - batch_size: 128
  - use_phage_weights: True
  - weight_decay: 0.0130
  - scheduler_type: one_cycle
  - warmup_ratio: 0.065


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing max pooling for 177 strains...
Precomputing max pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.9929, MCC: 0.0319 | Val Loss: 0.9250, MCC: -0.0110 | LR: 0.000013
Epoch 2/75 - Train Loss: 0.9817, MCC: 0.0755 | Val Loss: 0.9375, MCC: -0.0141 | LR: 0.000036
Epoch 3/75 - Train Loss: 0.9756, MCC:

[I 2025-04-22 06:04:46,565] Trial 9 finished with value: 0.2501154530209327 and parameters: {'pooling_type': 'max', 'classifier_hidden_layers': 1, 'classifier_hidden_dim': 768, 'dropout': 0.2787160975551121, 'ln': True, 'activation_function': 'silu', 'learning_rate': 9.196444983650752e-05, 'batch_size': 128, 'use_phage_weights': True, 'weight_decay': 0.012989801809330643, 'scheduler_type': 'one_cycle', 'warmup_ratio': 0.06457647406561529}. Best is trial 8 with value: 0.3136371067840282.


Fold 5 - Val MCC: 0.1807

CROSS-VALIDATION RESULTS
Median MCC: 0.2501
Mean MCC: 0.2282 ± 0.0483
Fold 1 MCC: 0.2598
Fold 2 MCC: 0.2501
Fold 3 MCC: 0.1623
Fold 4 MCC: 0.2880
Fold 5 MCC: 0.1807


TRIAL 10: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.112
  - ln: False
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000026
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0300
  - scheduler_type: one_cycle
  - warmup_ratio: 0.192


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6945, MCC: 0.0056 | Val Loss: 0.6931, MCC: 0.0141 | LR: 0.000001
Epoch 2/75 - Train Loss: 0.6923, MCC: 0.0083 | Val Loss: 0.6899, MCC: 0.1877 | LR: 0.000002
Epoch 3/75 - Train Loss: 0

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Fold 1 - Val MCC: 0.1877

FOLD 2/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6998, MCC: -0.0119 | Val Loss: 0.6999, MCC: 0.0000 | LR: 0.000001
Epoch 2/75 - Train Loss: 0.6979, MCC: -0.0286 | Val Loss: 0.6971, MCC: 0.0000 | LR: 0.000002
Epoch 3/75 - Train Loss: 0.6948, MCC: -0.0200 | Val Loss: 0.6924, MCC: 0.0423 | LR: 0.000004
Epoch 4/75 - Train Loss: 0.6897, MCC: -0.0015 | Val Loss: 0.6848, MCC: 0.0000 | LR: 0.000006
Epoch 5/75 - Train Loss: 0.6822, MCC: 0.0032 | Val Loss: 0.6736, MCC: 0.0000 | LR: 0.000008
Epoch 6/75 - Train Loss: 0.6709, MCC: 0.0000 | Val Loss: 0.6574, MCC: 0.0000 | LR: 0.000011
Epoch 7/75 - Train Loss: 0.6547, MCC: 0.0000 | Val Loss: 0.6333, MCC: 0.0000 | LR: 0.000013
Epoch 8/75 - Train Loss: 0.6283, MCC: 0.0000 | Val Loss: 0.5980, MCC: 0.0000 | LR: 0.000016
Epoch 9/75 - Train Loss: 0.5944, MCC: 0.0000

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Fold 2 - Val MCC: 0.0423

FOLD 3/5
Train set: 13632 interactions, 142 strains
Validation set: 3360 interactions, 35 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.7007, MCC: -0.0009 | Val Loss: 0.7026, MCC: -0.0843 | LR: 0.000001
Epoch 2/75 - Train Loss: 0.6989, MCC: -0.0031 | Val Loss: 0.6996, MCC: -0.1358 | LR: 0.000002
Epoch 3/75 - Train Loss: 0.6959, MCC: -0.0209 | Val Loss: 0.6946, MCC: -0.1358 | LR: 0.000004
Epoch 4/75 - Train Loss: 0.6908, MCC: -0.0169 | Val Loss: 0.6867, MCC: 0.0135 | LR: 0.000006
Epoch 5/75 - Train Loss: 0.6833, MCC: -0.0051 | Val Loss: 0.6748, MCC: 0.0000 | LR: 0.000008
Epoch 6/75 - Train Loss: 0.6719, MCC: -0.0063 | Val Loss: 0.6571, MCC: 0.0000 | LR: 0.000011
Epoch 7/75 - Train Loss: 0.6550, MCC: 0.0000 | Val Loss: 0.6298, MCC: 0.0000 | LR: 0.000013
Epoch 8/75 - Train Loss: 0.6312, MCC: 0.0000 | Val Loss: 0.5899, MCC: 0.0000 | LR: 0.000016
Epoch 9/75 - Train Loss: 0.6006, MCC: 0

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Fold 3 - Val MCC: 0.0135

FOLD 4/5
Train set: 13632 interactions, 142 strains
Validation set: 3360 interactions, 35 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6970, MCC: -0.0330 | Val Loss: 0.6942, MCC: -0.0639 | LR: 0.000001
Epoch 2/75 - Train Loss: 0.6949, MCC: -0.0180 | Val Loss: 0.6918, MCC: -0.0639 | LR: 0.000002
Epoch 3/75 - Train Loss: 0.6917, MCC: -0.0112 | Val Loss: 0.6879, MCC: 0.0535 | LR: 0.000004
Epoch 4/75 - Train Loss: 0.6862, MCC: 0.0273 | Val Loss: 0.6818, MCC: 0.0000 | LR: 0.000006
Epoch 5/75 - Train Loss: 0.6782, MCC: 0.0013 | Val Loss: 0.6732, MCC: 0.0000 | LR: 0.000008
Epoch 6/75 - Train Loss: 0.6666, MCC: -0.0045 | Val Loss: 0.6610, MCC: 0.0000 | LR: 0.000011
Epoch 7/75 - Train Loss: 0.6502, MCC: 0.0000 | Val Loss: 0.6439, MCC: 0.0000 | LR: 0.000013
Epoch 8/75 - Train Loss: 0.6258, MCC: 0.0000 | Val Loss: 0.6204, MCC: 0.0000 | LR: 0.000016
Epoch 9/75 - Train Loss: 0.5922, MCC: 0.00

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Fold 4 - Val MCC: 0.0535

FOLD 5/5
Train set: 13632 interactions, 142 strains
Validation set: 3360 interactions, 35 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6906, MCC: 0.0204 | Val Loss: 0.6885, MCC: 0.0000 | LR: 0.000001
Epoch 2/75 - Train Loss: 0.6885, MCC: 0.0136 | Val Loss: 0.6858, MCC: 0.0000 | LR: 0.000002
Epoch 3/75 - Train Loss: 0.6851, MCC: 0.0174 | Val Loss: 0.6816, MCC: 0.0000 | LR: 0.000004
Epoch 4/75 - Train Loss: 0.6801, MCC: 0.0160 | Val Loss: 0.6750, MCC: 0.0000 | LR: 0.000006
Epoch 5/75 - Train Loss: 0.6721, MCC: 0.0000 | Val Loss: 0.6656, MCC: 0.0000 | LR: 0.000008
Epoch 6/75 - Train Loss: 0.6614, MCC: 0.0000 | Val Loss: 0.6519, MCC: 0.0000 | LR: 0.000011
Epoch 7/75 - Train Loss: 0.6446, MCC: 0.0000 | Val Loss: 0.6310, MCC: 0.0000 | LR: 0.000013
Epoch 8/75 - Train Loss: 0.6194, MCC: 0.0000 | Val Loss: 0.6003, MCC: 0.0000 | LR: 0.000016
Epoch 9/75 - Train Loss: 0.5844, MCC: 0.0000 | V

[I 2025-04-22 06:07:18,175] Trial 10 finished with value: 0.053458443106601486 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.11240633271029737, 'ln': False, 'activation_function': 'relu', 'learning_rate': 2.6469840539598173e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.02997858219123056, 'scheduler_type': 'one_cycle', 'warmup_ratio': 0.191871457996057}. Best is trial 8 with value: 0.3136371067840282.


Fold 5 - Val MCC: 0.0762

CROSS-VALIDATION RESULTS
Median MCC: 0.0535
Mean MCC: 0.0746 ± 0.0600
Fold 1 MCC: 0.1877
Fold 2 MCC: 0.0423
Fold 3 MCC: 0.0135
Fold 4 MCC: 0.0535
Fold 5 MCC: 0.0762


TRIAL 11: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.207
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000033
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0239
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.107


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6034, MCC: 0.0077 | Val Loss: 0.5105, MCC: 0.0000 | LR: 0.000033
Epoch 2/75 - Train Loss: 0.5370, MCC: 0.0317 | Val Loss: 0.4838, MCC: 0.0000 | LR: 0.000033
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 06:12:20,429] Trial 11 finished with value: 0.32038504607547647 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.20656807230814117, 'ln': True, 'activation_function': 'relu', 'learning_rate': 3.311951037496574e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.023937191942813765, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.10662414280782767}. Best is trial 11 with value: 0.32038504607547647.


Fold 5 - Val MCC: 0.3180

CROSS-VALIDATION RESULTS
Median MCC: 0.3204
Mean MCC: 0.3222 ± 0.0111
Fold 1 MCC: 0.3204
Fold 2 MCC: 0.3327
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.3046
Fold 5 MCC: 0.3180


TRIAL 12: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.204
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000029
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0224
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.099


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5974, MCC: 0.0016 | Val Loss: 0.5215, MCC: 0.0000 | LR: 0.000029
Epoch 2/75 - Train Loss: 0.5453, MCC: 0.0036 | Val Loss: 0.4908, MCC: 0.0000 | LR: 0.000029
Epoch 3/75 - Train L

[I 2025-04-22 06:16:33,225] Trial 12 finished with value: 0.3076986362291398 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.20415392005871644, 'ln': True, 'activation_function': 'relu', 'learning_rate': 2.8918427135796117e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.022412809017773185, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.0989007519269346}. Best is trial 11 with value: 0.32038504607547647.


Fold 5 - Val MCC: 0.3289

CROSS-VALIDATION RESULTS
Median MCC: 0.3077
Mean MCC: 0.2913 ± 0.0411
Fold 1 MCC: 0.2135
Fold 2 MCC: 0.3180
Fold 3 MCC: 0.3077
Fold 4 MCC: 0.2883
Fold 5 MCC: 0.3289


TRIAL 13: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.180
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000036
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0847
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.105


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5977, MCC: 0.0074 | Val Loss: 0.4997, MCC: 0.0000 | LR: 0.000036
Epoch 2/75 - Train Loss: 0.5365, MCC: 0.0271 | Val Loss: 0.4881, MCC: 0.0000 | LR: 0.000036
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 06:21:13,357] Trial 13 finished with value: 0.32177801045634175 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.17991028422533337, 'ln': True, 'activation_function': 'relu', 'learning_rate': 3.64298700512498e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.0847432657716344, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.10469868066183283}. Best is trial 13 with value: 0.32177801045634175.


Fold 5 - Val MCC: 0.3155

CROSS-VALIDATION RESULTS
Median MCC: 0.3218
Mean MCC: 0.3202 ± 0.0094
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3218
Fold 3 MCC: 0.3290
Fold 4 MCC: 0.3046
Fold 5 MCC: 0.3155


TRIAL 14: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.243
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000040
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0911
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.101


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5768, MCC: -0.0178 | Val Loss: 0.5035, MCC: 0.0000 | LR: 0.000040
Epoch 2/75 - Train Loss: 0.5465, MCC: -0.0046 | Val Loss: 0.4896, MCC: 0.0000 | LR: 0.000040
Epoch 3/75 - Train Loss:

[I 2025-04-22 06:25:30,676] Trial 14 finished with value: 0.32758298206345926 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.24271166111479706, 'ln': True, 'activation_function': 'relu', 'learning_rate': 3.962044140088257e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.09105273581735306, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.10084978620552135}. Best is trial 14 with value: 0.32758298206345926.


Fold 5 - Val MCC: 0.3276

CROSS-VALIDATION RESULTS
Median MCC: 0.3276
Mean MCC: 0.3153 ± 0.0213
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3327
Fold 3 MCC: 0.3103
Fold 4 MCC: 0.2756
Fold 5 MCC: 0.3276


TRIAL 15: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 2
  - classifier_hidden_dim: 384
  - dropout: 0.244
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000041
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0843
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.083


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5744, MCC: -0.0068 | Val Loss: 0.5047, MCC: 0.0000 | LR: 0.000041
Epoch 2/75 - Train Loss: 0.5434, MCC: 0.0076 | Val Loss: 0.4889, MCC: 0.0000 | LR: 0.000041
Epoch 3/75 - Train Loss: 

[I 2025-04-22 06:29:37,813] Trial 15 finished with value: 0.3106341215176607 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 2, 'classifier_hidden_dim': 384, 'dropout': 0.2444876582342146, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.121356612594157e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.08432367144565799, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.08321865362683883}. Best is trial 14 with value: 0.32758298206345926.


Fold 5 - Val MCC: 0.2791

CROSS-VALIDATION RESULTS
Median MCC: 0.3106
Mean MCC: 0.2975 ± 0.0323
Fold 1 MCC: 0.3179
Fold 2 MCC: 0.3106
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.2442
Fold 5 MCC: 0.2791


TRIAL 16: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.174
  - ln: True
  - activation_function: gelu
Training Parameters:
  - learning_rate: 0.000021
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0985
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.110


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6037, MCC: 0.0010 | Val Loss: 0.5225, MCC: 0.0000 | LR: 0.000021
Epoch 2/75 - Train Loss: 0.5398, MCC: -0.0047 | Val Loss: 0.4879, MCC: 0.0000 | LR: 0.000021
Epoch 3/75 - Train Loss: 

[I 2025-04-22 06:34:28,389] Trial 16 finished with value: 0.30516618393184924 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.17416822208646235, 'ln': True, 'activation_function': 'gelu', 'learning_rate': 2.0947694970185327e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.09852147700003912, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.11014493564783741}. Best is trial 14 with value: 0.32758298206345926.


Fold 5 - Val MCC: 0.3052

CROSS-VALIDATION RESULTS
Median MCC: 0.3052
Mean MCC: 0.2779 ± 0.0409
Fold 1 MCC: 0.2189
Fold 2 MCC: 0.3109
Fold 3 MCC: 0.3164
Fold 4 MCC: 0.2382
Fold 5 MCC: 0.3052


TRIAL 17: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 2
  - classifier_hidden_dim: 512
  - dropout: 0.253
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000037
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0498
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.146


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5609, MCC: 0.0091 | Val Loss: 0.4949, MCC: 0.0000 | LR: 0.000037
Epoch 2/75 - Train Loss: 0.5356, MCC: 0.0198 | Val Loss: 0.4767, MCC: 0.0000 | LR: 0.000037
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 06:40:50,943] Trial 17 finished with value: 0.31029583195405824 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 2, 'classifier_hidden_dim': 512, 'dropout': 0.2529893189856378, 'ln': True, 'activation_function': 'relu', 'learning_rate': 3.688007635229907e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.049827311947007874, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.14597638579715463}. Best is trial 14 with value: 0.32758298206345926.


Fold 5 - Val MCC: 0.2958

CROSS-VALIDATION RESULTS
Median MCC: 0.3103
Mean MCC: 0.3079 ± 0.0244
Fold 1 MCC: 0.3252
Fold 2 MCC: 0.3103
Fold 3 MCC: 0.3395
Fold 4 MCC: 0.2687
Fold 5 MCC: 0.2958


TRIAL 18: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 1
  - classifier_hidden_dim: 512
  - dropout: 0.163
  - ln: False
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000069
  - batch_size: 256
  - use_phage_weights: True
  - weight_decay: 0.0387
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.079


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.9807, MCC: 0.0647 | Val Loss: 0.9240, MCC: 0.0584 | LR: 0.000069
Epoch 2/75 - Train Loss: 0.9774, MCC: 0.1032 | Val Loss: 0.9261, MCC: 0.0230 | LR: 0.000069
Epoch 3/75 - Train Loss: 0.

[I 2025-04-22 06:43:45,870] Trial 18 finished with value: 0.21651274917570526 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 1, 'classifier_hidden_dim': 512, 'dropout': 0.1626880881372858, 'ln': False, 'activation_function': 'relu', 'learning_rate': 6.933847381791469e-05, 'batch_size': 256, 'use_phage_weights': True, 'weight_decay': 0.03867529695815856, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.0787124092807928}. Best is trial 14 with value: 0.32758298206345926.


Fold 5 - Val MCC: 0.2165

CROSS-VALIDATION RESULTS
Median MCC: 0.2165
Mean MCC: 0.2258 ± 0.0384
Fold 1 MCC: 0.1847
Fold 2 MCC: 0.1913
Fold 3 MCC: 0.2880
Fold 4 MCC: 0.2486
Fold 5 MCC: 0.2165


TRIAL 19: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.219
  - ln: True
  - activation_function: gelu
Training Parameters:
  - learning_rate: 0.000022
  - batch_size: 128
  - use_phage_weights: False
  - weight_decay: 0.0670
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.095


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5494, MCC: 0.0003 | Val Loss: 0.4960, MCC: 0.0000 | LR: 0.000022
Epoch 2/75 - Train Loss: 0.5176, MCC: 0.0529 | Val Loss: 0.4859, MCC: 0.0000 | LR: 0.000022
Epoch 3/75 - Train Loss: 0.

[I 2025-04-22 06:47:34,081] Trial 19 finished with value: 0.3159185049699115 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.21924643150754913, 'ln': True, 'activation_function': 'gelu', 'learning_rate': 2.1727986992680365e-05, 'batch_size': 128, 'use_phage_weights': False, 'weight_decay': 0.06699297508461977, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.09474101212995176}. Best is trial 14 with value: 0.32758298206345926.


Fold 5 - Val MCC: 0.3107

CROSS-VALIDATION RESULTS
Median MCC: 0.3159
Mean MCC: 0.3094 ± 0.0279
Fold 1 MCC: 0.3285
Fold 2 MCC: 0.3159
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.2566
Fold 5 MCC: 0.3107


TRIAL 20: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: max
  - classifier_hidden_layers: 2
  - classifier_hidden_dim: 512
  - dropout: 0.184
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000041
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0979
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.115


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing max pooling for 177 strains...
Precomputing max pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6026, MCC: 0.0032 | Val Loss: 0.5087, MCC: 0.0000 | LR: 0.000041
Epoch 2/75 - Train Loss: 0.5354, MCC: 0.0156 | Val Loss: 0.4848, MCC: 0.0000 | LR: 0.000041
Epoch 3/75 - Train Loss: 0.53

[I 2025-04-22 06:51:55,991] Trial 20 finished with value: 0.2770412202990979 and parameters: {'pooling_type': 'max', 'classifier_hidden_layers': 2, 'classifier_hidden_dim': 512, 'dropout': 0.18391535919352656, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.069940276986857e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.09785654692237442, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.11519873502998214}. Best is trial 14 with value: 0.32758298206345926.


Fold 5 - Val MCC: 0.2514

CROSS-VALIDATION RESULTS
Median MCC: 0.2770
Mean MCC: 0.2958 ± 0.0521
Fold 1 MCC: 0.3612
Fold 2 MCC: 0.3539
Fold 3 MCC: 0.2356
Fold 4 MCC: 0.2770
Fold 5 MCC: 0.2514


TRIAL 21: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.254
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000032
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0260
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.103


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5757, MCC: 0.0119 | Val Loss: 0.5158, MCC: 0.0000 | LR: 0.000032
Epoch 2/75 - Train Loss: 0.5431, MCC: 0.0112 | Val Loss: 0.4879, MCC: 0.0000 | LR: 0.000032
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 06:58:09,059] Trial 21 finished with value: 0.32747708996347763 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.2539398756927209, 'ln': True, 'activation_function': 'relu', 'learning_rate': 3.1715079543588754e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.02601567774239961, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.10309239723470495}. Best is trial 14 with value: 0.32758298206345926.


Fold 5 - Val MCC: 0.3275

CROSS-VALIDATION RESULTS
Median MCC: 0.3275
Mean MCC: 0.3196 ± 0.0230
Fold 1 MCC: 0.3206
Fold 2 MCC: 0.3351
Fold 3 MCC: 0.3395
Fold 4 MCC: 0.2756
Fold 5 MCC: 0.3275


TRIAL 22: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.259
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000029
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0357
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.138


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5830, MCC: 0.0193 | Val Loss: 0.5251, MCC: 0.0000 | LR: 0.000029
Epoch 2/75 - Train Loss: 0.5451, MCC: 0.0171 | Val Loss: 0.4869, MCC: 0.0000 | LR: 0.000029
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 07:03:25,628] Trial 22 finished with value: 0.32747708996347763 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.2586193237829906, 'ln': True, 'activation_function': 'relu', 'learning_rate': 2.9404289770998115e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.03570702441273177, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.13763467966600948}. Best is trial 14 with value: 0.32758298206345926.


Fold 5 - Val MCC: 0.3275

CROSS-VALIDATION RESULTS
Median MCC: 0.3275
Mean MCC: 0.3145 ± 0.0252
Fold 1 MCC: 0.3290
Fold 2 MCC: 0.3146
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.2661
Fold 5 MCC: 0.3275


TRIAL 23: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.263
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000027
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0158
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.135


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5817, MCC: -0.0013 | Val Loss: 0.5418, MCC: 0.0000 | LR: 0.000027
Epoch 2/75 - Train Loss: 0.5414, MCC: 0.0132 | Val Loss: 0.4956, MCC: 0.0000 | LR: 0.000027
Epoch 3/75 - Train Loss: 

[I 2025-04-22 07:07:35,047] Trial 23 finished with value: 0.30117648885963016 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.2626519663284967, 'ln': True, 'activation_function': 'relu', 'learning_rate': 2.727801066925477e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.015836892199840177, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.13545218292036107}. Best is trial 14 with value: 0.32758298206345926.


Fold 5 - Val MCC: 0.3167

CROSS-VALIDATION RESULTS
Median MCC: 0.3012
Mean MCC: 0.2880 ± 0.0417
Fold 1 MCC: 0.2136
Fold 2 MCC: 0.3327
Fold 3 MCC: 0.3012
Fold 4 MCC: 0.2756
Fold 5 MCC: 0.3167


TRIAL 24: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.267
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000026
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0352
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.145


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5675, MCC: 0.0158 | Val Loss: 0.5271, MCC: 0.0000 | LR: 0.000026
Epoch 2/75 - Train Loss: 0.5413, MCC: 0.0328 | Val Loss: 0.4917, MCC: 0.0000 | LR: 0.000026
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 07:14:30,959] Trial 24 finished with value: 0.3301495479532531 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.2668456363354913, 'ln': True, 'activation_function': 'relu', 'learning_rate': 2.6070367224411916e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.03522558435977214, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.1447549702146994}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.3265

CROSS-VALIDATION RESULTS
Median MCC: 0.3301
Mean MCC: 0.3185 ± 0.0272
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3327
Fold 3 MCC: 0.3387
Fold 4 MCC: 0.2647
Fold 5 MCC: 0.3265


TRIAL 25: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.299
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000046
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0540
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.167


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5861, MCC: -0.0042 | Val Loss: 0.5092, MCC: 0.0000 | LR: 0.000046
Epoch 2/75 - Train Loss: 0.5421, MCC: 0.0080 | Val Loss: 0.4943, MCC: 0.0000 | LR: 0.000046
Epoch 3/75 - Train Loss: 

[I 2025-04-22 07:18:23,938] Trial 25 finished with value: 0.29959902612137224 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.29916595290267484, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.6223959777278094e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.054020143799111256, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.16653858294881146}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.3286

CROSS-VALIDATION RESULTS
Median MCC: 0.2996
Mean MCC: 0.3026 ± 0.0252
Fold 1 MCC: 0.3278
Fold 2 MCC: 0.2996
Fold 3 MCC: 0.2970
Fold 4 MCC: 0.2598
Fold 5 MCC: 0.3286


TRIAL 26: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.233
  - ln: False
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000024
  - batch_size: 1024
  - use_phage_weights: True
  - weight_decay: 0.0304
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.199


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.9889, MCC: 0.0409 | Val Loss: 0.9159, MCC: 0.1306 | LR: 0.000024
Epoch 2/75 - Train Loss: 0.9861, MCC: 0.0466 | Val Loss: 0.9131, MCC: 0.0288 | LR: 0.000024
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 07:24:39,228] Trial 26 finished with value: 0.1305918475635457 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.23256629097731799, 'ln': False, 'activation_function': 'relu', 'learning_rate': 2.3995507201936212e-05, 'batch_size': 1024, 'use_phage_weights': True, 'weight_decay': 0.03040743830520737, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.19921130167869883}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.0792

CROSS-VALIDATION RESULTS
Median MCC: 0.1306
Mean MCC: 0.1530 ± 0.0623
Fold 1 MCC: 0.1306
Fold 2 MCC: 0.1017
Fold 3 MCC: 0.2274
Fold 4 MCC: 0.2259
Fold 5 MCC: 0.0792


TRIAL 27: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 2
  - classifier_hidden_dim: 384
  - dropout: 0.269
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000017
  - batch_size: 128
  - use_phage_weights: False
  - weight_decay: 0.0074
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.051


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5461, MCC: -0.0094 | Val Loss: 0.5061, MCC: 0.0000 | LR: 0.000017
Epoch 2/75 - Train Loss: 0.5232, MCC: -0.0015 | Val Loss: 0.4788, MCC: 0.0000 | LR: 0.000017
Epoch 3/75 - Train Loss: 

[I 2025-04-22 07:28:46,337] Trial 27 finished with value: 0.2976855921323162 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 2, 'classifier_hidden_dim': 384, 'dropout': 0.26866904756258586, 'ln': True, 'activation_function': 'relu', 'learning_rate': 1.719041601860978e-05, 'batch_size': 128, 'use_phage_weights': False, 'weight_decay': 0.0074132708115519745, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.050634842926343224}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.2822

CROSS-VALIDATION RESULTS
Median MCC: 0.2977
Mean MCC: 0.3001 ± 0.0300
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.2977
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.2552
Fold 5 MCC: 0.2822


TRIAL 28: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: max
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 256
  - dropout: 0.237
  - ln: True
  - activation_function: gelu
Training Parameters:
  - learning_rate: 0.000058
  - batch_size: 256
  - use_phage_weights: False
  - weight_decay: 0.0282
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.118


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing max pooling for 177 strains...
Precomputing max pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5534, MCC: 0.0084 | Val Loss: 0.5150, MCC: 0.0000 | LR: 0.000058
Epoch 2/75 - Train Loss: 0.5273, MCC: 0.0293 | Val Loss: 0.4681, MCC: 0.0000 | LR: 0.000058
Epoch 3/75 - Train Loss: 0.508

[I 2025-04-22 07:32:31,785] Trial 28 finished with value: 0.2991439456026531 and parameters: {'pooling_type': 'max', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 256, 'dropout': 0.23733499879985748, 'ln': True, 'activation_function': 'gelu', 'learning_rate': 5.7601989971744805e-05, 'batch_size': 256, 'use_phage_weights': False, 'weight_decay': 0.02819540808699478, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.11801647514465173}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.2991

CROSS-VALIDATION RESULTS
Median MCC: 0.2991
Mean MCC: 0.3226 ± 0.0498
Fold 1 MCC: 0.4064
Fold 2 MCC: 0.3467
Fold 3 MCC: 0.2616
Fold 4 MCC: 0.2991
Fold 5 MCC: 0.2991


TRIAL 29: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 2
  - classifier_hidden_dim: 768
  - dropout: 0.223
  - ln: True
  - activation_function: silu
Training Parameters:
  - learning_rate: 0.000016
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0188
  - scheduler_type: reduce_on_plateau
  - warmup_ratio: 0.177


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5605, MCC: -0.0182 | Val Loss: 0.5072, MCC: 0.0000 | LR: 0.000016
Epoch 2/75 - Train Loss: 0.5274, MCC: 0.0000 | Val Loss: 0.4896, MCC: 0.0000 | LR: 0.000016
Epoch 3/75 - Train Loss: 

[I 2025-04-22 07:40:28,930] Trial 29 finished with value: 0.14190691481932216 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 2, 'classifier_hidden_dim': 768, 'dropout': 0.22271807314152564, 'ln': True, 'activation_function': 'silu', 'learning_rate': 1.6323719851814583e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.018805471114723873, 'scheduler_type': 'reduce_on_plateau', 'warmup_ratio': 0.17711593404412898}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.0650

CROSS-VALIDATION RESULTS
Median MCC: 0.1419
Mean MCC: 0.1432 ± 0.0477
Fold 1 MCC: 0.1606
Fold 2 MCC: 0.1353
Fold 3 MCC: 0.1419
Fold 4 MCC: 0.2131
Fold 5 MCC: 0.0650


TRIAL 30: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: max
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.276
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000011
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0010
  - scheduler_type: reduce_on_plateau
  - warmup_ratio: 0.153


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing max pooling for 177 strains...
Precomputing max pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5847, MCC: -0.0038 | Val Loss: 0.4972, MCC: 0.0000 | LR: 0.000011
Epoch 2/75 - Train Loss: 0.5526, MCC: 0.0156 | Val Loss: 0.5102, MCC: 0.0000 | LR: 0.000011
Epoch 3/75 - Train Loss: 0.

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Fold 1 - Val MCC: 0.0000

FOLD 2/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing max pooling for 177 strains...
Precomputing max pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5621, MCC: 0.0059 | Val Loss: 0.5249, MCC: 0.0000 | LR: 0.000011
Epoch 2/75 - Train Loss: 0.5442, MCC: 0.0037 | Val Loss: 0.5164, MCC: 0.0000 | LR: 0.000011
Epoch 3/75 - Train Loss: 0.5402, MCC: -0.0072 | Val Loss: 0.5099, MCC: 0.0000 | LR: 0.000011
Epoch 4/75 - Train Loss: 0.5396, MCC: 0.0012 | Val Loss: 0.5072, MCC: 0.0000 | LR: 0.000011
Epoch 5/75 - Train Loss: 0.5397, MCC: -0.0115 | Val Loss: 0.5025, MCC: 0.0000 | LR: 0.000011
Epoch 6/75 - Train Loss: 0.5323, MCC: 0.0220 | Val Loss: 0.5002, MCC: 0.0000 | LR: 0.000006
Epoch 7/75 - Train Loss: 0.5377, MCC: 0.0052 | Val Loss: 0.5005, MCC: 0.0000 | LR: 0.000006
Epoch 8/75 - Train Loss: 0.5312, MCC: 0.0155 | Val Loss: 0.4974, MCC: 0.0000 | LR: 0.000006
Epoch 9/75 - Train Loss: 0.5346, MCC: 0.0037 | Val L

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Fold 2 - Val MCC: 0.0000

FOLD 3/5
Train set: 13632 interactions, 142 strains
Validation set: 3360 interactions, 35 strains
Precomputing max pooling for 177 strains...
Precomputing max pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5822, MCC: 0.0008 | Val Loss: 0.4509, MCC: 0.0000 | LR: 0.000011
Epoch 2/75 - Train Loss: 0.5624, MCC: 0.0152 | Val Loss: 0.4432, MCC: 0.0000 | LR: 0.000011
Epoch 3/75 - Train Loss: 0.5614, MCC: 0.0100 | Val Loss: 0.4413, MCC: 0.0000 | LR: 0.000011
Epoch 4/75 - Train Loss: 0.5565, MCC: 0.0023 | Val Loss: 0.4393, MCC: 0.0000 | LR: 0.000011
Epoch 5/75 - Train Loss: 0.5523, MCC: 0.0103 | Val Loss: 0.4396, MCC: 0.0000 | LR: 0.000011
Epoch 6/75 - Train Loss: 0.5523, MCC: 0.0003 | Val Loss: 0.4383, MCC: 0.0000 | LR: 0.000006
Epoch 7/75 - Train Loss: 0.5492, MCC: 0.0075 | Val Loss: 0.4381, MCC: 0.0000 | LR: 0.000006
Epoch 8/75 - Train Loss: 0.5508, MCC: 0.0146 | Val Loss: 0.4370, MCC: 0.0000 | LR: 0.000006
Epoch 9/75 - Train Loss: 0.5506, MCC: 0.0250 | Val Los

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Fold 3 - Val MCC: 0.0000

FOLD 4/5
Train set: 13632 interactions, 142 strains
Validation set: 3360 interactions, 35 strains
Precomputing max pooling for 177 strains...
Precomputing max pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5763, MCC: 0.0045 | Val Loss: 0.5968, MCC: 0.0000 | LR: 0.000011
Epoch 2/75 - Train Loss: 0.5295, MCC: 0.0000 | Val Loss: 0.6153, MCC: 0.0000 | LR: 0.000011
Epoch 3/75 - Train Loss: 0.5279, MCC: -0.0045 | Val Loss: 0.5886, MCC: 0.0000 | LR: 0.000011
Epoch 4/75 - Train Loss: 0.5263, MCC: 0.0000 | Val Loss: 0.5814, MCC: 0.0000 | LR: 0.000011
Epoch 5/75 - Train Loss: 0.5257, MCC: -0.0064 | Val Loss: 0.5822, MCC: 0.0000 | LR: 0.000011
Epoch 6/75 - Train Loss: 0.5184, MCC: 0.0163 | Val Loss: 0.5784, MCC: 0.0000 | LR: 0.000006
Epoch 7/75 - Train Loss: 0.5190, MCC: 0.0163 | Val Loss: 0.5783, MCC: 0.0000 | LR: 0.000006
Epoch 8/75 - Train Loss: 0.5202, MCC: 0.0163 | Val Loss: 0.5746, MCC: 0.0000 | LR: 0.000006
Epoch 9/75 - Train Loss: 0.5173, MCC: -0.0045 | Val 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Fold 4 - Val MCC: 0.0000

FOLD 5/5
Train set: 13632 interactions, 142 strains
Validation set: 3360 interactions, 35 strains
Precomputing max pooling for 177 strains...
Precomputing max pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6452, MCC: -0.0088 | Val Loss: 0.5383, MCC: 0.0000 | LR: 0.000011
Epoch 2/75 - Train Loss: 0.5482, MCC: 0.0035 | Val Loss: 0.5585, MCC: 0.0000 | LR: 0.000011
Epoch 3/75 - Train Loss: 0.5379, MCC: -0.0065 | Val Loss: 0.5488, MCC: 0.0000 | LR: 0.000011
Epoch 4/75 - Train Loss: 0.5380, MCC: 0.0159 | Val Loss: 0.5380, MCC: 0.0000 | LR: 0.000011
Epoch 5/75 - Train Loss: 0.5302, MCC: 0.0010 | Val Loss: 0.5375, MCC: 0.0000 | LR: 0.000011
Epoch 6/75 - Train Loss: 0.5325, MCC: -0.0080 | Val Loss: 0.5380, MCC: 0.0000 | LR: 0.000006
Epoch 7/75 - Train Loss: 0.5315, MCC: -0.0092 | Val Loss: 0.5355, MCC: 0.0000 | LR: 0.000006
Epoch 8/75 - Train Loss: 0.5299, MCC: -0.0046 | Val Loss: 0.5344, MCC: 0.0000 | LR: 0.000006
Epoch 9/75 - Train Loss: 0.5302, MCC: -0.0070 | V

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2025-04-22 07:52:00,900] Trial 30 finished with value: 0.0 and parameters: {'pooling_type': 'max', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.27559634946500206, 'ln': True, 'activation_function': 'relu', 'learning_rate': 1.1021029236920082e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.001018916194339174, 'scheduler_type': 'reduce_on_plateau', 'warmup_ratio': 0.1531161157791225}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.0000

CROSS-VALIDATION RESULTS
Median MCC: 0.0000
Mean MCC: 0.0000 ± 0.0000
Fold 1 MCC: 0.0000
Fold 2 MCC: 0.0000
Fold 3 MCC: 0.0000
Fold 4 MCC: 0.0000
Fold 5 MCC: 0.0000


TRIAL 31: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.259
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000031
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0369
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.138


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5791, MCC: -0.0124 | Val Loss: 0.5302, MCC: 0.0000 | LR: 0.000031
Epoch 2/75 - Train Loss: 0.5427, MCC: 0.0162 | Val Loss: 0.4986, MCC: 0.0000 | LR: 0.000031
Epoch 3/75 - Train Loss: 

[I 2025-04-22 07:56:46,263] Trial 31 finished with value: 0.30561035048760743 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.25914596896350167, 'ln': True, 'activation_function': 'relu', 'learning_rate': 3.122574298128791e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.03688829208472383, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.13795123494979433}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.3056

CROSS-VALIDATION RESULTS
Median MCC: 0.3056
Mean MCC: 0.3094 ± 0.0177
Fold 1 MCC: 0.2957
Fold 2 MCC: 0.3229
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.2870
Fold 5 MCC: 0.3056


TRIAL 32: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.246
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000024
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0695
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.156


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5797, MCC: 0.0005 | Val Loss: 0.5281, MCC: 0.0000 | LR: 0.000024
Epoch 2/75 - Train Loss: 0.5484, MCC: 0.0082 | Val Loss: 0.4930, MCC: 0.0000 | LR: 0.000023
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 08:03:40,969] Trial 32 finished with value: 0.32747708996347763 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.2461165295996391, 'ln': True, 'activation_function': 'relu', 'learning_rate': 2.3521515767155747e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.06950193382873888, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.15633889399740405}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.3275

CROSS-VALIDATION RESULTS
Median MCC: 0.3275
Mean MCC: 0.3173 ± 0.0163
Fold 1 MCC: 0.2935
Fold 2 MCC: 0.3276
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.3024
Fold 5 MCC: 0.3275


TRIAL 33: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.289
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000033
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0458
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.132


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5963, MCC: -0.0007 | Val Loss: 0.5304, MCC: 0.0000 | LR: 0.000033
Epoch 2/75 - Train Loss: 0.5456, MCC: 0.0089 | Val Loss: 0.4887, MCC: 0.0000 | LR: 0.000033
Epoch 3/75 - Train Loss: 

[I 2025-04-22 08:09:06,797] Trial 33 finished with value: 0.32091259711259973 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.28916401704758277, 'ln': True, 'activation_function': 'relu', 'learning_rate': 3.28882020134401e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.045803397788628096, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.1322395054545254}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.3209

CROSS-VALIDATION RESULTS
Median MCC: 0.3209
Mean MCC: 0.3152 ± 0.0212
Fold 1 MCC: 0.3009
Fold 2 MCC: 0.3327
Fold 3 MCC: 0.3395
Fold 4 MCC: 0.2818
Fold 5 MCC: 0.3209


TRIAL 34: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 768
  - dropout: 0.220
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000026
  - batch_size: 512
  - use_phage_weights: True
  - weight_decay: 0.0118
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.093


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.9928, MCC: 0.0482 | Val Loss: 0.9300, MCC: -0.0141 | LR: 0.000026
Epoch 2/75 - Train Loss: 0.9901, MCC: 0.0701 | Val Loss: 0.9325, MCC: -0.0141 | LR: 0.000026
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 08:11:20,653] Trial 34 finished with value: 0.11265017516337283 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 768, 'dropout': 0.2197228189438742, 'ln': True, 'activation_function': 'relu', 'learning_rate': 2.5632359866049535e-05, 'batch_size': 512, 'use_phage_weights': True, 'weight_decay': 0.011781484614419522, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.09316683295221773}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.1127

CROSS-VALIDATION RESULTS
Median MCC: 0.1127
Mean MCC: 0.1667 ± 0.0834
Fold 1 MCC: 0.0899
Fold 2 MCC: 0.1017
Fold 3 MCC: 0.3005
Fold 4 MCC: 0.2289
Fold 5 MCC: 0.1127


TRIAL 35: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.258
  - ln: False
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000030
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0362
  - scheduler_type: reduce_on_plateau
  - warmup_ratio: 0.140


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6720, MCC: 0.0013 | Val Loss: 0.6297, MCC: 0.0000 | LR: 0.000030
Epoch 2/75 - Train Loss: 0.6137, MCC: 0.0000 | Val Loss: 0.5580, MCC: 0.0000 | LR: 0.000030
Epoch 3/75 - Train

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Fold 1 - Val MCC: 0.0000

FOLD 2/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6778, MCC: -0.0110 | Val Loss: 0.6418, MCC: 0.0000 | LR: 0.000030
Epoch 2/75 - Train Loss: 0.6246, MCC: 0.0000 | Val Loss: 0.5815, MCC: 0.0000 | LR: 0.000030
Epoch 3/75 - Train Loss: 0.5671, MCC: 0.0000 | Val Loss: 0.5274, MCC: 0.0000 | LR: 0.000030
Epoch 4/75 - Train Loss: 0.5450, MCC: 0.0000 | Val Loss: 0.5109, MCC: 0.0000 | LR: 0.000030
Epoch 5/75 - Train Loss: 0.5393, MCC: 0.0000 | Val Loss: 0.5104, MCC: 0.0000 | LR: 0.000030
Epoch 6/75 - Train Loss: 0.5308, MCC: 0.0000 | Val Loss: 0.5111, MCC: 0.0000 | LR: 0.000015
Epoch 7/75 - Train Loss: 0.5356, MCC: 0.0000 | Val Loss: 0.5104, MCC: 0.0000 | LR: 0.000015
Epoch 8/75 - Train Loss: 0.5335, MCC: 0.0000 | Val Loss: 0.5100, MCC: 0.0000 | LR: 0.000015
Epoch 9/75 - Train Loss: 0.5311, MCC: 0.0000 | 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Fold 2 - Val MCC: 0.0000

FOLD 3/5
Train set: 13632 interactions, 142 strains
Validation set: 3360 interactions, 35 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6600, MCC: -0.0112 | Val Loss: 0.6136, MCC: 0.0000 | LR: 0.000030
Epoch 2/75 - Train Loss: 0.6082, MCC: 0.0000 | Val Loss: 0.5398, MCC: 0.0000 | LR: 0.000030
Epoch 3/75 - Train Loss: 0.5669, MCC: 0.0000 | Val Loss: 0.4793, MCC: 0.0000 | LR: 0.000030
Epoch 4/75 - Train Loss: 0.5566, MCC: 0.0000 | Val Loss: 0.4588, MCC: 0.0000 | LR: 0.000030
Epoch 5/75 - Train Loss: 0.5500, MCC: 0.0000 | Val Loss: 0.4610, MCC: 0.0000 | LR: 0.000030
Epoch 6/75 - Train Loss: 0.5492, MCC: 0.0000 | Val Loss: 0.4589, MCC: 0.0000 | LR: 0.000015
Epoch 7/75 - Train Loss: 0.5500, MCC: 0.0000 | Val Loss: 0.4566, MCC: 0.0000 | LR: 0.000015
Epoch 8/75 - Train Loss: 0.5495, MCC: 0.0000 | Val Loss: 0.4566, MCC: 0.0000 | LR: 0.000015
Epoch 9/75 - Train Loss: 0.5473, MCC: 0.0000 | 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Fold 3 - Val MCC: 0.0000

FOLD 4/5
Train set: 13632 interactions, 142 strains
Validation set: 3360 interactions, 35 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6803, MCC: -0.0148 | Val Loss: 0.6537, MCC: 0.0000 | LR: 0.000030
Epoch 2/75 - Train Loss: 0.6226, MCC: 0.0000 | Val Loss: 0.6065, MCC: 0.0000 | LR: 0.000030
Epoch 3/75 - Train Loss: 0.5616, MCC: 0.0000 | Val Loss: 0.5757, MCC: 0.0000 | LR: 0.000030
Epoch 4/75 - Train Loss: 0.5308, MCC: 0.0000 | Val Loss: 0.5796, MCC: 0.0000 | LR: 0.000030
Epoch 5/75 - Train Loss: 0.5236, MCC: 0.0000 | Val Loss: 0.5725, MCC: 0.0000 | LR: 0.000030
Epoch 6/75 - Train Loss: 0.5199, MCC: 0.0000 | Val Loss: 0.5686, MCC: 0.0000 | LR: 0.000015
Epoch 7/75 - Train Loss: 0.5199, MCC: 0.0000 | Val Loss: 0.5675, MCC: 0.0000 | LR: 0.000015
Epoch 8/75 - Train Loss: 0.5193, MCC: 0.0000 | Val Loss: 0.5674, MCC: 0.0000 | LR: 0.000015
Epoch 9/75 - Train Loss: 0.5162, MCC: 0.0000 | 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Fold 4 - Val MCC: 0.0000

FOLD 5/5
Train set: 13632 interactions, 142 strains
Validation set: 3360 interactions, 35 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6658, MCC: -0.0011 | Val Loss: 0.6360, MCC: 0.0000 | LR: 0.000030
Epoch 2/75 - Train Loss: 0.6146, MCC: 0.0000 | Val Loss: 0.5842, MCC: 0.0000 | LR: 0.000030
Epoch 3/75 - Train Loss: 0.5641, MCC: 0.0000 | Val Loss: 0.5420, MCC: 0.0000 | LR: 0.000030
Epoch 4/75 - Train Loss: 0.5387, MCC: 0.0000 | Val Loss: 0.5321, MCC: 0.0000 | LR: 0.000030
Epoch 5/75 - Train Loss: 0.5321, MCC: 0.0000 | Val Loss: 0.5303, MCC: 0.0000 | LR: 0.000030
Epoch 6/75 - Train Loss: 0.5326, MCC: 0.0000 | Val Loss: 0.5301, MCC: 0.0000 | LR: 0.000015
Epoch 7/75 - Train Loss: 0.5315, MCC: 0.0000 | Val Loss: 0.5297, MCC: 0.0000 | LR: 0.000015
Epoch 8/75 - Train Loss: 0.5266, MCC: 0.0000 | Val Loss: 0.5293, MCC: 0.0000 | LR: 0.000015
Epoch 9/75 - Train Loss: 0.5260, MCC: 0.0000 | 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2025-04-22 08:22:53,483] Trial 35 finished with value: 0.0 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.25822648311838764, 'ln': False, 'activation_function': 'relu', 'learning_rate': 3.000929045730913e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.0362252028595437, 'scheduler_type': 'reduce_on_plateau', 'warmup_ratio': 0.13983146765769447}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.0000

CROSS-VALIDATION RESULTS
Median MCC: 0.0000
Mean MCC: 0.0000 ± 0.0000
Fold 1 MCC: 0.0000
Fold 2 MCC: 0.0000
Fold 3 MCC: 0.0000
Fold 4 MCC: 0.0000
Fold 5 MCC: 0.0000


TRIAL 36: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 1
  - classifier_hidden_dim: 512
  - dropout: 0.300
  - ln: True
  - activation_function: silu
Training Parameters:
  - learning_rate: 0.000038
  - batch_size: 512
  - use_phage_weights: True
  - weight_decay: 0.0045
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.175


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.9848, MCC: 0.0687 | Val Loss: 0.9269, MCC: -0.0141 | LR: 0.000038
Epoch 2/75 - Train Loss: 0.9876, MCC: 0.0625 | Val Loss: 0.9186, MCC: 0.0305 | LR: 0.000038
Epoch 3/75 - Train Loss: 0.

[I 2025-04-22 08:24:51,953] Trial 36 finished with value: 0.21802630982075924 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 1, 'classifier_hidden_dim': 512, 'dropout': 0.29989513916012767, 'ln': True, 'activation_function': 'silu', 'learning_rate': 3.78207972206178e-05, 'batch_size': 512, 'use_phage_weights': True, 'weight_decay': 0.004458575124546365, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.17542887849594474}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.1174

CROSS-VALIDATION RESULTS
Median MCC: 0.2180
Mean MCC: 0.1844 ± 0.0620
Fold 1 MCC: 0.1016
Fold 2 MCC: 0.2397
Fold 3 MCC: 0.2451
Fold 4 MCC: 0.2180
Fold 5 MCC: 0.1174


TRIAL 37: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 256
  - dropout: 0.194
  - ln: True
  - activation_function: gelu
Training Parameters:
  - learning_rate: 0.000018
  - batch_size: 256
  - use_phage_weights: False
  - weight_decay: 0.0560
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.075


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5690, MCC: -0.0076 | Val Loss: 0.5239, MCC: 0.0000 | LR: 0.000018
Epoch 2/75 - Train Loss: 0.5339, MCC: 0.0396 | Val Loss: 0.4978, MCC: 0.0000 | LR: 0.000018
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 08:29:13,904] Trial 37 finished with value: 0.30516618393184924 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 256, 'dropout': 0.19377741657201095, 'ln': True, 'activation_function': 'gelu', 'learning_rate': 1.822539018697546e-05, 'batch_size': 256, 'use_phage_weights': False, 'weight_decay': 0.05599422492879979, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.0749227656846315}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.3052

CROSS-VALIDATION RESULTS
Median MCC: 0.3052
Mean MCC: 0.2973 ± 0.0244
Fold 1 MCC: 0.2882
Fold 2 MCC: 0.3114
Fold 3 MCC: 0.3267
Fold 4 MCC: 0.2552
Fold 5 MCC: 0.3052


TRIAL 38: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 2
  - classifier_hidden_dim: 768
  - dropout: 0.247
  - ln: False
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000047
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0153
  - scheduler_type: reduce_on_plateau
  - warmup_ratio: 0.158


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6191, MCC: 0.0132 | Val Loss: 0.5182, MCC: 0.0000 | LR: 0.000047
Epoch 2/75 - Train Loss: 0.5461, MCC: 0.0000 | Val Loss: 0.4955, MCC: 0.0000 | LR: 0.000047
Epoch 3/75 - Train Loss:

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Fold 1 - Val MCC: 0.0000

FOLD 2/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6336, MCC: -0.0004 | Val Loss: 0.5421, MCC: 0.0000 | LR: 0.000047
Epoch 2/75 - Train Loss: 0.5475, MCC: 0.0000 | Val Loss: 0.5112, MCC: 0.0000 | LR: 0.000047
Epoch 3/75 - Train Loss: 0.5371, MCC: 0.0000 | Val Loss: 0.5101, MCC: 0.0000 | LR: 0.000047
Epoch 4/75 - Train Loss: 0.5337, MCC: 0.0000 | Val Loss: 0.5092, MCC: 0.0000 | LR: 0.000047
Epoch 5/75 - Train Loss: 0.5299, MCC: 0.0000 | Val Loss: 0.5034, MCC: 0.0000 | LR: 0.000047
Epoch 6/75 - Train Loss: 0.5271, MCC: 0.0000 | Val Loss: 0.5019, MCC: 0.0000 | LR: 0.000023
Epoch 7/75 - Train Loss: 0.5276, MCC: 0.0000 | Val Loss: 0.5002, MCC: 0.0000 | LR: 0.000023
Epoch 8/75 - Train Loss: 0.5205, MCC: 0.0000 | Val Loss: 0.4983, MCC: 0.0000 | LR: 0.000023
Epoch 9/75 - Train Loss: 0.5221, MCC: 0.0000 | Val 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Fold 2 - Val MCC: 0.0000

FOLD 3/5
Train set: 13632 interactions, 142 strains
Validation set: 3360 interactions, 35 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6205, MCC: -0.0073 | Val Loss: 0.4944, MCC: 0.0000 | LR: 0.000047
Epoch 2/75 - Train Loss: 0.5562, MCC: 0.0000 | Val Loss: 0.4523, MCC: 0.0000 | LR: 0.000047
Epoch 3/75 - Train Loss: 0.5503, MCC: 0.0000 | Val Loss: 0.4583, MCC: 0.0000 | LR: 0.000047
Epoch 4/75 - Train Loss: 0.5468, MCC: 0.0000 | Val Loss: 0.4539, MCC: 0.0000 | LR: 0.000047
Epoch 5/75 - Train Loss: 0.5396, MCC: 0.0000 | Val Loss: 0.4513, MCC: 0.0000 | LR: 0.000047
Epoch 6/75 - Train Loss: 0.5377, MCC: 0.0000 | Val Loss: 0.4473, MCC: 0.0000 | LR: 0.000023
Epoch 7/75 - Train Loss: 0.5357, MCC: 0.0000 | Val Loss: 0.4466, MCC: 0.0000 | LR: 0.000023
Epoch 8/75 - Train Loss: 0.5317, MCC: 0.0000 | Val Loss: 0.4424, MCC: 0.0000 | LR: 0.000023
Epoch 9/75 - Train Loss: 0.5292, MCC: 0.0000 | Val 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Fold 3 - Val MCC: 0.0000

FOLD 4/5
Train set: 13632 interactions, 142 strains
Validation set: 3360 interactions, 35 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6025, MCC: 0.0083 | Val Loss: 0.5757, MCC: 0.0000 | LR: 0.000047
Epoch 2/75 - Train Loss: 0.5276, MCC: 0.0000 | Val Loss: 0.5825, MCC: 0.0000 | LR: 0.000047
Epoch 3/75 - Train Loss: 0.5206, MCC: 0.0000 | Val Loss: 0.5672, MCC: 0.0000 | LR: 0.000047
Epoch 4/75 - Train Loss: 0.5173, MCC: 0.0000 | Val Loss: 0.5649, MCC: 0.0000 | LR: 0.000047
Epoch 5/75 - Train Loss: 0.5161, MCC: 0.0000 | Val Loss: 0.5632, MCC: 0.0000 | LR: 0.000047
Epoch 6/75 - Train Loss: 0.5123, MCC: 0.0000 | Val Loss: 0.5609, MCC: 0.0000 | LR: 0.000023
Epoch 7/75 - Train Loss: 0.5083, MCC: 0.0000 | Val Loss: 0.5597, MCC: 0.0000 | LR: 0.000023
Epoch 8/75 - Train Loss: 0.5032, MCC: 0.0000 | Val Loss: 0.5582, MCC: 0.0000 | LR: 0.000023
Epoch 9/75 - Train Loss: 0.5038, MCC: 0.0000 | Val L

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Fold 4 - Val MCC: 0.0000

FOLD 5/5
Train set: 13632 interactions, 142 strains
Validation set: 3360 interactions, 35 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6117, MCC: -0.0185 | Val Loss: 0.5420, MCC: 0.0000 | LR: 0.000047
Epoch 2/75 - Train Loss: 0.5372, MCC: 0.0000 | Val Loss: 0.5340, MCC: 0.0000 | LR: 0.000047
Epoch 3/75 - Train Loss: 0.5304, MCC: 0.0000 | Val Loss: 0.5298, MCC: 0.0000 | LR: 0.000047
Epoch 4/75 - Train Loss: 0.5287, MCC: 0.0000 | Val Loss: 0.5268, MCC: 0.0000 | LR: 0.000047
Epoch 5/75 - Train Loss: 0.5229, MCC: 0.0000 | Val Loss: 0.5232, MCC: 0.0000 | LR: 0.000047
Epoch 6/75 - Train Loss: 0.5187, MCC: 0.0000 | Val Loss: 0.5217, MCC: 0.0000 | LR: 0.000023
Epoch 7/75 - Train Loss: 0.5193, MCC: 0.0000 | Val Loss: 0.5201, MCC: 0.0000 | LR: 0.000023
Epoch 8/75 - Train Loss: 0.5146, MCC: 0.0000 | Val Loss: 0.5180, MCC: 0.0000 | LR: 0.000023
Epoch 9/75 - Train Loss: 0.5144, MCC: 0.0000 | Val 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2025-04-22 08:40:42,323] Trial 38 finished with value: 0.0 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 2, 'classifier_hidden_dim': 768, 'dropout': 0.24721022716860913, 'ln': False, 'activation_function': 'relu', 'learning_rate': 4.662012457613874e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.015328712685019119, 'scheduler_type': 'reduce_on_plateau', 'warmup_ratio': 0.1584312846745549}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.0000

CROSS-VALIDATION RESULTS
Median MCC: 0.0000
Mean MCC: 0.0000 ± 0.0000
Fold 1 MCC: 0.0000
Fold 2 MCC: 0.0000
Fold 3 MCC: 0.0000
Fold 4 MCC: 0.0000
Fold 5 MCC: 0.0000


TRIAL 39: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.273
  - ln: True
  - activation_function: silu
Training Parameters:
  - learning_rate: 0.000068
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0228
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.121


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5669, MCC: -0.0001 | Val Loss: 0.4870, MCC: 0.0000 | LR: 0.000068
Epoch 2/75 - Train Loss: 0.5354, MCC: 0.0077 | Val Loss: 0.4772, MCC: 0.0000 | LR: 0.000068
Epoch 3/75 - Train Loss: 

[I 2025-04-22 08:44:12,303] Trial 39 finished with value: 0.2957280549248848 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.2725105378525168, 'ln': True, 'activation_function': 'silu', 'learning_rate': 6.773609508437767e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.022797160400279185, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.12110429639096623}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.3064

CROSS-VALIDATION RESULTS
Median MCC: 0.2957
Mean MCC: 0.2793 ± 0.0337
Fold 1 MCC: 0.2957
Fold 2 MCC: 0.2415
Fold 3 MCC: 0.3165
Fold 4 MCC: 0.2362
Fold 5 MCC: 0.3064


TRIAL 40: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 256
  - dropout: 0.229
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000014
  - batch_size: 128
  - use_phage_weights: True
  - weight_decay: 0.0716
  - scheduler_type: one_cycle
  - warmup_ratio: 0.128


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 1.0090, MCC: -0.0050 | Val Loss: 0.9281, MCC: 0.0420 | LR: 0.000001
Epoch 2/75 - Train Loss: 1.0049, MCC: 0.0038 | Val Loss: 0.9251, MCC: -0.0123 | LR: 0.000002
Epoch 3/75 - Train Loss: 0.

[I 2025-04-22 08:47:45,714] Trial 40 finished with value: 0.23969946946708112 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 256, 'dropout': 0.22886053338899537, 'ln': True, 'activation_function': 'relu', 'learning_rate': 1.3934342670579075e-05, 'batch_size': 128, 'use_phage_weights': True, 'weight_decay': 0.07160819109547713, 'scheduler_type': 'one_cycle', 'warmup_ratio': 0.12812597893948557}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.1698

CROSS-VALIDATION RESULTS
Median MCC: 0.2397
Mean MCC: 0.2013 ± 0.0913
Fold 1 MCC: 0.0420
Fold 2 MCC: 0.2397
Fold 3 MCC: 0.3109
Fold 4 MCC: 0.2442
Fold 5 MCC: 0.1698


TRIAL 41: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.245
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000024
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0725
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.158


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5927, MCC: 0.0135 | Val Loss: 0.5152, MCC: 0.0000 | LR: 0.000024
Epoch 2/75 - Train Loss: 0.5453, MCC: 0.0154 | Val Loss: 0.4871, MCC: 0.0000 | LR: 0.000024
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 08:52:55,808] Trial 41 finished with value: 0.3154684566775704 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.24532294780277197, 'ln': True, 'activation_function': 'relu', 'learning_rate': 2.360252743368619e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.07246107659868739, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.1580396672865168}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.3222

CROSS-VALIDATION RESULTS
Median MCC: 0.3155
Mean MCC: 0.3097 ± 0.0220
Fold 1 MCC: 0.2959
Fold 2 MCC: 0.3155
Fold 3 MCC: 0.3395
Fold 4 MCC: 0.2756
Fold 5 MCC: 0.3222


TRIAL 42: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.285
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000020
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0443
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.142


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.7297, MCC: 0.0064 | Val Loss: 0.5045, MCC: 0.0000 | LR: 0.000020
Epoch 2/75 - Train Loss: 0.5521, MCC: -0.0062 | Val Loss: 0.5392, MCC: 0.0000 | LR: 0.000020
Epoch 3/75 - Train Loss: 

[I 2025-04-22 08:58:23,687] Trial 42 finished with value: 0.295858035030785 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.285071654862687, 'ln': True, 'activation_function': 'relu', 'learning_rate': 2.0335701452442247e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.04426860337676487, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.14215316503113928}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.3167

CROSS-VALIDATION RESULTS
Median MCC: 0.2959
Mean MCC: 0.3006 ± 0.0161
Fold 1 MCC: 0.2959
Fold 2 MCC: 0.2923
Fold 3 MCC: 0.3207
Fold 4 MCC: 0.2774
Fold 5 MCC: 0.3167


TRIAL 43: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.213
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000033
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0577
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.176


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6321, MCC: 0.0038 | Val Loss: 0.5248, MCC: 0.0000 | LR: 0.000033
Epoch 2/75 - Train Loss: 0.5413, MCC: 0.0000 | Val Loss: 0.4877, MCC: 0.0000 | LR: 0.000033
Epoch 3/75 - Train Loss: 0

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2025-04-22 09:01:29,335] Trial 43 finished with value: 0.2779372433083666 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.2131056691759089, 'ln': True, 'activation_function': 'relu', 'learning_rate': 3.30502447404093e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.05773292514354146, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.17580080061345335}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.2401

CROSS-VALIDATION RESULTS
Median MCC: 0.2779
Mean MCC: 0.2750 ± 0.0193
Fold 1 MCC: 0.2779
Fold 2 MCC: 0.2909
Fold 3 MCC: 0.2941
Fold 4 MCC: 0.2722
Fold 5 MCC: 0.2401


TRIAL 44: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.057
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000023
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0308
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.154


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5742, MCC: 0.0252 | Val Loss: 0.4924, MCC: 0.0000 | LR: 0.000023
Epoch 2/75 - Train Loss: 0.5293, MCC: 0.0000 | Val Loss: 0.4818, MCC: 0.0000 | LR: 0.000023
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 09:05:25,492] Trial 44 finished with value: 0.30437922400104067 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.056600216065418624, 'ln': True, 'activation_function': 'relu', 'learning_rate': 2.333207471255684e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.030808731716810464, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.1536060746453948}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.2988

CROSS-VALIDATION RESULTS
Median MCC: 0.3044
Mean MCC: 0.3021 ± 0.0272
Fold 1 MCC: 0.3290
Fold 2 MCC: 0.3252
Fold 3 MCC: 0.3044
Fold 4 MCC: 0.2530
Fold 5 MCC: 0.2988


TRIAL 45: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: max
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.237
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000028
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0746
  - scheduler_type: one_cycle
  - warmup_ratio: 0.111


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing max pooling for 177 strains...
Precomputing max pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6536, MCC: -0.0353 | Val Loss: 0.5842, MCC: 0.0000 | LR: 0.000002
Epoch 2/75 - Train Loss: 0.6161, MCC: -0.0206 | Val Loss: 0.5295, MCC: 0.0000 | LR: 0.000005
Epoch 3/75 - Train Loss: 0.5729, M

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Fold 3 - Val MCC: 0.0061

FOLD 4/5
Train set: 13632 interactions, 142 strains
Validation set: 3360 interactions, 35 strains
Precomputing max pooling for 177 strains...
Precomputing max pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6877, MCC: -0.0264 | Val Loss: 0.6571, MCC: -0.0584 | LR: 0.000002
Epoch 2/75 - Train Loss: 0.6431, MCC: -0.0140 | Val Loss: 0.6034, MCC: 0.0000 | LR: 0.000005
Epoch 3/75 - Train Loss: 0.5814, MCC: -0.0097 | Val Loss: 0.5823, MCC: 0.0000 | LR: 0.000009
Epoch 4/75 - Train Loss: 0.5343, MCC: -0.0090 | Val Loss: 0.6260, MCC: 0.0000 | LR: 0.000014
Epoch 5/75 - Train Loss: 0.5317, MCC: 0.0000 | Val Loss: 0.5793, MCC: 0.0000 | LR: 0.000019
Epoch 6/75 - Train Loss: 0.5231, MCC: 0.0000 | Val Loss: 0.5730, MCC: 0.0000 | LR: 0.000023
Epoch 7/75 - Train Loss: 0.5178, MCC: 0.0000 | Val Loss: 0.5706, MCC: 0.0000 | LR: 0.000026
Epoch 8/75 - Train Loss: 0.5117, MCC: 0.0118 | Val Loss: 0.5585, MCC: 0.0000 | LR: 0.000028
Epoch 9/75 - Train Loss: 0.5023, MCC: 0.0118 | Va

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2025-04-22 09:09:05,824] Trial 45 finished with value: 0.2978584101096407 and parameters: {'pooling_type': 'max', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.23667567461544817, 'ln': True, 'activation_function': 'relu', 'learning_rate': 2.8018254440029083e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.07461425333695584, 'scheduler_type': 'one_cycle', 'warmup_ratio': 0.11129122403011632}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.0405

CROSS-VALIDATION RESULTS
Median MCC: 0.2979
Mean MCC: 0.2035 ± 0.1482
Fold 1 MCC: 0.3415
Fold 2 MCC: 0.3316
Fold 3 MCC: 0.0061
Fold 4 MCC: 0.2979
Fold 5 MCC: 0.0405


TRIAL 46: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.268
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000043
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0089
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.100


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5721, MCC: -0.0037 | Val Loss: 0.5074, MCC: 0.0000 | LR: 0.000043
Epoch 2/75 - Train Loss: 0.5387, MCC: 0.0173 | Val Loss: 0.4845, MCC: 0.0000 | LR: 0.000043
Epoch 3/75 - Train Loss: 

[I 2025-04-22 09:14:41,930] Trial 46 finished with value: 0.32758298206345926 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.26833878527238475, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.2789194037533884e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.008881354840979173, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.10039972044916576}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.3276

CROSS-VALIDATION RESULTS
Median MCC: 0.3276
Mean MCC: 0.3145 ± 0.0231
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.2977
Fold 3 MCC: 0.3395
Fold 4 MCC: 0.2778
Fold 5 MCC: 0.3276


TRIAL 47: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.269
  - ln: False
  - activation_function: silu
Training Parameters:
  - learning_rate: 0.000052
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0089
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.103


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6398, MCC: -0.0004 | Val Loss: 0.5623, MCC: 0.0000 | LR: 0.000052
Epoch 2/75 - Train Loss: 0.5536, MCC: 0.0000 | Val Loss: 0.5153, MCC: 0.0000 | LR: 0.000052
Epoch 3/75 - Train 

[I 2025-04-22 09:19:25,370] Trial 47 finished with value: 0.1723279754715956 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.26851950676395886, 'ln': False, 'activation_function': 'silu', 'learning_rate': 5.222706941218089e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.008922053409815934, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.10281547164973318}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.1197

CROSS-VALIDATION RESULTS
Median MCC: 0.1723
Mean MCC: 0.1653 ± 0.0314
Fold 1 MCC: 0.2083
Fold 2 MCC: 0.1413
Fold 3 MCC: 0.1849
Fold 4 MCC: 0.1723
Fold 5 MCC: 0.1197


TRIAL 48: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.283
  - ln: True
  - activation_function: gelu
Training Parameters:
  - learning_rate: 0.000044
  - batch_size: 256
  - use_phage_weights: False
  - weight_decay: 0.0048
  - scheduler_type: one_cycle
  - warmup_ratio: 0.088


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6246, MCC: 0.0059 | Val Loss: 0.5192, MCC: 0.0000 | LR: 0.000004
Epoch 2/75 - Train Loss: 0.5503, MCC: -0.0149 | Val Loss: 0.5384, MCC: 0.0000 | LR: 0.000011
Epoch 3/75 - Train Loss: 0.5409, 

[I 2025-04-22 09:24:07,992] Trial 48 finished with value: 0.31078683971429405 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.2826153850510788, 'ln': True, 'activation_function': 'gelu', 'learning_rate': 4.3733696763303497e-05, 'batch_size': 256, 'use_phage_weights': False, 'weight_decay': 0.004765399246450276, 'scheduler_type': 'one_cycle', 'warmup_ratio': 0.08844649174195457}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.3275

CROSS-VALIDATION RESULTS
Median MCC: 0.3108
Mean MCC: 0.3136 ± 0.0162
Fold 1 MCC: 0.3108
Fold 2 MCC: 0.3036
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.2905
Fold 5 MCC: 0.3275


TRIAL 49: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 2
  - classifier_hidden_dim: 512
  - dropout: 0.254
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000036
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0098
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.098


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5401, MCC: 0.0001 | Val Loss: 0.4858, MCC: 0.0000 | LR: 0.000036
Epoch 2/75 - Train Loss: 0.5279, MCC: 0.0265 | Val Loss: 0.4723, MCC: 0.0000 | LR: 0.000036
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 09:29:40,250] Trial 49 finished with value: 0.31402734906300017 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 2, 'classifier_hidden_dim': 512, 'dropout': 0.25384915343941045, 'ln': True, 'activation_function': 'relu', 'learning_rate': 3.578042482160055e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.009833687621772254, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.09754091738005653}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.3058

CROSS-VALIDATION RESULTS
Median MCC: 0.3140
Mean MCC: 0.3147 ± 0.0149
Fold 1 MCC: 0.3140
Fold 2 MCC: 0.3327
Fold 3 MCC: 0.3288
Fold 4 MCC: 0.2923
Fold 5 MCC: 0.3058


TRIAL 50: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 768
  - dropout: 0.099
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000053
  - batch_size: 1024
  - use_phage_weights: True
  - weight_decay: 0.0146
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.071


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.9976, MCC: 0.0579 | Val Loss: 0.9221, MCC: -0.0141 | LR: 0.000053
Epoch 2/75 - Train Loss: 0.9804, MCC: 0.1061 | Val Loss: 0.9042, MCC: 0.0877 | LR: 0.000053
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 09:32:41,829] Trial 50 finished with value: 0.2473965546509152 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 768, 'dropout': 0.0986295133732647, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.267077224147904e-05, 'batch_size': 1024, 'use_phage_weights': True, 'weight_decay': 0.014625063136029522, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.07109480927805142}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.2137

CROSS-VALIDATION RESULTS
Median MCC: 0.2474
Mean MCC: 0.2221 ± 0.0720
Fold 1 MCC: 0.0877
Fold 2 MCC: 0.2474
Fold 3 MCC: 0.2925
Fold 4 MCC: 0.2693
Fold 5 MCC: 0.2137


TRIAL 51: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.266
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000040
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0188
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.145


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5492, MCC: -0.0101 | Val Loss: 0.4934, MCC: 0.0000 | LR: 0.000040
Epoch 2/75 - Train Loss: 0.5380, MCC: 0.0089 | Val Loss: 0.4734, MCC: 0.0000 | LR: 0.000040
Epoch 3/75 - Train Loss: 

[I 2025-04-22 09:38:08,813] Trial 51 finished with value: 0.3001654296940305 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.26618719203604, 'ln': True, 'activation_function': 'relu', 'learning_rate': 3.993339399568019e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.01882698821731591, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.14514709220469613}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.3275

CROSS-VALIDATION RESULTS
Median MCC: 0.3002
Mean MCC: 0.3084 ± 0.0223
Fold 1 MCC: 0.2980
Fold 2 MCC: 0.3002
Fold 3 MCC: 0.3395
Fold 4 MCC: 0.2771
Fold 5 MCC: 0.3275


TRIAL 52: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.242
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000030
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0030
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.129


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5755, MCC: -0.0196 | Val Loss: 0.5083, MCC: 0.0000 | LR: 0.000030
Epoch 2/75 - Train Loss: 0.5437, MCC: -0.0095 | Val Loss: 0.4897, MCC: 0.0000 | LR: 0.000030
Epoch 3/75 - Train Loss:

[I 2025-04-22 09:42:22,196] Trial 52 finished with value: 0.30516618393184924 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.24217239214101977, 'ln': True, 'activation_function': 'relu', 'learning_rate': 3.0358241026403032e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.002991451571867942, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.128996505067614}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.3052

CROSS-VALIDATION RESULTS
Median MCC: 0.3052
Mean MCC: 0.3084 ± 0.0166
Fold 1 MCC: 0.3250
Fold 2 MCC: 0.2986
Fold 3 MCC: 0.3290
Fold 4 MCC: 0.2845
Fold 5 MCC: 0.3052


TRIAL 53: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.254
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000025
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0267
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.087


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6181, MCC: 0.0282 | Val Loss: 0.5265, MCC: 0.0000 | LR: 0.000025
Epoch 2/75 - Train Loss: 0.5497, MCC: 0.0050 | Val Loss: 0.4933, MCC: 0.0000 | LR: 0.000025
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 09:47:30,883] Trial 53 finished with value: 0.3054500410764348 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.2538560471740708, 'ln': True, 'activation_function': 'relu', 'learning_rate': 2.5097929145812583e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.02667832610981918, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.08686075280082148}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.3055

CROSS-VALIDATION RESULTS
Median MCC: 0.3055
Mean MCC: 0.3095 ± 0.0112
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3103
Fold 3 MCC: 0.2970
Fold 4 MCC: 0.3046
Fold 5 MCC: 0.3055


TRIAL 54: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.275
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000034
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0360
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.113


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6064, MCC: -0.0082 | Val Loss: 0.5274, MCC: 0.0000 | LR: 0.000034
Epoch 2/75 - Train Loss: 0.5449, MCC: 0.0321 | Val Loss: 0.4901, MCC: 0.0000 | LR: 0.000034
Epoch 3/75 - Train Loss: 

[I 2025-04-22 09:54:13,133] Trial 54 finished with value: 0.322585439464479 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.27514072222764097, 'ln': True, 'activation_function': 'relu', 'learning_rate': 3.421880899157402e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.035972309145479706, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.11345241777387677}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.2863

CROSS-VALIDATION RESULTS
Median MCC: 0.3226
Mean MCC: 0.3134 ± 0.0229
Fold 1 MCC: 0.3226
Fold 2 MCC: 0.3328
Fold 3 MCC: 0.3395
Fold 4 MCC: 0.2859
Fold 5 MCC: 0.2863


TRIAL 55: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.293
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000028
  - batch_size: 128
  - use_phage_weights: False
  - weight_decay: 0.0856
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.105


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5434, MCC: 0.0108 | Val Loss: 0.4929, MCC: 0.0000 | LR: 0.000028
Epoch 2/75 - Train Loss: 0.5204, MCC: 0.0376 | Val Loss: 0.4709, MCC: 0.0000 | LR: 0.000028
Epoch 3/75 - Train Loss: 0.

[I 2025-04-22 09:58:47,668] Trial 55 finished with value: 0.3104283063564704 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.2927424749859704, 'ln': True, 'activation_function': 'relu', 'learning_rate': 2.755524913230165e-05, 'batch_size': 128, 'use_phage_weights': False, 'weight_decay': 0.0856043027923665, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.10462597812759726}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.3104

CROSS-VALIDATION RESULTS
Median MCC: 0.3104
Mean MCC: 0.3126 ± 0.0181
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.2994
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.2874
Fold 5 MCC: 0.3104


TRIAL 56: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.229
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000022
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0624
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.121


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5647, MCC: 0.0053 | Val Loss: 0.5104, MCC: 0.0000 | LR: 0.000022
Epoch 2/75 - Train Loss: 0.5410, MCC: 0.0050 | Val Loss: 0.4879, MCC: 0.0000 | LR: 0.000022
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 10:04:35,251] Trial 56 finished with value: 0.3145612307993759 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.22925697962604957, 'ln': True, 'activation_function': 'relu', 'learning_rate': 2.2057603680313924e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.06238729375643765, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.12087144740874256}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.3111

CROSS-VALIDATION RESULTS
Median MCC: 0.3146
Mean MCC: 0.3137 ± 0.0189
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3146
Fold 3 MCC: 0.3328
Fold 4 MCC: 0.2798
Fold 5 MCC: 0.3111


TRIAL 57: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: max
  - classifier_hidden_layers: 2
  - classifier_hidden_dim: 256
  - dropout: 0.199
  - ln: False
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000043
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0072
  - scheduler_type: reduce_on_plateau
  - warmup_ratio: 0.092


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing max pooling for 177 strains...
Precomputing max pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6931, MCC: 0.0225 | Val Loss: 0.6357, MCC: 0.0000 | LR: 0.000043
Epoch 2/75 - Train Loss: 0.6157, MCC: 0.0036 | Val Loss: 0.5599, MCC: 0.0000 | LR: 0.000043
Epoch 3/75 - Train Loss: 0.

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Fold 1 - Val MCC: 0.0000

FOLD 2/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing max pooling for 177 strains...
Precomputing max pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6307, MCC: 0.0007 | Val Loss: 0.5725, MCC: 0.0000 | LR: 0.000043
Epoch 2/75 - Train Loss: 0.5605, MCC: 0.0000 | Val Loss: 0.5205, MCC: 0.0000 | LR: 0.000043
Epoch 3/75 - Train Loss: 0.5350, MCC: 0.0000 | Val Loss: 0.5104, MCC: 0.0000 | LR: 0.000043
Epoch 4/75 - Train Loss: 0.5372, MCC: 0.0000 | Val Loss: 0.5099, MCC: 0.0000 | LR: 0.000043
Epoch 5/75 - Train Loss: 0.5368, MCC: 0.0000 | Val Loss: 0.5109, MCC: 0.0000 | LR: 0.000043
Epoch 6/75 - Train Loss: 0.5322, MCC: 0.0000 | Val Loss: 0.5103, MCC: 0.0000 | LR: 0.000021
Epoch 7/75 - Train Loss: 0.5313, MCC: 0.0000 | Val Loss: 0.5086, MCC: 0.0000 | LR: 0.000021
Epoch 8/75 - Train Loss: 0.5335, MCC: 0.0000 | Val Loss: 0.5077, MCC: 0.0000 | LR: 0.000021
Epoch 9/75 - Train Loss: 0.5300, MCC: 0.0000 | Val Los

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Fold 2 - Val MCC: 0.0000

FOLD 3/5
Train set: 13632 interactions, 142 strains
Validation set: 3360 interactions, 35 strains
Precomputing max pooling for 177 strains...
Precomputing max pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6647, MCC: -0.0085 | Val Loss: 0.5979, MCC: 0.0000 | LR: 0.000043
Epoch 2/75 - Train Loss: 0.5982, MCC: 0.0000 | Val Loss: 0.5141, MCC: 0.0000 | LR: 0.000043
Epoch 3/75 - Train Loss: 0.5632, MCC: 0.0000 | Val Loss: 0.4684, MCC: 0.0000 | LR: 0.000043
Epoch 4/75 - Train Loss: 0.5543, MCC: 0.0000 | Val Loss: 0.4592, MCC: 0.0000 | LR: 0.000043
Epoch 5/75 - Train Loss: 0.5517, MCC: 0.0000 | Val Loss: 0.4613, MCC: 0.0000 | LR: 0.000043
Epoch 6/75 - Train Loss: 0.5497, MCC: 0.0000 | Val Loss: 0.4616, MCC: 0.0000 | LR: 0.000021
Epoch 7/75 - Train Loss: 0.5500, MCC: 0.0000 | Val Loss: 0.4599, MCC: 0.0000 | LR: 0.000021
Epoch 8/75 - Train Loss: 0.5469, MCC: 0.0000 | Val Loss: 0.4594, MCC: 0.0000 | LR: 0.000021
Epoch 9/75 - Train Loss: 0.5494, MCC: 0.0000 | Val Lo

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Fold 3 - Val MCC: 0.0000

FOLD 4/5
Train set: 13632 interactions, 142 strains
Validation set: 3360 interactions, 35 strains
Precomputing max pooling for 177 strains...
Precomputing max pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6577, MCC: 0.0048 | Val Loss: 0.6229, MCC: 0.0000 | LR: 0.000043
Epoch 2/75 - Train Loss: 0.5781, MCC: 0.0000 | Val Loss: 0.5772, MCC: 0.0000 | LR: 0.000043
Epoch 3/75 - Train Loss: 0.5330, MCC: 0.0000 | Val Loss: 0.5730, MCC: 0.0000 | LR: 0.000043
Epoch 4/75 - Train Loss: 0.5240, MCC: 0.0000 | Val Loss: 0.5779, MCC: 0.0000 | LR: 0.000043
Epoch 5/75 - Train Loss: 0.5251, MCC: 0.0000 | Val Loss: 0.5719, MCC: 0.0000 | LR: 0.000043
Epoch 6/75 - Train Loss: 0.5218, MCC: 0.0000 | Val Loss: 0.5700, MCC: 0.0000 | LR: 0.000021
Epoch 7/75 - Train Loss: 0.5194, MCC: 0.0000 | Val Loss: 0.5695, MCC: 0.0000 | LR: 0.000021
Epoch 8/75 - Train Loss: 0.5182, MCC: 0.0000 | Val Loss: 0.5699, MCC: 0.0000 | LR: 0.000021
Epoch 9/75 - Train Loss: 0.5196, MCC: 0.0000 | Val Los

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Fold 4 - Val MCC: 0.0000

FOLD 5/5
Train set: 13632 interactions, 142 strains
Validation set: 3360 interactions, 35 strains
Precomputing max pooling for 177 strains...
Precomputing max pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6378, MCC: -0.0019 | Val Loss: 0.5882, MCC: 0.0000 | LR: 0.000043
Epoch 2/75 - Train Loss: 0.5635, MCC: 0.0000 | Val Loss: 0.5388, MCC: 0.0000 | LR: 0.000043
Epoch 3/75 - Train Loss: 0.5336, MCC: 0.0000 | Val Loss: 0.5317, MCC: 0.0000 | LR: 0.000043
Epoch 4/75 - Train Loss: 0.5321, MCC: 0.0000 | Val Loss: 0.5312, MCC: 0.0000 | LR: 0.000043
Epoch 5/75 - Train Loss: 0.5288, MCC: 0.0000 | Val Loss: 0.5296, MCC: 0.0000 | LR: 0.000043
Epoch 6/75 - Train Loss: 0.5297, MCC: 0.0000 | Val Loss: 0.5291, MCC: 0.0000 | LR: 0.000021
Epoch 7/75 - Train Loss: 0.5256, MCC: 0.0000 | Val Loss: 0.5285, MCC: 0.0000 | LR: 0.000021
Epoch 8/75 - Train Loss: 0.5295, MCC: 0.0000 | Val Loss: 0.5278, MCC: 0.0000 | LR: 0.000021
Epoch 9/75 - Train Loss: 0.5278, MCC: 0.0000 | Val Lo

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2025-04-22 10:16:01,001] Trial 57 finished with value: 0.0 and parameters: {'pooling_type': 'max', 'classifier_hidden_layers': 2, 'classifier_hidden_dim': 256, 'dropout': 0.19947552371438734, 'ln': False, 'activation_function': 'relu', 'learning_rate': 4.2723368869705015e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.007223595498138471, 'scheduler_type': 'reduce_on_plateau', 'warmup_ratio': 0.09163472930693835}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.0000

CROSS-VALIDATION RESULTS
Median MCC: 0.0000
Mean MCC: 0.0000 ± 0.0000
Fold 1 MCC: 0.0000
Fold 2 MCC: 0.0000
Fold 3 MCC: 0.0000
Fold 4 MCC: 0.0000
Fold 5 MCC: 0.0000


TRIAL 58: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.251
  - ln: True
  - activation_function: gelu
Training Parameters:
  - learning_rate: 0.000038
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0478
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.081


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5724, MCC: -0.0072 | Val Loss: 0.5219, MCC: 0.0000 | LR: 0.000038
Epoch 2/75 - Train Loss: 0.5385, MCC: 0.0071 | Val Loss: 0.4821, MCC: 0.0000 | LR: 0.000038
Epoch 3/75 - Train 

[I 2025-04-22 10:21:51,598] Trial 58 finished with value: 0.3012832913742416 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.2508906369889522, 'ln': True, 'activation_function': 'gelu', 'learning_rate': 3.80526337045674e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.04782533019460863, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.08142536715559312}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.2401

CROSS-VALIDATION RESULTS
Median MCC: 0.3013
Mean MCC: 0.2968 ± 0.0387
Fold 1 MCC: 0.3013
Fold 2 MCC: 0.3400
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.2671
Fold 5 MCC: 0.2401


TRIAL 59: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.157
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000020
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0966
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.164


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5766, MCC: -0.0112 | Val Loss: 0.5161, MCC: 0.0000 | LR: 0.000020
Epoch 2/75 - Train Loss: 0.5411, MCC: -0.0082 | Val Loss: 0.4870, MCC: 0.0000 | LR: 0.000020
Epoch 3/75 - Train Loss:

[I 2025-04-22 10:27:31,959] Trial 59 finished with value: 0.32758298206345926 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.15686128253148454, 'ln': True, 'activation_function': 'relu', 'learning_rate': 1.9976532547969732e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.09658767472014318, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.16389060144412074}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.3276

CROSS-VALIDATION RESULTS
Median MCC: 0.3276
Mean MCC: 0.3168 ± 0.0175
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3332
Fold 3 MCC: 0.3049
Fold 4 MCC: 0.2879
Fold 5 MCC: 0.3276


TRIAL 60: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 1
  - classifier_hidden_dim: 512
  - dropout: 0.154
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000019
  - batch_size: 256
  - use_phage_weights: False
  - weight_decay: 0.0057
  - scheduler_type: one_cycle
  - warmup_ratio: 0.150


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6122, MCC: -0.0245 | Val Loss: 0.5771, MCC: 0.0000 | LR: 0.000001
Epoch 2/75 - Train Loss: 0.5690, MCC: -0.0067 | Val Loss: 0.5300, MCC: 0.0000 | LR: 0.000002
Epoch 3/75 - Train Loss: 0.5414,

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Fold 3 - Val MCC: 0.0798

FOLD 4/5
Train set: 13632 interactions, 142 strains
Validation set: 3360 interactions, 35 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6370, MCC: 0.0275 | Val Loss: 0.6392, MCC: -0.0005 | LR: 0.000001
Epoch 2/75 - Train Loss: 0.5780, MCC: 0.0040 | Val Loss: 0.5982, MCC: 0.0000 | LR: 0.000002
Epoch 3/75 - Train Loss: 0.5328, MCC: -0.0054 | Val Loss: 0.5945, MCC: 0.0000 | LR: 0.000004
Epoch 4/75 - Train Loss: 0.5181, MCC: 0.0000 | Val Loss: 0.5861, MCC: 0.0000 | LR: 0.000006
Epoch 5/75 - Train Loss: 0.5098, MCC: 0.0000 | Val Loss: 0.5801, MCC: 0.0000 | LR: 0.000008
Epoch 6/75 - Train Loss: 0.5003, MCC: 0.0000 | Val Loss: 0.5750, MCC: 0.0000 | LR: 0.000011
Epoch 7/75 - Train Loss: 0.4927, MCC: 0.0083 | Val Loss: 0.5743, MCC: 0.0000 | LR: 0.000014
Epoch 8/75 - Train Loss: 0.4876, MCC: 0.0119 | Val Loss: 0.5588, MCC: 0.0000 | LR: 0.000016
Epoch 9/75 - Train Loss: 0.4830, MCC: 0.0310 | Val

[I 2025-04-22 10:31:38,846] Trial 60 finished with value: 0.2982532303209996 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 1, 'classifier_hidden_dim': 512, 'dropout': 0.15390494470516727, 'ln': True, 'activation_function': 'relu', 'learning_rate': 1.9316311954459578e-05, 'batch_size': 256, 'use_phage_weights': False, 'weight_decay': 0.005711323225097488, 'scheduler_type': 'one_cycle', 'warmup_ratio': 0.14968858320792244}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.3107

CROSS-VALIDATION RESULTS
Median MCC: 0.2983
Mean MCC: 0.2389 ± 0.0889
Fold 1 MCC: 0.3034
Fold 2 MCC: 0.2983
Fold 3 MCC: 0.0798
Fold 4 MCC: 0.2023
Fold 5 MCC: 0.3107


TRIAL 61: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.135
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000026
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0643
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.161


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5714, MCC: 0.0002 | Val Loss: 0.4985, MCC: 0.0000 | LR: 0.000026
Epoch 2/75 - Train Loss: 0.5347, MCC: 0.0077 | Val Loss: 0.4769, MCC: 0.0000 | LR: 0.000026
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 10:37:53,612] Trial 61 finished with value: 0.32784481446388 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.13490746467540202, 'ln': True, 'activation_function': 'relu', 'learning_rate': 2.6148630310549e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.06431052563934861, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.16097662406326443}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.3262

CROSS-VALIDATION RESULTS
Median MCC: 0.3278
Mean MCC: 0.3229 ± 0.0173
Fold 1 MCC: 0.3278
Fold 2 MCC: 0.3345
Fold 3 MCC: 0.3367
Fold 4 MCC: 0.2892
Fold 5 MCC: 0.3262


TRIAL 62: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.117
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000030
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0973
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.161


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5591, MCC: 0.0152 | Val Loss: 0.4942, MCC: 0.0000 | LR: 0.000030
Epoch 2/75 - Train Loss: 0.5347, MCC: 0.0000 | Val Loss: 0.4820, MCC: 0.0000 | LR: 0.000030
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 10:42:33,128] Trial 62 finished with value: 0.31441287057441913 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.11684240461357337, 'ln': True, 'activation_function': 'relu', 'learning_rate': 2.966600511620501e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.09728590801928308, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.16132922917264303}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.3017

CROSS-VALIDATION RESULTS
Median MCC: 0.3144
Mean MCC: 0.3126 ± 0.0246
Fold 1 MCC: 0.3144
Fold 2 MCC: 0.3450
Fold 3 MCC: 0.3290
Fold 4 MCC: 0.2728
Fold 5 MCC: 0.3017


TRIAL 63: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.134
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000015
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0862
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.169


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6132, MCC: -0.0051 | Val Loss: 0.5067, MCC: 0.0000 | LR: 0.000015
Epoch 2/75 - Train Loss: 0.5502, MCC: 0.0000 | Val Loss: 0.4924, MCC: 0.0000 | LR: 0.000015
Epoch 3/75 - Train Loss: 

[I 2025-04-22 10:47:16,564] Trial 63 finished with value: 0.30749590792606846 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.1340096571988762, 'ln': True, 'activation_function': 'relu', 'learning_rate': 1.4635143801154943e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.08617481938850123, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.16916196290545024}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.3075

CROSS-VALIDATION RESULTS
Median MCC: 0.3075
Mean MCC: 0.2921 ± 0.0332
Fold 1 MCC: 0.3290
Fold 2 MCC: 0.2497
Fold 3 MCC: 0.3193
Fold 4 MCC: 0.2552
Fold 5 MCC: 0.3075


TRIAL 64: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.144
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000026
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0519
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.181


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5583, MCC: -0.0105 | Val Loss: 0.4985, MCC: 0.0000 | LR: 0.000026
Epoch 2/75 - Train Loss: 0.5358, MCC: 0.0000 | Val Loss: 0.4776, MCC: 0.0000 | LR: 0.000026
Epoch 3/75 - Train Loss: 

[I 2025-04-22 10:52:53,122] Trial 64 finished with value: 0.3301495479532531 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.14405918637658785, 'ln': True, 'activation_function': 'relu', 'learning_rate': 2.6082605776089435e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.05193801604836125, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.1811057996508849}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.3186

CROSS-VALIDATION RESULTS
Median MCC: 0.3301
Mean MCC: 0.3197 ± 0.0236
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3394
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.2746
Fold 5 MCC: 0.3186


TRIAL 65: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.168
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000026
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0522
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.168


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5919, MCC: -0.0004 | Val Loss: 0.5144, MCC: 0.0000 | LR: 0.000026
Epoch 2/75 - Train Loss: 0.5449, MCC: 0.0000 | Val Loss: 0.4896, MCC: 0.0000 | LR: 0.000026
Epoch 3/75 - Train Loss: 

[I 2025-04-22 10:58:46,724] Trial 65 finished with value: 0.3267004201721205 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.16849916413209842, 'ln': True, 'activation_function': 'relu', 'learning_rate': 2.6097926320766025e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.052169013453267646, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.16808065373725375}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.3240

CROSS-VALIDATION RESULTS
Median MCC: 0.3267
Mean MCC: 0.3200 ± 0.0171
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3327
Fold 3 MCC: 0.3267
Fold 4 MCC: 0.2862
Fold 5 MCC: 0.3240


TRIAL 66: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.150
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000020
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0415
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.190


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5695, MCC: 0.0141 | Val Loss: 0.5100, MCC: 0.0000 | LR: 0.000020
Epoch 2/75 - Train Loss: 0.5419, MCC: -0.0047 | Val Loss: 0.4892, MCC: 0.0000 | LR: 0.000020
Epoch 3/75 - Train Loss: 

[I 2025-04-22 11:04:12,329] Trial 66 finished with value: 0.2923168136419878 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.14974125396068427, 'ln': True, 'activation_function': 'relu', 'learning_rate': 1.9678434981426702e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.04151831753470973, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.19008849082473123}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.3222

CROSS-VALIDATION RESULTS
Median MCC: 0.2923
Mean MCC: 0.3022 ± 0.0260
Fold 1 MCC: 0.2923
Fold 2 MCC: 0.2923
Fold 3 MCC: 0.3395
Fold 4 MCC: 0.2647
Fold 5 MCC: 0.3222


TRIAL 67: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.122
  - ln: True
  - activation_function: silu
Training Parameters:
  - learning_rate: 0.000022
  - batch_size: 128
  - use_phage_weights: False
  - weight_decay: 0.0621
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.181


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5292, MCC: -0.0012 | Val Loss: 0.4845, MCC: 0.0000 | LR: 0.000022
Epoch 2/75 - Train Loss: 0.5038, MCC: 0.0717 | Val Loss: 0.4608, MCC: 0.0000 | LR: 0.000022
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 11:08:14,205] Trial 67 finished with value: 0.321001397496274 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.12172067211253973, 'ln': True, 'activation_function': 'silu', 'learning_rate': 2.15878886238614e-05, 'batch_size': 128, 'use_phage_weights': False, 'weight_decay': 0.06206882575190141, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.1805496786510853}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.3239

CROSS-VALIDATION RESULTS
Median MCC: 0.3210
Mean MCC: 0.3141 ± 0.0207
Fold 1 MCC: 0.3210
Fold 2 MCC: 0.3327
Fold 3 MCC: 0.3189
Fold 4 MCC: 0.2738
Fold 5 MCC: 0.3239


TRIAL 68: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: max
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.140
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000018
  - batch_size: 1024
  - use_phage_weights: True
  - weight_decay: 0.0782
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.163


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing max pooling for 177 strains...
Precomputing max pooling for 97 phages...
Epoch 1/75 - Train Loss: 1.0129, MCC: -0.0059 | Val Loss: 0.9310, MCC: -0.0141 | LR: 0.000018
Epoch 2/75 - Train Loss: 0.9898, MCC: 0.0475 | Val Loss: 0.9251, MCC: -0.0141 | LR: 0.000018
Epoch 3/75 - Train Loss: 0.

[I 2025-04-22 11:10:19,016] Trial 68 finished with value: 0.10066689825997757 and parameters: {'pooling_type': 'max', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.14044573005789826, 'ln': True, 'activation_function': 'relu', 'learning_rate': 1.7652618641491977e-05, 'batch_size': 1024, 'use_phage_weights': True, 'weight_decay': 0.07822674078850994, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.1625786844640676}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.1007

CROSS-VALIDATION RESULTS
Median MCC: 0.1007
Mean MCC: 0.1066 ± 0.0383
Fold 1 MCC: 0.0412
Fold 2 MCC: 0.1004
Fold 3 MCC: 0.1469
Fold 4 MCC: 0.1436
Fold 5 MCC: 0.1007


TRIAL 69: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.156
  - ln: False
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000016
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0513
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.172


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6736, MCC: -0.0056 | Val Loss: 0.6455, MCC: 0.0000 | LR: 0.000016
Epoch 2/75 - Train Loss: 0.6303, MCC: 0.0000 | Val Loss: 0.5939, MCC: 0.0000 | LR: 0.000016
Epoch 3/75 - Train Loss: 

[I 2025-04-22 11:21:49,317] Trial 69 finished with value: 0.09885922294501852 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.1560413751816941, 'ln': False, 'activation_function': 'relu', 'learning_rate': 1.5632329414235793e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.05126979798409692, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.17175479757723605}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.0838

CROSS-VALIDATION RESULTS
Median MCC: 0.0989
Mean MCC: 0.1143 ± 0.0308
Fold 1 MCC: 0.1197
Fold 2 MCC: 0.0977
Fold 3 MCC: 0.1714
Fold 4 MCC: 0.0989
Fold 5 MCC: 0.0838


TRIAL 70: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 768
  - dropout: 0.097
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000035
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0980
  - scheduler_type: reduce_on_plateau
  - warmup_ratio: 0.183


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5547, MCC: 0.0186 | Val Loss: 0.4799, MCC: 0.0000 | LR: 0.000035
Epoch 2/75 - Train Loss: 0.5212, MCC: -0.0067 | Val Loss: 0.4591, MCC: 0.0000 | LR: 0.000035
Epoch 3/75 - Train Loss:

[I 2025-04-22 11:24:18,035] Trial 70 finished with value: 0.3040249940228125 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 768, 'dropout': 0.09732510459528862, 'ln': True, 'activation_function': 'relu', 'learning_rate': 3.5053012231988044e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.09803687509940316, 'scheduler_type': 'reduce_on_plateau', 'warmup_ratio': 0.18294703534750334}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.3099

CROSS-VALIDATION RESULTS
Median MCC: 0.3040
Mean MCC: 0.2921 ± 0.0309
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.2415
Fold 3 MCC: 0.3040
Fold 4 MCC: 0.2750
Fold 5 MCC: 0.3099


TRIAL 71: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.127
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000028
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0394
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.135


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5782, MCC: 0.0023 | Val Loss: 0.4979, MCC: 0.0000 | LR: 0.000028
Epoch 2/75 - Train Loss: 0.5320, MCC: 0.0000 | Val Loss: 0.4778, MCC: 0.0000 | LR: 0.000028
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 11:29:21,976] Trial 71 finished with value: 0.32699384564743905 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.1268685580936218, 'ln': True, 'activation_function': 'relu', 'learning_rate': 2.788774164282311e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.03940677179688147, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.13512326091851762}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.3270

CROSS-VALIDATION RESULTS
Median MCC: 0.3270
Mean MCC: 0.3112 ± 0.0303
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3103
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.2530
Fold 5 MCC: 0.3270


TRIAL 72: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.161
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000031
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0348
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.148


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5764, MCC: 0.0127 | Val Loss: 0.4988, MCC: 0.0000 | LR: 0.000031
Epoch 2/75 - Train Loss: 0.5330, MCC: -0.0067 | Val Loss: 0.4858, MCC: 0.0000 | LR: 0.000031
Epoch 3/75 - Train Loss: 

[I 2025-04-22 11:34:22,405] Trial 72 finished with value: 0.3291583663011339 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.16102586692346404, 'ln': True, 'activation_function': 'relu', 'learning_rate': 3.08666365043036e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.03482224378200563, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.1478367909286824}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.3292

CROSS-VALIDATION RESULTS
Median MCC: 0.3292
Mean MCC: 0.3163 ± 0.0191
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.2986
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.2883
Fold 5 MCC: 0.3292


TRIAL 73: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.185
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000032
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0255
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.148


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5818, MCC: -0.0095 | Val Loss: 0.5052, MCC: 0.0000 | LR: 0.000032
Epoch 2/75 - Train Loss: 0.5388, MCC: -0.0067 | Val Loss: 0.4835, MCC: 0.0000 | LR: 0.000032
Epoch 3/75 - Train Loss:

[I 2025-04-22 11:39:39,446] Trial 73 finished with value: 0.3189372392952408 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.1847408983538438, 'ln': True, 'activation_function': 'relu', 'learning_rate': 3.234245885194066e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.02552651117036058, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.14812761837294364}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.3275

CROSS-VALIDATION RESULTS
Median MCC: 0.3189
Mean MCC: 0.3098 ± 0.0246
Fold 1 MCC: 0.3189
Fold 2 MCC: 0.2906
Fold 3 MCC: 0.3395
Fold 4 MCC: 0.2726
Fold 5 MCC: 0.3275


TRIAL 74: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.163
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000025
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0203
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.163


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5447, MCC: 0.0221 | Val Loss: 0.4907, MCC: 0.0000 | LR: 0.000025
Epoch 2/75 - Train Loss: 0.5383, MCC: 0.0077 | Val Loss: 0.4785, MCC: 0.0000 | LR: 0.000025
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 11:43:42,832] Trial 74 finished with value: 0.30798746004194694 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.16291229113794978, 'ln': True, 'activation_function': 'relu', 'learning_rate': 2.510962035371887e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.020286779136077997, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.16312524145387633}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.3129

CROSS-VALIDATION RESULTS
Median MCC: 0.3080
Mean MCC: 0.3076 ± 0.0125
Fold 1 MCC: 0.3080
Fold 2 MCC: 0.2936
Fold 3 MCC: 0.3279
Fold 4 MCC: 0.2957
Fold 5 MCC: 0.3129


TRIAL 75: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.140
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000039
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0315
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.186


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5649, MCC: 0.0232 | Val Loss: 0.4919, MCC: 0.0000 | LR: 0.000039
Epoch 2/75 - Train Loss: 0.5313, MCC: 0.0000 | Val Loss: 0.4756, MCC: 0.0000 | LR: 0.000039
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 11:48:24,543] Trial 75 finished with value: 0.3301495479532531 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.14047478635241417, 'ln': True, 'activation_function': 'relu', 'learning_rate': 3.9457649825509786e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.03149500280478163, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18619312714635303}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.3052

CROSS-VALIDATION RESULTS
Median MCC: 0.3301
Mean MCC: 0.3116 ± 0.0305
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3327
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.2545
Fold 5 MCC: 0.3052


TRIAL 76: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.108
  - ln: True
  - activation_function: gelu
Training Parameters:
  - learning_rate: 0.000050
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0639
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.187


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5404, MCC: 0.0027 | Val Loss: 0.4684, MCC: 0.0000 | LR: 0.000050
Epoch 2/75 - Train Loss: 0.5158, MCC: 0.0193 | Val Loss: 0.4525, MCC: 0.0000 | LR: 0.000050
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 11:52:38,551] Trial 76 finished with value: 0.321098511923026 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.10795285660483495, 'ln': True, 'activation_function': 'gelu', 'learning_rate': 4.968654296578803e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.06390785718591077, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18744290414217674}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.3269

CROSS-VALIDATION RESULTS
Median MCC: 0.3211
Mean MCC: 0.3188 ± 0.0116
Fold 1 MCC: 0.3211
Fold 2 MCC: 0.3339
Fold 3 MCC: 0.3107
Fold 4 MCC: 0.3014
Fold 5 MCC: 0.3269


TRIAL 77: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 256
  - dropout: 0.142
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000040
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0307
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.182


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5634, MCC: -0.0158 | Val Loss: 0.5039, MCC: 0.0000 | LR: 0.000040
Epoch 2/75 - Train Loss: 0.5424, MCC: 0.0077 | Val Loss: 0.4946, MCC: 0.0000 | LR: 0.000040
Epoch 3/75 - Train 

[I 2025-04-22 11:58:05,972] Trial 77 finished with value: 0.3282359883505621 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 256, 'dropout': 0.14243861713124784, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.0340290537422554e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.030726516108485803, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18171259246928467}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.3282

CROSS-VALIDATION RESULTS
Median MCC: 0.3282
Mean MCC: 0.3194 ± 0.0204
Fold 1 MCC: 0.3067
Fold 2 MCC: 0.3400
Fold 3 MCC: 0.3361
Fold 4 MCC: 0.2857
Fold 5 MCC: 0.3282


TRIAL 78: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 256
  - dropout: 0.141
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000059
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0314
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.198


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5812, MCC: 0.0047 | Val Loss: 0.5024, MCC: 0.0000 | LR: 0.000059
Epoch 2/75 - Train Loss: 0.5436, MCC: 0.0000 | Val Loss: 0.4871, MCC: 0.0000 | LR: 0.000059
Epoch 3/75 - Train L

[I 2025-04-22 12:01:57,530] Trial 78 finished with value: 0.31140091216967924 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 256, 'dropout': 0.141222259694636, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.862952362289927e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.03139706775590797, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.19800008335972416}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.3114

CROSS-VALIDATION RESULTS
Median MCC: 0.3114
Mean MCC: 0.3057 ± 0.0188
Fold 1 MCC: 0.2967
Fold 2 MCC: 0.3276
Fold 3 MCC: 0.3186
Fold 4 MCC: 0.2739
Fold 5 MCC: 0.3114


TRIAL 79: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 256
  - dropout: 0.143
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000040
  - batch_size: 256
  - use_phage_weights: True
  - weight_decay: 0.0329
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.184


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.9875, MCC: 0.0560 | Val Loss: 0.9264, MCC: 0.0143 | LR: 0.000040
Epoch 2/75 - Train Loss: 0.9838, MCC: 0.0813 | Val Loss: 0.9298, MCC: 0.0179 | LR: 0.000040
Epoch 3/75 - Train Los

[I 2025-04-22 12:05:16,617] Trial 79 finished with value: 0.24433417704635016 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 256, 'dropout': 0.14269126430559945, 'ln': True, 'activation_function': 'relu', 'learning_rate': 3.961177197728018e-05, 'batch_size': 256, 'use_phage_weights': True, 'weight_decay': 0.0329260657941287, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18434755345507076}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.1293

CROSS-VALIDATION RESULTS
Median MCC: 0.2443
Mean MCC: 0.2031 ± 0.1034
Fold 1 MCC: 0.0395
Fold 2 MCC: 0.2880
Fold 3 MCC: 0.3142
Fold 4 MCC: 0.2443
Fold 5 MCC: 0.1293


TRIAL 80: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 256
  - dropout: 0.132
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000046
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0131
  - scheduler_type: one_cycle
  - warmup_ratio: 0.196


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6179, MCC: -0.0102 | Val Loss: 0.5749, MCC: 0.0000 | LR: 0.000002
Epoch 2/75 - Train Loss: 0.5981, MCC: 0.0088 | Val Loss: 0.5474, MCC: 0.0000 | LR: 0.000004
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 12:10:14,221] Trial 80 finished with value: 0.3275675187649259 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 256, 'dropout': 0.1320439194760229, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.572440444882197e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.01308860136821071, 'scheduler_type': 'one_cycle', 'warmup_ratio': 0.19562321484176098}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.3116

CROSS-VALIDATION RESULTS
Median MCC: 0.3276
Mean MCC: 0.3151 ± 0.0237
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3276
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.2704
Fold 5 MCC: 0.3116


TRIAL 81: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 256
  - dropout: 0.177
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000043
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0433
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.173


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5667, MCC: -0.0170 | Val Loss: 0.5039, MCC: 0.0000 | LR: 0.000043
Epoch 2/75 - Train Loss: 0.5412, MCC: 0.0000 | Val Loss: 0.4918, MCC: 0.0000 | LR: 0.000043
Epoch 3/75 - Train 

[I 2025-04-22 12:14:53,909] Trial 81 finished with value: 0.3249823121896812 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 256, 'dropout': 0.17711187682194202, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.303735455210939e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.04327579024419568, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.17262050795933534}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.3250

CROSS-VALIDATION RESULTS
Median MCC: 0.3250
Mean MCC: 0.3172 ± 0.0207
Fold 1 MCC: 0.3325
Fold 2 MCC: 0.3146
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.2783
Fold 5 MCC: 0.3250


TRIAL 82: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.162
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000048
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0082
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.180


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5642, MCC: 0.0158 | Val Loss: 0.4900, MCC: 0.0000 | LR: 0.000048
Epoch 2/75 - Train Loss: 0.5299, MCC: 0.0077 | Val Loss: 0.4745, MCC: 0.0000 | LR: 0.000048
Epoch 3/75 - Train L

[I 2025-04-22 12:19:16,359] Trial 82 finished with value: 0.32927282695339666 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.16153162001941299, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.847968254352484e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.008208387583499252, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.17997288331341862}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.3293

CROSS-VALIDATION RESULTS
Median MCC: 0.3293
Mean MCC: 0.3187 ± 0.0239
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3226
Fold 3 MCC: 0.3394
Fold 4 MCC: 0.2722
Fold 5 MCC: 0.3293


TRIAL 83: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 256
  - dropout: 0.172
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000038
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0081
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.179


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5619, MCC: 0.0201 | Val Loss: 0.4993, MCC: 0.0000 | LR: 0.000037
Epoch 2/75 - Train Loss: 0.5392, MCC: 0.0000 | Val Loss: 0.4872, MCC: 0.0000 | LR: 0.000037
Epoch 3/75 - Train L

[I 2025-04-22 12:23:55,052] Trial 83 finished with value: 0.31848220846429115 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 256, 'dropout': 0.17155839116282504, 'ln': True, 'activation_function': 'relu', 'learning_rate': 3.750276528451529e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.008113896669368787, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.1790222302490865}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.2996

CROSS-VALIDATION RESULTS
Median MCC: 0.3185
Mean MCC: 0.3120 ± 0.0172
Fold 1 MCC: 0.3185
Fold 2 MCC: 0.3188
Fold 3 MCC: 0.3365
Fold 4 MCC: 0.2867
Fold 5 MCC: 0.2996


TRIAL 84: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.147
  - ln: True
  - activation_function: silu
Training Parameters:
  - learning_rate: 0.000058
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0063
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.193


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5622, MCC: -0.0088 | Val Loss: 0.4824, MCC: 0.0000 | LR: 0.000058
Epoch 2/75 - Train Loss: 0.5265, MCC: 0.0108 | Val Loss: 0.4688, MCC: 0.0000 | LR: 0.000058
Epoch 3/75 - Train 

[I 2025-04-22 12:27:46,026] Trial 84 finished with value: 0.3106211148990342 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.14653352064652478, 'ln': True, 'activation_function': 'silu', 'learning_rate': 5.795005103919866e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.0063026120782682825, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.1929601253445852}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.3277

CROSS-VALIDATION RESULTS
Median MCC: 0.3106
Mean MCC: 0.3031 ± 0.0323
Fold 1 MCC: 0.3106
Fold 2 MCC: 0.2975
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.2442
Fold 5 MCC: 0.3277


TRIAL 85: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.162
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000061
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0096
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.184


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5575, MCC: -0.0061 | Val Loss: 0.4903, MCC: 0.0000 | LR: 0.000061
Epoch 2/75 - Train Loss: 0.5321, MCC: -0.0047 | Val Loss: 0.4769, MCC: 0.0000 | LR: 0.000061
Epoch 3/75 - Train

[I 2025-04-22 12:33:03,120] Trial 85 finished with value: 0.3301495479532531 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.16248634029831616, 'ln': True, 'activation_function': 'relu', 'learning_rate': 6.149452223054504e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.009585854617794116, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18386561207622276}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.3286

CROSS-VALIDATION RESULTS
Median MCC: 0.3301
Mean MCC: 0.3268 ± 0.0162
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3400
Fold 3 MCC: 0.3396
Fold 4 MCC: 0.2957
Fold 5 MCC: 0.3286


TRIAL 86: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.164
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000089
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0213
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.186


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5460, MCC: -0.0142 | Val Loss: 0.4797, MCC: 0.0000 | LR: 0.000089
Epoch 2/75 - Train Loss: 0.5091, MCC: 0.0619 | Val Loss: 0.4670, MCC: 0.0595 | LR: 0.000089
Epoch 3/75 - Train L

[I 2025-04-22 12:35:48,760] Trial 86 finished with value: 0.32067960918359106 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.16405624445640504, 'ln': True, 'activation_function': 'relu', 'learning_rate': 8.910273464296188e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.021331031453557962, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18554556253717866}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.2896

CROSS-VALIDATION RESULTS
Median MCC: 0.3207
Mean MCC: 0.3072 ± 0.0226
Fold 1 MCC: 0.3210
Fold 2 MCC: 0.3327
Fold 3 MCC: 0.3207
Fold 4 MCC: 0.2722
Fold 5 MCC: 0.2896


TRIAL 87: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 1
  - classifier_hidden_dim: 512
  - dropout: 0.138
  - ln: False
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000064
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0102
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.190


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6177, MCC: 0.0086 | Val Loss: 0.4973, MCC: 0.0000 | LR: 0.000064
Epoch 2/75 - Train Loss: 0.5426, MCC: 0.0000 | Val Loss: 0.4979, MCC: 0.0000 | LR: 0.000064
Epoch 3/75 - Train 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Fold 4 - Val MCC: 0.0989

FOLD 5/5
Train set: 13632 interactions, 142 strains
Validation set: 3360 interactions, 35 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5923, MCC: -0.0102 | Val Loss: 0.5325, MCC: 0.0000 | LR: 0.000064
Epoch 2/75 - Train Loss: 0.5308, MCC: 0.0000 | Val Loss: 0.5306, MCC: 0.0000 | LR: 0.000064
Epoch 3/75 - Train Loss: 0.5288, MCC: 0.0000 | Val Loss: 0.5273, MCC: 0.0000 | LR: 0.000064
Epoch 4/75 - Train Loss: 0.5230, MCC: 0.0000 | Val Loss: 0.5219, MCC: 0.0000 | LR: 0.000064
Epoch 5/75 - Train Loss: 0.5173, MCC: 0.0000 | Val Loss: 0.5195, MCC: 0.0000 | LR: 0.000063
Epoch 6/75 - Train Loss: 0.5144, MCC: 0.0000 | Val Loss: 0.5176, MCC: 0.0000 | LR: 0.000063
Epoch 7/75 - Train Loss: 0.5142, MCC: 0.0000 | Val Loss: 0.5144, MCC: 0.0000 | LR: 0.000063
Epoch 8/75 - Train Loss: 0.5079, MCC: 0.0000 | Val Loss: 0.5123, MCC: 0.0000 | LR: 0.000062
Epoch 9/75 - Train Loss: 0.5047, MCC: 0.0000 | 

[I 2025-04-22 12:43:05,018] Trial 87 finished with value: 0.07874890777196418 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 1, 'classifier_hidden_dim': 512, 'dropout': 0.13794678193739532, 'ln': False, 'activation_function': 'relu', 'learning_rate': 6.407452503930444e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.010232706694494517, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18951288373507025}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.0651

CROSS-VALIDATION RESULTS
Median MCC: 0.0787
Mean MCC: 0.0793 ± 0.0160
Fold 1 MCC: 0.0954
Fold 2 MCC: 0.0584
Fold 3 MCC: 0.0787
Fold 4 MCC: 0.0989
Fold 5 MCC: 0.0651


TRIAL 88: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 768
  - dropout: 0.189
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000073
  - batch_size: 128
  - use_phage_weights: False
  - weight_decay: 0.0121
  - scheduler_type: reduce_on_plateau
  - warmup_ratio: 0.181


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5272, MCC: 0.0789 | Val Loss: 0.4973, MCC: 0.1883 | LR: 0.000073
Epoch 2/75 - Train Loss: 0.5018, MCC: 0.1080 | Val Loss: 0.4624, MCC: 0.3057 | LR: 0.000073
Epoch 3/75 - Train L

[I 2025-04-22 12:46:03,823] Trial 88 finished with value: 0.30370742532072176 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 768, 'dropout': 0.18885035985858062, 'ln': True, 'activation_function': 'relu', 'learning_rate': 7.261078996016207e-05, 'batch_size': 128, 'use_phage_weights': False, 'weight_decay': 0.012094504535560968, 'scheduler_type': 'reduce_on_plateau', 'warmup_ratio': 0.18127540691745012}. Best is trial 24 with value: 0.3301495479532531.


Fold 5 - Val MCC: 0.3033

CROSS-VALIDATION RESULTS
Median MCC: 0.3037
Mean MCC: 0.3080 ± 0.0146
Fold 1 MCC: 0.3254
Fold 2 MCC: 0.3223
Fold 3 MCC: 0.3037
Fold 4 MCC: 0.2854
Fold 5 MCC: 0.3033


TRIAL 89: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.127
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000050
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0167
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.174


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5505, MCC: -0.0032 | Val Loss: 0.4830, MCC: 0.0000 | LR: 0.000050
Epoch 2/75 - Train Loss: 0.5256, MCC: 0.0000 | Val Loss: 0.4628, MCC: 0.0000 | LR: 0.000050
Epoch 3/75 - Train 

[I 2025-04-22 12:50:01,210] Trial 89 finished with value: 0.33284211902408495 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.1272141027439813, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.0165363768069756e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.01673509545164733, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.1739083459755588}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3276

CROSS-VALIDATION RESULTS
Median MCC: 0.3328
Mean MCC: 0.3214 ± 0.0249
Fold 1 MCC: 0.3328
Fold 2 MCC: 0.3351
Fold 3 MCC: 0.3396
Fold 4 MCC: 0.2722
Fold 5 MCC: 0.3276


TRIAL 90: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.125
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000050
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0171
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.175


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5610, MCC: 0.0043 | Val Loss: 0.4918, MCC: 0.0000 | LR: 0.000050
Epoch 2/75 - Train Loss: 0.5260, MCC: 0.0000 | Val Loss: 0.4700, MCC: 0.0000 | LR: 0.000050
Epoch 3/75 - Train L

[I 2025-04-22 12:53:52,718] Trial 90 finished with value: 0.32561872010324844 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.12454487853761659, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.029247987509508e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.01713077922330472, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.17519799883746748}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3256

CROSS-VALIDATION RESULTS
Median MCC: 0.3256
Mean MCC: 0.3137 ± 0.0248
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3195
Fold 3 MCC: 0.3287
Fold 4 MCC: 0.2647
Fold 5 MCC: 0.3256


TRIAL 91: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.150
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000054
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0339
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.171


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5639, MCC: 0.0164 | Val Loss: 0.4944, MCC: 0.0000 | LR: 0.000054
Epoch 2/75 - Train Loss: 0.5283, MCC: 0.0000 | Val Loss: 0.4760, MCC: 0.0000 | LR: 0.000054
Epoch 3/75 - Train L

[I 2025-04-22 12:57:29,278] Trial 91 finished with value: 0.31565513481491014 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.15023650596057173, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.371732676987035e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.0338771570758709, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.17057285137959702}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3052

CROSS-VALIDATION RESULTS
Median MCC: 0.3157
Mean MCC: 0.3159 ± 0.0144
Fold 1 MCC: 0.3305
Fold 2 MCC: 0.3157
Fold 3 MCC: 0.3329
Fold 4 MCC: 0.2952
Fold 5 MCC: 0.3052


TRIAL 92: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.109
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000049
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0280
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.179


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5842, MCC: 0.0017 | Val Loss: 0.4998, MCC: 0.0000 | LR: 0.000049
Epoch 2/75 - Train Loss: 0.5287, MCC: -0.0047 | Val Loss: 0.4745, MCC: 0.0000 | LR: 0.000049
Epoch 3/75 - Train 

[I 2025-04-22 13:01:10,948] Trial 92 finished with value: 0.321001397496274 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.10905657547545677, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.8947366697904054e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.0280123967304771, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.17905598499854858}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3276

CROSS-VALIDATION RESULTS
Median MCC: 0.3210
Mean MCC: 0.3160 ± 0.0173
Fold 1 MCC: 0.3210
Fold 2 MCC: 0.2964
Fold 3 MCC: 0.3394
Fold 4 MCC: 0.2957
Fold 5 MCC: 0.3276


TRIAL 93: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.167
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000060
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0248
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.154


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5601, MCC: 0.0114 | Val Loss: 0.4851, MCC: 0.0000 | LR: 0.000060
Epoch 2/75 - Train Loss: 0.5296, MCC: 0.0007 | Val Loss: 0.4651, MCC: 0.0000 | LR: 0.000060
Epoch 3/75 - Train L

[I 2025-04-22 13:05:15,226] Trial 93 finished with value: 0.3275675187649259 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.16675433401147985, 'ln': True, 'activation_function': 'relu', 'learning_rate': 6.025143502638822e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.024766824264833117, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.15421120041164904}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3289

CROSS-VALIDATION RESULTS
Median MCC: 0.3276
Mean MCC: 0.3122 ± 0.0246
Fold 1 MCC: 0.2990
Fold 2 MCC: 0.3276
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.2700
Fold 5 MCC: 0.3289


TRIAL 94: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.127
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000063
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0567
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.193


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5627, MCC: -0.0080 | Val Loss: 0.4839, MCC: 0.0000 | LR: 0.000063
Epoch 2/75 - Train Loss: 0.5274, MCC: -0.0047 | Val Loss: 0.4688, MCC: 0.0000 | LR: 0.000063
Epoch 3/75 - Train

[I 2025-04-22 13:08:23,545] Trial 94 finished with value: 0.32563779412372773 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.12666255840477852, 'ln': True, 'activation_function': 'relu', 'learning_rate': 6.333943263220375e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.05674180751653817, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.19297844774024567}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3302

CROSS-VALIDATION RESULTS
Median MCC: 0.3256
Mean MCC: 0.3141 ± 0.0179
Fold 1 MCC: 0.2891
Fold 2 MCC: 0.3297
Fold 3 MCC: 0.3256
Fold 4 MCC: 0.2957
Fold 5 MCC: 0.3302


TRIAL 95: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.160
  - ln: True
  - activation_function: gelu
Training Parameters:
  - learning_rate: 0.000045
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0477
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.187


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5484, MCC: 0.0107 | Val Loss: 0.4919, MCC: 0.0000 | LR: 0.000045
Epoch 2/75 - Train Loss: 0.5355, MCC: -0.0048 | Val Loss: 0.4753, MCC: 0.0000 | LR: 0.000045
Epoch 3/75 - Train 

[I 2025-04-22 13:13:25,471] Trial 95 finished with value: 0.3288527352672735 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.16040857999640173, 'ln': True, 'activation_function': 'gelu', 'learning_rate': 4.5382947244198715e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.04767230206743764, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.1870241416921869}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3289

CROSS-VALIDATION RESULTS
Median MCC: 0.3289
Mean MCC: 0.3128 ± 0.0341
Fold 1 MCC: 0.3308
Fold 2 MCC: 0.3327
Fold 3 MCC: 0.3271
Fold 4 MCC: 0.2448
Fold 5 MCC: 0.3289


TRIAL 96: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.158
  - ln: True
  - activation_function: gelu
Training Parameters:
  - learning_rate: 0.000045
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0471
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.173


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5653, MCC: -0.0140 | Val Loss: 0.4886, MCC: 0.0000 | LR: 0.000045
Epoch 2/75 - Train Loss: 0.5292, MCC: -0.0015 | Val Loss: 0.4729, MCC: 0.0000 | LR: 0.000045
Epoch 3/75 - Train

[I 2025-04-22 13:16:39,111] Trial 96 finished with value: 0.2969071949496105 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.15802507536354235, 'ln': True, 'activation_function': 'gelu', 'learning_rate': 4.545978622563012e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.04713993042747924, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.17337554545132494}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.2378

CROSS-VALIDATION RESULTS
Median MCC: 0.2969
Mean MCC: 0.2852 ± 0.0283
Fold 1 MCC: 0.3210
Fold 2 MCC: 0.2980
Fold 3 MCC: 0.2969
Fold 4 MCC: 0.2722
Fold 5 MCC: 0.2378


TRIAL 97: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.145
  - ln: True
  - activation_function: gelu
Training Parameters:
  - learning_rate: 0.000055
  - batch_size: 1024
  - use_phage_weights: True
  - weight_decay: 0.0381
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.186


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 1.0007, MCC: 0.0324 | Val Loss: 0.9123, MCC: -0.0141 | LR: 0.000055
Epoch 2/75 - Train Loss: 0.9918, MCC: 0.0753 | Val Loss: 0.9080, MCC: 0.0305 | LR: 0.000055
Epoch 3/75 - Train L

[I 2025-04-22 13:19:55,788] Trial 97 finished with value: 0.20514920901147332 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.1450577092422337, 'ln': True, 'activation_function': 'gelu', 'learning_rate': 5.527605633309706e-05, 'batch_size': 1024, 'use_phage_weights': True, 'weight_decay': 0.0381319227358499, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18602191995039796}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.1591

CROSS-VALIDATION RESULTS
Median MCC: 0.2051
Mean MCC: 0.1945 ± 0.0682
Fold 1 MCC: 0.0825
Fold 2 MCC: 0.2051
Fold 3 MCC: 0.2608
Fold 4 MCC: 0.2651
Fold 5 MCC: 0.1591


TRIAL 98: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.174
  - ln: True
  - activation_function: gelu
Training Parameters:
  - learning_rate: 0.000034
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0410
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.177


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5864, MCC: 0.0154 | Val Loss: 0.5191, MCC: 0.0000 | LR: 0.000034
Epoch 2/75 - Train Loss: 0.5385, MCC: 0.0000 | Val Loss: 0.4842, MCC: 0.0000 | LR: 0.000034
Epoch 3/75 - Train L

[I 2025-04-22 13:23:14,756] Trial 98 finished with value: 0.24151122252441348 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.17418946672742225, 'ln': True, 'activation_function': 'gelu', 'learning_rate': 3.43715043498711e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.040950663966042924, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.17738345541899106}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.2378

CROSS-VALIDATION RESULTS
Median MCC: 0.2415
Mean MCC: 0.2624 ± 0.0293
Fold 1 MCC: 0.2988
Fold 2 MCC: 0.2415
Fold 3 MCC: 0.2975
Fold 4 MCC: 0.2362
Fold 5 MCC: 0.2378


TRIAL 99: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.136
  - ln: True
  - activation_function: gelu
Training Parameters:
  - learning_rate: 0.000078
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0295
  - scheduler_type: one_cycle
  - warmup_ratio: 0.188


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6675, MCC: 0.0149 | Val Loss: 0.5838, MCC: 0.0000 | LR: 0.000004
Epoch 2/75 - Train Loss: 0.5974, MCC: 0.0038 | Val Loss: 0.5142, MCC: 0.0000 | LR: 0.000007
Epoch 3/75 - Train Loss: 0.

[I 2025-04-22 13:27:48,800] Trial 99 finished with value: 0.323543597716865 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.13609418296453385, 'ln': True, 'activation_function': 'gelu', 'learning_rate': 7.801685621296788e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.029498284408392233, 'scheduler_type': 'one_cycle', 'warmup_ratio': 0.1880623619530118}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3235

CROSS-VALIDATION RESULTS
Median MCC: 0.3235
Mean MCC: 0.3140 ± 0.0253
Fold 1 MCC: 0.3185
Fold 2 MCC: 0.3276
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.2647
Fold 5 MCC: 0.3235


TRIAL 100: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.160
  - ln: False
  - activation_function: gelu
Training Parameters:
  - learning_rate: 0.000031
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0235
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.182


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6674, MCC: 0.0000 | Val Loss: 0.6455, MCC: 0.0000 | LR: 0.000031
Epoch 2/75 - Train Loss: 0.6355, MCC: 0.0000 | Val Loss: 0.6016, MCC: 0.0000 | LR: 0.000031
Epoch 3/75 - Train

[I 2025-04-22 13:36:18,518] Trial 100 finished with value: 0.11000202373345777 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.15988945760687434, 'ln': False, 'activation_function': 'gelu', 'learning_rate': 3.122585596726305e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.023545159942620176, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.1822589234248334}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.0925

CROSS-VALIDATION RESULTS
Median MCC: 0.1100
Mean MCC: 0.1170 ± 0.0224
Fold 1 MCC: 0.1100
Fold 2 MCC: 0.1296
Fold 3 MCC: 0.1541
Fold 4 MCC: 0.0989
Fold 5 MCC: 0.0925


TRIAL 101: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.153
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000040
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0090
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.167


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5997, MCC: 0.0088 | Val Loss: 0.4989, MCC: 0.0000 | LR: 0.000040
Epoch 2/75 - Train Loss: 0.5341, MCC: 0.0000 | Val Loss: 0.4868, MCC: 0.0000 | LR: 0.000040
Epoch 3/75 - Train 

[I 2025-04-22 13:40:38,368] Trial 101 finished with value: 0.32166740416902695 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.15293611096874613, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.034831958505506e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.008997580779905581, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.16670956874257137}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3276

CROSS-VALIDATION RESULTS
Median MCC: 0.3217
Mean MCC: 0.3154 ± 0.0180
Fold 1 MCC: 0.3217
Fold 2 MCC: 0.3276
Fold 3 MCC: 0.3201
Fold 4 MCC: 0.2799
Fold 5 MCC: 0.3276


TRIAL 102: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: max
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.118
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000039
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0106
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.194


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing max pooling for 177 strains...
Precomputing max pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5655, MCC: -0.0029 | Val Loss: 0.5062, MCC: 0.0000 | LR: 0.000039
Epoch 2/75 - Train Loss: 0.5379, MCC: 0.0077 | Val Loss: 0.4827, MCC: 0.0000 | LR: 0.000039
Epoch 3/75 - Train Loss: 0.

[I 2025-04-22 13:44:53,909] Trial 102 finished with value: 0.3046741177790295 and parameters: {'pooling_type': 'max', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.11801690808709897, 'ln': True, 'activation_function': 'relu', 'learning_rate': 3.8699497575121523e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.010626799727593762, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.19401255762049083}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.2729

CROSS-VALIDATION RESULTS
Median MCC: 0.3047
Mean MCC: 0.3153 ± 0.0557
Fold 1 MCC: 0.3994
Fold 2 MCC: 0.3548
Fold 3 MCC: 0.2448
Fold 4 MCC: 0.3047
Fold 5 MCC: 0.2729


TRIAL 103: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 256
  - dropout: 0.145
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000048
  - batch_size: 256
  - use_phage_weights: False
  - weight_decay: 0.0802
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.199


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5648, MCC: 0.0121 | Val Loss: 0.5047, MCC: 0.0000 | LR: 0.000048
Epoch 2/75 - Train Loss: 0.5210, MCC: -0.0009 | Val Loss: 0.4686, MCC: 0.0000 | LR: 0.000048
Epoch 3/75 - Train 

[I 2025-04-22 13:48:25,358] Trial 103 finished with value: 0.32473795974427144 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 256, 'dropout': 0.14540577109278, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.787793134893811e-05, 'batch_size': 256, 'use_phage_weights': False, 'weight_decay': 0.08021208038123806, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.1989980433613036}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3200

CROSS-VALIDATION RESULTS
Median MCC: 0.3247
Mean MCC: 0.3156 ± 0.0259
Fold 1 MCC: 0.3247
Fold 2 MCC: 0.3327
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.2650
Fold 5 MCC: 0.3200


TRIAL 104: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.132
  - ln: True
  - activation_function: silu
Training Parameters:
  - learning_rate: 0.000029
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0692
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.143


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5695, MCC: -0.0271 | Val Loss: 0.5039, MCC: 0.0000 | LR: 0.000029
Epoch 2/75 - Train Loss: 0.5391, MCC: 0.0000 | Val Loss: 0.4841, MCC: 0.0000 | LR: 0.000029
Epoch 3/75 - Train

[I 2025-04-22 13:52:04,074] Trial 104 finished with value: 0.30107377774599564 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.13172335935065455, 'ln': True, 'activation_function': 'silu', 'learning_rate': 2.896466244611687e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.0692417503140979, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.14289188971688668}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.2378

CROSS-VALIDATION RESULTS
Median MCC: 0.3011
Mean MCC: 0.2830 ± 0.0327
Fold 1 MCC: 0.3047
Fold 2 MCC: 0.3011
Fold 3 MCC: 0.3210
Fold 4 MCC: 0.2503
Fold 5 MCC: 0.2378


TRIAL 105: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.183
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000042
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0138
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.159


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5510, MCC: 0.0150 | Val Loss: 0.4853, MCC: 0.0000 | LR: 0.000042
Epoch 2/75 - Train Loss: 0.5308, MCC: -0.0015 | Val Loss: 0.4717, MCC: 0.0000 | LR: 0.000042
Epoch 3/75 - Train

[I 2025-04-22 13:57:09,784] Trial 105 finished with value: 0.3221006205564047 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.18273342399924053, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.168216786489616e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.013779129922486411, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.1591215275160094}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3070

CROSS-VALIDATION RESULTS
Median MCC: 0.3221
Mean MCC: 0.3173 ± 0.0180
Fold 1 MCC: 0.3290
Fold 2 MCC: 0.3400
Fold 3 MCC: 0.3221
Fold 4 MCC: 0.2883
Fold 5 MCC: 0.3070


TRIAL 106: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.169
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000037
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0062
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.178


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5667, MCC: 0.0097 | Val Loss: 0.4940, MCC: 0.0000 | LR: 0.000037
Epoch 2/75 - Train Loss: 0.5391, MCC: 0.0265 | Val Loss: 0.4748, MCC: 0.0000 | LR: 0.000037
Epoch 3/75 - Train Loss: 

[I 2025-04-22 14:02:32,355] Trial 106 finished with value: 0.3308863014635093 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.16933770736335574, 'ln': True, 'activation_function': 'relu', 'learning_rate': 3.664674056904027e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.0062390227314273045, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.17810184966010062}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3012

CROSS-VALIDATION RESULTS
Median MCC: 0.3309
Mean MCC: 0.3163 ± 0.0227
Fold 1 MCC: 0.3309
Fold 2 MCC: 0.3333
Fold 3 MCC: 0.3372
Fold 4 MCC: 0.2789
Fold 5 MCC: 0.3012


TRIAL 107: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.171
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000037
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0049
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.176


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5858, MCC: 0.0197 | Val Loss: 0.4988, MCC: 0.0000 | LR: 0.000037
Epoch 2/75 - Train Loss: 0.5380, MCC: 0.0076 | Val Loss: 0.4789, MCC: 0.0000 | LR: 0.000036
Epoch 3/75 - Train Loss: 

[I 2025-04-22 14:07:37,825] Trial 107 finished with value: 0.3290082860155867 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.17062542120919605, 'ln': True, 'activation_function': 'relu', 'learning_rate': 3.652645091008301e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.0049270481211439576, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.17614162927083976}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3276

CROSS-VALIDATION RESULTS
Median MCC: 0.3290
Mean MCC: 0.3235 ± 0.0141
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3351
Fold 3 MCC: 0.3290
Fold 4 MCC: 0.2957
Fold 5 MCC: 0.3276


TRIAL 108: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: max
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.170
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000044
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0045
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.185


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing max pooling for 177 strains...
Precomputing max pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5425, MCC: -0.0015 | Val Loss: 0.4953, MCC: 0.0000 | LR: 0.000044
Epoch 2/75 - Train Loss: 0.5234, MCC: 0.0050 | Val Loss: 0.4749, MCC: 0.0000 | LR: 0.000044
Epoch 3/75 - Train Loss: 0.5

[I 2025-04-22 14:11:16,249] Trial 108 finished with value: 0.28434093295511087 and parameters: {'pooling_type': 'max', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.16990739599942095, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.4307970777368066e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.00445302087433954, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18475877877194471}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.2695

CROSS-VALIDATION RESULTS
Median MCC: 0.2843
Mean MCC: 0.3172 ± 0.0522
Fold 1 MCC: 0.4062
Fold 2 MCC: 0.3469
Fold 3 MCC: 0.2843
Fold 4 MCC: 0.2789
Fold 5 MCC: 0.2695


TRIAL 109: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.178
  - ln: True
  - activation_function: gelu
Training Parameters:
  - learning_rate: 0.000036
  - batch_size: 128
  - use_phage_weights: False
  - weight_decay: 0.0050
  - scheduler_type: reduce_on_plateau
  - warmup_ratio: 0.191


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5254, MCC: 0.0289 | Val Loss: 0.4893, MCC: 0.0000 | LR: 0.000036
Epoch 2/75 - Train Loss: 0.5052, MCC: 0.0921 | Val Loss: 0.4747, MCC: 0.0595 | LR: 0.000036
Epoch 3/75 - Train 

[I 2025-04-22 14:14:36,840] Trial 109 finished with value: 0.32369224276885006 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.17803704955699848, 'ln': True, 'activation_function': 'gelu', 'learning_rate': 3.625645499671133e-05, 'batch_size': 128, 'use_phage_weights': False, 'weight_decay': 0.0049954964016693856, 'scheduler_type': 'reduce_on_plateau', 'warmup_ratio': 0.19073642745475536}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3237

CROSS-VALIDATION RESULTS
Median MCC: 0.3237
Mean MCC: 0.3192 ± 0.0216
Fold 1 MCC: 0.3125
Fold 2 MCC: 0.3327
Fold 3 MCC: 0.3454
Fold 4 MCC: 0.2818
Fold 5 MCC: 0.3237


TRIAL 110: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 2
  - classifier_hidden_dim: 384
  - dropout: 0.162
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000033
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0038
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.176


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5531, MCC: -0.0053 | Val Loss: 0.4961, MCC: 0.0000 | LR: 0.000033
Epoch 2/75 - Train Loss: 0.5345, MCC: 0.0000 | Val Loss: 0.4803, MCC: 0.0000 | LR: 0.000033
Epoch 3/75 - Train Loss:

[I 2025-04-22 14:18:41,585] Trial 110 finished with value: 0.30364317924414436 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 2, 'classifier_hidden_dim': 384, 'dropout': 0.16151525612256679, 'ln': True, 'activation_function': 'relu', 'learning_rate': 3.3011235335392476e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.003814142706160399, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.17583722664324172}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3036

CROSS-VALIDATION RESULTS
Median MCC: 0.3036
Mean MCC: 0.2970 ± 0.0258
Fold 1 MCC: 0.3290
Fold 2 MCC: 0.2943
Fold 3 MCC: 0.3075
Fold 4 MCC: 0.2507
Fold 5 MCC: 0.3036


TRIAL 111: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.151
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000031
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0068
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.166


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5705, MCC: 0.0037 | Val Loss: 0.4972, MCC: 0.0000 | LR: 0.000031
Epoch 2/75 - Train Loss: 0.5394, MCC: 0.0036 | Val Loss: 0.4862, MCC: 0.0000 | LR: 0.000031
Epoch 3/75 - Train Loss: 

[I 2025-04-22 14:23:22,867] Trial 111 finished with value: 0.32176123859130573 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.15136166761491607, 'ln': True, 'activation_function': 'relu', 'learning_rate': 3.064020996785425e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.006825649135438651, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.16559501862911757}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3275

CROSS-VALIDATION RESULTS
Median MCC: 0.3218
Mean MCC: 0.3186 ± 0.0137
Fold 1 MCC: 0.3199
Fold 2 MCC: 0.3218
Fold 3 MCC: 0.3313
Fold 4 MCC: 0.2924
Fold 5 MCC: 0.3275


TRIAL 112: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.166
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000027
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0054
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.180


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5531, MCC: 0.0118 | Val Loss: 0.4872, MCC: 0.0000 | LR: 0.000027
Epoch 2/75 - Train Loss: 0.5369, MCC: -0.0067 | Val Loss: 0.4839, MCC: 0.0000 | LR: 0.000027
Epoch 3/75 - Train Loss:

[I 2025-04-22 14:29:13,599] Trial 112 finished with value: 0.3167103185618868 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.16628186175476933, 'ln': True, 'activation_function': 'relu', 'learning_rate': 2.7097936513180202e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.005387758082684184, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.17961987438733298}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3167

CROSS-VALIDATION RESULTS
Median MCC: 0.3167
Mean MCC: 0.3089 ± 0.0269
Fold 1 MCC: 0.3305
Fold 2 MCC: 0.2923
Fold 3 MCC: 0.3395
Fold 4 MCC: 0.2654
Fold 5 MCC: 0.3167


TRIAL 113: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.155
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000037
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0035
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.175


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5575, MCC: 0.0251 | Val Loss: 0.4867, MCC: 0.0000 | LR: 0.000037
Epoch 2/75 - Train Loss: 0.5341, MCC: -0.0082 | Val Loss: 0.4748, MCC: 0.0000 | LR: 0.000036
Epoch 3/75 - Train Loss:

[I 2025-04-22 14:33:29,131] Trial 113 finished with value: 0.3275675187649259 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.15514492208577932, 'ln': True, 'activation_function': 'relu', 'learning_rate': 3.654316757368959e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.0035017311006947517, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.17455175710946153}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.2996

CROSS-VALIDATION RESULTS
Median MCC: 0.3276
Mean MCC: 0.3126 ± 0.0246
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3276
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.2701
Fold 5 MCC: 0.2996


TRIAL 114: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.148
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000025
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0061
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.171


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5895, MCC: 0.0012 | Val Loss: 0.5109, MCC: 0.0000 | LR: 0.000025
Epoch 2/75 - Train Loss: 0.5435, MCC: -0.0047 | Val Loss: 0.4907, MCC: 0.0000 | LR: 0.000025
Epoch 3/75 - Train Loss:

[I 2025-04-22 14:37:48,917] Trial 114 finished with value: 0.3229755171182271 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.14822892331827336, 'ln': True, 'activation_function': 'relu', 'learning_rate': 2.5023433418179182e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.0061268405314186215, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.17067894925495372}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3230

CROSS-VALIDATION RESULTS
Median MCC: 0.3230
Mean MCC: 0.3172 ± 0.0152
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3261
Fold 3 MCC: 0.3191
Fold 4 MCC: 0.2878
Fold 5 MCC: 0.3230


TRIAL 115: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 768
  - dropout: 0.137
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000022
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0080
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.178


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5936, MCC: 0.0100 | Val Loss: 0.4923, MCC: 0.0000 | LR: 0.000022
Epoch 2/75 - Train Loss: 0.5327, MCC: 0.0000 | Val Loss: 0.4768, MCC: 0.0000 | LR: 0.000022
Epoch 3/75 - Train Loss: 

[I 2025-04-22 14:43:14,831] Trial 115 finished with value: 0.32306426424832224 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 768, 'dropout': 0.13707480083642287, 'ln': True, 'activation_function': 'relu', 'learning_rate': 2.2308240946632828e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.008041356273544742, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.17815871895462868}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3231

CROSS-VALIDATION RESULTS
Median MCC: 0.3231
Mean MCC: 0.3175 ± 0.0131
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3095
Fold 3 MCC: 0.3290
Fold 4 MCC: 0.2957
Fold 5 MCC: 0.3231


TRIAL 116: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.198
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000042
  - batch_size: 1024
  - use_phage_weights: True
  - weight_decay: 0.0463
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.183


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.9952, MCC: 0.0528 | Val Loss: 0.9073, MCC: 0.0305 | LR: 0.000042
Epoch 2/75 - Train Loss: 0.9963, MCC: 0.0640 | Val Loss: 0.9132, MCC: -0.0037 | LR: 0.000042
Epoch 3/75 - Train Loss: 

[I 2025-04-22 14:46:36,386] Trial 116 finished with value: 0.1978711495541378 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.19812498665693987, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.167452059494624e-05, 'batch_size': 1024, 'use_phage_weights': True, 'weight_decay': 0.046285942012668266, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18257087873785707}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.1979

CROSS-VALIDATION RESULTS
Median MCC: 0.1979
Mean MCC: 0.1734 ± 0.0738
Fold 1 MCC: 0.0390
Fold 2 MCC: 0.1572
Fold 3 MCC: 0.2497
Fold 4 MCC: 0.2233
Fold 5 MCC: 0.1979


TRIAL 117: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 256
  - dropout: 0.189
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000035
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0349
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.151


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6375, MCC: 0.0127 | Val Loss: 0.5068, MCC: 0.0000 | LR: 0.000035
Epoch 2/75 - Train Loss: 0.5480, MCC: 0.0000 | Val Loss: 0.5071, MCC: 0.0000 | LR: 0.000035
Epoch 3/75 - Train Loss: 

[I 2025-04-22 14:52:20,564] Trial 117 finished with value: 0.3159075856289391 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 256, 'dropout': 0.1888346616949403, 'ln': True, 'activation_function': 'relu', 'learning_rate': 3.495835446466224e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.03487481819714423, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.15105194060261962}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3159

CROSS-VALIDATION RESULTS
Median MCC: 0.3159
Mean MCC: 0.3148 ± 0.0158
Fold 1 MCC: 0.3111
Fold 2 MCC: 0.3327
Fold 3 MCC: 0.3272
Fold 4 MCC: 0.2873
Fold 5 MCC: 0.3159


TRIAL 118: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.173
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000028
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0075
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.188


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5811, MCC: 0.0007 | Val Loss: 0.5038, MCC: 0.0000 | LR: 0.000028
Epoch 2/75 - Train Loss: 0.5366, MCC: 0.0156 | Val Loss: 0.4849, MCC: 0.0000 | LR: 0.000028
Epoch 3/75 - Train Loss: 

[I 2025-04-22 14:57:26,799] Trial 118 finished with value: 0.3275675187649259 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.17271884233893267, 'ln': True, 'activation_function': 'relu', 'learning_rate': 2.8454169446517902e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.007533386260149904, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18780548598664895}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3070

CROSS-VALIDATION RESULTS
Median MCC: 0.3276
Mean MCC: 0.3174 ± 0.0204
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3276
Fold 3 MCC: 0.3395
Fold 4 MCC: 0.2826
Fold 5 MCC: 0.3070


TRIAL 119: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.160
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000026
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0021
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.195


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5520, MCC: -0.0004 | Val Loss: 0.4910, MCC: 0.0000 | LR: 0.000026
Epoch 2/75 - Train Loss: 0.5343, MCC: 0.0000 | Val Loss: 0.4798, MCC: 0.0000 | LR: 0.000026
Epoch 3/75 - Train

[I 2025-04-22 15:00:54,577] Trial 119 finished with value: 0.3127525385803911 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.1599979391688792, 'ln': True, 'activation_function': 'relu', 'learning_rate': 2.649323504498301e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.002064422700012283, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.1951259858369773}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3142

CROSS-VALIDATION RESULTS
Median MCC: 0.3128
Mean MCC: 0.2993 ± 0.0287
Fold 1 MCC: 0.2968
Fold 2 MCC: 0.3276
Fold 3 MCC: 0.3128
Fold 4 MCC: 0.2454
Fold 5 MCC: 0.3142


TRIAL 120: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.142
  - ln: False
  - activation_function: silu
Training Parameters:
  - learning_rate: 0.000024
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0066
  - scheduler_type: one_cycle
  - warmup_ratio: 0.169


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6969, MCC: -0.0073 | Val Loss: 0.6965, MCC: 0.0000 | LR: 0.000001
Epoch 2/75 - Train Loss: 0.6953, MCC: 0.0033 | Val Loss: 0.6940, MCC: 0.0000 | LR: 0.000002
Epoch 3/75 - Train Loss: 0.692

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Fold 4 - Val MCC: 0.0238

FOLD 5/5
Train set: 13632 interactions, 142 strains
Validation set: 3360 interactions, 35 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6901, MCC: -0.0021 | Val Loss: 0.6892, MCC: 0.0000 | LR: 0.000001
Epoch 2/75 - Train Loss: 0.6883, MCC: -0.0170 | Val Loss: 0.6869, MCC: 0.0000 | LR: 0.000002
Epoch 3/75 - Train Loss: 0.6853, MCC: 0.0077 | Val Loss: 0.6829, MCC: 0.0000 | LR: 0.000004
Epoch 4/75 - Train Loss: 0.6801, MCC: 0.0000 | Val Loss: 0.6765, MCC: 0.0000 | LR: 0.000006
Epoch 5/75 - Train Loss: 0.6722, MCC: 0.0000 | Val Loss: 0.6663, MCC: 0.0000 | LR: 0.000009
Epoch 6/75 - Train Loss: 0.6592, MCC: 0.0000 | Val Loss: 0.6504, MCC: 0.0000 | LR: 0.000012
Epoch 7/75 - Train Loss: 0.6391, MCC: 0.0000 | Val Loss: 0.6248, MCC: 0.0000 | LR: 0.000015
Epoch 8/75 - Train Loss: 0.6084, MCC: 0.0000 | Val Loss: 0.5871, MCC: 0.0000 | LR: 0.000017
Epoch 9/75 - Train Loss: 0.5683, MCC: 0.0000 | Val

[I 2025-04-22 15:08:44,886] Trial 120 finished with value: 0.06790542272678256 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.14241827202053808, 'ln': False, 'activation_function': 'silu', 'learning_rate': 2.4116466296120366e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.006603452983567849, 'scheduler_type': 'one_cycle', 'warmup_ratio': 0.16889620202936342}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.0650

CROSS-VALIDATION RESULTS
Median MCC: 0.0679
Mean MCC: 0.0677 ± 0.0247
Fold 1 MCC: 0.0954
Fold 2 MCC: 0.0679
Fold 3 MCC: 0.0864
Fold 4 MCC: 0.0238
Fold 5 MCC: 0.0650


TRIAL 121: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.164
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000038
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0614
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.184


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5661, MCC: -0.0170 | Val Loss: 0.5007, MCC: 0.0000 | LR: 0.000038
Epoch 2/75 - Train Loss: 0.5347, MCC: 0.0109 | Val Loss: 0.4767, MCC: 0.0000 | LR: 0.000038
Epoch 3/75 - Train Loss:

[I 2025-04-22 15:12:42,847] Trial 121 finished with value: 0.3004334405840663 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.16447580134975517, 'ln': True, 'activation_function': 'relu', 'learning_rate': 3.846833644035676e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.06140304342195464, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18396245057126714}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3275

CROSS-VALIDATION RESULTS
Median MCC: 0.3004
Mean MCC: 0.3038 ± 0.0212
Fold 1 MCC: 0.3004
Fold 2 MCC: 0.2802
Fold 3 MCC: 0.3290
Fold 4 MCC: 0.2818
Fold 5 MCC: 0.3275


TRIAL 122: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.177
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000052
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0428
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.181


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5484, MCC: 0.0055 | Val Loss: 0.4834, MCC: 0.0000 | LR: 0.000052
Epoch 2/75 - Train Loss: 0.5321, MCC: 0.0077 | Val Loss: 0.4699, MCC: 0.0000 | LR: 0.000052
Epoch 3/75 - Train Loss: 

[I 2025-04-22 15:17:52,055] Trial 122 finished with value: 0.32816999258876195 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.17710736801136112, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.167033747199105e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.042849269434636086, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18094967038261067}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3275

CROSS-VALIDATION RESULTS
Median MCC: 0.3282
Mean MCC: 0.3207 ± 0.0232
Fold 1 MCC: 0.3282
Fold 2 MCC: 0.3359
Fold 3 MCC: 0.3372
Fold 4 MCC: 0.2749
Fold 5 MCC: 0.3275


TRIAL 123: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.182
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000051
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0540
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.181


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5602, MCC: -0.0101 | Val Loss: 0.4885, MCC: 0.0000 | LR: 0.000051
Epoch 2/75 - Train Loss: 0.5315, MCC: 0.0077 | Val Loss: 0.4742, MCC: 0.0000 | LR: 0.000051
Epoch 3/75 - Train Loss:

[I 2025-04-22 15:21:48,773] Trial 123 finished with value: 0.3185320904193146 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.18188637605010918, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.103331539860093e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.05397410938069154, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18084797541175912}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3185

CROSS-VALIDATION RESULTS
Median MCC: 0.3185
Mean MCC: 0.3153 ± 0.0207
Fold 1 MCC: 0.2985
Fold 2 MCC: 0.3327
Fold 3 MCC: 0.3411
Fold 4 MCC: 0.2856
Fold 5 MCC: 0.3185


TRIAL 124: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.129
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000055
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0433
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.174


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6124, MCC: -0.0023 | Val Loss: 0.4948, MCC: 0.0000 | LR: 0.000055
Epoch 2/75 - Train Loss: 0.5353, MCC: 0.0156 | Val Loss: 0.4770, MCC: 0.0000 | LR: 0.000055
Epoch 3/75 - Train Loss:

[I 2025-04-22 15:25:13,127] Trial 124 finished with value: 0.31965264398322396 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.12879143774744478, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.503009327684214e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.04332135146330826, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.17384897182150458}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3181

CROSS-VALIDATION RESULTS
Median MCC: 0.3197
Mean MCC: 0.3074 ± 0.0319
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3248
Fold 3 MCC: 0.3197
Fold 4 MCC: 0.2442
Fold 5 MCC: 0.3181


TRIAL 125: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.120
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000046
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0115
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.190


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5608, MCC: -0.0160 | Val Loss: 0.4954, MCC: 0.0000 | LR: 0.000046
Epoch 2/75 - Train Loss: 0.5307, MCC: 0.0156 | Val Loss: 0.4706, MCC: 0.0000 | LR: 0.000045
Epoch 3/75 - Train Loss:

[I 2025-04-22 15:28:51,901] Trial 125 finished with value: 0.3275675187649259 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.11995896114372516, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.5564154409036345e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.011462328971445503, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.1895142623524334}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3079

CROSS-VALIDATION RESULTS
Median MCC: 0.3276
Mean MCC: 0.3151 ± 0.0234
Fold 1 MCC: 0.3302
Fold 2 MCC: 0.3276
Fold 3 MCC: 0.3372
Fold 4 MCC: 0.2726
Fold 5 MCC: 0.3079


TRIAL 126: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.090
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000049
  - batch_size: 256
  - use_phage_weights: False
  - weight_decay: 0.0383
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.178


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5294, MCC: 0.0156 | Val Loss: 0.4662, MCC: 0.0000 | LR: 0.000049
Epoch 2/75 - Train Loss: 0.5083, MCC: 0.1042 | Val Loss: 0.4629, MCC: 0.0595 | LR: 0.000048
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 15:32:28,425] Trial 126 finished with value: 0.3276922507970427 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.08983825159061701, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.854473201445075e-05, 'batch_size': 256, 'use_phage_weights': False, 'weight_decay': 0.03832558482104203, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.1777712840958016}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3275

CROSS-VALIDATION RESULTS
Median MCC: 0.3277
Mean MCC: 0.3183 ± 0.0232
Fold 1 MCC: 0.3277
Fold 2 MCC: 0.3351
Fold 3 MCC: 0.3290
Fold 4 MCC: 0.2723
Fold 5 MCC: 0.3275


TRIAL 127: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.153
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000062
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0503
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.187


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5634, MCC: -0.0201 | Val Loss: 0.4852, MCC: 0.0000 | LR: 0.000062
Epoch 2/75 - Train Loss: 0.5276, MCC: 0.0077 | Val Loss: 0.4731, MCC: 0.0000 | LR: 0.000062
Epoch 3/75 - Train

[I 2025-04-22 15:36:57,823] Trial 127 finished with value: 0.3289868039032808 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.15340495922552747, 'ln': True, 'activation_function': 'relu', 'learning_rate': 6.244632933564796e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.05028558568283963, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18720576371219197}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3276

CROSS-VALIDATION RESULTS
Median MCC: 0.3290
Mean MCC: 0.3242 ± 0.0139
Fold 1 MCC: 0.3290
Fold 2 MCC: 0.3327
Fold 3 MCC: 0.3346
Fold 4 MCC: 0.2969
Fold 5 MCC: 0.3276


TRIAL 128: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 256
  - dropout: 0.156
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000063
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0493
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.187


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5699, MCC: -0.0057 | Val Loss: 0.4987, MCC: 0.0000 | LR: 0.000063
Epoch 2/75 - Train Loss: 0.5360, MCC: -0.0047 | Val Loss: 0.4842, MCC: 0.0000 | LR: 0.000063
Epoch 3/75 - Trai

[I 2025-04-22 15:41:42,883] Trial 128 finished with value: 0.3274712895371589 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 256, 'dropout': 0.15578746670530785, 'ln': True, 'activation_function': 'relu', 'learning_rate': 6.269357678976324e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.049329933120043544, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18681917830441364}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3107

CROSS-VALIDATION RESULTS
Median MCC: 0.3275
Mean MCC: 0.3138 ± 0.0220
Fold 1 MCC: 0.3275
Fold 2 MCC: 0.3297
Fold 3 MCC: 0.3290
Fold 4 MCC: 0.2722
Fold 5 MCC: 0.3107


TRIAL 129: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.170
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000053
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0280
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.191


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5761, MCC: -0.0051 | Val Loss: 0.4939, MCC: 0.0000 | LR: 0.000053
Epoch 2/75 - Train Loss: 0.5311, MCC: -0.0047 | Val Loss: 0.4723, MCC: 0.0000 | LR: 0.000053
Epoch 3/75 - Trai

[I 2025-04-22 15:46:07,069] Trial 129 finished with value: 0.31712666185964217 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.16977692369991837, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.3212946376774945e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.027950506320568246, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.19102465567581367}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3276

CROSS-VALIDATION RESULTS
Median MCC: 0.3171
Mean MCC: 0.3175 ± 0.0127
Fold 1 MCC: 0.3027
Fold 2 MCC: 0.3171
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.3046
Fold 5 MCC: 0.3276


TRIAL 130: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.176
  - ln: True
  - activation_function: gelu
Training Parameters:
  - learning_rate: 0.000071
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0055
  - scheduler_type: reduce_on_plateau
  - warmup_ratio: 0.183


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5656, MCC: 0.0134 | Val Loss: 0.4851, MCC: 0.0000 | LR: 0.000071
Epoch 2/75 - Train Loss: 0.5251, MCC: -0.0047 | Val Loss: 0.4760, MCC: 0.0000 | LR: 0.000071
Epoch 3/75 - Trai

[I 2025-04-22 15:49:50,099] Trial 130 finished with value: 0.3142841789430215 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.17624312590226598, 'ln': True, 'activation_function': 'gelu', 'learning_rate': 7.10991860035869e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.005471221568406243, 'scheduler_type': 'reduce_on_plateau', 'warmup_ratio': 0.18284465570522215}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3107

CROSS-VALIDATION RESULTS
Median MCC: 0.3143
Mean MCC: 0.3056 ± 0.0315
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3143
Fold 3 MCC: 0.3283
Fold 4 MCC: 0.2443
Fold 5 MCC: 0.3107


TRIAL 131: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.151
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000056
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0320
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.186


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5571, MCC: -0.0043 | Val Loss: 0.4884, MCC: 0.0000 | LR: 0.000056
Epoch 2/75 - Train Loss: 0.5323, MCC: 0.0134 | Val Loss: 0.4725, MCC: 0.0000 | LR: 0.000056
Epoch 3/75 - Train

[I 2025-04-22 15:54:48,428] Trial 131 finished with value: 0.3301495479532531 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.15145351904110854, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.643072304471834e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.03199483517529958, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18584193032018695}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3275

CROSS-VALIDATION RESULTS
Median MCC: 0.3301
Mean MCC: 0.3199 ± 0.0233
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3327
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.2735
Fold 5 MCC: 0.3275


TRIAL 132: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.149
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000060
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0324
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.196


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5869, MCC: -0.0110 | Val Loss: 0.4949, MCC: 0.0000 | LR: 0.000060
Epoch 2/75 - Train Loss: 0.5349, MCC: 0.0156 | Val Loss: 0.4756, MCC: 0.0000 | LR: 0.000060
Epoch 3/75 - Train

[I 2025-04-22 15:59:04,527] Trial 132 finished with value: 0.3275675187649259 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.1486121335351192, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.9639275681366186e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.03240320034946367, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.19585344404180277}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3289

CROSS-VALIDATION RESULTS
Median MCC: 0.3276
Mean MCC: 0.3150 ± 0.0237
Fold 1 MCC: 0.3066
Fold 2 MCC: 0.3276
Fold 3 MCC: 0.3395
Fold 4 MCC: 0.2726
Fold 5 MCC: 0.3289


TRIAL 133: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.207
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000057
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0093
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.186


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5609, MCC: 0.0241 | Val Loss: 0.4936, MCC: 0.0000 | LR: 0.000056
Epoch 2/75 - Train Loss: 0.5354, MCC: -0.0067 | Val Loss: 0.4772, MCC: 0.0000 | LR: 0.000056
Epoch 3/75 - Train

[I 2025-04-22 16:03:12,738] Trial 133 finished with value: 0.32166740416902695 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.20728610283144028, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.651932493336866e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.009277191713745917, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.1858472181821723}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.2996

CROSS-VALIDATION RESULTS
Median MCC: 0.3217
Mean MCC: 0.3203 ± 0.0170
Fold 1 MCC: 0.3217
Fold 2 MCC: 0.3400
Fold 3 MCC: 0.3378
Fold 4 MCC: 0.3023
Fold 5 MCC: 0.2996


TRIAL 134: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.161
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000047
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0356
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.180


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5508, MCC: 0.0210 | Val Loss: 0.4880, MCC: 0.0000 | LR: 0.000047
Epoch 2/75 - Train Loss: 0.5280, MCC: -0.0047 | Val Loss: 0.4697, MCC: 0.0000 | LR: 0.000047
Epoch 3/75 - Train

[I 2025-04-22 16:07:18,136] Trial 134 finished with value: 0.32758298206345926 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.16076315104627692, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.71311522537273e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.035607766285427514, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.1804501449829666}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3276

CROSS-VALIDATION RESULTS
Median MCC: 0.3276
Mean MCC: 0.3173 ± 0.0220
Fold 1 MCC: 0.3290
Fold 2 MCC: 0.3406
Fold 3 MCC: 0.3123
Fold 4 MCC: 0.2771
Fold 5 MCC: 0.3276


TRIAL 135: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.154
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000066
  - batch_size: 1024
  - use_phage_weights: False
  - weight_decay: 0.0417
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.177


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5633, MCC: 0.0072 | Val Loss: 0.4896, MCC: 0.0000 | LR: 0.000066
Epoch 2/75 - Train Loss: 0.5298, MCC: 0.0000 | Val Loss: 0.4726, MCC: 0.0000 | LR: 0.000066
Epoch 3/75 - Train 

[I 2025-04-22 16:11:10,335] Trial 135 finished with value: 0.32758298206345926 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.15356051147654814, 'ln': True, 'activation_function': 'relu', 'learning_rate': 6.630389772374473e-05, 'batch_size': 1024, 'use_phage_weights': False, 'weight_decay': 0.04166176225725361, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.1766843589538438}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3276

CROSS-VALIDATION RESULTS
Median MCC: 0.3276
Mean MCC: 0.3221 ± 0.0103
Fold 1 MCC: 0.3290
Fold 2 MCC: 0.3327
Fold 3 MCC: 0.3165
Fold 4 MCC: 0.3046
Fold 5 MCC: 0.3276


TRIAL 136: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.140
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000051
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0312
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.191


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5597, MCC: -0.0037 | Val Loss: 0.4974, MCC: 0.0000 | LR: 0.000051
Epoch 2/75 - Train Loss: 0.5219, MCC: 0.0237 | Val Loss: 0.4781, MCC: 0.0000 | LR: 0.000051
Epoch 3/75 - Train 

[I 2025-04-22 16:15:06,265] Trial 136 finished with value: 0.3301495479532531 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.13991606564019596, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.124916805273115e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.031240877728870225, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.1913573008029656}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3167

CROSS-VALIDATION RESULTS
Median MCC: 0.3301
Mean MCC: 0.3208 ± 0.0175
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3327
Fold 3 MCC: 0.3361
Fold 4 MCC: 0.2883
Fold 5 MCC: 0.3167


TRIAL 137: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.140
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000044
  - batch_size: 512
  - use_phage_weights: True
  - weight_decay: 0.0294
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.191


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.9913, MCC: 0.0282 | Val Loss: 0.9320, MCC: -0.0141 | LR: 0.000044
Epoch 2/75 - Train Loss: 0.9864, MCC: 0.0709 | Val Loss: 0.9314, MCC: -0.0141 | LR: 0.000044
Epoch 3/75 - Train 

[I 2025-04-22 16:19:04,464] Trial 137 finished with value: 0.20324126444326776 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.14002744590543587, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.4198966400178285e-05, 'batch_size': 512, 'use_phage_weights': True, 'weight_decay': 0.029387941647242826, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.19104268501007773}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.1879

CROSS-VALIDATION RESULTS
Median MCC: 0.2032
Mean MCC: 0.2009 ± 0.0738
Fold 1 MCC: 0.0729
Fold 2 MCC: 0.2032
Fold 3 MCC: 0.2929
Fold 4 MCC: 0.2476
Fold 5 MCC: 0.1879


TRIAL 138: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.146
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000061
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0255
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.188


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5361, MCC: -0.0030 | Val Loss: 0.4847, MCC: 0.0000 | LR: 0.000061
Epoch 2/75 - Train Loss: 0.5142, MCC: 0.0377 | Val Loss: 0.4593, MCC: 0.0000 | LR: 0.000061
Epoch 3/75 - Train 

[I 2025-04-22 16:23:53,922] Trial 138 finished with value: 0.3301495479532531 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.14550158293425666, 'ln': True, 'activation_function': 'relu', 'learning_rate': 6.0720310169879684e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.025508623132595207, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.1876150091127191}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3198

CROSS-VALIDATION RESULTS
Median MCC: 0.3301
Mean MCC: 0.3207 ± 0.0203
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3361
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.2818
Fold 5 MCC: 0.3198


TRIAL 139: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.146
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000063
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0256
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.197


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5489, MCC: -0.0149 | Val Loss: 0.5025, MCC: 0.0000 | LR: 0.000063
Epoch 2/75 - Train Loss: 0.5236, MCC: 0.0211 | Val Loss: 0.4675, MCC: 0.0000 | LR: 0.000063
Epoch 3/75 - Train 

[I 2025-04-22 16:27:33,559] Trial 139 finished with value: 0.3029822059092262 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.14639275168106203, 'ln': True, 'activation_function': 'relu', 'learning_rate': 6.330471441157539e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.025643222427342585, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.19672322467037745}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3277

CROSS-VALIDATION RESULTS
Median MCC: 0.3030
Mean MCC: 0.3048 ± 0.0281
Fold 1 MCC: 0.2946
Fold 2 MCC: 0.3030
Fold 3 MCC: 0.3396
Fold 4 MCC: 0.2590
Fold 5 MCC: 0.3277


TRIAL 140: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 1
  - classifier_hidden_dim: 384
  - dropout: 0.165
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000068
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0191
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.188


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5587, MCC: 0.0027 | Val Loss: 0.4950, MCC: 0.0000 | LR: 0.000068
Epoch 2/75 - Train Loss: 0.5205, MCC: -0.0067 | Val Loss: 0.4799, MCC: 0.0000 | LR: 0.000068
Epoch 3/75 - Train 

[I 2025-04-22 16:31:30,243] Trial 140 finished with value: 0.30529829690577526 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 1, 'classifier_hidden_dim': 384, 'dropout': 0.16535731608465995, 'ln': True, 'activation_function': 'relu', 'learning_rate': 6.766641563922487e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.019117700651828877, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18773798439141326}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3053

CROSS-VALIDATION RESULTS
Median MCC: 0.3053
Mean MCC: 0.3005 ± 0.0172
Fold 1 MCC: 0.2946
Fold 2 MCC: 0.3171
Fold 3 MCC: 0.3155
Fold 4 MCC: 0.2701
Fold 5 MCC: 0.3053


TRIAL 141: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.131
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000058
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0227
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.193


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5421, MCC: -0.0028 | Val Loss: 0.4860, MCC: 0.0000 | LR: 0.000058
Epoch 2/75 - Train Loss: 0.5135, MCC: 0.0429 | Val Loss: 0.4638, MCC: 0.0595 | LR: 0.000058
Epoch 3/75 - Train 

[I 2025-04-22 16:34:55,092] Trial 141 finished with value: 0.32747708996347763 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.13090838729652715, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.772716947855299e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.02267108564046067, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.1925770564836464}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3275

CROSS-VALIDATION RESULTS
Median MCC: 0.3275
Mean MCC: 0.3155 ± 0.0263
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3384
Fold 3 MCC: 0.3168
Fold 4 MCC: 0.2647
Fold 5 MCC: 0.3275


TRIAL 142: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.138
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000055
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0316
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.185


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5538, MCC: -0.0004 | Val Loss: 0.4990, MCC: 0.0000 | LR: 0.000055
Epoch 2/75 - Train Loss: 0.5220, MCC: 0.0101 | Val Loss: 0.4675, MCC: 0.0000 | LR: 0.000055
Epoch 3/75 - Train 

[I 2025-04-22 16:38:27,242] Trial 142 finished with value: 0.315916718100461 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.13789129331174066, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.5498072903899256e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.031633885104569694, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18477243701712215}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3104

CROSS-VALIDATION RESULTS
Median MCC: 0.3159
Mean MCC: 0.3124 ± 0.0241
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3159
Fold 3 MCC: 0.3372
Fold 4 MCC: 0.2681
Fold 5 MCC: 0.3104


TRIAL 143: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.150
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000041
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0161
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.189


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5448, MCC: -0.0023 | Val Loss: 0.4984, MCC: 0.0000 | LR: 0.000041
Epoch 2/75 - Train Loss: 0.5222, MCC: 0.0217 | Val Loss: 0.4818, MCC: 0.0000 | LR: 0.000041
Epoch 3/75 - Train 

[I 2025-04-22 16:43:07,438] Trial 143 finished with value: 0.3199879226606935 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.1504245569469297, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.066698704558264e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.016094117932004222, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18909431428623735}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3200

CROSS-VALIDATION RESULTS
Median MCC: 0.3200
Mean MCC: 0.3127 ± 0.0244
Fold 1 MCC: 0.3027
Fold 2 MCC: 0.3351
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.2702
Fold 5 MCC: 0.3200


TRIAL 144: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.146
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000050
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0376
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.183


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5417, MCC: -0.0116 | Val Loss: 0.4967, MCC: 0.0000 | LR: 0.000050
Epoch 2/75 - Train Loss: 0.5181, MCC: 0.0167 | Val Loss: 0.4746, MCC: 0.0000 | LR: 0.000050
Epoch 3/75 - Train 

[I 2025-04-22 16:47:05,272] Trial 144 finished with value: 0.33148512545071257 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.14555006905989956, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.965816993604099e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.03757686010955305, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18304224002986716}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3112

CROSS-VALIDATION RESULTS
Median MCC: 0.3315
Mean MCC: 0.3166 ± 0.0238
Fold 1 MCC: 0.3315
Fold 2 MCC: 0.3327
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.2722
Fold 5 MCC: 0.3112


TRIAL 145: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.157
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000075
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0379
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.186


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5398, MCC: 0.0078 | Val Loss: 0.4845, MCC: 0.0000 | LR: 0.000075
Epoch 2/75 - Train Loss: 0.5134, MCC: 0.0243 | Val Loss: 0.4642, MCC: 0.0595 | LR: 0.000074
Epoch 3/75 - Train L

[I 2025-04-22 16:50:15,704] Trial 145 finished with value: 0.3255065901812532 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.15671650607638957, 'ln': True, 'activation_function': 'relu', 'learning_rate': 7.455271600697767e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.03789964944167349, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18559223299399263}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3308

CROSS-VALIDATION RESULTS
Median MCC: 0.3255
Mean MCC: 0.3189 ± 0.0181
Fold 1 MCC: 0.3108
Fold 2 MCC: 0.3255
Fold 3 MCC: 0.3395
Fold 4 MCC: 0.2878
Fold 5 MCC: 0.3308


TRIAL 146: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.159
  - ln: True
  - activation_function: gelu
Training Parameters:
  - learning_rate: 0.000062
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0337
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.131


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5552, MCC: -0.0134 | Val Loss: 0.4931, MCC: 0.0000 | LR: 0.000061
Epoch 2/75 - Train Loss: 0.5152, MCC: 0.0062 | Val Loss: 0.4682, MCC: 0.0594 | LR: 0.000061
Epoch 3/75 - Train 

[I 2025-04-22 16:54:46,372] Trial 146 finished with value: 0.3288527352672735 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.1594630748703393, 'ln': True, 'activation_function': 'gelu', 'learning_rate': 6.150648824732101e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.03365958160129503, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.13095308672934916}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3289

CROSS-VALIDATION RESULTS
Median MCC: 0.3289
Mean MCC: 0.3127 ± 0.0258
Fold 1 MCC: 0.2888
Fold 2 MCC: 0.3356
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.2746
Fold 5 MCC: 0.3289


TRIAL 147: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.144
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000050
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0274
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.194


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5624, MCC: -0.0103 | Val Loss: 0.4971, MCC: 0.0000 | LR: 0.000050
Epoch 2/75 - Train Loss: 0.5204, MCC: -0.0033 | Val Loss: 0.4790, MCC: 0.0000 | LR: 0.000050
Epoch 3/75 - Train

[I 2025-04-22 16:58:52,476] Trial 147 finished with value: 0.3301495479532531 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.14382470430263616, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.009657095680042e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.027441676808012436, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.19351798657264388}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3284

CROSS-VALIDATION RESULTS
Median MCC: 0.3301
Mean MCC: 0.3294 ± 0.0211
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3589
Fold 3 MCC: 0.3362
Fold 4 MCC: 0.2931
Fold 5 MCC: 0.3284


TRIAL 148: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.135
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000051
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0203
  - scheduler_type: one_cycle
  - warmup_ratio: 0.200


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6233, MCC: 0.0331 | Val Loss: 0.5733, MCC: 0.0000 | LR: 0.000003
Epoch 2/75 - Train Loss: 0.5671, MCC: 0.0052 | Val Loss: 0.5225, MCC: 0.0000 | LR: 0.000004
Epoch 3/75 - Train Loss: 0.

[I 2025-04-22 17:02:54,075] Trial 148 finished with value: 0.33105994456521237 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.1352440292056379, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.138440963823939e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.020333337135039115, 'scheduler_type': 'one_cycle', 'warmup_ratio': 0.19965376503864835}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3422

CROSS-VALIDATION RESULTS
Median MCC: 0.3311
Mean MCC: 0.3356 ± 0.0297
Fold 1 MCC: 0.3874
Fold 2 MCC: 0.3311
Fold 3 MCC: 0.3189
Fold 4 MCC: 0.2983
Fold 5 MCC: 0.3422


TRIAL 149: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: max
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.135
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000051
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0237
  - scheduler_type: one_cycle
  - warmup_ratio: 0.199


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing max pooling for 177 strains...
Precomputing max pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.8070, MCC: -0.0054 | Val Loss: 0.7224, MCC: -0.0143 | LR: 0.000003
Epoch 2/75 - Train Loss: 0.6760, MCC: -0.0040 | Val Loss: 0.5672, MCC: 0.0000 | LR: 0.000004
Epoch 3/75 - Train Loss: 0.5692, 

[I 2025-04-22 17:07:15,325] Trial 149 finished with value: 0.32185248622397344 and parameters: {'pooling_type': 'max', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.134662758858213, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.115519696789907e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.02373179733633372, 'scheduler_type': 'one_cycle', 'warmup_ratio': 0.19922893167904845}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.2906

CROSS-VALIDATION RESULTS
Median MCC: 0.3219
Mean MCC: 0.3295 ± 0.0420
Fold 1 MCC: 0.3879
Fold 2 MCC: 0.3669
Fold 3 MCC: 0.2803
Fold 4 MCC: 0.3219
Fold 5 MCC: 0.2906


TRIAL 150: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.143
  - ln: False
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000052
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0257
  - scheduler_type: one_cycle
  - warmup_ratio: 0.193


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6778, MCC: 0.0224 | Val Loss: 0.6733, MCC: 0.0000 | LR: 0.000003
Epoch 2/75 - Train Loss: 0.6710, MCC: -0.0006 | Val Loss: 0.6634, MCC: 0.0000 | LR: 0.000004
Epoch 3/75 - Train Loss: 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Fold 2 - Val MCC: 0.0397

FOLD 3/5
Train set: 13632 interactions, 142 strains
Validation set: 3360 interactions, 35 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6912, MCC: -0.0184 | Val Loss: 0.6884, MCC: 0.0000 | LR: 0.000003
Epoch 2/75 - Train Loss: 0.6846, MCC: -0.0052 | Val Loss: 0.6775, MCC: 0.0000 | LR: 0.000004
Epoch 3/75 - Train Loss: 0.6731, MCC: 0.0100 | Val Loss: 0.6573, MCC: 0.0000 | LR: 0.000007
Epoch 4/75 - Train Loss: 0.6513, MCC: 0.0000 | Val Loss: 0.6201, MCC: 0.0000 | LR: 0.000011
Epoch 5/75 - Train Loss: 0.6146, MCC: 0.0000 | Val Loss: 0.5552, MCC: 0.0000 | LR: 0.000015
Epoch 6/75 - Train Loss: 0.5691, MCC: 0.0000 | Val Loss: 0.4840, MCC: 0.0000 | LR: 0.000021
Epoch 7/75 - Train Loss: 0.5533, MCC: 0.0000 | Val Loss: 0.4687, MCC: 0.0000 | LR: 0.000026
Epoch 8/75 - Train Loss: 0.5483, MCC: 0.0000 | Val Loss: 0.4661, MCC: 0.0000 | LR: 0.000031
Epoch 9/75 - Train Loss: 0.5458, MCC: 0.0000 |

[I 2025-04-22 17:12:07,351] Trial 150 finished with value: 0.24433417704635016 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.14345068799007182, 'ln': False, 'activation_function': 'relu', 'learning_rate': 5.2010417845436404e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.025666316467073957, 'scheduler_type': 'one_cycle', 'warmup_ratio': 0.19279175140685512}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.2378

CROSS-VALIDATION RESULTS
Median MCC: 0.2443
Mean MCC: 0.2078 ± 0.0846
Fold 1 MCC: 0.2506
Fold 2 MCC: 0.0397
Fold 3 MCC: 0.2664
Fold 4 MCC: 0.2443
Fold 5 MCC: 0.2378


TRIAL 151: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.124
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000049
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0211
  - scheduler_type: one_cycle
  - warmup_ratio: 0.200


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.7230, MCC: 0.0115 | Val Loss: 0.6434, MCC: 0.0000 | LR: 0.000002
Epoch 2/75 - Train Loss: 0.6181, MCC: -0.0012 | Val Loss: 0.5329, MCC: 0.0000 | LR: 0.000004
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 17:15:36,491] Trial 151 finished with value: 0.3289868039032808 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.12431900400690603, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.864001502344009e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.02114370464403673, 'scheduler_type': 'one_cycle', 'warmup_ratio': 0.19953288962032154}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3345

CROSS-VALIDATION RESULTS
Median MCC: 0.3290
Mean MCC: 0.3130 ± 0.0307
Fold 1 MCC: 0.3290
Fold 2 MCC: 0.3171
Fold 3 MCC: 0.3317
Fold 4 MCC: 0.2528
Fold 5 MCC: 0.3345


TRIAL 152: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.115
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000054
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0284
  - scheduler_type: one_cycle
  - warmup_ratio: 0.195


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.8413, MCC: -0.0185 | Val Loss: 0.7672, MCC: 0.0000 | LR: 0.000003
Epoch 2/75 - Train Loss: 0.6937, MCC: -0.0129 | Val Loss: 0.5859, MCC: 0.0000 | LR: 0.000005
Epoch 3/75 - Train Loss: 

[I 2025-04-22 17:19:36,949] Trial 152 finished with value: 0.32233698481887596 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.11501915548495042, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.436478250212217e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.028411717133535, 'scheduler_type': 'one_cycle', 'warmup_ratio': 0.19487777461168831}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3275

CROSS-VALIDATION RESULTS
Median MCC: 0.3223
Mean MCC: 0.3171 ± 0.0235
Fold 1 MCC: 0.3210
Fold 2 MCC: 0.3223
Fold 3 MCC: 0.3420
Fold 4 MCC: 0.2726
Fold 5 MCC: 0.3275


TRIAL 153: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.151
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000058
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0170
  - scheduler_type: one_cycle
  - warmup_ratio: 0.196


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6249, MCC: -0.0051 | Val Loss: 0.5568, MCC: 0.0000 | LR: 0.000003
Epoch 2/75 - Train Loss: 0.5644, MCC: 0.0170 | Val Loss: 0.5168, MCC: 0.0000 | LR: 0.000005
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 17:23:16,497] Trial 153 finished with value: 0.31449933006104036 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.15077605590864762, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.78300709442503e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.017020405494442815, 'scheduler_type': 'one_cycle', 'warmup_ratio': 0.19588076534446505}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3145

CROSS-VALIDATION RESULTS
Median MCC: 0.3145
Mean MCC: 0.3136 ± 0.0166
Fold 1 MCC: 0.3210
Fold 2 MCC: 0.3363
Fold 3 MCC: 0.3109
Fold 4 MCC: 0.2854
Fold 5 MCC: 0.3145


TRIAL 154: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.146
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000060
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0198
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.190


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5467, MCC: -0.0009 | Val Loss: 0.4987, MCC: 0.0000 | LR: 0.000060
Epoch 2/75 - Train Loss: 0.5197, MCC: 0.0130 | Val Loss: 0.4761, MCC: 0.0000 | LR: 0.000060
Epoch 3/75 - Train 

[I 2025-04-22 17:27:16,093] Trial 154 finished with value: 0.3266653323735915 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.14585958854650774, 'ln': True, 'activation_function': 'relu', 'learning_rate': 6.018997896690787e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.01983052644698643, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.1902774491443141}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3234

CROSS-VALIDATION RESULTS
Median MCC: 0.3267
Mean MCC: 0.3139 ± 0.0301
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3351
Fold 3 MCC: 0.3267
Fold 4 MCC: 0.2541
Fold 5 MCC: 0.3234


TRIAL 155: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.137
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000047
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0080
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.178


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5433, MCC: 0.0032 | Val Loss: 0.4936, MCC: 0.0000 | LR: 0.000047
Epoch 2/75 - Train Loss: 0.5184, MCC: -0.0142 | Val Loss: 0.4657, MCC: 0.0000 | LR: 0.000047
Epoch 3/75 - Train 

[I 2025-04-22 17:33:17,091] Trial 155 finished with value: 0.3301495479532531 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.1373081712360415, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.73433193578524e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.008027457191064118, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.17792006443671327}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3238

CROSS-VALIDATION RESULTS
Median MCC: 0.3301
Mean MCC: 0.3250 ± 0.0161
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3406
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.2947
Fold 5 MCC: 0.3238


TRIAL 156: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.129
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000049
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0082
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.172


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5500, MCC: -0.0071 | Val Loss: 0.5018, MCC: 0.0000 | LR: 0.000049
Epoch 2/75 - Train Loss: 0.5228, MCC: 0.0217 | Val Loss: 0.4754, MCC: 0.0000 | LR: 0.000049
Epoch 3/75 - Train 

[I 2025-04-22 17:36:43,454] Trial 156 finished with value: 0.308258205700542 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.12923175428714914, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.9247223998409446e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.008226559409771695, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.17169219512577263}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3083

CROSS-VALIDATION RESULTS
Median MCC: 0.3083
Mean MCC: 0.3128 ± 0.0191
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.2906
Fold 3 MCC: 0.3395
Fold 4 MCC: 0.2957
Fold 5 MCC: 0.3083


TRIAL 157: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.138
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000048
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0100
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.053


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5728, MCC: 0.0078 | Val Loss: 0.5106, MCC: 0.0000 | LR: 0.000048
Epoch 2/75 - Train Loss: 0.5294, MCC: -0.0047 | Val Loss: 0.4869, MCC: 0.0000 | LR: 0.000048
Epoch 3/75 - Train 

[I 2025-04-22 17:40:07,214] Trial 157 finished with value: 0.31421331110654843 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.13793734210433972, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.7659249570286006e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.010036761126705419, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.05319461408759951}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3275

CROSS-VALIDATION RESULTS
Median MCC: 0.3142
Mean MCC: 0.3017 ± 0.0392
Fold 1 MCC: 0.3056
Fold 2 MCC: 0.3351
Fold 3 MCC: 0.3142
Fold 4 MCC: 0.2259
Fold 5 MCC: 0.3275


TRIAL 158: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.135
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000043
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0070
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.125


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5437, MCC: 0.0068 | Val Loss: 0.4988, MCC: 0.0000 | LR: 0.000043
Epoch 2/75 - Train Loss: 0.5224, MCC: 0.0000 | Val Loss: 0.4717, MCC: 0.0000 | LR: 0.000043
Epoch 3/75 - Train L

[I 2025-04-22 17:44:35,989] Trial 158 finished with value: 0.32747708996347763 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.1346861329576024, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.271037201133731e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.007042705814112639, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.12523896183664462}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3275

CROSS-VALIDATION RESULTS
Median MCC: 0.3275
Mean MCC: 0.3236 ± 0.0135
Fold 1 MCC: 0.3114
Fold 2 MCC: 0.3351
Fold 3 MCC: 0.3396
Fold 4 MCC: 0.3046
Fold 5 MCC: 0.3275


TRIAL 159: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.141
  - ln: True
  - activation_function: silu
Training Parameters:
  - learning_rate: 0.000054
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0078
  - scheduler_type: reduce_on_plateau
  - warmup_ratio: 0.178


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5467, MCC: -0.0090 | Val Loss: 0.4943, MCC: 0.0000 | LR: 0.000054
Epoch 2/75 - Train Loss: 0.5131, MCC: 0.0521 | Val Loss: 0.4769, MCC: 0.0000 | LR: 0.000054
Epoch 3/75 - Train

[I 2025-04-22 17:48:09,253] Trial 159 finished with value: 0.317972682451475 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.1409783844293628, 'ln': True, 'activation_function': 'silu', 'learning_rate': 5.380932925873503e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.00776276835450127, 'scheduler_type': 'reduce_on_plateau', 'warmup_ratio': 0.17761732664272967}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3199

CROSS-VALIDATION RESULTS
Median MCC: 0.3180
Mean MCC: 0.3088 ± 0.0239
Fold 1 MCC: 0.3180
Fold 2 MCC: 0.3063
Fold 3 MCC: 0.3353
Fold 4 MCC: 0.2647
Fold 5 MCC: 0.3199


TRIAL 160: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 768
  - dropout: 0.121
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000051
  - batch_size: 512
  - use_phage_weights: True
  - weight_decay: 0.0268
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.183


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.9893, MCC: 0.0561 | Val Loss: 0.9568, MCC: 0.0905 | LR: 0.000051
Epoch 2/75 - Train Loss: 0.9822, MCC: 0.0907 | Val Loss: 0.9428, MCC: -0.0141 | LR: 0.000051
Epoch 3/75 - Train L

[I 2025-04-22 17:51:31,654] Trial 160 finished with value: 0.2348691264969246 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 768, 'dropout': 0.12110107275097635, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.143864786898027e-05, 'batch_size': 512, 'use_phage_weights': True, 'weight_decay': 0.0267921092713783, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18317451039358423}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.2349

CROSS-VALIDATION RESULTS
Median MCC: 0.2349
Mean MCC: 0.2200 ± 0.0670
Fold 1 MCC: 0.0905
Fold 2 MCC: 0.2333
Fold 3 MCC: 0.2783
Fold 4 MCC: 0.2630
Fold 5 MCC: 0.2349


TRIAL 161: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.152
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000056
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0355
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.174


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5752, MCC: -0.0055 | Val Loss: 0.5123, MCC: 0.0000 | LR: 0.000056
Epoch 2/75 - Train Loss: 0.5251, MCC: 0.0221 | Val Loss: 0.4834, MCC: 0.0000 | LR: 0.000056
Epoch 3/75 - Train 

[I 2025-04-22 17:56:39,874] Trial 161 finished with value: 0.3289868039032808 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.15218755228251446, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.629362975502173e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.03546043129085931, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.17443305365532186}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3283

CROSS-VALIDATION RESULTS
Median MCC: 0.3290
Mean MCC: 0.3246 ± 0.0223
Fold 1 MCC: 0.3290
Fold 2 MCC: 0.3444
Fold 3 MCC: 0.3396
Fold 4 MCC: 0.2818
Fold 5 MCC: 0.3283


TRIAL 162: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.144
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000067
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0307
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.179


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5437, MCC: -0.0209 | Val Loss: 0.4919, MCC: 0.0000 | LR: 0.000067
Epoch 2/75 - Train Loss: 0.5161, MCC: 0.0237 | Val Loss: 0.4710, MCC: 0.0000 | LR: 0.000067
Epoch 3/75 - Train 

[I 2025-04-22 17:59:42,741] Trial 162 finished with value: 0.3192715366821864 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.14381013492562175, 'ln': True, 'activation_function': 'relu', 'learning_rate': 6.692211263380915e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.030656710878046868, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.17917912624367907}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3167

CROSS-VALIDATION RESULTS
Median MCC: 0.3193
Mean MCC: 0.3164 ± 0.0153
Fold 1 MCC: 0.3193
Fold 2 MCC: 0.3252
Fold 3 MCC: 0.3331
Fold 4 MCC: 0.2879
Fold 5 MCC: 0.3167


TRIAL 163: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.131
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000050
  - batch_size: 128
  - use_phage_weights: False
  - weight_decay: 0.0060
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.184


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5288, MCC: 0.0339 | Val Loss: 0.4693, MCC: 0.0595 | LR: 0.000050
Epoch 2/75 - Train Loss: 0.4994, MCC: 0.1416 | Val Loss: 0.4681, MCC: 0.1973 | LR: 0.000050
Epoch 3/75 - Train L

[I 2025-04-22 18:03:42,369] Trial 163 finished with value: 0.3301495479532531 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.13116404177871174, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.024412185294499e-05, 'batch_size': 128, 'use_phage_weights': False, 'weight_decay': 0.006044295945404202, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18362389512391677}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3198

CROSS-VALIDATION RESULTS
Median MCC: 0.3301
Mean MCC: 0.3276 ± 0.0246
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3643
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.2883
Fold 5 MCC: 0.3198


TRIAL 164: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.128
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000047
  - batch_size: 128
  - use_phage_weights: False
  - weight_decay: 0.0051
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.184


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5268, MCC: 0.0213 | Val Loss: 0.4666, MCC: 0.0595 | LR: 0.000047
Epoch 2/75 - Train Loss: 0.4999, MCC: 0.1376 | Val Loss: 0.4716, MCC: 0.0000 | LR: 0.000047
Epoch 3/75 - Train L

[I 2025-04-22 18:06:14,526] Trial 164 finished with value: 0.31060170896909545 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.12755747147497354, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.677197637321395e-05, 'batch_size': 128, 'use_phage_weights': False, 'weight_decay': 0.005122958295863312, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18395033172732594}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3106

CROSS-VALIDATION RESULTS
Median MCC: 0.3106
Mean MCC: 0.3116 ± 0.0166
Fold 1 MCC: 0.3278
Fold 2 MCC: 0.3102
Fold 3 MCC: 0.3273
Fold 4 MCC: 0.2822
Fold 5 MCC: 0.3106


TRIAL 165: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.133
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000053
  - batch_size: 128
  - use_phage_weights: False
  - weight_decay: 0.0066
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.193


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5254, MCC: 0.0347 | Val Loss: 0.4740, MCC: 0.0595 | LR: 0.000053
Epoch 2/75 - Train Loss: 0.5002, MCC: 0.1178 | Val Loss: 0.4574, MCC: 0.0000 | LR: 0.000053
Epoch 3/75 - Train L

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2025-04-22 18:09:47,949] Trial 165 finished with value: 0.3221670268152889 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.13273902004967345, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.3115809684121395e-05, 'batch_size': 128, 'use_phage_weights': False, 'weight_decay': 0.006634436027440528, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.1933769066351599}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3222

CROSS-VALIDATION RESULTS
Median MCC: 0.3222
Mean MCC: 0.3137 ± 0.0253
Fold 1 MCC: 0.2848
Fold 2 MCC: 0.3327
Fold 3 MCC: 0.3454
Fold 4 MCC: 0.2833
Fold 5 MCC: 0.3222


TRIAL 166: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.139
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000050
  - batch_size: 128
  - use_phage_weights: False
  - weight_decay: 0.0059
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.180


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5233, MCC: 0.0693 | Val Loss: 0.4707, MCC: 0.0000 | LR: 0.000050
Epoch 2/75 - Train Loss: 0.5030, MCC: 0.1108 | Val Loss: 0.4618, MCC: 0.2068 | LR: 0.000050
Epoch 3/75 - Train L

[I 2025-04-22 18:12:57,669] Trial 166 finished with value: 0.3193718278022875 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.13943668191909728, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.9974119259228516e-05, 'batch_size': 128, 'use_phage_weights': False, 'weight_decay': 0.005946708169391735, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.1797209151179951}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3194

CROSS-VALIDATION RESULTS
Median MCC: 0.3194
Mean MCC: 0.3137 ± 0.0154
Fold 1 MCC: 0.3057
Fold 2 MCC: 0.3327
Fold 3 MCC: 0.3223
Fold 4 MCC: 0.2883
Fold 5 MCC: 0.3194


TRIAL 167: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: max
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.168
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000045
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0045
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.175


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing max pooling for 177 strains...
Precomputing max pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5369, MCC: 0.0106 | Val Loss: 0.4903, MCC: 0.0000 | LR: 0.000045
Epoch 2/75 - Train Loss: 0.5201, MCC: 0.0403 | Val Loss: 0.4746, MCC: 0.1407 | LR: 0.000045
Epoch 3/75 - Train Loss: 0.50

[I 2025-04-22 18:16:29,339] Trial 167 finished with value: 0.3078416180622452 and parameters: {'pooling_type': 'max', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.1683319646087764, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.4670934502362955e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.004483358400514337, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.17510843299824175}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.2791

CROSS-VALIDATION RESULTS
Median MCC: 0.3078
Mean MCC: 0.3098 ± 0.0396
Fold 1 MCC: 0.3537
Fold 2 MCC: 0.3538
Fold 3 MCC: 0.2546
Fold 4 MCC: 0.3078
Fold 5 MCC: 0.2791


TRIAL 168: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.147
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000011
  - batch_size: 128
  - use_phage_weights: False
  - weight_decay: 0.0088
  - scheduler_type: one_cycle
  - warmup_ratio: 0.190


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6293, MCC: -0.0264 | Val Loss: 0.5784, MCC: 0.0000 | LR: 0.000001
Epoch 2/75 - Train Loss: 0.5835, MCC: 0.0003 | Val Loss: 0.5279, MCC: 0.0000 | LR: 0.000001
Epoch 3/75 - Train Loss: 0

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Fold 2 - Val MCC: 0.0661

FOLD 3/5
Train set: 13632 interactions, 142 strains
Validation set: 3360 interactions, 35 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6781, MCC: 0.0337 | Val Loss: 0.5856, MCC: 0.0000 | LR: 0.000001
Epoch 2/75 - Train Loss: 0.6161, MCC: 0.0111 | Val Loss: 0.5023, MCC: 0.0000 | LR: 0.000001
Epoch 3/75 - Train Loss: 0.5670, MCC: 0.0026 | Val Loss: 0.4613, MCC: 0.0000 | LR: 0.000002
Epoch 4/75 - Train Loss: 0.5515, MCC: 0.0032 | Val Loss: 0.4542, MCC: 0.0000 | LR: 0.000002
Epoch 5/75 - Train Loss: 0.5468, MCC: 0.0152 | Val Loss: 0.4487, MCC: 0.0000 | LR: 0.000003
Epoch 6/75 - Train Loss: 0.5397, MCC: 0.0000 | Val Loss: 0.4406, MCC: 0.0000 | LR: 0.000004
Epoch 7/75 - Train Loss: 0.5327, MCC: -0.0003 | Val Loss: 0.4328, MCC: 0.0000 | LR: 0.000005
Epoch 8/75 - Train Loss: 0.5221, MCC: 0.0382 | Val Loss: 0.4325, MCC: 0.0000 | LR: 0.000007
Epoch 9/75 - Train Loss: 0.5167, MCC: 0.0718 | 

[I 2025-04-22 18:21:47,270] Trial 168 finished with value: 0.32653959087008144 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.1473639837347946, 'ln': True, 'activation_function': 'relu', 'learning_rate': 1.073706413686741e-05, 'batch_size': 128, 'use_phage_weights': False, 'weight_decay': 0.008810341127081673, 'scheduler_type': 'one_cycle', 'warmup_ratio': 0.1899697434783147}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3276

CROSS-VALIDATION RESULTS
Median MCC: 0.3265
Mean MCC: 0.2663 ± 0.1018
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.0661
Fold 3 MCC: 0.3265
Fold 4 MCC: 0.2812
Fold 5 MCC: 0.3276


TRIAL 169: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.125
  - ln: False
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000033
  - batch_size: 128
  - use_phage_weights: False
  - weight_decay: 0.0226
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.182


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5640, MCC: 0.0027 | Val Loss: 0.5191, MCC: 0.0000 | LR: 0.000033
Epoch 2/75 - Train Loss: 0.5285, MCC: 0.0000 | Val Loss: 0.5056, MCC: 0.0000 | LR: 0.000033
Epoch 3/75 - Train 

[I 2025-04-22 18:26:45,260] Trial 169 finished with value: 0.2957280549248848 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.1252057545355475, 'ln': False, 'activation_function': 'relu', 'learning_rate': 3.292235875148214e-05, 'batch_size': 128, 'use_phage_weights': False, 'weight_decay': 0.02260203568076228, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18164602036078684}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.2505

CROSS-VALIDATION RESULTS
Median MCC: 0.2957
Mean MCC: 0.2911 ± 0.0382
Fold 1 MCC: 0.2957
Fold 2 MCC: 0.3295
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.2443
Fold 5 MCC: 0.2505


TRIAL 170: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.133
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000048
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0125
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.185


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5399, MCC: -0.0081 | Val Loss: 0.4912, MCC: 0.0000 | LR: 0.000048
Epoch 2/75 - Train Loss: 0.5127, MCC: 0.0344 | Val Loss: 0.4674, MCC: 0.0717 | LR: 0.000048
Epoch 3/75 - Train Loss: 

[I 2025-04-22 18:30:16,897] Trial 170 finished with value: 0.3301495479532531 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.13304059856145292, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.819216263563333e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.012462187701033606, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18486187831882717}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.2986

CROSS-VALIDATION RESULTS
Median MCC: 0.3301
Mean MCC: 0.3194 ± 0.0155
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3327
Fold 3 MCC: 0.3331
Fold 4 MCC: 0.3024
Fold 5 MCC: 0.2986


TRIAL 171: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.133
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000050
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0125
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.184


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5418, MCC: 0.0045 | Val Loss: 0.4843, MCC: 0.0000 | LR: 0.000050
Epoch 2/75 - Train Loss: 0.5106, MCC: 0.0404 | Val Loss: 0.4632, MCC: 0.0990 | LR: 0.000050
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 18:35:02,876] Trial 171 finished with value: 0.3301495479532531 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.13306390799820073, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.972159228132369e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.01250755177110287, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18437599040993638}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3259

CROSS-VALIDATION RESULTS
Median MCC: 0.3301
Mean MCC: 0.3239 ± 0.0211
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3444
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.2836
Fold 5 MCC: 0.3259


TRIAL 172: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.112
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000047
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0127
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.186


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5528, MCC: -0.0017 | Val Loss: 0.5028, MCC: 0.0000 | LR: 0.000047
Epoch 2/75 - Train Loss: 0.5217, MCC: 0.0077 | Val Loss: 0.4701, MCC: 0.0000 | LR: 0.000047
Epoch 3/75 - Train Loss: 

[I 2025-04-22 18:40:01,045] Trial 172 finished with value: 0.32747708996347763 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.11166318759167966, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.689914209089782e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.012675980061796675, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18564343654903545}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3275

CROSS-VALIDATION RESULTS
Median MCC: 0.3275
Mean MCC: 0.3245 ± 0.0142
Fold 1 MCC: 0.3358
Fold 2 MCC: 0.3356
Fold 3 MCC: 0.3265
Fold 4 MCC: 0.2971
Fold 5 MCC: 0.3275


TRIAL 173: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.133
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000049
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0098
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.188


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5458, MCC: 0.0034 | Val Loss: 0.4913, MCC: 0.0000 | LR: 0.000049
Epoch 2/75 - Train Loss: 0.5138, MCC: 0.0280 | Val Loss: 0.4715, MCC: 0.0595 | LR: 0.000049
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 18:44:36,795] Trial 173 finished with value: 0.3301495479532531 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.13299477200262597, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.893987671285271e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.009771502372429087, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18819456688375077}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3052

CROSS-VALIDATION RESULTS
Median MCC: 0.3301
Mean MCC: 0.3167 ± 0.0251
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3383
Fold 3 MCC: 0.3372
Fold 4 MCC: 0.2726
Fold 5 MCC: 0.3052


TRIAL 174: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.132
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000050
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0097
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.189


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5611, MCC: -0.0023 | Val Loss: 0.5064, MCC: 0.0000 | LR: 0.000050
Epoch 2/75 - Train Loss: 0.5232, MCC: 0.0167 | Val Loss: 0.4814, MCC: 0.0000 | LR: 0.000050
Epoch 3/75 - Train Loss: 

[I 2025-04-22 18:47:35,403] Trial 174 finished with value: 0.32758298206345926 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.1321554440805043, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.973987156353158e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.009712785433626447, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18869826370757334}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3276

CROSS-VALIDATION RESULTS
Median MCC: 0.3276
Mean MCC: 0.3183 ± 0.0251
Fold 1 MCC: 0.3290
Fold 2 MCC: 0.3520
Fold 3 MCC: 0.3049
Fold 4 MCC: 0.2780
Fold 5 MCC: 0.3276


TRIAL 175: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.137
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000053
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0111
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.183


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5579, MCC: -0.0128 | Val Loss: 0.4919, MCC: 0.0000 | LR: 0.000053
Epoch 2/75 - Train Loss: 0.5127, MCC: 0.0479 | Val Loss: 0.4681, MCC: 0.0000 | LR: 0.000053
Epoch 3/75 - Train Loss: 

[I 2025-04-22 18:50:52,857] Trial 175 finished with value: 0.3289868039032808 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.13656450150747032, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.309989358956947e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.011098738018907749, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18345918468876493}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3275

CROSS-VALIDATION RESULTS
Median MCC: 0.3290
Mean MCC: 0.3213 ± 0.0196
Fold 1 MCC: 0.3290
Fold 2 MCC: 0.3327
Fold 3 MCC: 0.3350
Fold 4 MCC: 0.2824
Fold 5 MCC: 0.3275


TRIAL 176: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.122
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000048
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0139
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.192


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5346, MCC: 0.0109 | Val Loss: 0.4831, MCC: 0.0000 | LR: 0.000048
Epoch 2/75 - Train Loss: 0.5117, MCC: 0.0480 | Val Loss: 0.4655, MCC: 0.1296 | LR: 0.000048
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 18:54:09,169] Trial 176 finished with value: 0.3276680045549857 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.12197092071127812, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.7783788824617835e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.013904872646557542, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.1915431201611132}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3277

CROSS-VALIDATION RESULTS
Median MCC: 0.3277
Mean MCC: 0.3183 ± 0.0240
Fold 1 MCC: 0.3302
Fold 2 MCC: 0.3276
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.2706
Fold 5 MCC: 0.3277


TRIAL 177: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.128
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000050
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0122
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.187


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5569, MCC: 0.0046 | Val Loss: 0.4939, MCC: 0.0000 | LR: 0.000050
Epoch 2/75 - Train Loss: 0.5144, MCC: 0.0058 | Val Loss: 0.4685, MCC: 0.0000 | LR: 0.000050
Epoch 3/75 - Train L

[I 2025-04-22 18:57:44,098] Trial 177 finished with value: 0.30675355362851786 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.12795682020397564, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.0148115545144275e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.012193676884571177, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18704317619880978}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3068

CROSS-VALIDATION RESULTS
Median MCC: 0.3068
Mean MCC: 0.3166 ± 0.0134
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3062
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.3046
Fold 5 MCC: 0.3068


TRIAL 178: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 768
  - dropout: 0.141
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000012
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0110
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.196


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6036, MCC: -0.0187 | Val Loss: 0.5132, MCC: 0.0000 | LR: 0.000012
Epoch 2/75 - Train Loss: 0.5331, MCC: 0.0076 | Val Loss: 0.4918, MCC: 0.0000 | LR: 0.000012
Epoch 3/75 - Train Loss: 

[I 2025-04-22 19:03:27,527] Trial 178 finished with value: 0.3145612307993759 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 768, 'dropout': 0.1405103337771049, 'ln': True, 'activation_function': 'relu', 'learning_rate': 1.2268026563266925e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.011015576720372407, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.19645002819875546}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3233

CROSS-VALIDATION RESULTS
Median MCC: 0.3146
Mean MCC: 0.3154 ± 0.0135
Fold 1 MCC: 0.3079
Fold 2 MCC: 0.3146
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.2957
Fold 5 MCC: 0.3233


TRIAL 179: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.134
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000044
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0151
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.182


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5513, MCC: 0.0040 | Val Loss: 0.5048, MCC: 0.0000 | LR: 0.000044
Epoch 2/75 - Train Loss: 0.5193, MCC: 0.0237 | Val Loss: 0.4702, MCC: 0.0717 | LR: 0.000044
Epoch 3/75 - Train L

[I 2025-04-22 19:07:01,327] Trial 179 finished with value: 0.3284918010954441 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.13417638837894594, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.410630239731953e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.01514599903280424, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18154271887598566}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3285

CROSS-VALIDATION RESULTS
Median MCC: 0.3285
Mean MCC: 0.3160 ± 0.0314
Fold 1 MCC: 0.3240
Fold 2 MCC: 0.3442
Fold 3 MCC: 0.3288
Fold 4 MCC: 0.2547
Fold 5 MCC: 0.3285


TRIAL 180: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.129
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000056
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0185
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.178


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5522, MCC: -0.0149 | Val Loss: 0.4992, MCC: 0.0000 | LR: 0.000056
Epoch 2/75 - Train Loss: 0.5205, MCC: 0.0039 | Val Loss: 0.4724, MCC: 0.0000 | LR: 0.000056
Epoch 3/75 - Train Loss: 

[I 2025-04-22 19:10:07,988] Trial 180 finished with value: 0.3175271425995995 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.1294109401005342, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.579081856664111e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.018529945808918365, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.17846086569543984}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.2555

CROSS-VALIDATION RESULTS
Median MCC: 0.3175
Mean MCC: 0.2978 ± 0.0391
Fold 1 MCC: 0.3175
Fold 2 MCC: 0.3327
Fold 3 MCC: 0.3372
Fold 4 MCC: 0.2458
Fold 5 MCC: 0.2555


TRIAL 181: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.148
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000051
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0085
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.186


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5510, MCC: 0.0094 | Val Loss: 0.4959, MCC: 0.0000 | LR: 0.000051
Epoch 2/75 - Train Loss: 0.5162, MCC: 0.0113 | Val Loss: 0.4666, MCC: 0.0000 | LR: 0.000050
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 19:13:49,173] Trial 181 finished with value: 0.3289868039032808 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.14755927078036082, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.058541125273829e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.008508193367042559, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18556060227038543}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3276

CROSS-VALIDATION RESULTS
Median MCC: 0.3290
Mean MCC: 0.3281 ± 0.0085
Fold 1 MCC: 0.3290
Fold 2 MCC: 0.3327
Fold 3 MCC: 0.3383
Fold 4 MCC: 0.3127
Fold 5 MCC: 0.3276


TRIAL 182: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.143
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000047
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0074
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.193


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5404, MCC: 0.0063 | Val Loss: 0.4957, MCC: 0.0000 | LR: 0.000047
Epoch 2/75 - Train Loss: 0.5192, MCC: 0.0272 | Val Loss: 0.4636, MCC: 0.0000 | LR: 0.000047
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 19:16:51,734] Trial 182 finished with value: 0.3172379151064007 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.1434880422495062, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.734005637348251e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.0073569533559880666, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.1925311377038079}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3212

CROSS-VALIDATION RESULTS
Median MCC: 0.3172
Mean MCC: 0.3099 ± 0.0113
Fold 1 MCC: 0.2992
Fold 2 MCC: 0.3183
Fold 3 MCC: 0.3172
Fold 4 MCC: 0.2935
Fold 5 MCC: 0.3212


TRIAL 183: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.136
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000054
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0106
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.189


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5484, MCC: -0.0035 | Val Loss: 0.4929, MCC: 0.0000 | LR: 0.000054
Epoch 2/75 - Train Loss: 0.5157, MCC: 0.0343 | Val Loss: 0.4703, MCC: 0.0000 | LR: 0.000054
Epoch 3/75 - Train Loss: 

[I 2025-04-22 19:19:43,370] Trial 183 finished with value: 0.30553462164543466 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.13569356778243674, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.409357198239311e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.01058418739452668, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18873579083846226}. Best is trial 89 with value: 0.33284211902408495.


Fold 5 - Val MCC: 0.3056

CROSS-VALIDATION RESULTS
Median MCC: 0.3055
Mean MCC: 0.2989 ± 0.0268
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3043
Fold 3 MCC: 0.3055
Fold 4 MCC: 0.2490
Fold 5 MCC: 0.3056


TRIAL 184: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.140
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000059
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0337
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.184


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5461, MCC: -0.0062 | Val Loss: 0.4896, MCC: 0.0000 | LR: 0.000059
Epoch 2/75 - Train Loss: 0.5138, MCC: 0.0372 | Val Loss: 0.4676, MCC: 0.0595 | LR: 0.000059
Epoch 3/75 - Train Loss: 

[I 2025-04-22 19:23:08,993] Trial 184 finished with value: 0.3355107650026586 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.1399261039732264, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.924978444690761e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.033737044834691504, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18417067616941543}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3297

CROSS-VALIDATION RESULTS
Median MCC: 0.3355
Mean MCC: 0.3242 ± 0.0264
Fold 1 MCC: 0.3426
Fold 2 MCC: 0.3411
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.2722
Fold 5 MCC: 0.3297


TRIAL 185: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.139
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000057
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0279
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.183


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5427, MCC: -0.0049 | Val Loss: 0.4969, MCC: 0.0000 | LR: 0.000056
Epoch 2/75 - Train Loss: 0.5160, MCC: 0.0369 | Val Loss: 0.4622, MCC: 0.0717 | LR: 0.000056
Epoch 3/75 - Train Loss: 

[I 2025-04-22 19:27:36,820] Trial 185 finished with value: 0.32747708996347763 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.13928248213099922, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.6504689565358976e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.027854045125398377, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18334943211867216}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3275

CROSS-VALIDATION RESULTS
Median MCC: 0.3275
Mean MCC: 0.3237 ± 0.0099
Fold 1 MCC: 0.3248
Fold 2 MCC: 0.3327
Fold 3 MCC: 0.3288
Fold 4 MCC: 0.3046
Fold 5 MCC: 0.3275


TRIAL 186: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.125
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000060
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0171
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.179


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5350, MCC: -0.0019 | Val Loss: 0.4846, MCC: 0.0000 | LR: 0.000060
Epoch 2/75 - Train Loss: 0.5109, MCC: 0.0347 | Val Loss: 0.4613, MCC: 0.0595 | LR: 0.000060
Epoch 3/75 - Train 

[I 2025-04-22 19:30:59,907] Trial 186 finished with value: 0.3258042327775716 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.12523932193404544, 'ln': True, 'activation_function': 'relu', 'learning_rate': 6.039511119593481e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.01708761952799815, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.17875846875465334}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3226

CROSS-VALIDATION RESULTS
Median MCC: 0.3258
Mean MCC: 0.3180 ± 0.0194
Fold 1 MCC: 0.3258
Fold 2 MCC: 0.3327
Fold 3 MCC: 0.3290
Fold 4 MCC: 0.2798
Fold 5 MCC: 0.3226


TRIAL 187: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 2
  - classifier_hidden_dim: 512
  - dropout: 0.132
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000046
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0098
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.185


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5457, MCC: -0.0161 | Val Loss: 0.4954, MCC: 0.0000 | LR: 0.000046
Epoch 2/75 - Train Loss: 0.5151, MCC: 0.0185 | Val Loss: 0.4720, MCC: 0.0000 | LR: 0.000045
Epoch 3/75 - Train Loss: 

[I 2025-04-22 19:34:33,059] Trial 187 finished with value: 0.3093253109565124 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 2, 'classifier_hidden_dim': 512, 'dropout': 0.13161330884719305, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.55440999816399e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.00984318123111114, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18458074805085176}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3093

CROSS-VALIDATION RESULTS
Median MCC: 0.3093
Mean MCC: 0.3042 ± 0.0236
Fold 1 MCC: 0.2668
Fold 2 MCC: 0.3351
Fold 3 MCC: 0.3189
Fold 4 MCC: 0.2907
Fold 5 MCC: 0.3093


TRIAL 188: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.118
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000058
  - batch_size: 256
  - use_phage_weights: False
  - weight_decay: 0.0130
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.188


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5325, MCC: 0.0041 | Val Loss: 0.4737, MCC: 0.0000 | LR: 0.000058
Epoch 2/75 - Train Loss: 0.5037, MCC: 0.0941 | Val Loss: 0.4576, MCC: 0.0595 | LR: 0.000058
Epoch 3/75 - Train L

[I 2025-04-22 19:37:55,454] Trial 188 finished with value: 0.3235013214010312 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.11772924836576082, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.845570626642109e-05, 'batch_size': 256, 'use_phage_weights': False, 'weight_decay': 0.013031812284480155, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18827840929277043}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3145

CROSS-VALIDATION RESULTS
Median MCC: 0.3235
Mean MCC: 0.3185 ± 0.0189
Fold 1 MCC: 0.3235
Fold 2 MCC: 0.3351
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.2841
Fold 5 MCC: 0.3145


TRIAL 189: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.145
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000053
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0320
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.181


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5514, MCC: 0.0019 | Val Loss: 0.4949, MCC: 0.0000 | LR: 0.000053
Epoch 2/75 - Train Loss: 0.5219, MCC: 0.0173 | Val Loss: 0.4726, MCC: 0.0368 | LR: 0.000053
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 19:41:44,007] Trial 189 finished with value: 0.3267564942308566 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.14465442661740557, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.2728903559451225e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.03202779560107646, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18122020415599144}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3275

CROSS-VALIDATION RESULTS
Median MCC: 0.3268
Mean MCC: 0.3064 ± 0.0291
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.2923
Fold 3 MCC: 0.3268
Fold 4 MCC: 0.2552
Fold 5 MCC: 0.3275


TRIAL 190: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.150
  - ln: True
  - activation_function: silu
Training Parameters:
  - learning_rate: 0.000049
  - batch_size: 512
  - use_phage_weights: True
  - weight_decay: 0.0256
  - scheduler_type: one_cycle
  - warmup_ratio: 0.194


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 1.0051, MCC: 0.0263 | Val Loss: 0.9365, MCC: -0.0141 | LR: 0.000002
Epoch 2/75 - Train Loss: 0.9948, MCC: 0.0148 | Val Loss: 0.9320, MCC: -0.0269 | LR: 0.000004
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 19:45:27,659] Trial 190 finished with value: 0.2292315328767053 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.15020932076592522, 'ln': True, 'activation_function': 'silu', 'learning_rate': 4.860767861889679e-05, 'batch_size': 512, 'use_phage_weights': True, 'weight_decay': 0.02564505081984448, 'scheduler_type': 'one_cycle', 'warmup_ratio': 0.1943196438041037}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.1879

CROSS-VALIDATION RESULTS
Median MCC: 0.2292
Mean MCC: 0.2307 ± 0.0335
Fold 1 MCC: 0.2015
Fold 2 MCC: 0.2292
Fold 3 MCC: 0.2783
Fold 4 MCC: 0.2564
Fold 5 MCC: 0.1879


TRIAL 191: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.156
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000052
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0382
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.140


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5423, MCC: -0.0022 | Val Loss: 0.4924, MCC: 0.0000 | LR: 0.000052
Epoch 2/75 - Train Loss: 0.5175, MCC: 0.0187 | Val Loss: 0.4621, MCC: 0.0000 | LR: 0.000052
Epoch 3/75 - Train Loss: 

[I 2025-04-22 19:48:42,246] Trial 191 finished with value: 0.321001397496274 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.15563644318440858, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.16638573142617e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.038150194532769494, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.13966068920794467}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3288

CROSS-VALIDATION RESULTS
Median MCC: 0.3210
Mean MCC: 0.3143 ± 0.0179
Fold 1 MCC: 0.3210
Fold 2 MCC: 0.2923
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.2941
Fold 5 MCC: 0.3288


TRIAL 192: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.139
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000046
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0342
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.191


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5442, MCC: 0.0075 | Val Loss: 0.4876, MCC: 0.0000 | LR: 0.000046
Epoch 2/75 - Train Loss: 0.5181, MCC: 0.0247 | Val Loss: 0.4723, MCC: 0.0595 | LR: 0.000046
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 19:51:55,415] Trial 192 finished with value: 0.3222323159082517 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.13856948430542745, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.6171175289930604e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.034169979417711244, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.190937876448526}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3265

CROSS-VALIDATION RESULTS
Median MCC: 0.3222
Mean MCC: 0.3168 ± 0.0130
Fold 1 MCC: 0.3089
Fold 2 MCC: 0.3310
Fold 3 MCC: 0.3222
Fold 4 MCC: 0.2953
Fold 5 MCC: 0.3265


TRIAL 193: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.146
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000065
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0315
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.134


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5442, MCC: 0.0006 | Val Loss: 0.4894, MCC: 0.0000 | LR: 0.000065
Epoch 2/75 - Train Loss: 0.5136, MCC: 0.0475 | Val Loss: 0.4635, MCC: 0.0717 | LR: 0.000065
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 19:55:17,080] Trial 193 finished with value: 0.31997192534453367 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.1460528003346781, 'ln': True, 'activation_function': 'relu', 'learning_rate': 6.546247519203274e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.0314881340108479, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.13444963092787385}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3200

CROSS-VALIDATION RESULTS
Median MCC: 0.3200
Mean MCC: 0.3153 ± 0.0155
Fold 1 MCC: 0.3025
Fold 2 MCC: 0.3327
Fold 3 MCC: 0.3288
Fold 4 MCC: 0.2924
Fold 5 MCC: 0.3200


TRIAL 194: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.130
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000055
  - batch_size: 128
  - use_phage_weights: False
  - weight_decay: 0.0093
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.186


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5237, MCC: 0.0197 | Val Loss: 0.4677, MCC: 0.1338 | LR: 0.000055
Epoch 2/75 - Train Loss: 0.4997, MCC: 0.1126 | Val Loss: 0.4609, MCC: 0.0000 | LR: 0.000055
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 19:58:07,110] Trial 194 finished with value: 0.31994315005177065 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.13037737520902198, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.4799148162361376e-05, 'batch_size': 128, 'use_phage_weights': False, 'weight_decay': 0.009274741533685422, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18563632835245994}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3199

CROSS-VALIDATION RESULTS
Median MCC: 0.3199
Mean MCC: 0.3215 ± 0.0326
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3745
Fold 3 MCC: 0.3090
Fold 4 MCC: 0.2739
Fold 5 MCC: 0.3199


TRIAL 195: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.163
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000050
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0292
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.146


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5552, MCC: -0.0009 | Val Loss: 0.5023, MCC: 0.0000 | LR: 0.000050
Epoch 2/75 - Train Loss: 0.5259, MCC: 0.0000 | Val Loss: 0.4722, MCC: 0.0000 | LR: 0.000049
Epoch 3/75 - Train Loss: 

[I 2025-04-22 20:02:00,514] Trial 195 finished with value: 0.3327139421780493 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.16320069528054545, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.956286916804828e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.02921491843184908, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.14585731223728943}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3275

CROSS-VALIDATION RESULTS
Median MCC: 0.3327
Mean MCC: 0.3242 ± 0.0273
Fold 1 MCC: 0.3529
Fold 2 MCC: 0.3327
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.2722
Fold 5 MCC: 0.3275


TRIAL 196: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.142
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000050
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0245
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.173


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5480, MCC: 0.0086 | Val Loss: 0.5038, MCC: 0.0000 | LR: 0.000050
Epoch 2/75 - Train Loss: 0.5226, MCC: 0.0156 | Val Loss: 0.4702, MCC: 0.0000 | LR: 0.000050
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 20:06:07,592] Trial 196 finished with value: 0.3301495479532531 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.14207674054690372, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.9706997690183705e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.024456497896886698, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.17252994154399034}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3240

CROSS-VALIDATION RESULTS
Median MCC: 0.3301
Mean MCC: 0.3181 ± 0.0271
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3332
Fold 3 MCC: 0.3385
Fold 4 MCC: 0.2647
Fold 5 MCC: 0.3240


TRIAL 197: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.141
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000051
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0237
  - scheduler_type: reduce_on_plateau
  - warmup_ratio: 0.121


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5485, MCC: -0.0123 | Val Loss: 0.4949, MCC: 0.0000 | LR: 0.000051
Epoch 2/75 - Train Loss: 0.5181, MCC: -0.0014 | Val Loss: 0.4685, MCC: 0.0000 | LR: 0.000051
Epoch 3/75 - Train Loss

[I 2025-04-22 20:09:37,180] Trial 197 finished with value: 0.32524215575680165 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.14108822456834721, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.0526861687496276e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.023728947256265065, 'scheduler_type': 'reduce_on_plateau', 'warmup_ratio': 0.1210942329201652}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3188

CROSS-VALIDATION RESULTS
Median MCC: 0.3252
Mean MCC: 0.3167 ± 0.0230
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3252
Fold 3 MCC: 0.3371
Fold 4 MCC: 0.2722
Fold 5 MCC: 0.3188


TRIAL 198: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.135
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000044
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0222
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.172


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5586, MCC: -0.0071 | Val Loss: 0.5020, MCC: 0.0000 | LR: 0.000044
Epoch 2/75 - Train Loss: 0.5273, MCC: 0.0036 | Val Loss: 0.4820, MCC: 0.0000 | LR: 0.000044
Epoch 3/75 - Train Loss: 

[I 2025-04-22 20:14:13,539] Trial 198 finished with value: 0.3301495479532531 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.13549281538358054, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.360016187205866e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.022205466144634683, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.17245037313817882}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3275

CROSS-VALIDATION RESULTS
Median MCC: 0.3301
Mean MCC: 0.3261 ± 0.0111
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3327
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.3046
Fold 5 MCC: 0.3275


TRIAL 199: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.149
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000058
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0282
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.170


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5455, MCC: 0.0004 | Val Loss: 0.4950, MCC: 0.0000 | LR: 0.000058
Epoch 2/75 - Train Loss: 0.5158, MCC: 0.0116 | Val Loss: 0.4636, MCC: 0.0617 | LR: 0.000057
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 20:18:21,926] Trial 199 finished with value: 0.3301495479532531 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.14931433729054788, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.7551331159998565e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.028227212638989044, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.16964569780275307}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3222

CROSS-VALIDATION RESULTS
Median MCC: 0.3301
Mean MCC: 0.3191 ± 0.0237
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3351
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.2726
Fold 5 MCC: 0.3222


TRIAL 200: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.124
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000052
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0201
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.175


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5730, MCC: 0.0003 | Val Loss: 0.5054, MCC: 0.0000 | LR: 0.000052
Epoch 2/75 - Train Loss: 0.5289, MCC: 0.0000 | Val Loss: 0.4937, MCC: 0.0000 | LR: 0.000052
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 20:22:36,411] Trial 200 finished with value: 0.32448040461802896 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.12398624347765094, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.171764777525711e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.020073789672876742, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.1754829391889653}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3275

CROSS-VALIDATION RESULTS
Median MCC: 0.3245
Mean MCC: 0.3220 ± 0.0105
Fold 1 MCC: 0.3245
Fold 2 MCC: 0.3357
Fold 3 MCC: 0.3177
Fold 4 MCC: 0.3046
Fold 5 MCC: 0.3275


TRIAL 201: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.135
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000043
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0210
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.166


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5512, MCC: 0.0126 | Val Loss: 0.4989, MCC: 0.0000 | LR: 0.000043
Epoch 2/75 - Train Loss: 0.5234, MCC: 0.0050 | Val Loss: 0.4671, MCC: 0.0000 | LR: 0.000043
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 20:26:35,593] Trial 201 finished with value: 0.31786900759419706 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.13497849163317366, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.2666199018960276e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.021038069693564268, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.16589978664312577}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3179

CROSS-VALIDATION RESULTS
Median MCC: 0.3179
Mean MCC: 0.3154 ± 0.0162
Fold 1 MCC: 0.3082
Fold 2 MCC: 0.3327
Fold 3 MCC: 0.3301
Fold 4 MCC: 0.2883
Fold 5 MCC: 0.3179


TRIAL 202: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.135
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000045
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0218
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.170


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5786, MCC: 0.0058 | Val Loss: 0.5083, MCC: 0.0000 | LR: 0.000045
Epoch 2/75 - Train Loss: 0.5290, MCC: 0.0156 | Val Loss: 0.4856, MCC: 0.0000 | LR: 0.000045
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 20:30:15,184] Trial 202 finished with value: 0.3228485967594461 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.13548476458569908, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.474710557710767e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.021837061641512067, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.17046072098755766}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3199

CROSS-VALIDATION RESULTS
Median MCC: 0.3228
Mean MCC: 0.3154 ± 0.0261
Fold 1 MCC: 0.3228
Fold 2 MCC: 0.3327
Fold 3 MCC: 0.3370
Fold 4 MCC: 0.2647
Fold 5 MCC: 0.3199


TRIAL 203: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.143
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000048
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0248
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.172


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5657, MCC: 0.0087 | Val Loss: 0.5093, MCC: 0.0000 | LR: 0.000048
Epoch 2/75 - Train Loss: 0.5269, MCC: -0.0067 | Val Loss: 0.4808, MCC: 0.0000 | LR: 0.000048
Epoch 3/75 - Train Loss: 

[I 2025-04-22 20:33:28,711] Trial 203 finished with value: 0.3274712895371589 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.14305710092210944, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.821729488806018e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.024791255590193958, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.17175527651365302}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3284

CROSS-VALIDATION RESULTS
Median MCC: 0.3275
Mean MCC: 0.3086 ± 0.0369
Fold 1 MCC: 0.3275
Fold 2 MCC: 0.3142
Fold 3 MCC: 0.3366
Fold 4 MCC: 0.2362
Fold 5 MCC: 0.3284


TRIAL 204: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.129
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000042
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0294
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.177


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5526, MCC: 0.0016 | Val Loss: 0.5015, MCC: 0.0000 | LR: 0.000042
Epoch 2/75 - Train Loss: 0.5248, MCC: -0.0047 | Val Loss: 0.4701, MCC: 0.0000 | LR: 0.000042
Epoch 3/75 - Train Loss: 

[I 2025-04-22 20:36:49,968] Trial 204 finished with value: 0.322256017655474 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.1294073646576871, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.209510726697981e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.029411623820497067, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.17718947751999692}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3238

CROSS-VALIDATION RESULTS
Median MCC: 0.3223
Mean MCC: 0.3079 ± 0.0278
Fold 1 MCC: 0.3058
Fold 2 MCC: 0.3327
Fold 3 MCC: 0.3223
Fold 4 MCC: 0.2552
Fold 5 MCC: 0.3238


TRIAL 205: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.139
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000046
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0263
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.182


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5830, MCC: -0.0098 | Val Loss: 0.5070, MCC: 0.0000 | LR: 0.000046
Epoch 2/75 - Train Loss: 0.5256, MCC: 0.0000 | Val Loss: 0.4903, MCC: 0.0000 | LR: 0.000046
Epoch 3/75 - Train Loss: 

[I 2025-04-22 20:40:16,337] Trial 205 finished with value: 0.33017618849749797 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.13906809413317492, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.62283093352821e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.02627832242805711, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.1818699579419487}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3302

CROSS-VALIDATION RESULTS
Median MCC: 0.3302
Mean MCC: 0.3148 ± 0.0221
Fold 1 MCC: 0.2875
Fold 2 MCC: 0.3327
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.2883
Fold 5 MCC: 0.3302


TRIAL 206: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.143
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000050
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0266
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.182


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5578, MCC: -0.0095 | Val Loss: 0.5063, MCC: 0.0000 | LR: 0.000050
Epoch 2/75 - Train Loss: 0.5270, MCC: -0.0047 | Val Loss: 0.4767, MCC: 0.0000 | LR: 0.000050
Epoch 3/75 - Train Loss:

[I 2025-04-22 20:43:11,324] Trial 206 finished with value: 0.3172550130418551 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.14294346099069757, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.016803176079798e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.026594432964707017, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18232099209400482}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3209

CROSS-VALIDATION RESULTS
Median MCC: 0.3173
Mean MCC: 0.3106 ± 0.0312
Fold 1 MCC: 0.3572
Fold 2 MCC: 0.2949
Fold 3 MCC: 0.3173
Fold 4 MCC: 0.2625
Fold 5 MCC: 0.3209


TRIAL 207: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.139
  - ln: False
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000053
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0303
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.187


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6195, MCC: 0.0082 | Val Loss: 0.5334, MCC: 0.0000 | LR: 0.000053
Epoch 2/75 - Train Loss: 0.5433, MCC: 0.0000 | Val Loss: 0.5179, MCC: 0.0000 | LR: 0.000053
Epoch 3/75 - Train Loss: 

[I 2025-04-22 20:49:43,621] Trial 207 finished with value: 0.24433417704635016 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.13923592666316637, 'ln': False, 'activation_function': 'relu', 'learning_rate': 5.308641946011967e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.030314931743961736, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.1871241432955061}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.2378

CROSS-VALIDATION RESULTS
Median MCC: 0.2443
Mean MCC: 0.2492 ± 0.0286
Fold 1 MCC: 0.2083
Fold 2 MCC: 0.2956
Fold 3 MCC: 0.2597
Fold 4 MCC: 0.2443
Fold 5 MCC: 0.2378


TRIAL 208: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: max
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.154
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000047
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0270
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.200


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing max pooling for 177 strains...
Precomputing max pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5759, MCC: -0.0079 | Val Loss: 0.5147, MCC: 0.0000 | LR: 0.000047
Epoch 2/75 - Train Loss: 0.5362, MCC: 0.0000 | Val Loss: 0.5001, MCC: 0.0000 | LR: 0.000047
Epoch 3/75 - Train Loss: 0.5

[I 2025-04-22 20:52:39,217] Trial 208 finished with value: 0.2903635347910742 and parameters: {'pooling_type': 'max', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.1542116453791122, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.7487816490972036e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.026961350261161858, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.1998035099600722}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.2784

CROSS-VALIDATION RESULTS
Median MCC: 0.2904
Mean MCC: 0.3022 ± 0.0389
Fold 1 MCC: 0.3442
Fold 2 MCC: 0.3495
Fold 3 MCC: 0.2486
Fold 4 MCC: 0.2904
Fold 5 MCC: 0.2784


TRIAL 209: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.148
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000060
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0332
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.183


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5596, MCC: 0.0076 | Val Loss: 0.5091, MCC: 0.0000 | LR: 0.000060
Epoch 2/75 - Train Loss: 0.5284, MCC: 0.0077 | Val Loss: 0.4805, MCC: 0.0000 | LR: 0.000060
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 20:56:38,549] Trial 209 finished with value: 0.3238996878167865 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.14790822006916382, 'ln': True, 'activation_function': 'relu', 'learning_rate': 6.038516877972526e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.03317451582504823, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.1833159408934139}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3239

CROSS-VALIDATION RESULTS
Median MCC: 0.3239
Mean MCC: 0.3184 ± 0.0196
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3221
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.2804
Fold 5 MCC: 0.3239


TRIAL 210: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.133
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000055
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0246
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.109


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5814, MCC: 0.0162 | Val Loss: 0.5061, MCC: 0.0000 | LR: 0.000055
Epoch 2/75 - Train Loss: 0.5279, MCC: 0.0000 | Val Loss: 0.4801, MCC: 0.0000 | LR: 0.000055
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 21:00:10,243] Trial 210 finished with value: 0.29629228614891623 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.13337492773159082, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.512216769291648e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.024595078142097833, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.10872373768353076}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.2963

CROSS-VALIDATION RESULTS
Median MCC: 0.2963
Mean MCC: 0.3107 ± 0.0192
Fold 1 MCC: 0.2957
Fold 2 MCC: 0.3327
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.2931
Fold 5 MCC: 0.2963


TRIAL 211: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.137
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000046
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0228
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.180


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5521, MCC: 0.0064 | Val Loss: 0.5050, MCC: 0.0000 | LR: 0.000046
Epoch 2/75 - Train Loss: 0.5222, MCC: 0.0000 | Val Loss: 0.4746, MCC: 0.0000 | LR: 0.000046
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 21:03:49,580] Trial 211 finished with value: 0.3301495479532531 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.13724220123266656, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.613401454775221e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.022780760476872153, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.17964370145821104}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3275

CROSS-VALIDATION RESULTS
Median MCC: 0.3301
Mean MCC: 0.3286 ± 0.0189
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3541
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.2957
Fold 5 MCC: 0.3275


TRIAL 212: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.133
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000044
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0293
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.174


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5465, MCC: -0.0005 | Val Loss: 0.4922, MCC: 0.0000 | LR: 0.000044
Epoch 2/75 - Train Loss: 0.5200, MCC: 0.0055 | Val Loss: 0.4675, MCC: 0.0000 | LR: 0.000044
Epoch 3/75 - Train Loss: 

[I 2025-04-22 21:07:53,075] Trial 212 finished with value: 0.326303490698575 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.13294315913843066, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.412831164071234e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.029299853223816744, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.17374052235897774}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3275

CROSS-VALIDATION RESULTS
Median MCC: 0.3263
Mean MCC: 0.3188 ± 0.0134
Fold 1 MCC: 0.3309
Fold 2 MCC: 0.3155
Fold 3 MCC: 0.3263
Fold 4 MCC: 0.2940
Fold 5 MCC: 0.3275


TRIAL 213: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.141
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000049
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0175
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.116


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5488, MCC: 0.0073 | Val Loss: 0.5054, MCC: 0.0000 | LR: 0.000049
Epoch 2/75 - Train Loss: 0.5250, MCC: -0.0047 | Val Loss: 0.4764, MCC: 0.0000 | LR: 0.000049
Epoch 3/75 - Train Loss: 

[I 2025-04-22 21:10:33,871] Trial 213 finished with value: 0.29856432010177025 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.14055630545179623, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.892365697932974e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.01751693933437306, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.11585631344325084}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3052

CROSS-VALIDATION RESULTS
Median MCC: 0.2986
Mean MCC: 0.2984 ± 0.0113
Fold 1 MCC: 0.3126
Fold 2 MCC: 0.2986
Fold 3 MCC: 0.2969
Fold 4 MCC: 0.2788
Fold 5 MCC: 0.3052


TRIAL 214: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.128
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000040
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0368
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.191


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5784, MCC: 0.0010 | Val Loss: 0.5077, MCC: 0.0000 | LR: 0.000040
Epoch 2/75 - Train Loss: 0.5272, MCC: 0.0000 | Val Loss: 0.4813, MCC: 0.0000 | LR: 0.000039
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 21:14:12,117] Trial 214 finished with value: 0.31565513481491014 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.12814128303562666, 'ln': True, 'activation_function': 'relu', 'learning_rate': 3.9547992178480535e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.036764511552823405, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.19061865621926846}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.2991

CROSS-VALIDATION RESULTS
Median MCC: 0.3157
Mean MCC: 0.3171 ± 0.0142
Fold 1 MCC: 0.3308
Fold 2 MCC: 0.3157
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.3046
Fold 5 MCC: 0.2991


TRIAL 215: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.145
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000049
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0248
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.185


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5639, MCC: 0.0204 | Val Loss: 0.5043, MCC: 0.0000 | LR: 0.000049
Epoch 2/75 - Train Loss: 0.5305, MCC: 0.0000 | Val Loss: 0.4829, MCC: 0.0000 | LR: 0.000049
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 21:18:10,068] Trial 215 finished with value: 0.3275675187649259 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.14505073701856505, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.94850522846566e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.02480517174864733, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.1854165114145136}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3275

CROSS-VALIDATION RESULTS
Median MCC: 0.3276
Mean MCC: 0.3182 ± 0.0241
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3276
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.2704
Fold 5 MCC: 0.3275


TRIAL 216: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.137
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000052
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0143
  - scheduler_type: one_cycle
  - warmup_ratio: 0.177


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6585, MCC: -0.0078 | Val Loss: 0.5891, MCC: 0.0000 | LR: 0.000003
Epoch 2/75 - Train Loss: 0.5856, MCC: 0.0081 | Val Loss: 0.5185, MCC: 0.0000 | LR: 0.000005
Epoch 3/75 - Train Loss: 0.5447,

[I 2025-04-22 21:22:59,409] Trial 216 finished with value: 0.3301495479532531 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.13731603029720627, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.2106158617054176e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.014305196012920926, 'scheduler_type': 'one_cycle', 'warmup_ratio': 0.17736345165617412}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3202

CROSS-VALIDATION RESULTS
Median MCC: 0.3301
Mean MCC: 0.3252 ± 0.0115
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3351
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.3051
Fold 5 MCC: 0.3202


TRIAL 217: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.122
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000046
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0211
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.188


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5418, MCC: 0.0022 | Val Loss: 0.4959, MCC: 0.0000 | LR: 0.000046
Epoch 2/75 - Train Loss: 0.5172, MCC: -0.0057 | Val Loss: 0.4695, MCC: 0.0000 | LR: 0.000046
Epoch 3/75 - Train 

[I 2025-04-22 21:26:15,427] Trial 217 finished with value: 0.3289868039032808 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.12204816973784545, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.612340013942949e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.021059528293268112, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18796968722002735}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3336

CROSS-VALIDATION RESULTS
Median MCC: 0.3290
Mean MCC: 0.3182 ± 0.0169
Fold 1 MCC: 0.3290
Fold 2 MCC: 0.3331
Fold 3 MCC: 0.2997
Fold 4 MCC: 0.2957
Fold 5 MCC: 0.3336


TRIAL 218: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.056
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000043
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0117
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.181


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5642, MCC: 0.0058 | Val Loss: 0.5016, MCC: 0.0000 | LR: 0.000043
Epoch 2/75 - Train Loss: 0.5186, MCC: 0.0000 | Val Loss: 0.4812, MCC: 0.0000 | LR: 0.000043
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 21:29:27,058] Trial 218 finished with value: 0.3238996878167865 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.05644565754051592, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.306839181326147e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.011730114619938217, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.1809494121543936}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3239

CROSS-VALIDATION RESULTS
Median MCC: 0.3239
Mean MCC: 0.3096 ± 0.0330
Fold 1 MCC: 0.3183
Fold 2 MCC: 0.3327
Fold 3 MCC: 0.3290
Fold 4 MCC: 0.2443
Fold 5 MCC: 0.3239


TRIAL 219: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 768
  - dropout: 0.148
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000047
  - batch_size: 256
  - use_phage_weights: False
  - weight_decay: 0.0273
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.193


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5320, MCC: 0.0013 | Val Loss: 0.4669, MCC: 0.0000 | LR: 0.000047
Epoch 2/75 - Train Loss: 0.5012, MCC: 0.0959 | Val Loss: 0.4631, MCC: 0.2068 | LR: 0.000047
Epoch 3/75 - Train L

[I 2025-04-22 21:33:19,978] Trial 219 finished with value: 0.32921448976251966 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 768, 'dropout': 0.1481079948020802, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.745120984561484e-05, 'batch_size': 256, 'use_phage_weights': False, 'weight_decay': 0.027250299509557933, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.19288700910826817}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3292

CROSS-VALIDATION RESULTS
Median MCC: 0.3292
Mean MCC: 0.3170 ± 0.0214
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.2948
Fold 3 MCC: 0.3425
Fold 4 MCC: 0.2885
Fold 5 MCC: 0.3292


TRIAL 220: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.131
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000041
  - batch_size: 128
  - use_phage_weights: False
  - weight_decay: 0.0397
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.174


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5415, MCC: -0.0058 | Val Loss: 0.4725, MCC: 0.0000 | LR: 0.000041
Epoch 2/75 - Train Loss: 0.5053, MCC: 0.0960 | Val Loss: 0.4605, MCC: 0.1819 | LR: 0.000041
Epoch 3/75 - Train Loss: 

[I 2025-04-22 21:36:56,961] Trial 220 finished with value: 0.32293183958610977 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.13060713107815797, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.123061334665283e-05, 'batch_size': 128, 'use_phage_weights': False, 'weight_decay': 0.03973899922974933, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.17375355116337593}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3229

CROSS-VALIDATION RESULTS
Median MCC: 0.3229
Mean MCC: 0.3176 ± 0.0267
Fold 1 MCC: 0.2933
Fold 2 MCC: 0.3541
Fold 3 MCC: 0.3361
Fold 4 MCC: 0.2818
Fold 5 MCC: 0.3229


TRIAL 221: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.151
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000055
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0292
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.169


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5648, MCC: 0.0067 | Val Loss: 0.5018, MCC: 0.0000 | LR: 0.000055
Epoch 2/75 - Train Loss: 0.5248, MCC: 0.0050 | Val Loss: 0.4728, MCC: 0.0000 | LR: 0.000055
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 21:40:45,157] Trial 221 finished with value: 0.3301495479532531 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.1512327634449938, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.5233078624899944e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.029179980805338675, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.16896097996484022}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3198

CROSS-VALIDATION RESULTS
Median MCC: 0.3301
Mean MCC: 0.3244 ± 0.0151
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3394
Fold 3 MCC: 0.3353
Fold 4 MCC: 0.2972
Fold 5 MCC: 0.3198


TRIAL 222: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.142
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000059
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0331
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.184


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5544, MCC: 0.0033 | Val Loss: 0.5076, MCC: 0.0000 | LR: 0.000059
Epoch 2/75 - Train Loss: 0.5242, MCC: -0.0047 | Val Loss: 0.4748, MCC: 0.0000 | LR: 0.000059
Epoch 3/75 - Train Loss: 

[I 2025-04-22 21:44:14,820] Trial 222 finished with value: 0.3100772582750694 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.14228115406514927, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.860443661975496e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.033108415234379865, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18392114138046725}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3051

CROSS-VALIDATION RESULTS
Median MCC: 0.3101
Mean MCC: 0.3070 ± 0.0191
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3171
Fold 3 MCC: 0.3101
Fold 4 MCC: 0.2727
Fold 5 MCC: 0.3051


TRIAL 223: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.158
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000058
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0276
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.177


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5539, MCC: 0.0084 | Val Loss: 0.5058, MCC: 0.0000 | LR: 0.000058
Epoch 2/75 - Train Loss: 0.5259, MCC: 0.0055 | Val Loss: 0.4720, MCC: 0.0000 | LR: 0.000058
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 21:48:05,998] Trial 223 finished with value: 0.32747708996347763 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.1582460013956259, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.793769680418866e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.02763469594063926, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.1769315374795254}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3275

CROSS-VALIDATION RESULTS
Median MCC: 0.3275
Mean MCC: 0.3249 ± 0.0147
Fold 1 MCC: 0.3221
Fold 2 MCC: 0.3409
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.2986
Fold 5 MCC: 0.3275


TRIAL 224: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.151
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000051
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0243
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.144


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5447, MCC: 0.0003 | Val Loss: 0.4925, MCC: 0.0000 | LR: 0.000051
Epoch 2/75 - Train Loss: 0.5222, MCC: 0.0151 | Val Loss: 0.4793, MCC: 0.0000 | LR: 0.000051
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 21:51:39,954] Trial 224 finished with value: 0.32758298206345926 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.15066511111312528, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.082148412526062e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.024302821404044234, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.14407845001791195}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3276

CROSS-VALIDATION RESULTS
Median MCC: 0.3276
Mean MCC: 0.3220 ± 0.0146
Fold 1 MCC: 0.3327
Fold 2 MCC: 0.3276
Fold 3 MCC: 0.3290
Fold 4 MCC: 0.2931
Fold 5 MCC: 0.3276


TRIAL 225: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 1
  - classifier_hidden_dim: 384
  - dropout: 0.136
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000062
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0300
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.168


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5378, MCC: 0.0153 | Val Loss: 0.4901, MCC: 0.0000 | LR: 0.000062
Epoch 2/75 - Train Loss: 0.5128, MCC: 0.0212 | Val Loss: 0.4720, MCC: 0.0000 | LR: 0.000062
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 21:55:24,656] Trial 225 finished with value: 0.30978622713210424 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 1, 'classifier_hidden_dim': 384, 'dropout': 0.13577319818446953, 'ln': True, 'activation_function': 'relu', 'learning_rate': 6.238776153198187e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.030041957782295504, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.16797729420590127}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3098

CROSS-VALIDATION RESULTS
Median MCC: 0.3098
Mean MCC: 0.3034 ± 0.0243
Fold 1 MCC: 0.3157
Fold 2 MCC: 0.2935
Fold 3 MCC: 0.3353
Fold 4 MCC: 0.2628
Fold 5 MCC: 0.3098


TRIAL 226: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.146
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000053
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0160
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.157


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5416, MCC: 0.0101 | Val Loss: 0.4920, MCC: 0.0000 | LR: 0.000053
Epoch 2/75 - Train Loss: 0.5188, MCC: 0.0309 | Val Loss: 0.4703, MCC: 0.0000 | LR: 0.000053
Epoch 3/75 - Train L

[I 2025-04-22 21:58:39,247] Trial 226 finished with value: 0.3084828390481911 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.14590223700659605, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.330778873223599e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.015956652223920453, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.15703434052670753}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3289

CROSS-VALIDATION RESULTS
Median MCC: 0.3085
Mean MCC: 0.3114 ± 0.0160
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3006
Fold 3 MCC: 0.3085
Fold 4 MCC: 0.2891
Fold 5 MCC: 0.3289


TRIAL 227: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.140
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000049
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0352
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.173


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5451, MCC: -0.0077 | Val Loss: 0.4937, MCC: 0.0000 | LR: 0.000049
Epoch 2/75 - Train Loss: 0.5143, MCC: 0.0087 | Val Loss: 0.4766, MCC: 0.0000 | LR: 0.000049
Epoch 3/75 - Train Loss: 

[I 2025-04-22 22:01:54,276] Trial 227 finished with value: 0.32747708996347763 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.1398259175366458, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.9308668277470406e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.03518795617855101, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.1730375591990804}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3275

CROSS-VALIDATION RESULTS
Median MCC: 0.3275
Mean MCC: 0.3311 ± 0.0263
Fold 1 MCC: 0.3795
Fold 2 MCC: 0.3327
Fold 3 MCC: 0.3112
Fold 4 MCC: 0.3046
Fold 5 MCC: 0.3275


TRIAL 228: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.152
  - ln: True
  - activation_function: silu
Training Parameters:
  - learning_rate: 0.000057
  - batch_size: 512
  - use_phage_weights: True
  - weight_decay: 0.0227
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.181


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.9869, MCC: 0.0619 | Val Loss: 0.9259, MCC: -0.0107 | LR: 0.000057
Epoch 2/75 - Train Loss: 0.9798, MCC: 0.0973 | Val Loss: 0.9185, MCC: 0.0305 | LR: 0.000056
Epoch 3/75 - Train L

[I 2025-04-22 22:05:25,932] Trial 228 finished with value: 0.23969946946708112 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.15247481598441104, 'ln': True, 'activation_function': 'silu', 'learning_rate': 5.659311371086702e-05, 'batch_size': 512, 'use_phage_weights': True, 'weight_decay': 0.0226914202434011, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18074139506348155}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.2100

CROSS-VALIDATION RESULTS
Median MCC: 0.2397
Mean MCC: 0.2408 ± 0.0289
Fold 1 MCC: 0.2083
Fold 2 MCC: 0.2397
Fold 3 MCC: 0.2797
Fold 4 MCC: 0.2665
Fold 5 MCC: 0.2100


TRIAL 229: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.125
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000045
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0063
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.189


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5431, MCC: 0.0141 | Val Loss: 0.4884, MCC: 0.0000 | LR: 0.000045
Epoch 2/75 - Train Loss: 0.5181, MCC: 0.0062 | Val Loss: 0.4706, MCC: 0.0717 | LR: 0.000045
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 22:09:53,745] Trial 229 finished with value: 0.3301495479532531 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.124764636984542, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.4894372688782546e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.00631291266125801, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18873450297591915}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3240

CROSS-VALIDATION RESULTS
Median MCC: 0.3301
Mean MCC: 0.3254 ± 0.0162
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3421
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.2952
Fold 5 MCC: 0.3240


TRIAL 230: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.165
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000047
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0268
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.184


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5478, MCC: 0.0032 | Val Loss: 0.5055, MCC: 0.0000 | LR: 0.000047
Epoch 2/75 - Train Loss: 0.5270, MCC: -0.0047 | Val Loss: 0.4748, MCC: 0.0000 | LR: 0.000047
Epoch 3/75 - Train 

[I 2025-04-22 22:13:44,367] Trial 230 finished with value: 0.32758298206345926 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.16494230359196366, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.722672123709076e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.026820358063204302, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.1838580322018597}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3276

CROSS-VALIDATION RESULTS
Median MCC: 0.3276
Mean MCC: 0.3148 ± 0.0251
Fold 1 MCC: 0.3065
Fold 2 MCC: 0.3351
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.2692
Fold 5 MCC: 0.3276


TRIAL 231: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.135
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000046
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0192
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.180


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5562, MCC: 0.0066 | Val Loss: 0.5049, MCC: 0.0000 | LR: 0.000046
Epoch 2/75 - Train Loss: 0.5248, MCC: -0.0095 | Val Loss: 0.4717, MCC: 0.0000 | LR: 0.000046
Epoch 3/75 - Train Loss: 

[I 2025-04-22 22:17:23,413] Trial 231 finished with value: 0.3289868039032808 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.13454797649558387, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.642757383790365e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.019199589870906777, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18001471199867983}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3291

CROSS-VALIDATION RESULTS
Median MCC: 0.3290
Mean MCC: 0.3123 ± 0.0256
Fold 1 MCC: 0.3290
Fold 2 MCC: 0.3327
Fold 3 MCC: 0.3062
Fold 4 MCC: 0.2647
Fold 5 MCC: 0.3291


TRIAL 232: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.137
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000051
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0218
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.179


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5440, MCC: -0.0070 | Val Loss: 0.4984, MCC: 0.0000 | LR: 0.000051
Epoch 2/75 - Train Loss: 0.5179, MCC: 0.0424 | Val Loss: 0.4719, MCC: 0.0000 | LR: 0.000051
Epoch 3/75 - Train Loss: 

[I 2025-04-22 22:21:17,374] Trial 232 finished with value: 0.32483886124438593 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.13650136133914678, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.0697564239717814e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.021826586270323148, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.1787691115908668}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3186

CROSS-VALIDATION RESULTS
Median MCC: 0.3248
Mean MCC: 0.3207 ± 0.0147
Fold 1 MCC: 0.3248
Fold 2 MCC: 0.3351
Fold 3 MCC: 0.3315
Fold 4 MCC: 0.2935
Fold 5 MCC: 0.3186


TRIAL 233: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.140
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000046
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0320
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.176


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5555, MCC: 0.0067 | Val Loss: 0.4992, MCC: 0.0000 | LR: 0.000045
Epoch 2/75 - Train Loss: 0.5220, MCC: 0.0153 | Val Loss: 0.4774, MCC: 0.0000 | LR: 0.000045
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 22:25:10,788] Trial 233 finished with value: 0.3194100119908715 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.13969014381118752, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.551889538083933e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.03197951996560897, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.17590453718937232}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3107

CROSS-VALIDATION RESULTS
Median MCC: 0.3194
Mean MCC: 0.3191 ± 0.0104
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3194
Fold 3 MCC: 0.3308
Fold 4 MCC: 0.3046
Fold 5 MCC: 0.3107


TRIAL 234: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.144
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000049
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0239
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.182


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5765, MCC: 0.0056 | Val Loss: 0.5044, MCC: 0.0000 | LR: 0.000049
Epoch 2/75 - Train Loss: 0.5274, MCC: 0.0000 | Val Loss: 0.4863, MCC: 0.0000 | LR: 0.000049
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 22:28:39,992] Trial 234 finished with value: 0.2923104636720179 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.14386553920743295, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.913009734441927e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.02388003145384092, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18175794522929262}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3090

CROSS-VALIDATION RESULTS
Median MCC: 0.2923
Mean MCC: 0.2854 ± 0.0420
Fold 1 MCC: 0.2138
Fold 2 MCC: 0.2923
Fold 3 MCC: 0.3395
Fold 4 MCC: 0.2722
Fold 5 MCC: 0.3090


TRIAL 235: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.130
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000052
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0231
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.186


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5604, MCC: 0.0030 | Val Loss: 0.5131, MCC: 0.0000 | LR: 0.000052
Epoch 2/75 - Train Loss: 0.5227, MCC: 0.0156 | Val Loss: 0.4736, MCC: 0.0496 | LR: 0.000052
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 22:32:02,468] Trial 235 finished with value: 0.3275675187649259 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.1300553996580408, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.231259299277492e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.023110429890953122, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.1861172531991755}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3290

CROSS-VALIDATION RESULTS
Median MCC: 0.3276
Mean MCC: 0.3179 ± 0.0144
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3276
Fold 3 MCC: 0.3090
Fold 4 MCC: 0.2935
Fold 5 MCC: 0.3290


TRIAL 236: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.139
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000043
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0286
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.179


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5720, MCC: 0.0126 | Val Loss: 0.5089, MCC: 0.0000 | LR: 0.000043
Epoch 2/75 - Train Loss: 0.5280, MCC: 0.0221 | Val Loss: 0.4782, MCC: 0.0000 | LR: 0.000043
Epoch 3/75 - Train Loss: 0

[I 2025-04-22 22:35:18,156] Trial 236 finished with value: 0.32643515991587735 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.13866790860748093, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.338967587386275e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.028607068728145134, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.17915391950097928}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3275

CROSS-VALIDATION RESULTS
Median MCC: 0.3264
Mean MCC: 0.3116 ± 0.0228
Fold 1 MCC: 0.2987
Fold 2 MCC: 0.3327
Fold 3 MCC: 0.3264
Fold 4 MCC: 0.2726
Fold 5 MCC: 0.3275


TRIAL 237: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.148
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000025
  - batch_size: 128
  - use_phage_weights: False
  - weight_decay: 0.0256
  - scheduler_type: reduce_on_plateau
  - warmup_ratio: 0.172


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5382, MCC: -0.0025 | Val Loss: 0.4932, MCC: 0.0000 | LR: 0.000025
Epoch 2/75 - Train Loss: 0.5049, MCC: 0.0864 | Val Loss: 0.4575, MCC: 0.0000 | LR: 0.000025
Epoch 3/75 - Train

[I 2025-04-22 22:39:22,609] Trial 237 finished with value: 0.32927282695339666 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.14770038400230154, 'ln': True, 'activation_function': 'relu', 'learning_rate': 2.4633867488305706e-05, 'batch_size': 128, 'use_phage_weights': False, 'weight_decay': 0.025605792031677777, 'scheduler_type': 'reduce_on_plateau', 'warmup_ratio': 0.17211533415788907}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3293

CROSS-VALIDATION RESULTS
Median MCC: 0.3293
Mean MCC: 0.3210 ± 0.0194
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3276
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.2825
Fold 5 MCC: 0.3293


TRIAL 238: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.134
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000048
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0013
  - scheduler_type: one_cycle
  - warmup_ratio: 0.197


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.7873, MCC: 0.0101 | Val Loss: 0.7142, MCC: -0.0325 | LR: 0.000002
Epoch 2/75 - Train Loss: 0.6811, MCC: -0.0063 | Val Loss: 0.5846, MCC: 0.0000 | LR: 0.000004
Epoch 3/75 - Train Loss: 0.5819

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2025-04-22 22:42:54,224] Trial 238 finished with value: 0.31712666185964217 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.13442034276913636, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.79810275504767e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.0012824523195736183, 'scheduler_type': 'one_cycle', 'warmup_ratio': 0.19733074218219218}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.0362

CROSS-VALIDATION RESULTS
Median MCC: 0.3171
Mean MCC: 0.2611 ± 0.1132
Fold 1 MCC: 0.3210
Fold 2 MCC: 0.3171
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.2957
Fold 5 MCC: 0.0362


TRIAL 239: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.143
  - ln: False
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000054
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0318
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.184


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6140, MCC: 0.0023 | Val Loss: 0.5182, MCC: 0.0000 | LR: 0.000054
Epoch 2/75 - Train Loss: 0.5411, MCC: 0.0000 | Val Loss: 0.5167, MCC: 0.0000 | LR: 0.000054
Epoch 3/75 - Train 

[I 2025-04-22 22:49:35,553] Trial 239 finished with value: 0.2908005910510183 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.14299878374297062, 'ln': False, 'activation_function': 'relu', 'learning_rate': 5.3650464745870046e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.031786376127713935, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.1839528828694926}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.2447

CROSS-VALIDATION RESULTS
Median MCC: 0.2908
Mean MCC: 0.2758 ± 0.0294
Fold 1 MCC: 0.2908
Fold 2 MCC: 0.3058
Fold 3 MCC: 0.3016
Fold 4 MCC: 0.2362
Fold 5 MCC: 0.2447


TRIAL 240: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.126
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000099
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0057
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.190


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5617, MCC: -0.0075 | Val Loss: 0.5042, MCC: 0.0000 | LR: 0.000099
Epoch 2/75 - Train Loss: 0.5195, MCC: 0.0309 | Val Loss: 0.4698, MCC: 0.0919 | LR: 0.000099
Epoch 3/75 - Train Loss: 

[I 2025-04-22 22:52:46,643] Trial 240 finished with value: 0.3246688573386802 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.12627181850424732, 'ln': True, 'activation_function': 'relu', 'learning_rate': 9.895241766790986e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.0056620271733283595, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18952458525646526}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3239

CROSS-VALIDATION RESULTS
Median MCC: 0.3247
Mean MCC: 0.3224 ± 0.0117
Fold 1 MCC: 0.3275
Fold 2 MCC: 0.3247
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.3005
Fold 5 MCC: 0.3239


TRIAL 241: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.138
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000052
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0142
  - scheduler_type: one_cycle
  - warmup_ratio: 0.178


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.6821, MCC: 0.0420 | Val Loss: 0.6056, MCC: 0.0000 | LR: 0.000003
Epoch 2/75 - Train Loss: 0.6002, MCC: 0.0265 | Val Loss: 0.5229, MCC: 0.0000 | LR: 0.000005
Epoch 3/75 - Train Loss: 0.5468, 

[I 2025-04-22 22:56:59,042] Trial 241 finished with value: 0.32637647366065015 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.13803465357475794, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.238959263666423e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.014222071513774977, 'scheduler_type': 'one_cycle', 'warmup_ratio': 0.1778302309378577}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3275

CROSS-VALIDATION RESULTS
Median MCC: 0.3264
Mean MCC: 0.3203 ± 0.0155
Fold 1 MCC: 0.3216
Fold 2 MCC: 0.3264
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.2906
Fold 5 MCC: 0.3275


TRIAL 242: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.131
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000052
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0152
  - scheduler_type: one_cycle
  - warmup_ratio: 0.176


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5734, MCC: 0.0102 | Val Loss: 0.5268, MCC: 0.0000 | LR: 0.000003
Epoch 2/75 - Train Loss: 0.5458, MCC: 0.0077 | Val Loss: 0.5151, MCC: 0.0000 | LR: 0.000005
Epoch 3/75 - Train Loss: 0.5408, 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2025-04-22 23:01:12,380] Trial 242 finished with value: 0.3327139421780493 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.13093404098520567, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.165346124813983e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.015163907494231675, 'scheduler_type': 'one_cycle', 'warmup_ratio': 0.1757253826512978}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.0235

CROSS-VALIDATION RESULTS
Median MCC: 0.3327
Mean MCC: 0.2694 ± 0.1279
Fold 1 MCC: 0.3830
Fold 2 MCC: 0.3327
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.2726
Fold 5 MCC: 0.0235


TRIAL 243: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.129
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000050
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0191
  - scheduler_type: one_cycle
  - warmup_ratio: 0.164


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.7304, MCC: -0.0303 | Val Loss: 0.6536, MCC: 0.0000 | LR: 0.000003
Epoch 2/75 - Train Loss: 0.6255, MCC: -0.0080 | Val Loss: 0.5397, MCC: 0.0000 | LR: 0.000005
Epoch 3/75 - Train Loss: 0.5529

[I 2025-04-22 23:05:38,167] Trial 243 finished with value: 0.32758298206345926 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.12888505838384795, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.041107616503613e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.019071944191802907, 'scheduler_type': 'one_cycle', 'warmup_ratio': 0.16387267045488124}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3276

CROSS-VALIDATION RESULTS
Median MCC: 0.3276
Mean MCC: 0.3225 ± 0.0149
Fold 1 MCC: 0.3189
Fold 2 MCC: 0.3351
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.2952
Fold 5 MCC: 0.3276


TRIAL 244: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.132
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000046
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0159
  - scheduler_type: one_cycle
  - warmup_ratio: 0.170


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5551, MCC: -0.0015 | Val Loss: 0.5159, MCC: 0.0000 | LR: 0.000003
Epoch 2/75 - Train Loss: 0.5404, MCC: -0.0047 | Val Loss: 0.5139, MCC: 0.0000 | LR: 0.000005
Epoch 3/75 - Train Loss: 0.5366

[I 2025-04-22 23:11:48,419] Trial 244 finished with value: 0.3301495479532531 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.13193495865620034, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.6105161494623053e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.015887578348157887, 'scheduler_type': 'one_cycle', 'warmup_ratio': 0.16971362843978016}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3251

CROSS-VALIDATION RESULTS
Median MCC: 0.3301
Mean MCC: 0.3271 ± 0.0123
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3400
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.3046
Fold 5 MCC: 0.3251


TRIAL 245: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 2
  - classifier_hidden_dim: 384
  - dropout: 0.144
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000061
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0117
  - scheduler_type: one_cycle
  - warmup_ratio: 0.173


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.8077, MCC: 0.0102 | Val Loss: 0.7136, MCC: 0.0575 | LR: 0.000003
Epoch 2/75 - Train Loss: 0.6599, MCC: 0.0107 | Val Loss: 0.5562, MCC: 0.0000 | LR: 0.000006
Epoch 3/75 - Train Loss: 0.5528, 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Fold 1 - Val MCC: 0.0575

FOLD 2/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.8272, MCC: -0.0034 | Val Loss: 0.7522, MCC: 0.0604 | LR: 0.000003
Epoch 2/75 - Train Loss: 0.6857, MCC: 0.0338 | Val Loss: 0.5978, MCC: 0.0000 | LR: 0.000006
Epoch 3/75 - Train Loss: 0.5646, MCC: 0.0165 | Val Loss: 0.5309, MCC: 0.0000 | LR: 0.000010
Epoch 4/75 - Train Loss: 0.5359, MCC: 0.0000 | Val Loss: 0.5207, MCC: 0.0000 | LR: 0.000015
Epoch 5/75 - Train Loss: 0.5233, MCC: 0.0000 | Val Loss: 0.5097, MCC: 0.0000 | LR: 0.000021
Epoch 6/75 - Train Loss: 0.5119, MCC: 0.0000 | Val Loss: 0.4910, MCC: 0.0000 | LR: 0.000028
Epoch 7/75 - Train Loss: 0.5037, MCC: 0.0366 | Val Loss: 0.4790, MCC: 0.0000 | LR: 0.000035
Epoch 8/75 - Train Loss: 0.4929, MCC: 0.1015 | Val Loss: 0.4769, MCC: 0.0000 | LR: 0.000042

Early stopping triggered at epoch 8. Best val MCC:

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Fold 2 - Val MCC: 0.0604

FOLD 3/5
Train set: 13632 interactions, 142 strains
Validation set: 3360 interactions, 35 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.7086, MCC: -0.0124 | Val Loss: 0.6430, MCC: 0.0000 | LR: 0.000003
Epoch 2/75 - Train Loss: 0.6140, MCC: -0.0068 | Val Loss: 0.5034, MCC: 0.0000 | LR: 0.000006
Epoch 3/75 - Train Loss: 0.5556, MCC: 0.0000 | Val Loss: 0.4510, MCC: 0.0000 | LR: 0.000010
Epoch 4/75 - Train Loss: 0.5477, MCC: 0.0000 | Val Loss: 0.4576, MCC: 0.0000 | LR: 0.000015
Epoch 5/75 - Train Loss: 0.5363, MCC: 0.0000 | Val Loss: 0.4517, MCC: 0.0000 | LR: 0.000021
Epoch 6/75 - Train Loss: 0.5243, MCC: 0.0100 | Val Loss: 0.4336, MCC: 0.0000 | LR: 0.000028
Epoch 7/75 - Train Loss: 0.5142, MCC: 0.0372 | Val Loss: 0.4348, MCC: 0.0922 | LR: 0.000035
Epoch 8/75 - Train Loss: 0.5061, MCC: 0.1181 | Val Loss: 0.4222, MCC: 0.0000 | LR: 0.000042
Epoch 9/75 - Train Loss: 0.4984, MCC: 0.1432 | Val

[I 2025-04-22 23:14:36,928] Trial 245 finished with value: 0.2796270802455048 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 2, 'classifier_hidden_dim': 384, 'dropout': 0.14361463173228922, 'ln': True, 'activation_function': 'relu', 'learning_rate': 6.054846049902681e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.011684280341686594, 'scheduler_type': 'one_cycle', 'warmup_ratio': 0.17346208537565846}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3186

CROSS-VALIDATION RESULTS
Median MCC: 0.2796
Mean MCC: 0.2075 ± 0.1222
Fold 1 MCC: 0.0575
Fold 2 MCC: 0.0604
Fold 3 MCC: 0.3212
Fold 4 MCC: 0.2796
Fold 5 MCC: 0.3186


TRIAL 246: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: max
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.135
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000056
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0176
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.183


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing max pooling for 177 strains...
Precomputing max pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5427, MCC: -0.0055 | Val Loss: 0.4917, MCC: 0.0000 | LR: 0.000056
Epoch 2/75 - Train Loss: 0.5138, MCC: 0.0228 | Val Loss: 0.4622, MCC: 0.0000 | LR: 0.000056
Epoch 3/75 - Train Loss: 0.4

[I 2025-04-22 23:18:14,662] Trial 246 finished with value: 0.3078881183284952 and parameters: {'pooling_type': 'max', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.1352258395965644, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.595306868358197e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.017591525935740395, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18304830220655186}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.2943

CROSS-VALIDATION RESULTS
Median MCC: 0.3079
Mean MCC: 0.3257 ± 0.0552
Fold 1 MCC: 0.4228
Fold 2 MCC: 0.3427
Fold 3 MCC: 0.2607
Fold 4 MCC: 0.3079
Fold 5 MCC: 0.2943


TRIAL 247: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: median
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.155
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000048
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0208
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.176


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing median pooling for 177 strains...
Precomputing median pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5532, MCC: -0.0091 | Val Loss: 0.5076, MCC: 0.0000 | LR: 0.000048
Epoch 2/75 - Train Loss: 0.5217, MCC: 0.0112 | Val Loss: 0.4730, MCC: 0.0000 | LR: 0.000048
Epoch 3/75 - Train 

[I 2025-04-22 23:22:26,489] Trial 247 finished with value: 0.3269384713823051 and parameters: {'pooling_type': 'median', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.1548542379648722, 'ln': True, 'activation_function': 'relu', 'learning_rate': 4.8330061336439664e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.020840827798345944, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.17626828424361807}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3258

CROSS-VALIDATION RESULTS
Median MCC: 0.3269
Mean MCC: 0.3143 ± 0.0309
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3269
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.2528
Fold 5 MCC: 0.3258


TRIAL 248: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 512
  - dropout: 0.122
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000051
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0136
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.186


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5360, MCC: -0.0020 | Val Loss: 0.4773, MCC: 0.0000 | LR: 0.000051
Epoch 2/75 - Train Loss: 0.5131, MCC: 0.0416 | Val Loss: 0.4659, MCC: 0.0000 | LR: 0.000051
Epoch 3/75 - Train Loss: 

[I 2025-04-22 23:27:22,649] Trial 248 finished with value: 0.3301495479532531 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 512, 'dropout': 0.12202233244434393, 'ln': True, 'activation_function': 'relu', 'learning_rate': 5.070255956347065e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.013610580312042764, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18639925287866338}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3276

CROSS-VALIDATION RESULTS
Median MCC: 0.3301
Mean MCC: 0.3243 ± 0.0185
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3400
Fold 3 MCC: 0.3355
Fold 4 MCC: 0.2883
Fold 5 MCC: 0.3276


TRIAL 249: PARAMETER CONFIGURATION (WITH 5-FOLD CV)
Architecture Parameters:
  - pooling_type: mean
  - classifier_hidden_layers: 3
  - classifier_hidden_dim: 384
  - dropout: 0.149
  - ln: True
  - activation_function: relu
Training Parameters:
  - learning_rate: 0.000065
  - batch_size: 512
  - use_phage_weights: False
  - weight_decay: 0.0287
  - scheduler_type: cosine_annealing
  - warmup_ratio: 0.181


FOLD 1/5
Train set: 13536 interactions, 141 strains
Validation set: 3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1/75 - Train Loss: 0.5511, MCC: -0.0014 | Val Loss: 0.5003, MCC: 0.0000 | LR: 0.000065
Epoch 2/75 - Train Loss: 0.5198, MCC: 0.0036 | Val Loss: 0.4702, MCC: 0.0000 | LR: 0.000065
Epoch 3/75 - Train Loss: 

[I 2025-04-22 23:30:25,437] Trial 249 finished with value: 0.3167103185618868 and parameters: {'pooling_type': 'mean', 'classifier_hidden_layers': 3, 'classifier_hidden_dim': 384, 'dropout': 0.14942668504255233, 'ln': True, 'activation_function': 'relu', 'learning_rate': 6.461472911895777e-05, 'batch_size': 512, 'use_phage_weights': False, 'weight_decay': 0.028664522261363165, 'scheduler_type': 'cosine_annealing', 'warmup_ratio': 0.18124751752257026}. Best is trial 184 with value: 0.3355107650026586.


Fold 5 - Val MCC: 0.3167

CROSS-VALIDATION RESULTS
Median MCC: 0.3167
Mean MCC: 0.3087 ± 0.0228
Fold 1 MCC: 0.3301
Fold 2 MCC: 0.3276
Fold 3 MCC: 0.3014
Fold 4 MCC: 0.2679
Fold 5 MCC: 0.3167


Study completed!
Number of finished trials: 250
Best trial:
  Value (median MCC): 0.3355
  Params:
    pooling_type: mean
    classifier_hidden_layers: 3
    classifier_hidden_dim: 512
    dropout: 0.1399261039732264
    ln: True
    activation_function: relu
    learning_rate: 5.924978444690761e-05
    batch_size: 512
    use_phage_weights: False
    weight_decay: 0.033737044834691504
    scheduler_type: cosine_annealing
    warmup_ratio: 0.18417067616941543

Creating visualization plots...

Training the best model with 5 different seeds...

TRAINING FINAL MODEL WITH 5 DIFFERENT RANDOM SEEDS

SEED 1/5 (random_state=42)
Train set: 13536 interactions, 141 strains
Test set:  3456 interactions, 36 strains
Precomputing mean pooling for 177 strains...
Precomputing mean pooling for 97 phages...
Epoch 1

In [None]:
def generate_plots_for_saved_model(
    results_dir,
    model_name,
    interactions_path,
    strain_embeddings_path,
    phage_embeddings_path,
    output_dir=None
):
    """
    Generate confusion matrix, ROC curve, and PR curve for a saved model.

    Args:
        results_dir: Base directory where models are stored
        model_name: Name of model ('best', 'median', or specific seed 42-46)
        interactions_path: Path to interactions CSV
        strain_embeddings_path: Path to strain embeddings directory
        phage_embeddings_path: Path to phage embeddings directory
        output_dir: Directory to save the plots
    """
    import os
    import numpy as np
    import pandas as pd
    import torch
    import matplotlib.pyplot as plt
    import seaborn as sns
    from sklearn.metrics import (
        confusion_matrix, roc_curve, precision_recall_curve, auc,
        accuracy_score, precision_score, recall_score, f1_score, matthews_corrcoef
    )

    # Set up output directory
    if output_dir is None:
        output_dir = os.path.join(results_dir, "plots", f"model_{model_name}")
    os.makedirs(output_dir, exist_ok=True)

    # Format model name properly
    if model_name.lower() in ['best', 'median']:
        model_filename = f"final_model_{model_name.lower()}.pt"
    elif model_name.isdigit() or (isinstance(model_name, int)):
        model_filename = f"final_model_seed_{model_name}.pt"
    else:
        model_filename = model_name  # In case the full filename was provided

    # Construct model path
    model_path = os.path.join(results_dir, "models", model_filename)

    # Check if model exists
    if not os.path.exists(model_path):
        raise ValueError(f"Model not found at {model_path}")

    # Load the model with weights_only=False to handle the error
    print(f"Loading model from {model_path}")
    try:
        # First try loading normally
        checkpoint = torch.load(model_path, map_location='cpu')
    except Exception as e:
        print(f"Error loading model: {e}")
        print("Trying with weights_only=False...")
        # Try with weights_only=False
        checkpoint = torch.load(model_path, map_location='cpu', weights_only=False)

    # Get random state and parameters from the checkpoint
    random_state = checkpoint.get('seed', 42)
    params = checkpoint.get('best_params', {})
    print(f"Using seed {random_state} and {params.get('pooling_type', 'mean')} pooling")

    # Import the necessary functions from the original script
    # from mean_pooling_optuna_glm_cv import (
    #     load_embeddings_flexible, filter_interactions_by_strain,
    #     precompute_pooled_embeddings, PooledVectorModel
    # )

    # Load the data
    print("Loading embeddings and interaction data...")
    strain_embeddings = load_embeddings_flexible(strain_embeddings_path)
    phage_embeddings = load_embeddings_flexible(phage_embeddings_path)
    interactions_df = pd.read_csv(interactions_path)

    # Filter to ensure we have embeddings for all strains/phages
    strain_keys = set(strain_embeddings.keys())
    phage_keys = set(phage_embeddings.keys())

    filtered_df = interactions_df[
        interactions_df['strain'].isin(strain_keys) &
        interactions_df['phage'].isin(phage_keys)
    ]

    print(f"Using {len(filtered_df)} interactions")

    # Split data using same random state as original
    train_df, test_df = filter_interactions_by_strain(filtered_df, random_state)
    print(f"Test set size: {len(test_df)} interactions")

    # Precompute pooled vectors using the same pooling method
    pooling_type = params.get('pooling_type', 'mean')
    pooled_strains, pooled_phages = precompute_pooled_embeddings(
        strain_embeddings, phage_embeddings, pooling_type
    )

    # Reconstruct the model with the saved architecture
    first_strain_id = next(iter(pooled_strains))
    embedding_dim = pooled_strains[first_strain_id].shape[0]
    model = PooledVectorModel(
        embedding_dim=embedding_dim,
        dropout=params.get('dropout', 0.1),
        ln=params.get('ln', True),
        classifier_hidden_layers=params.get('classifier_hidden_layers', 1),
        classifier_hidden_dim=params.get('classifier_hidden_dim', None),
        activation_function=params.get('activation_function', "relu")
    )

    # Load the model weights
    model.load_state_dict(checkpoint['model_state_dict'])
    model.eval()

    # Generate predictions
    print("Generating predictions on test data...")
    all_preds = []
    all_labels = []
    all_strains = []
    all_phages = []

    # Batch process the test data
    for _, row in test_df.iterrows():
        strain_id = row['strain']
        phage_id = row['phage']
        label = row['interaction']

        # Get pooled vectors
        strain_vec = torch.tensor(pooled_strains[strain_id], dtype=torch.float32).unsqueeze(0)
        phage_vec = torch.tensor(pooled_phages[phage_id], dtype=torch.float32).unsqueeze(0)

        # Get prediction
        with torch.no_grad():
            logit = model(strain_vec, phage_vec)
            pred = torch.sigmoid(logit).item()

        all_preds.append(pred)
        all_labels.append(label)
        all_strains.append(strain_id)
        all_phages.append(phage_id)

    # Convert to numpy arrays
    all_preds = np.array(all_preds)
    all_labels = np.array(all_labels)

    # Generate binary predictions
    binary_preds = (all_preds > 0.5).astype(int)

    # Calculate confusion matrix
    conf_matrix = confusion_matrix(all_labels, binary_preds)

    # 1. Plot confusion matrix
    plt.figure(figsize=(8, 7))
    sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", cbar=False,
                xticklabels=['Predicted Negative', 'Predicted Positive'],
                yticklabels=['Actual Negative', 'Actual Positive'])
    plt.title(f"Confusion Matrix (Model: {model_name})")
    plt.xlabel("Predicted Label")
    plt.ylabel("True Label")
    plt.savefig(os.path.join(output_dir, f"model_{model_name}_confusion_matrix.png"),
                dpi=300, bbox_inches='tight')
    plt.close()

    # Save confusion matrix data
    pd.DataFrame(conf_matrix,
                 columns=['Predicted Negative', 'Predicted Positive'],
                 index=['Actual Negative', 'Actual Positive']).to_csv(
        os.path.join(output_dir, f"model_{model_name}_confusion_matrix.csv")
    )

    # 2. Plot ROC curve
    fpr, tpr, _ = roc_curve(all_labels, all_preds)
    roc_auc = auc(fpr, tpr)

    plt.figure(figsize=(8, 7))
    plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.3f})')
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(f'ROC Curve (Model: {model_name})')
    plt.legend(loc="lower right")
    plt.grid(alpha=0.3)
    plt.savefig(os.path.join(output_dir, f"model_{model_name}_roc_curve.png"),
                dpi=300, bbox_inches='tight')
    plt.close()

    # Save ROC curve data
    pd.DataFrame({'fpr': fpr, 'tpr': tpr}).to_csv(
        os.path.join(output_dir, f"model_{model_name}_roc_curve.csv"),
        index=False
    )

    # 3. Plot PR curve
    precision_curve, recall_curve, _ = precision_recall_curve(all_labels, all_preds)
    pr_auc = auc(recall_curve, precision_curve)

    plt.figure(figsize=(8, 7))
    plt.plot(recall_curve, precision_curve, color='darkorange', lw=2,
             label=f'PR curve (area = {pr_auc:.3f})')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title(f'Precision-Recall Curve (Model: {model_name})')
    plt.legend(loc="lower left")
    plt.grid(alpha=0.3)
    plt.savefig(os.path.join(output_dir, f"model_{model_name}_pr_curve.png"),
                dpi=300, bbox_inches='tight')
    plt.close()

    # Save PR curve data
    pd.DataFrame({'recall': recall_curve, 'precision': precision_curve}).to_csv(
        os.path.join(output_dir, f"model_{model_name}_pr_curve.csv"),
        index=False
    )

    # Calculate metrics
    mcc = matthews_corrcoef(all_labels, binary_preds)
    accuracy = accuracy_score(all_labels, binary_preds)
    precision = precision_score(all_labels, binary_preds)
    recall = recall_score(all_labels, binary_preds)
    f1 = f1_score(all_labels, binary_preds)

    # Print summary
    tn, fp, fn, tp = conf_matrix.ravel()

    print(f"\n{'='*80}")
    print(f"EVALUATION SUMMARY FOR MODEL {model_name}")
    print(f"{'='*80}")
    print(f"MCC:        {mcc:.4f}")
    print(f"Accuracy:   {accuracy:.4f}")
    print(f"Precision:  {precision:.4f}")
    print(f"Recall:     {recall:.4f}")
    print(f"F1 Score:   {f1:.4f}")
    print(f"ROC AUC:    {roc_auc:.4f}")
    print(f"PR AUC:     {pr_auc:.4f}")

    print("\nConfusion Matrix:")
    print(f"  TN: {tn}, FP: {fp}")
    print(f"  FN: {fn}, TP: {tp}")

    n_pos = fn + tp
    n_neg = tn + fp
    print(f"Class balance: {n_pos} positive, {n_neg} negative ({n_pos/(n_pos+n_neg):.2%} positive)")
    print(f"{'='*80}\n")

    print(f"Plots saved to {output_dir}")

    # Save predictions and metrics
    predictions_df = pd.DataFrame({
        'strain': all_strains,
        'phage': all_phages,
        'true_label': all_labels,
        'prediction_prob': all_preds,
        'prediction_binary': binary_preds
    })

    predictions_df.to_csv(os.path.join(output_dir, f"model_{model_name}_predictions.csv"), index=False)

    metrics_df = pd.DataFrame({
        'metric': ['mcc', 'accuracy', 'precision', 'recall', 'f1', 'roc_auc', 'pr_auc'],
        'value': [mcc, accuracy, precision, recall, f1, roc_auc, pr_auc]
    })

    metrics_df.to_csv(os.path.join(output_dir, f"model_{model_name}_metrics.csv"), index=False)

    return {
        'mcc': mcc,
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'roc_auc': roc_auc,
        'pr_auc': pr_auc,
        'confusion_matrix': conf_matrix,
        'predictions_df': predictions_df
    }

results_dir = '/content/drive/MyDrive/Arkin/set_transformer_data/glm_optuna_pooling_cv'
interactions_path = '/content/drive/MyDrive/Arkin/phage_public_datasets/e_coli/interaction_matrix.csv'
strain_embeddings_path = '/content/drive/MyDrive/Arkin/set_transformer_data/embedding_experiments/ecoli/current_embeddings/strain'
phage_embeddings_path = '/content/drive/MyDrive/Arkin/set_transformer_data/embedding_experiments/ecoli/current_embeddings/phage'

# Process the best model
generate_plots_for_saved_model(
    results_dir=results_dir,
    model_name='best',
    interactions_path=interactions_path,
    strain_embeddings_path=strain_embeddings_path,
    phage_embeddings_path=phage_embeddings_path
)

Loading model from /content/drive/MyDrive/Arkin/set_transformer_data/glm_optuna_pooling_cv/models/final_model_best.pt
Error loading model: Weights only load failed. This file can still be loaded, to do so you have two options, [1mdo those steps only if you trust the source of the checkpoint[0m. 
	(1) In PyTorch 2.6, we changed the default value of the `weights_only` argument in `torch.load` from `False` to `True`. Re-running `torch.load` with `weights_only` set to `False` will likely succeed, but it can result in arbitrary code execution. Do it only if you got the file from a trusted source.
	(2) Alternatively, to load with `weights_only=True` please check the recommended steps in the following error message.
	WeightsUnpickler error: Unsupported global: GLOBAL numpy._core.multiarray.scalar was not an allowed global by default. Please use `torch.serialization.add_safe_globals([scalar])` or the `torch.serialization.safe_globals([scalar])` context manager to allowlist this global if you

{'mcc': np.float64(0.3441049541532808),
 'accuracy': 0.7884837962962963,
 'precision': 0.5310457516339869,
 'recall': 0.4226267880364109,
 'f1': 0.4706734250543085,
 'roc_auc': np.float64(0.7546492455365936),
 'pr_auc': np.float64(0.46181844856983756),
 'confusion_matrix': array([[2400,  287],
        [ 444,  325]]),
 'predictions_df':       strain     phage  true_label  prediction_prob  prediction_binary
 0     ECOR54  55989_P2           0         0.517637                  1
 1     ECOR52  55989_P2           0         0.517607                  1
 2     ECOR51  55989_P2           1         0.517616                  1
 3     NILS72  55989_P2           1         0.567400                  1
 4     NILS29  55989_P2           1         0.569938                  1
 ...      ...       ...         ...              ...                ...
 3451  ECOR17   LM40_P3           0         0.116181                  0
 3452  NILS04   LM40_P3           0         0.155521                  0
 3453  NILS05  

## Best Parameter Cross-Validation

In [None]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
from pathlib import Path
from tqdm import tqdm
from sklearn.model_selection import KFold
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    matthews_corrcoef, roc_auc_score, average_precision_score, confusion_matrix,
    roc_curve, precision_recall_curve, auc
)
import json

def run_ten_fold_cv_with_best_params(
    optuna_results_dir,
    interactions_path,
    strain_embeddings_path,
    phage_embeddings_path,
    n_folds=10,
    random_state=42,
    output_dir=None
):
    """
    Run 10-fold cross-validation with best parameters from Optuna optimization.
    """
    # Set up timestamp and results directory
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

    if output_dir is None:
        output_dir = f"ten_fold_cv_results_{timestamp}"

    # Create directory structure
    os.makedirs(output_dir, exist_ok=True)
    os.makedirs(os.path.join(output_dir, "fold_results"), exist_ok=True)
    os.makedirs(os.path.join(output_dir, "fold_plots"), exist_ok=True)
    os.makedirs(os.path.join(output_dir, "combined_results"), exist_ok=True)

    # Load best parameters from Optuna results
    best_params = load_best_params(optuna_results_dir)

    # Extract model and training parameters
    model_params = {
        'dropout': best_params.get('dropout', 0.1),
        'ln': best_params.get('ln', True),
        'classifier_hidden_layers': best_params.get('classifier_hidden_layers', 1),
        'classifier_hidden_dim': best_params.get('classifier_hidden_dim', None),
        'activation_function': best_params.get('activation_function', 'relu')
    }

    train_params = {
        'num_epochs': 75,  # Set a reasonable default
        'learning_rate': best_params.get('learning_rate', 5e-5),
        'batch_size': best_params.get('batch_size', 512),
        'patience': 10,  # Set a reasonable default
        'use_phage_weights': best_params.get('use_phage_weights', True),
        'scheduler_type': best_params.get('scheduler_type', 'one_cycle'),
        'warmup_ratio': best_params.get('warmup_ratio', 0.1),
        'weight_decay': best_params.get('weight_decay', 0.01)
    }

    # Get pooling type from best parameters
    pooling_type = best_params.get('pooling_type', 'mean')

    # Save configuration
    config = {
        'best_params': best_params,
        'model_params': model_params,
        'train_params': train_params,
        'interactions_path': interactions_path,
        'strain_embeddings_path': strain_embeddings_path,
        'phage_embeddings_path': phage_embeddings_path,
        'pooling_type': pooling_type,
        'n_folds': n_folds,
        'random_state': random_state,
        'timestamp': timestamp,
        'optuna_results_dir': optuna_results_dir
    }

    with open(os.path.join(output_dir, "config.json"), 'w') as f:
        json.dump(config, f, indent=4)

    # Get device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")

    print(f"\n{'='*80}")
    print(f"STARTING {n_folds}-FOLD CROSS-VALIDATION WITH BEST PARAMETERS")
    print(f"{'='*80}")
    print(f"Optuna results directory: {optuna_results_dir}")
    print(f"Pooling type: {pooling_type}")
    print(f"Results directory: {output_dir}")
    print("\nBest parameters from Optuna:")
    for key, value in best_params.items():
        print(f"  {key}: {value}")
    print(f"{'='*80}")

    # Load data
    print("Loading embeddings...")
    strain_embeddings = load_embeddings_flexible(strain_embeddings_path)
    phage_embeddings = load_embeddings_flexible(phage_embeddings_path)

    print("Loading interaction data...")
    interactions_df = pd.read_csv(interactions_path)

    # Filter to ensure we have embeddings for all strains/phages
    strain_keys = set(strain_embeddings.keys())
    phage_keys = set(phage_embeddings.keys())

    filtered_df = interactions_df[
        interactions_df['strain'].isin(strain_keys) &
        interactions_df['phage'].isin(phage_keys)
    ]

    print(f"Original interactions: {len(interactions_df)}")
    print(f"Filtered interactions: {len(filtered_df)}")

    if len(filtered_df) == 0:
        raise ValueError("No interactions match the provided embeddings!")

    # Get unique strains for fold splitting
    unique_strains = filtered_df['strain'].unique()

    # Create KFold object for splitting by strain
    kf = KFold(n_splits=n_folds, shuffle=True, random_state=random_state)

    # Track metrics and predictions across folds
    fold_metrics = []
    all_predictions = []

    # Since we're having issues with precompute_pooled_embeddings,
    # let's implement it directly here to ensure it works as expected

    # Direct implementation of pooling
    print(f"Directly implementing {pooling_type} pooling...")
    pooled_strains = {}
    pooled_phages = {}

    # Apply pooling to each strain
    for strain_id, (embedding, _) in tqdm(strain_embeddings.items(), desc="Pooling strains"):
        # Convert to tensor
        embedding_tensor = torch.tensor(embedding, dtype=torch.float32)

        # Apply pooling
        if pooling_type == 'mean':
            pooled = torch.mean(embedding_tensor, dim=0)
        elif pooling_type == 'max':
            pooled = torch.max(embedding_tensor, dim=0)[0]
        elif pooling_type == 'median':
            pooled = torch.median(embedding_tensor, dim=0)[0]
        else:
            raise ValueError(f"Unknown pooling type: {pooling_type}")

        # Store pooled vector
        pooled_strains[strain_id] = pooled.numpy()

    # Apply pooling to each phage
    for phage_id, (embedding, _) in tqdm(phage_embeddings.items(), desc="Pooling phages"):
        # Convert to tensor
        embedding_tensor = torch.tensor(embedding, dtype=torch.float32)

        # Apply pooling
        if pooling_type == 'mean':
            pooled = torch.mean(embedding_tensor, dim=0)
        elif pooling_type == 'max':
            pooled = torch.max(embedding_tensor, dim=0)[0]
        elif pooling_type == 'median':
            pooled = torch.median(embedding_tensor, dim=0)[0]
        else:
            raise ValueError(f"Unknown pooling type: {pooling_type}")

        # Store pooled vector
        pooled_phages[phage_id] = pooled.numpy()

    print(f"Completed pooling for {len(pooled_strains)} strains and {len(pooled_phages)} phages")

    # Get embedding dimension
    first_strain_id = next(iter(pooled_strains))
    embedding_dim = pooled_strains[first_strain_id].shape[0]
    print(f"Embedding dimension: {embedding_dim}")

    # Run cross-validation
    for fold_idx, (train_idx, val_idx) in enumerate(kf.split(unique_strains)):
        fold_num = fold_idx + 1
        print(f"\n{'='*80}")
        print(f"FOLD {fold_num}/{n_folds}")
        print(f"{'='*80}")

        # Create fold-specific directories
        fold_dir = os.path.join(output_dir, "fold_results", f"fold_{fold_num}")
        os.makedirs(fold_dir, exist_ok=True)

        # Split strains into train and validation sets
        train_strains = unique_strains[train_idx]
        val_strains = unique_strains[val_idx]

        # Create dataframes based on strain splits
        train_df = filtered_df[filtered_df['strain'].isin(train_strains)]
        val_df = filtered_df[filtered_df['strain'].isin(val_strains)]

        print(f"Train set: {len(train_df)} interactions, {len(train_strains)} strains")
        print(f"Validation set: {len(val_df)} interactions, {len(val_strains)} strains")

        # Calculate phage-specific weights if needed
        phage_weights = {}
        if train_params.get('use_phage_weights', True):
            phage_weights = calculate_phage_specific_weights(train_df)
            print(f"Using phage-specific weights for {len(phage_weights)} phages")
        else:
            # Use a default weight of 1.0 for all phages
            for phage in train_df['phage'].unique():
                phage_weights[phage] = 1.0
            print("Using default weight of 1.0 for all phages")

        # Create datasets
        train_dataset = PrecomputedPooledDataset(train_df, pooled_strains, pooled_phages, phage_weights)
        val_dataset = PrecomputedPooledDataset(val_df, pooled_strains, pooled_phages, phage_weights)

        # Create dataloaders
        train_loader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=train_params.get('batch_size', 512),
            shuffle=True,
            collate_fn=collate_pooled_vectors
        )

        val_loader = torch.utils.data.DataLoader(
            val_dataset,
            batch_size=train_params.get('batch_size', 512),
            shuffle=False,
            collate_fn=collate_pooled_vectors
        )

        # Initialize model
        model = PooledVectorModel(
            embedding_dim=embedding_dim,
            dropout=model_params.get('dropout', 0.1),
            ln=model_params.get('ln', True),
            classifier_hidden_layers=model_params.get('classifier_hidden_layers', 1),
            classifier_hidden_dim=model_params.get('classifier_hidden_dim', None),
            activation_function=model_params.get('activation_function', 'relu')
        ).to(device)

        # Train the model
        history, val_score = train_model(
            model,
            train_loader,
            val_loader,
            None,  # No trial object
            num_epochs=train_params.get('num_epochs', 75),
            learning_rate=train_params.get('learning_rate', 5e-5),
            patience=train_params.get('patience', 10),
            device=device,
            use_phage_weights=train_params.get('use_phage_weights', True),
            scheduler_type=train_params.get('scheduler_type', 'one_cycle'),
            warmup_ratio=train_params.get('warmup_ratio', 0.1),
            weight_decay=train_params.get('weight_decay', 0.01),
            metrics_dir=os.path.join(output_dir, "fold_results")
        )

        # Save training history plot
        save_training_history_plot(history, fold_dir, fold_num)

        # Evaluate on validation set
        val_metrics = evaluate_full(model, val_loader, device, train_params.get('use_phage_weights', True))

        # Save model
        torch.save({
            'model_state_dict': model.state_dict(),
            'model_params': model_params,
            'train_params': train_params,
            'val_metrics': val_metrics,
            'fold': fold_num,
            'embedding_dim': embedding_dim
        }, os.path.join(fold_dir, f"model_fold_{fold_num}.pt"))

        # Save prediction results
        val_results = save_predictions(model, val_loader, device, fold_dir, train_params.get('use_phage_weights', True))

        # Add fold information
        val_results['fold'] = fold_num

        # Append to all predictions for combined analysis
        all_predictions.append(val_results)

        # Generate fold-specific plots
        generate_fold_plots(val_metrics, fold_num, output_dir)

        # Save detailed metrics
        fold_metrics.append({
            'fold': fold_num,
            'accuracy': val_metrics['accuracy'],
            'precision': val_metrics['precision'],
            'recall': val_metrics['recall'],
            'f1': val_metrics['f1'],
            'mcc': val_metrics['mcc'],
            'roc_auc': val_metrics['roc_auc'],
            'pr_auc': val_metrics['pr_auc'],
            'tn': val_metrics['conf_matrix'][0, 0],
            'fp': val_metrics['conf_matrix'][0, 1],
            'fn': val_metrics['conf_matrix'][1, 0],
            'tp': val_metrics['conf_matrix'][1, 1]
        })

        # Print fold results
        print(f"\nFold {fold_num} Results:")
        print(f"Accuracy: {val_metrics['accuracy']:.4f}")
        print(f"Precision: {val_metrics['precision']:.4f}")
        print(f"Recall: {val_metrics['recall']:.4f}")
        print(f"F1: {val_metrics['f1']:.4f}")
        print(f"MCC: {val_metrics['mcc']:.4f}")
        print(f"ROC AUC: {val_metrics['roc_auc']:.4f}")
        print(f"PR AUC: {val_metrics['pr_auc']:.4f}")

    # Create metrics DataFrame
    metrics_df = pd.DataFrame(fold_metrics)

    # Combine all predictions
    combined_preds_df = pd.concat(all_predictions, ignore_index=True)
    combined_preds_df.to_csv(os.path.join(output_dir, "combined_results", "all_predictions.csv"), index=False)

    # Generate combined plots and statistics
    combined_results = generate_combined_results(metrics_df, combined_preds_df, output_dir)

    # Save fold metrics
    metrics_df.to_csv(os.path.join(output_dir, "combined_results", "fold_metrics.csv"), index=False)

    # Print final summary
    print_final_summary(combined_results, output_dir)

    # Return all results
    return {
        'metrics_df': metrics_df,
        'combined_preds_df': combined_preds_df,
        'combined_results': combined_results,
        'output_dir': output_dir
    }

def load_best_params(optuna_results_dir):
    """
    Load the best parameters from Optuna optimization results.

    Args:
        optuna_results_dir: Directory containing Optuna results

    Returns:
        Dictionary with best parameters
    """
    # Try to find best_params.json or best_params_cv.json
    best_params_paths = [
        os.path.join(optuna_results_dir, "best_params.json"),
        os.path.join(optuna_results_dir, "best_params_cv.json")
    ]

    for path in best_params_paths:
        if os.path.exists(path):
            print(f"Loading best parameters from {path}")
            with open(path, 'r') as f:
                return json.load(f)

    # If not found, try to load from study.pkl
    study_path = os.path.join(optuna_results_dir, "study.pkl")
    if os.path.exists(study_path):
        import pickle
        print(f"Loading study from {study_path}")
        with open(study_path, 'rb') as f:
            study = pickle.load(f)
        return study.best_params

    raise ValueError(f"Could not find best parameters in {optuna_results_dir}")

def save_predictions(model, data_loader, device, output_dir, use_phage_weights=True):
    """
    Generate and save detailed prediction results.

    Args:
        model: Trained model
        data_loader: DataLoader with test/validation data
        device: Device to run evaluation on
        output_dir: Directory to save results
        use_phage_weights: Whether to use phage-specific weights

    Returns:
        DataFrame with predictions
    """
    model.eval()

    # Lists to store results
    strain_ids = []
    phage_ids = []
    true_labels = []
    predicted_probs = []
    predicted_labels = []

    with torch.no_grad():
        for batch in data_loader:
            # Unpack the batch (for pooled vectors: strain, phage, label, weight, strain_id, phage_id)
            strain_vec, phage_vec, labels, weights, s_ids, p_ids = batch

            strain_vec = strain_vec.to(device)
            phage_vec = phage_vec.to(device)
            labels = labels.to(device)

            logits = model(strain_vec, phage_vec)
            probs = torch.sigmoid(logits).cpu().numpy().flatten()
            preds = (probs > 0.5).astype(int)

            # Extend lists
            strain_ids.extend(s_ids)
            phage_ids.extend(p_ids)
            true_labels.extend(labels.cpu().numpy().flatten())
            predicted_probs.extend(probs)
            predicted_labels.extend(preds)

    # Create DataFrame
    results_df = pd.DataFrame({
        'strain': strain_ids,
        'phage': phage_ids,
        'true_label': true_labels,
        'confidence': predicted_probs,
        'predicted_label': predicted_labels
    })

    # Save to CSV
    results_df.to_csv(os.path.join(output_dir, "predictions.csv"), index=False)

    return results_df

def generate_fold_plots(metrics, fold_num, output_dir):
    """
    Generate and save fold-specific plots.

    Args:
        metrics: Dictionary with evaluation metrics
        fold_num: Fold number
        output_dir: Base output directory
    """
    plots_dir = os.path.join(output_dir, "fold_plots")
    os.makedirs(plots_dir, exist_ok=True)

    # Calculate ROC curve if not already in metrics
    if 'fpr' not in metrics or 'tpr' not in metrics:
        fpr, tpr, _ = roc_curve(metrics['all_labels'], metrics['all_preds'])
        roc_auc = metrics['roc_auc']  # Use the pre-calculated AUC
    else:
        fpr, tpr = metrics['fpr'], metrics['tpr']
        roc_auc = metrics['roc_auc']

    # 1. ROC Curve
    plt.figure(figsize=(8, 7))
    plt.plot(fpr, tpr, color='darkorange', lw=2,
             label=f'ROC curve (area = {roc_auc:.3f})')
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(f'ROC Curve - Fold {fold_num}')
    plt.legend(loc="lower right")
    plt.grid(alpha=0.3)
    plt.savefig(os.path.join(plots_dir, f"fold_{fold_num}_roc_curve.png"), dpi=300, bbox_inches='tight')
    plt.close()

    # Save ROC data
    roc_df = pd.DataFrame({'fpr': fpr, 'tpr': tpr, 'auc': roc_auc})
    roc_df.to_csv(os.path.join(output_dir, "fold_results", f"fold_{fold_num}", f"fold_{fold_num}_roc_curve.csv"),
                  index=False)

    # Calculate PR curve if not already in metrics
    if 'precision_curve' not in metrics or 'recall_curve' not in metrics:
        precision, recall, _ = precision_recall_curve(metrics['all_labels'], metrics['all_preds'])
        pr_auc = metrics['pr_auc']  # Use the pre-calculated AUC
    else:
        precision, recall = metrics['precision_curve'], metrics['recall_curve']
        pr_auc = metrics['pr_auc']

    # 2. Precision-Recall Curve
    plt.figure(figsize=(8, 7))
    plt.plot(recall, precision, color='darkorange', lw=2,
             label=f'PR curve (area = {pr_auc:.3f})')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title(f'Precision-Recall Curve - Fold {fold_num}')
    plt.legend(loc="lower left")
    plt.grid(alpha=0.3)
    plt.savefig(os.path.join(plots_dir, f"fold_{fold_num}_pr_curve.png"), dpi=300, bbox_inches='tight')
    plt.close()

    # Save PR data
    pr_df = pd.DataFrame({'recall': recall, 'precision': precision, 'auc': pr_auc})
    pr_df.to_csv(os.path.join(output_dir, "fold_results", f"fold_{fold_num}", f"fold_{fold_num}_pr_curve.csv"),
                 index=False)

    # 3. Confusion Matrix
    cm = metrics['conf_matrix']

    plt.figure(figsize=(8, 7))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", cbar=False,
                xticklabels=['Predicted Negative', 'Predicted Positive'],
                yticklabels=['Actual Negative', 'Actual Positive'])
    plt.title(f"Confusion Matrix - Fold {fold_num}")
    plt.xlabel("Predicted Label")
    plt.ylabel("True Label")
    plt.savefig(os.path.join(plots_dir, f"fold_{fold_num}_confusion_matrix.png"), dpi=300, bbox_inches='tight')
    plt.close()

    # Save confusion matrix data
    cm_df = pd.DataFrame(cm,
                         columns=['Predicted Negative', 'Predicted Positive'],
                         index=['Actual Negative', 'Actual Positive'])
    cm_df.to_csv(os.path.join(output_dir, "fold_results", f"fold_{fold_num}", f"fold_{fold_num}_confusion_matrix.csv"))

def generate_combined_results(metrics_df, predictions_df, output_dir):
    """
    Generate combined plots and metrics across all folds.

    Args:
        metrics_df: DataFrame with metrics for each fold
        predictions_df: DataFrame with all predictions
        output_dir: Base output directory

    Returns:
        Dictionary with combined metrics
    """
    combined_dir = os.path.join(output_dir, "combined_results")
    os.makedirs(combined_dir, exist_ok=True)

    # Extract all true labels and predictions
    y_true = predictions_df['true_label'].values
    y_pred_proba = predictions_df['confidence'].values
    y_pred = (y_pred_proba >= 0.5).astype(int)

    # Calculate combined confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    tn, fp, fn, tp = cm.ravel()

    # Calculate combined metrics
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    mcc = matthews_corrcoef(y_true, y_pred)
    roc_auc = roc_auc_score(y_true, y_pred_proba)
    pr_auc = average_precision_score(y_true, y_pred_proba)

    # Calculate ROC and PR curves
    fpr, tpr, _ = roc_curve(y_true, y_pred_proba)
    precision_curve, recall_curve, _ = precision_recall_curve(y_true, y_pred_proba)

    # Calculate aggregate statistics across folds
    agg_metrics = {}
    for metric in ['accuracy', 'precision', 'recall', 'f1', 'mcc', 'roc_auc', 'pr_auc']:
        agg_metrics[metric] = {
            'mean': metrics_df[metric].mean(),
            'std': metrics_df[metric].std(),
            'median': metrics_df[metric].median(),
            'min': metrics_df[metric].min(),
            'max': metrics_df[metric].max()
        }

    # Create summary DataFrame
    summary_data = []
    for metric, values in agg_metrics.items():
        summary_data.append({
            'Metric': metric,
            'Mean': values['mean'],
            'Std': values['std'],
            'Median': values['median'],
            'Min': values['min'],
            'Max': values['max']
        })
    summary_df = pd.DataFrame(summary_data)
    summary_df.to_csv(os.path.join(combined_dir, "summary_metrics.csv"), index=False)

    # Save combined confusion matrix
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", cbar=False,
                xticklabels=['Predicted Negative', 'Predicted Positive'],
                yticklabels=['Actual Negative', 'Actual Positive'])
    plt.title("Combined Confusion Matrix (All Folds)")
    plt.xlabel("Predicted Label")
    plt.ylabel("True Label")
    plt.savefig(os.path.join(combined_dir, "combined_confusion_matrix.png"), dpi=300, bbox_inches='tight')
    plt.close()

    # Save combined confusion matrix data
    cm_df = pd.DataFrame(cm,
                       columns=['Predicted Negative', 'Predicted Positive'],
                       index=['Actual Negative', 'Actual Positive'])
    cm_df.to_csv(os.path.join(combined_dir, "combined_confusion_matrix.csv"))

    # Save combined metrics
    combined_metrics_df = pd.DataFrame([{
        'metric': 'Combined Results',
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'mcc': mcc,
        'roc_auc': roc_auc,
        'pr_auc': pr_auc,
        'tp': tp,
        'fp': fp,
        'tn': tn,
        'fn': fn
    }])
    combined_metrics_df.to_csv(os.path.join(combined_dir, "combined_metrics.csv"), index=False)

    # Generate ROC curves for all folds on the same plot
    plt.figure(figsize=(12, 10))

    # Get ROC curves for individual folds
    for fold in sorted(predictions_df['fold'].unique()):
        fold_data = predictions_df[predictions_df['fold'] == fold]
        fold_fpr, fold_tpr, _ = roc_curve(fold_data['true_label'], fold_data['confidence'])
        fold_auc = auc(fold_fpr, fold_tpr)
        plt.plot(fold_fpr, fold_tpr, lw=1, alpha=0.6, label=f'Fold {fold} (AUC = {fold_auc:.3f})')

    # Add combined ROC curve
    plt.plot(fpr, tpr, color='blue', lw=2, alpha=0.9,
             label=f'Combined (AUC = {roc_auc:.3f})')

    plt.plot([0, 1], [0, 1], 'k--', lw=2)
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curves for All Folds')
    plt.legend(loc="lower right")
    plt.grid(alpha=0.3)
    plt.savefig(os.path.join(combined_dir, "all_folds_roc_curve.png"), dpi=300, bbox_inches='tight')
    plt.close()

    # Save mean ROC data
    roc_df = pd.DataFrame({'fpr': fpr, 'tpr': tpr, 'auc': roc_auc})
    roc_df.to_csv(os.path.join(combined_dir, "combined_roc_curve.csv"), index=False)

    # Generate PR curves for all folds on the same plot
    plt.figure(figsize=(12, 10))

    # Get PR curves for individual folds
    for fold in sorted(predictions_df['fold'].unique()):
        fold_data = predictions_df[predictions_df['fold'] == fold]
        fold_precision, fold_recall, _ = precision_recall_curve(fold_data['true_label'], fold_data['confidence'])
        fold_auc = auc(fold_recall, fold_precision)
        plt.plot(fold_recall, fold_precision, lw=1, alpha=0.6, label=f'Fold {fold} (AUC = {fold_auc:.3f})')

    # Add combined PR curve
    plt.plot(recall_curve, precision_curve, color='blue', lw=2, alpha=0.9,
             label=f'Combined (AUC = {pr_auc:.3f})')

    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Precision-Recall Curves for All Folds')
    plt.legend(loc="lower left")
    plt.grid(alpha=0.3)
    plt.savefig(os.path.join(combined_dir, "all_folds_pr_curve.png"), dpi=300, bbox_inches='tight')
    plt.close()

    # Save PR data
    pr_df = pd.DataFrame({'recall': recall_curve, 'precision': precision_curve, 'auc': pr_auc})
    pr_df.to_csv(os.path.join(combined_dir, "combined_pr_curve.csv"), index=False)

    # Plot metrics across folds
    plt.figure(figsize=(15, 12))
    metrics_to_plot = ['accuracy', 'precision', 'recall', 'f1', 'mcc', 'roc_auc', 'pr_auc']

    for i, metric in enumerate(metrics_to_plot):
        plt.subplot(3, 3, i+1)
        sns.barplot(x='fold', y=metric, data=metrics_df, color='skyblue')
        plt.axhline(y=metrics_df[metric].mean(), color='red', linestyle='-',
                    label=f'Mean: {metrics_df[metric].mean():.3f}')
        plt.xlabel('Fold')
        plt.ylabel(metric.capitalize())
        plt.title(f'{metric.capitalize()} by Fold')
        plt.legend()
        plt.grid(alpha=0.3)

    plt.tight_layout()
    plt.savefig(os.path.join(combined_dir, "metrics_by_fold.png"), dpi=300, bbox_inches='tight')
    plt.close()

    # Return combined results
    return {
        'confusion_matrix': cm,
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'mcc': mcc,
        'roc_auc': roc_auc,
        'pr_auc': pr_auc,
        'tp': int(tp),
        'fp': int(fp),
        'tn': int(tn),
        'fn': int(fn),
        'fpr': fpr,
        'tpr': tpr,
        'precision_curve': precision_curve,
        'recall_curve': recall_curve,
        'agg_metrics': agg_metrics,
        'summary_df': summary_df
    }

def print_final_summary(combined_results, output_dir):
    """
    Print final summary of cross-validation results.

    Args:
        combined_results: Dictionary with combined metrics
        output_dir: Output directory
    """
    agg_metrics = combined_results['agg_metrics']

    print(f"\n{'='*80}")
    print(f"CROSS-VALIDATION RESULTS SUMMARY")
    print(f"{'='*80}")
    print(f"Mean Accuracy: {agg_metrics['accuracy']['mean']:.4f} ± {agg_metrics['accuracy']['std']:.4f}")
    print(f"Mean Precision: {agg_metrics['precision']['mean']:.4f} ± {agg_metrics['precision']['std']:.4f}")
    print(f"Mean Recall: {agg_metrics['recall']['mean']:.4f} ± {agg_metrics['recall']['std']:.4f}")
    print(f"Mean F1: {agg_metrics['f1']['mean']:.4f} ± {agg_metrics['f1']['std']:.4f}")
    print(f"Mean MCC: {agg_metrics['mcc']['mean']:.4f} ± {agg_metrics['mcc']['std']:.4f}")
    print(f"Mean ROC AUC: {agg_metrics['roc_auc']['mean']:.4f} ± {agg_metrics['roc_auc']['std']:.4f}")
    print(f"Mean PR AUC: {agg_metrics['pr_auc']['mean']:.4f} ± {agg_metrics['pr_auc']['std']:.4f}")

    print("\nCombined metrics (across all fold predictions):")
    print(f"Combined Accuracy: {combined_results['accuracy']:.4f}")
    print(f"Combined Precision: {combined_results['precision']:.4f}")
    print(f"Combined Recall: {combined_results['recall']:.4f}")
    print(f"Combined F1: {combined_results['f1']:.4f}")
    print(f"Combined MCC: {combined_results['mcc']:.4f}")
    print(f"Combined ROC AUC: {combined_results['roc_auc']:.4f}")
    print(f"Combined PR AUC: {combined_results['pr_auc']:.4f}")

    print("\nConfusion Matrix:")
    print(f"TN: {combined_results['tn']}, FP: {combined_results['fp']}")
    print(f"FN: {combined_results['fn']}, TP: {combined_results['tp']}")

    print(f"{'='*80}")
    print(f"All results saved to: {output_dir}")

def save_training_history_plot(history, output_dir, fold_num):
    """
    Save training history plots (loss and metrics over epochs).

    Args:
        history: Dictionary containing training history
        output_dir: Directory to save plots
        fold_num: Fold number for filename
    """
    plt.figure(figsize=(15, 6))

    # Loss plot
    plt.subplot(1, 2, 1)
    plt.plot(range(1, len(history['train_loss']) + 1), history['train_loss'], 'b-', label='Training Loss')
    plt.plot(range(1, len(history['val_loss']) + 1), history['val_loss'], 'r-', label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title(f'Fold {fold_num}: Loss vs. Epochs')
    plt.legend()
    plt.grid(alpha=0.3)

    # MCC plot
    plt.subplot(1, 2, 2)
    plt.plot(range(1, len(history['train_mcc']) + 1), history['train_mcc'], 'b-', label='Training MCC')
    plt.plot(range(1, len(history['val_mcc']) + 1), history['val_mcc'], 'r-', label='Validation MCC')
    plt.xlabel('Epoch')
    plt.ylabel('MCC')
    plt.title(f'Fold {fold_num}: MCC vs. Epochs')
    plt.legend()
    plt.grid(alpha=0.3)

    # Save the figure
    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, f"history_fold_{fold_num}.png"), dpi=300, bbox_inches='tight')
    plt.close()

    # Save history data as CSV
    history_df = pd.DataFrame({
        'epoch': range(1, len(history['train_loss']) + 1),
        'train_loss': history['train_loss'],
        'val_loss': history['val_loss'],
        'train_mcc': history['train_mcc'],
        'val_mcc': history['val_mcc'],
        'lr': history.get('lr', [None] * len(history['train_loss']))  # Include learning rate if available
    })

    history_df.to_csv(os.path.join(output_dir, f"history_fold_{fold_num}.csv"), index=False)

    # Also save learning rate plot if available
    if 'lr' in history and any(lr is not None for lr in history['lr']):
        plt.figure(figsize=(8, 5))
        plt.plot(range(1, len(history['lr']) + 1), history['lr'], 'g-')
        plt.xlabel('Epoch')
        plt.ylabel('Learning Rate')
        plt.title(f'Fold {fold_num}: Learning Rate Schedule')
        plt.grid(alpha=0.3)
        plt.savefig(os.path.join(output_dir, f"lr_schedule_fold_{fold_num}.png"), dpi=300, bbox_inches='tight')
        plt.close()

In [None]:
def analyze_cv_results(cv_results_dir):
    """
    Analyze cross-validation results to generate additional insights.

    Args:
        cv_results_dir: Directory containing CV results

    Returns:
        Dictionary with additional analysis
    """
    print(f"Analyzing results in: {cv_results_dir}")

    # Create directory for additional analysis
    analysis_dir = os.path.join(cv_results_dir, "analysis")
    os.makedirs(analysis_dir, exist_ok=True)

    # Load combined predictions
    predictions_path = os.path.join(cv_results_dir, "combined_results", "all_predictions.csv")
    if not os.path.exists(predictions_path):
        raise ValueError(f"Predictions file not found: {predictions_path}")

    all_preds_df = pd.read_csv(predictions_path)
    print(f"Loaded {len(all_preds_df)} predictions across all folds")

    # Extract true labels and predictions
    y_true = all_preds_df['true_label'].values
    y_pred_proba = all_preds_df['confidence'].values

    # Find optimal threshold using different metrics
    thresholds = np.linspace(0.1, 0.9, 81)  # Check thresholds from 0.1 to 0.9 in steps of 0.01

    # Arrays to store metrics for each threshold
    accuracies = []
    precisions = []
    recalls = []
    f1_scores = []
    mccs = []

    print("Finding optimal thresholds for different metrics...")
    for threshold in thresholds:
        y_pred = (y_pred_proba >= threshold).astype(int)

        accuracies.append(accuracy_score(y_true, y_pred))
        precisions.append(precision_score(y_true, y_pred))
        recalls.append(recall_score(y_true, y_pred))
        f1_scores.append(f1_score(y_true, y_pred))
        mccs.append(matthews_corrcoef(y_true, y_pred))

    # Find optimal thresholds
    optimal_thresholds = {
        'accuracy': thresholds[np.argmax(accuracies)],
        'precision': thresholds[np.argmax(precisions)],
        'recall': thresholds[np.argmax(recalls)],
        'f1': thresholds[np.argmax(f1_scores)],
        'mcc': thresholds[np.argmax(mccs)]
    }

    optimal_values = {
        'accuracy': np.max(accuracies),
        'precision': np.max(precisions),
        'recall': np.max(recalls),
        'f1': np.max(f1_scores),
        'mcc': np.max(mccs)
    }

    # Save thresholds
    thresholds_df = pd.DataFrame({
        'threshold': thresholds,
        'accuracy': accuracies,
        'precision': precisions,
        'recall': recalls,
        'f1': f1_scores,
        'mcc': mccs
    })
    thresholds_df.to_csv(os.path.join(analysis_dir, "threshold_analysis.csv"), index=False)

    # Plot threshold vs metrics
    plt.figure(figsize=(12, 8))
    plt.plot(thresholds, accuracies, label='Accuracy')
    plt.plot(thresholds, precisions, label='Precision')
    plt.plot(thresholds, recalls, label='Recall')
    plt.plot(thresholds, f1_scores, label='F1')
    plt.plot(thresholds, mccs, label='MCC')

    # Mark optimal thresholds
    for metric, threshold in optimal_thresholds.items():
        plt.axvline(x=threshold, color='gray', linestyle='--', alpha=0.5)
        plt.text(threshold, 0.4, f"{metric}: {threshold:.2f}", rotation=90, alpha=0.7)

    plt.xlabel('Threshold')
    plt.ylabel('Metric Value')
    plt.title('Effect of Classification Threshold on Metrics')
    plt.legend()
    plt.grid(alpha=0.3)
    plt.savefig(os.path.join(analysis_dir, "threshold_metrics.png"), dpi=300, bbox_inches='tight')
    plt.close()

    # Apply the optimal MCC threshold and recalculate metrics
    optimal_threshold = optimal_thresholds['mcc']
    y_pred_optimal = (y_pred_proba >= optimal_threshold).astype(int)

    # Recalculate confusion matrix and metrics
    cm_optimal = confusion_matrix(y_true, y_pred_optimal)

    optimal_metrics = {
        'threshold': optimal_threshold,
        'accuracy': accuracy_score(y_true, y_pred_optimal),
        'precision': precision_score(y_true, y_pred_optimal),
        'recall': recall_score(y_true, y_pred_optimal),
        'f1': f1_score(y_true, y_pred_optimal),
        'mcc': matthews_corrcoef(y_true, y_pred_optimal),
        'tn': cm_optimal[0, 0],
        'fp': cm_optimal[0, 1],
        'fn': cm_optimal[1, 0],
        'tp': cm_optimal[1, 1]
    }

    # Save optimal metrics
    with open(os.path.join(analysis_dir, "optimal_threshold_metrics.json"), 'w') as f:
        json.dump(optimal_metrics, f, indent=4)

    # Generate confusion matrix with optimal threshold
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm_optimal, annot=True, fmt="d", cmap="Blues", cbar=False,
                xticklabels=['Predicted Negative', 'Predicted Positive'],
                yticklabels=['Actual Negative', 'Actual Positive'])
    plt.title(f"Confusion Matrix with Optimal Threshold ({optimal_threshold:.2f})")
    plt.xlabel("Predicted Label")
    plt.ylabel("True Label")
    plt.savefig(os.path.join(analysis_dir, "optimal_threshold_confusion_matrix.png"), dpi=300, bbox_inches='tight')
    plt.close()

    # Analysis by strain and phage
    if 'strain' in all_preds_df.columns and 'phage' in all_preds_df.columns:
        print("Analyzing performance by strain and phage...")

        # Group by strain
        strain_metrics = all_preds_df.groupby('strain').apply(lambda x: {
            'count': len(x),
            'accuracy': accuracy_score(x['true_label'], (x['confidence'] >= 0.5).astype(int)),
            'mcc': matthews_corrcoef(x['true_label'], (x['confidence'] >= 0.5).astype(int))
                  if len(set(x['true_label'])) > 1 else 0
        }).reset_index()

        # Convert to proper DataFrame
        strain_metrics_df = pd.DataFrame([
            {
                'strain': row['strain'],
                'count': row[0]['count'],
                'accuracy': row[0]['accuracy'],
                'mcc': row[0]['mcc']
            }
            for _, row in strain_metrics.iterrows()
        ])

        # Save strain metrics
        strain_metrics_df.to_csv(os.path.join(analysis_dir, "strain_performance.csv"), index=False)

        # Group by phage
        phage_metrics = all_preds_df.groupby('phage').apply(lambda x: {
            'count': len(x),
            'accuracy': accuracy_score(x['true_label'], (x['confidence'] >= 0.5).astype(int)),
            'mcc': matthews_corrcoef(x['true_label'], (x['confidence'] >= 0.5).astype(int))
                  if len(set(x['true_label'])) > 1 else 0
        }).reset_index()

        # Convert to proper DataFrame
        phage_metrics_df = pd.DataFrame([
            {
                'phage': row['phage'],
                'count': row[0]['count'],
                'accuracy': row[0]['accuracy'],
                'mcc': row[0]['mcc']
            }
            for _, row in phage_metrics.iterrows()
        ])

        # Save phage metrics
        phage_metrics_df.to_csv(os.path.join(analysis_dir, "phage_performance.csv"), index=False)

        # Plot strain performance
        plt.figure(figsize=(12, 8))

        # Filter to strains with sufficient data
        top_strains = strain_metrics_df[strain_metrics_df['count'] >= 10].sort_values('mcc', ascending=False)
        if len(top_strains) > 20:
            top_strains = top_strains.head(20)  # Limit to top 20 for readability

        sns.barplot(x='strain', y='mcc', data=top_strains)
        plt.xticks(rotation=90)
        plt.xlabel('Strain')
        plt.ylabel('MCC')
        plt.title('Performance by Strain (MCC)')
        plt.tight_layout()
        plt.savefig(os.path.join(analysis_dir, "strain_performance.png"), dpi=300, bbox_inches='tight')
        plt.close()

        # Plot phage performance
        plt.figure(figsize=(12, 8))

        # Filter to phages with sufficient data
        top_phages = phage_metrics_df[phage_metrics_df['count'] >= 10].sort_values('mcc', ascending=False)
        if len(top_phages) > 20:
            top_phages = top_phages.head(20)  # Limit to top 20 for readability

        sns.barplot(x='phage', y='mcc', data=top_phages)
        plt.xticks(rotation=90)
        plt.xlabel('Phage')
        plt.ylabel('MCC')
        plt.title('Performance by Phage (MCC)')
        plt.tight_layout()
        plt.savefig(os.path.join(analysis_dir, "phage_performance.png"), dpi=300, bbox_inches='tight')
        plt.close()

    # Print summary of analysis
    print(f"\n{'='*80}")
    print("ANALYSIS SUMMARY")
    print(f"{'='*80}")
    print(f"Optimal thresholds:")
    for metric, threshold in optimal_thresholds.items():
        print(f"  {metric}: {threshold:.3f} (value: {optimal_values[metric]:.4f})")
    print(f"\nRecalculated metrics with optimal threshold ({optimal_threshold:.3f}):")
    print(f"  Accuracy:  {optimal_metrics['accuracy']:.4f}")
    print(f"  Precision: {optimal_metrics['precision']:.4f}")
    print(f"  Recall:    {optimal_metrics['recall']:.4f}")
    print(f"  F1 Score:  {optimal_metrics['f1']:.4f}")
    print(f"  MCC:       {optimal_metrics['mcc']:.4f}")
    print(f"{'='*80}")
    print(f"Analysis results saved to: {analysis_dir}")

    return {
        'optimal_thresholds': optimal_thresholds,
        'optimal_values': optimal_values,
        'optimal_metrics': optimal_metrics,
        'analysis_dir': analysis_dir
    }

In [None]:
def main_cv_workflow(
    interactions_path,
    strain_embeddings_path,
    phage_embeddings_path,
    pooling_type='mean',
    n_folds=10,
    random_state=42,
    output_dir=None
):
    """
    Run the complete 10-fold cross-validation workflow.

    Args:
        interactions_path: Path to CSV with interaction data
        strain_embeddings_path: Path to directory containing strain embeddings
        phage_embeddings_path: Path to directory containing phage embeddings
        pooling_type: Type of pooling to use ('mean', 'max', or 'median')
        n_folds: Number of folds for cross-validation
        random_state: Random seed for reproducibility
        output_dir: Output directory for results

    Returns:
        Dictionary with results
    """
    # Set timestamp for results directory
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

    if output_dir is None:
        output_dir = f"cv_results_{pooling_type}_{timestamp}"

    print(f"\n{'='*80}")
    print(f"STARTING {n_folds}-FOLD CROSS-VALIDATION WORKFLOW")
    print(f"{'='*80}")
    print(f"Interactions file: {interactions_path}")
    print(f"Strain embeddings: {strain_embeddings_path}")
    print(f"Phage embeddings: {phage_embeddings_path}")
    print(f"Pooling type: {pooling_type}")
    print(f"Output directory: {output_dir}")
    print(f"{'='*80}\n")

    # Model parameters
    model_params = {
        'dropout': 0.1,
        'ln': True,
        'classifier_hidden_layers': 1,
        'classifier_hidden_dim': None,  # Will be set to embedding_dim if None
        'activation_function': 'relu'
    }

    # Training parameters
    train_params = {
        'num_epochs': 75,
        'learning_rate': 5e-5,
        'batch_size': 512,
        'patience': 10,
        'use_phage_weights': True,
        'scheduler_type': 'one_cycle',
        'warmup_ratio': 0.1,
        'weight_decay': 0.01
    }

    # Run cross-validation
    cv_results = run_ten_fold_cv(
        model_params=model_params,
        train_params=train_params,
        interactions_path=interactions_path,
        strain_embeddings_path=strain_embeddings_path,
        phage_embeddings_path=phage_embeddings_path,
        pooling_type=pooling_type,
        n_folds=n_folds,
        random_state=random_state,
        output_dir=output_dir
    )

    # Run additional analysis
    analysis_results = analyze_cv_results(output_dir)

    # Return combined results
    return {
        'cv_results': cv_results,
        'analysis_results': analysis_results,
        'output_dir': output_dir
    }

In [None]:
results = run_ten_fold_cv_with_best_params(
    optuna_results_dir='/content/drive/MyDrive/Arkin/set_transformer_data/glm_optuna_pooling_cv',
    interactions_path = '/content/drive/MyDrive/Arkin/phage_public_datasets/e_coli/interaction_matrix.csv',
    strain_embeddings_path = '/content/drive/MyDrive/Arkin/set_transformer_data/embedding_experiments/ecoli/current_embeddings/strain',
    phage_embeddings_path = '/content/drive/MyDrive/Arkin/set_transformer_data/embedding_experiments/ecoli/current_embeddings/phage',
    n_folds=10,
    output_dir="/content/drive/MyDrive/Arkin/set_transformer_data/glm_optuna_pooling_cv/ten_fold_cv_results"
)

Loading best parameters from /content/drive/MyDrive/Arkin/set_transformer_data/glm_optuna_pooling_cv/best_params_cv.json
Using device: cuda

STARTING 10-FOLD CROSS-VALIDATION WITH BEST PARAMETERS
Optuna results directory: /content/drive/MyDrive/Arkin/set_transformer_data/glm_optuna_pooling_cv
Pooling type: mean
Results directory: /content/drive/MyDrive/Arkin/set_transformer_data/glm_optuna_pooling_cv/ten_fold_cv_results

Best parameters from Optuna:
  pooling_type: mean
  classifier_hidden_layers: 3
  classifier_hidden_dim: 512
  dropout: 0.1399261039732264
  ln: True
  activation_function: relu
  learning_rate: 5.924978444690761e-05
  batch_size: 512
  use_phage_weights: False
  weight_decay: 0.033737044834691504
  scheduler_type: cosine_annealing
  warmup_ratio: 0.18417067616941543
Loading embeddings...
Successfully loaded 177 embeddings
Successfully loaded 97 embeddings
Loading interaction data...
Original interactions: 38592
Filtered interactions: 16992
Directly implementing mean p

Pooling strains: 100%|██████████| 177/177 [00:00<00:00, 4399.83it/s]
Pooling phages: 100%|██████████| 97/97 [00:00<00:00, 24924.80it/s]

Completed pooling for 177 strains and 97 phages
Embedding dimension: 384

FOLD 1/10
Train set: 15264 interactions, 159 strains
Validation set: 1728 interactions, 18 strains
Using default weight of 1.0 for all phages





Epoch 1/75 - Train Loss: 0.5342, MCC: -0.0019 | Val Loss: 0.4655, MCC: 0.0000 | LR: 0.000059
Epoch 2/75 - Train Loss: 0.5022, MCC: 0.0710 | Val Loss: 0.4525, MCC: 0.0639 | LR: 0.000059
Epoch 3/75 - Train Loss: 0.4929, MCC: 0.1214 | Val Loss: 0.4386, MCC: 0.0639 | LR: 0.000059
Epoch 4/75 - Train Loss: 0.4902, MCC: 0.1314 | Val Loss: 0.4525, MCC: 0.2317 | LR: 0.000059
Epoch 5/75 - Train Loss: 0.4903, MCC: 0.1575 | Val Loss: 0.4380, MCC: 0.1395 | LR: 0.000059
Epoch 6/75 - Train Loss: 0.4878, MCC: 0.1618 | Val Loss: 0.4394, MCC: 0.2317 | LR: 0.000058
Epoch 7/75 - Train Loss: 0.4845, MCC: 0.1550 | Val Loss: 0.4282, MCC: 0.2553 | LR: 0.000058
Epoch 8/75 - Train Loss: 0.4817, MCC: 0.1811 | Val Loss: 0.4239, MCC: 0.3579 | LR: 0.000058
Epoch 9/75 - Train Loss: 0.4792, MCC: 0.1917 | Val Loss: 0.4217, MCC: 0.2772 | LR: 0.000057
Epoch 10/75 - Train Loss: 0.4773, MCC: 0.2069 | Val Loss: 0.4234, MCC: 0.2509 | LR: 0.000057
Epoch 11/75 - Train Loss: 0.4766, MCC: 0.2073 | Val Loss: 0.4262, MCC: 0.3579 

In [None]:
def display_study_results(study):
    """
    Display the results of an Optuna study in a notebook-friendly format.
    """
    import pandas as pd
    from IPython.display import display, HTML

    # Get best trial
    best_trial = study.best_trial

    # Create summary dataframe
    summary = pd.DataFrame({
        'Metric': ['Best Value (MCC)', 'Number of Trials', 'Study Duration'],
        'Value': [
            f"{best_trial.value:.4f}",
            len(study.trials),
            f"{(best_trial.datetime_complete - study.trials[0].datetime_start).total_seconds() / 3600:.1f} hours"
        ]
    })

    print("Study Summary:")
    display(summary)

    # Display best parameters
    params_df = pd.DataFrame({
        'Parameter': list(best_trial.params.keys()),
        'Value': list(best_trial.params.values())
    })

    print("\nBest Parameters:")
    display(params_df)

    # Display interactive plots if plotly is available
    try:
        import plotly.io as pio
        pio.renderers.default = 'notebook'

        print("\nOptimization History:")
        fig = optuna.visualization.plot_optimization_history(study)
        fig.show()

        print("\nParameter Importances:")
        fig = optuna.visualization.plot_param_importances(study)
        fig.show()

        print("\nParameter Relationships:")
        fig = optuna.visualization.plot_parallel_coordinate(study)
        fig.show()
    except:
        print("\nInstall plotly to see interactive visualizations: pip install plotly")

    # Display top trials
    top_trials = sorted(study.trials, key=lambda t: t.value if t.value is not None else float('-inf'), reverse=True)[:5]
    top_trials_data = []

    for t in top_trials:
        trial_data = {'Trial': t.number, 'Value': t.value if t.value is not None else None}
        for k, v in t.params.items():
            trial_data[k] = v
        top_trials_data.append(trial_data)

    print("\nTop 5 Trials:")
    display(pd.DataFrame(top_trials_data))

def print_final_summary(combined_results, output_dir):
    """
    Print final summary of cross-validation results.

    Args:
        combined_results: Dictionary with combined metrics
        output_dir: Output directory
    """
    agg_metrics = combined_results['agg_metrics']

    print(f"\n{'='*80}")
    print(f"CROSS-VALIDATION RESULTS SUMMARY")
    print(f"{'='*80}")
    print(f"Mean Accuracy: {agg_metrics['accuracy']['mean']:.4f} ± {agg_metrics['accuracy']['std']:.4f}")
    print(f"Mean Precision: {agg_metrics['precision']['mean']:.4f} ± {agg_metrics['precision']['std']:.4f}")
    print(f"Mean Recall: {agg_metrics['recall']['mean']:.4f} ± {agg_metrics['recall']['std']:.4f}")
    print(f"Mean F1: {agg_metrics['f1']['mean']:.4f} ± {agg_metrics['f1']['std']:.4f}")
    print(f"Mean MCC: {agg_metrics['mcc']['mean']:.4f} ± {agg_metrics['mcc']['std']:.4f}")
    print(f"Mean ROC AUC: {agg_metrics['roc_auc']['mean']:.4f} ± {agg_metrics['roc_auc']['std']:.4f}")
    print(f"Mean PR AUC: {agg_metrics['pr_auc']['mean']:.4f} ± {agg_metrics['pr_auc']['std']:.4f}")

    print("\nCombined metrics (across all fold predictions):")
    print(f"Combined Accuracy: {combined_results['accuracy']:.4f}")
    print(f"Combined Precision: {combined_results['precision']:.4f}")
    print(f"Combined Recall: {combined_results['recall']:.4f}")
    print(f"Combined F1: {combined_results['f1']:.4f}")
    print(f"Combined MCC: {combined_results['mcc']:.4f}")
    print(f"Combined ROC AUC: {combined_results['roc_auc']:.4f}")
    print(f"Combined PR AUC: {combined_results['pr_auc']:.4f}")

    print("\nConfusion Matrix:")
    print(f"TN: {combined_results['tn']}, FP: {combined_results['fp']}")
    print(f"FN: {combined_results['fn']}, TP: {combined_results['tp']}")

    print(f"{'='*80}")
    print(f"All results saved to: {output_dir}")

def analyze_cv_results(cv_results_dir):
    """
    Analyze cross-validation results to generate additional insights.

    Args:
        cv_results_dir: Directory containing CV results

    Returns:
        Dictionary with additional analysis
    """
    print(f"Analyzing results in: {cv_results_dir}")

    # Create directory for additional analysis
    analysis_dir = os.path.join(cv_results_dir, "analysis")
    os.makedirs(analysis_dir, exist_ok=True)

    # Load combined predictions
    predictions_path = os.path.join(cv_results_dir, "combined_results", "all_predictions.csv")
    if not os.path.exists(predictions_path):
        raise ValueError(f"Predictions file not found: {predictions_path}")

    all_preds_df = pd.read_csv(predictions_path)
    print(f"Loaded {len(all_preds_df)} predictions across all folds")

    # Extract true labels and predictions
    y_true = all_preds_df['true_label'].values
    y_pred_proba = all_preds_df['confidence'].values

    # Find optimal threshold using different metrics
    thresholds = np.linspace(0.1, 0.9, 81)  # Check thresholds from 0.1 to 0.9 in steps of 0.01

    # Arrays to store metrics for each threshold
    accuracies = []
    precisions = []
    recalls = []
    f1_scores = []
    mccs = []

    print("Finding optimal thresholds for different metrics...")
    for threshold in thresholds:
        y_pred = (y_pred_proba >= threshold).astype(int)

        accuracies.append(accuracy_score(y_true, y_pred))
        precisions.append(precision_score(y_true, y_pred))
        recalls.append(recall_score(y_true, y_pred))
        f1_scores.append(f1_score(y_true, y_pred))
        mccs.append(matthews_corrcoef(y_true, y_pred))

    # Find optimal thresholds
    optimal_thresholds = {
        'accuracy': thresholds[np.argmax(accuracies)],
        'precision': thresholds[np.argmax(precisions)],
        'recall': thresholds[np.argmax(recalls)],
        'f1': thresholds[np.argmax(f1_scores)],
        'mcc': thresholds[np.argmax(mccs)]
    }

    optimal_values = {
        'accuracy': np.max(accuracies),
        'precision': np.max(precisions),
        'recall': np.max(recalls),
        'f1': np.max(f1_scores),
        'mcc': np.max(mccs)
    }

    # Save thresholds
    thresholds_df = pd.DataFrame({
        'threshold': thresholds,
        'accuracy': accuracies,
        'precision': precisions,
        'recall': recalls,
        'f1': f1_scores,
        'mcc': mccs
    })
    thresholds_df.to_csv(os.path.join(analysis_dir, "threshold_analysis.csv"), index=False)

    # Plot threshold vs metrics
    plt.figure(figsize=(12, 8))
    plt.plot(thresholds, accuracies, label='Accuracy')
    plt.plot(thresholds, precisions, label='Precision')
    plt.plot(thresholds, recalls, label='Recall')
    plt.plot(thresholds, f1_scores, label='F1')
    plt.plot(thresholds, mccs, label='MCC')

    # Mark optimal thresholds
    for metric, threshold in optimal_thresholds.items():
        plt.axvline(x=threshold, color='gray', linestyle='--', alpha=0.5)
        plt.text(threshold, 0.4, f"{metric}: {threshold:.2f}", rotation=90, alpha=0.7)

    plt.xlabel('Threshold')
    plt.ylabel('Metric Value')
    plt.title('Effect of Classification Threshold on Metrics')
    plt.legend()
    plt.grid(alpha=0.3)
    plt.savefig(os.path.join(analysis_dir, "threshold_metrics.png"), dpi=300, bbox_inches='tight')
    plt.close()

    # Apply the optimal MCC threshold and recalculate metrics
    optimal_threshold = optimal_thresholds['mcc']
    y_pred_optimal = (y_pred_proba >= optimal_threshold).astype(int)

    # Recalculate confusion matrix and metrics
    cm_optimal = confusion_matrix(y_true, y_pred_optimal)

    optimal_metrics = {
        'threshold': optimal_threshold,
        'accuracy': accuracy_score(y_true, y_pred_optimal),
        'precision': precision_score(y_true, y_pred_optimal),
        'recall': recall_score(y_true, y_pred_optimal),
        'f1': f1_score(y_true, y_pred_optimal),
        'mcc': matthews_corrcoef(y_true, y_pred_optimal),
        'tn': cm_optimal[0, 0],
        'fp': cm_optimal[0, 1],
        'fn': cm_optimal[1, 0],
        'tp': cm_optimal[1, 1]
    }

    # Save optimal metrics
    with open(os.path.join(analysis_dir, "optimal_threshold_metrics.json"), 'w') as f:
        json.dump(optimal_metrics, f, indent=4)

    # Generate confusion matrix with optimal threshold
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm_optimal, annot=True, fmt="d", cmap="Blues", cbar=False,
                xticklabels=['Predicted Negative', 'Predicted Positive'],
                yticklabels=['Actual Negative', 'Actual Positive'])
    plt.title(f"Confusion Matrix with Optimal Threshold ({optimal_threshold:.2f})")
    plt.xlabel("Predicted Label")
    plt.ylabel("True Label")
    plt.savefig(os.path.join(analysis_dir, "optimal_threshold_confusion_matrix.png"), dpi=300, bbox_inches='tight')
    plt.close()

    # Analysis by strain and phage
    if 'strain' in all_preds_df.columns and 'phage' in all_preds_df.columns:
        print("Analyzing performance by strain and phage...")

        # Group by strain
        strain_metrics = all_preds_df.groupby('strain').apply(lambda x: {
            'count': len(x),
            'accuracy': accuracy_score(x['true_label'], (x['confidence'] >= 0.5).astype(int)),
            'mcc': matthews_corrcoef(x['true_label'], (x['confidence'] >= 0.5).astype(int))
                  if len(set(x['true_label'])) > 1 else 0
        }).reset_index()

        # Convert to proper DataFrame
        strain_metrics_df = pd.DataFrame([
            {
                'strain': row['strain'],
                'count': row[0]['count'],
                'accuracy': row[0]['accuracy'],
                'mcc': row[0]['mcc']
            }
            for _, row in strain_metrics.iterrows()
        ])

        # Save strain metrics
        strain_metrics_df.to_csv(os.path.join(analysis_dir, "strain_performance.csv"), index=False)

        # Group by phage
        phage_metrics = all_preds_df.groupby('phage').apply(lambda x: {
            'count': len(x),
            'accuracy': accuracy_score(x['true_label'], (x['confidence'] >= 0.5).astype(int)),
            'mcc': matthews_corrcoef(x['true_label'], (x['confidence'] >= 0.5).astype(int))
                  if len(set(x['true_label'])) > 1 else 0
        }).reset_index()

        # Convert to proper DataFrame
        phage_metrics_df = pd.DataFrame([
            {
                'phage': row['phage'],
                'count': row[0]['count'],
                'accuracy': row[0]['accuracy'],
                'mcc': row[0]['mcc']
            }
            for _, row in phage_metrics.iterrows()
        ])

        # Save phage metrics
        phage_metrics_df.to_csv(os.path.join(analysis_dir, "phage_performance.csv"), index=False)

        # Plot strain performance
        plt.figure(figsize=(12, 8))

        # Filter to strains with sufficient data
        top_strains = strain_metrics_df[strain_metrics_df['count'] >= 10].sort_values('mcc', ascending=False)
        if len(top_strains) > 20:
            top_strains = top_strains.head(20)  # Limit to top 20 for readability

        sns.barplot(x='strain', y='mcc', data=top_strains)
        plt.xticks(rotation=90)
        plt.xlabel('Strain')
        plt.ylabel('MCC')
        plt.title('Performance by Strain (MCC)')
        plt.tight_layout()
        plt.savefig(os.path.join(analysis_dir, "strain_performance.png"), dpi=300, bbox_inches='tight')
        plt.close()

        # Plot phage performance
        plt.figure(figsize=(12, 8))

        # Filter to phages with sufficient data
        top_phages = phage_metrics_df[phage_metrics_df['count'] >= 10].sort_values('mcc', ascending=False)
        if len(top_phages) > 20:
            top_phages = top_phages.head(20)  # Limit to top 20 for readability

        sns.barplot(x='phage', y='mcc', data=top_phages)
        plt.xticks(rotation=90)
        plt.xlabel('Phage')
        plt.ylabel('MCC')
        plt.title('Performance by Phage (MCC)')
        plt.tight_layout()
        plt.savefig(os.path.join(analysis_dir, "phage_performance.png"), dpi=300, bbox_inches='tight')
        plt.close()

    # Print summary of analysis
    print(f"\n{'='*80}")
    print("ANALYSIS SUMMARY")
    print(f"{'='*80}")
    print(f"Optimal thresholds:")
    for metric, threshold in optimal_thresholds.items():
        print(f"  {metric}: {threshold:.3f} (value: {optimal_values[metric]:.4f})")
    print(f"\nRecalculated metrics with optimal threshold ({optimal_threshold:.3f}):")
    print(f"  Accuracy:  {optimal_metrics['accuracy']:.4f}")
    print(f"  Precision: {optimal_metrics['precision']:.4f}")
    print(f"  Recall:    {optimal_metrics['recall']:.4f}")
    print(f"  F1 Score:  {optimal_metrics['f1']:.4f}")
    print(f"  MCC:       {optimal_metrics['mcc']:.4f}")
    print(f"{'='*80}")
    print(f"Analysis results saved to: {analysis_dir}")

    return {
        'optimal_thresholds': optimal_thresholds,
        'optimal_values': optimal_values,
        'optimal_metrics': optimal_metrics,
        'analysis_dir': analysis_dir
    }