# Hyperautomatic Barrel Batching System

This notebook automates the process of running multiple user defined experiments with different datsets and settings sequentially. Each experiment runs a user defined number of trials for different user defined model configurations (baseline, infused-fixed, infused-trainable) to evaluate and compare their performance systematically. It collects validation metrics for each run, aggregates them, and saves the results.

If you just want to run one experiment, try the automated_barrel_batching_system

### ||RUN ON RESTART||

In [4]:
# Load dependencies

from utils import build_multilabel_dataset, multilabel_split, prep_infused_sweetnet, seed_everything

import os
import pickle
import torch
import yaml

from glycowork.ml.processing import split_data_to_train, dataset_to_dataloader
from glycowork.ml import model_training


In [5]:
# Load embeddings

pickle_file_path = 'glm_embeddings_1.pkl'

# --- Load the Pickle File ---
if os.path.exists(pickle_file_path):
    print(f"Loading embeddings from: {pickle_file_path}")
    try:
        # Open the file in binary read mode ('rb')
        with open(pickle_file_path, 'rb') as file_handle:
            # Load the object(s) from the pickle file
            glm_embeddings = pickle.load(file_handle)

        print("Embeddings loaded successfully!")        

    except Exception as e:
        print(f"An error occurred while loading the pickle file: {e}")
else:
    print(f"Error: File not found at '{pickle_file_path}'. Please check the filename and path.")

Loading embeddings from: glm_embeddings_1.pkl
Embeddings loaded successfully!


## Experimental setup
Change parameters here to define each Experiment.


In [8]:
# --- immutable parameters ---
BASE_RANDOM_STATE = 42  # Initial seed for reproducibility of the entire sequence of experiments
DATA_DIR = "Datasets"   # Directory where the datasets are stored
os.makedirs(DATA_DIR, exist_ok=True) # Create the directory if it doesn't exist

# Mutable parameters that can be changed for each experiment below
settings = {
    "glycan_dataset":    'df_disease', # The glycowork dataset to use
    "glycan_class":      'disease_association', # The class to predict from the chosen dataset
    "num_runs":          5,  # Number of trials per configuration (e.g., 5 or 10)
    "epochs":            5, # Number of training epochs per run
    "batch_size":        128, # 32 or 128 seems to work well
    "train_size":        0.7, # Fraction of data to use for training (0.7 = 70% train, 15% val, 15% test)
    "learning_rate":     0.005, # Learning rate for the optimizer
    "drop_last":         False, # Whether to drop the last batch if it's smaller than the batch size
    "augment_prob":      0.0,  # Adjust if you want augmentation for training
    "generaliz_prob":    0.2,  # Adjust if you want generalization for training
    "patience":          25, # number of epochs without improvement before EarlyStop kicks in 
}
# ---- Datasets and num_classes within them ------
#  'df_species': 'Species', 'Genus', 'Family', 'Order', 'Class', 'Phylum', 'Kingdom', 'Domain', 'ref'
#  'df_tissue': 'tissue_sample', 'tissue_species', 'tissue_id', 'tissue_ref'
#  'df_disease': 'disease_association', 'disease_sample', 'disease_direction', 'disease_species', 'disease_id', 'disease_ref'

# --- Define Experiments ---
# This list defines all sets of parameters you want to change for each experiment, they override the default parameters
# Can be used to test different datasets and num_classes
# Or for crude hyperparameter tuning
experiments = [
    {
        "name": "Test",
        "glycan_dataset": "df_species", 
        "glycan_class": "Kingdom"
    },
    {
        "name": "test2",
        "glycan_dataset": "df_tissue", 
        "glycan_class": "tissue_sample"
    }
    # add more experiments as needed
]

# --- Define Experiment Configurations for each run of the experiment ---
# This list defines all sets of parameters you want to test for all the runs of each experiment
experiment_configs = [
    {
        "name": "baseline_trainable",
        "initialization_method": "random", 
        "trainable_embeddings": True
    },
    {
        "name": "baseline_fixed",
        "initialization_method": "random", 
        "trainable_embeddings": False
    },
    {
        "name": "infused_trainable",
        "initialization_method": "external", 
        "trainable_embeddings": True
    },
    {
        "name": "infused_fixed",
        "initialization_method": "external", 
        "trainable_embeddings": False
    }  
    # add more configurations as needed     
]


# Set the random seed for reproducibility
seed_everything(BASE_RANDOM_STATE)

# Initialize lists/dicts to store results globally for the notebook session
all_run_summary_results = {} # For storing best metrics from each run
all_run_epoch_histories = {} # For storing full epoch-wise histories



print()
print(f"A batch of {len(experiments)} experiments with {len(experiment_configs)} configurations have been set up")

All random seeds set to: 42

A batch of 2 experiments with 4 configurations have been set up


## Run Experiment Loop

In [None]:
# --- Hyperautomation loop ---



# Loop through each experiment
for experiment in experiments:
    
    # load the settings for this experiment
    for setting in settings:
        if setting in experiment:
            settings[setting] = experiment[setting]
       
    # get name of the current experiment
    experiment_name = experiment["name"]
    num_runs = settings['num_runs']

    print()
    print(f"Initializing {experiment_name} experiment with {len(experiment_configs)} configurations and {num_runs} runs per configuration.")
    print()

    # generate the paths for saving results
    results_summary_path =      os.path.join(DATA_DIR,f"summary_{experiment_name}.csv")
    epoch_data_path =           os.path.join(DATA_DIR,f"epoch_data_{experiment_name}.pkl")
    test_set_path =             os.path.join(DATA_DIR,f"test_set_{experiment_name}.pkl")
    saved_models_dir =          os.path.join(DATA_DIR,f"saved_models_{experiment_name}")
    experiment_settings_path =  os.path.join(DATA_DIR,f"experiment_settings_{experiment_name}.yaml")

    # generate yaml file to save the settings for this experiment
    experiment_log_data = {
        "experiment_name": experiment_name,
        "global_parameters_for_this_experiment": settings, # This holds updated settings
        "experiment_specific_configs": experiment, # The specific entry from the 'experiments' list
        "all_model_configurations_tested": experiment_configs, # All configs tested within this experiment
        "base_random_state_for_experiment_batch": BASE_RANDOM_STATE # Log the base seed
    }
    try:
        with open(experiment_settings_path, 'w') as f:
            yaml.dump(experiment_log_data, f, default_flow_style=False, sort_keys=False) # sort_keys=False to preserve order

        print(f"--- Settings for experiment '{experiment_name}' saved to {experiment_settings_path} ---")

    except Exception as e:
        print(f"Error saving settings for experiment '{experiment_name}' to YAML: {e}")

    # Set the initial random seed for this experiment
    seed_everything(BASE_RANDOM_STATE)

    # Load dataset for the experiment
    glycans, labels, label_names = build_multilabel_dataset(glycan_dataset = 'df_disease', 
                                                        glycan_class = 'disease_association', 
                                                        min_class_size = 2)

    
    num_classes = len(labels[0]) # Number of classes in the dataset

    # calculate the split ratio for train/val/test
    test_split_ratio = 1 - ((1 - settings['train_size'])/2)
    val_split_ratio = 1- ((1 - settings['train_size']) / (1 + settings['train_size']))

    # split out the test set outside of the loop to stop data leakage
    temp_glycans, test_glycans, _, temp_labels, test_labels, _ = multilabel_split(glycans, labels, train_size = test_split_ratio, 
                                                                random_state = BASE_RANDOM_STATE, no_test = True)

    # load the test set into a dataloader
    test_dataloader = dataset_to_dataloader(glycan_list = test_glycans, 
                                            labels = test_labels,
                                            batch_size = settings['batch_size'],
                                            drop_last = settings['drop_last'],
                                            augment_prob = settings['augment_prob'], 
                                            generalization_prob = settings['generaliz_prob']
    )
    # Save the test set to a pickle file
    with open(test_set_path, 'wb') as f:
        pickle.dump(test_dataloader, f) 
    print(f"Saved test set to {test_set_path}")

    for i in range(num_runs):

        # Print the current run number
        print("----------------")
        print(f"Run {i+1}/{num_runs}")
        print()

        # Increment the random state for each run
        random_state = BASE_RANDOM_STATE + i

        # Set the random seed for reproducibility for this run
        seed_everything(random_state)    

        # --- split data for this run outside of core loop for efficiency ---
        # Split the remainding dataset into training and validation
        train_glycans, val_glycans, _, train_labels, val_labels, _ = multilabel_split(temp_glycans, temp_labels, train_size=val_split_ratio, 
                                                                    random_state=random_state, no_test = True)

        # Load into dataloders for training and validation
        dataloaders = split_data_to_train(
            glycan_list_train = train_glycans, glycan_list_val = val_glycans, labels_train = train_labels, labels_val = val_labels,
            batch_size = settings['batch_size'],
            drop_last = settings['drop_last'],
            augment_prob = settings['augment_prob'], 
            generalization_prob = settings['generaliz_prob']
        )
        
        # --- Loop through each configuration & train model ---
        for config in experiment_configs:

            # Extract the configuration parameters
            config_name = config["name"]
            initialization_method = config["initialization_method"]
            trainable_embeddings = config["trainable_embeddings"]

            # Print the current configuration being used
            print("----------------")
            print(f"Running configuration: {config_name}")
            print()

            # --- Model Training ---
            # Initialize the model with the specified parameters
            model =  prep_infused_sweetnet(
                        initialization_method = initialization_method,
                        num_classes = num_classes,
                        embeddings_dict = glm_embeddings, 
                        trainable_embeddings = trainable_embeddings
                        ) 
            
            print()
            
            # Run the training setup function to prepare the model for training
            optimizer, scheduler, criterion = model_training.training_setup(model, settings['learning_rate'], num_classes = num_classes)

            # Run the training process
            model_ft, current_run_metrics = model_training.train_model(model, dataloaders, criterion, optimizer, scheduler,
                            num_epochs = settings['epochs'], mode = 'multilabel', return_metrics = True, patience = settings['patience'])
            
            print()
            
            # Collect the epoch-wise metrics from this configuration
            config_run_identifier = f"{config_name}_{i+1}"
            all_run_epoch_histories[config_run_identifier] = current_run_metrics

            # Save the best model to a file
            os.makedirs(saved_models_dir, exist_ok=True) # make sure the directory exists

            model_filename = f"{config_name}_run_{i+1}_state_dict.pth"
            model_filepath = os.path.join(saved_models_dir, model_filename)
            torch.save(model_ft.state_dict(), model_filepath)
            print(f"Saved model to {model_filepath}")
        
            # --- Save the best metrics from this run to the summary results dictionary ---
            # I should add a way to turn certain metrics off for some runs, when doing hyperparameter optimization
            # generate keys for the summary results
            loss_key = f"{config_name}_loss"
            lrap_key = f"{config_name}_lrap"
            ndcg_key = f"{config_name}_ndcg"
            metric_keys = [loss_key, lrap_key, ndcg_key]
            
            # add keys to the summary results dictionary if they don't exist
            for key in metric_keys:
                if key not in all_run_summary_results:
                    all_run_summary_results[key] = []

            # Find the best metrics from the current run
            best_loss = min(current_run_metrics['val']['loss'])
            best_lrap = max(current_run_metrics['val']['lrap'])
            best_ndcg = max(current_run_metrics['val']['ndcg'])
            best_metrics = [best_loss, best_lrap, best_ndcg]

            for key, metric in zip(metric_keys, best_metrics):
                # Append the best metric to the summary results dictionary
                all_run_summary_results[key].append(metric)
            
        # --- Export metrics at end of each run, in case of early termination ---

        # Save the epoch-wise metrics for this run to a pickle file
        with open(epoch_data_path, 'wb') as f:
            pickle.dump(all_run_epoch_histories, f)
        print(f"Saved training histories to {epoch_data_path}")

        # Save the summary results to a CSV file
        with open(results_summary_path, 'w') as f:
            # Write the header
            f.write(','.join(all_run_summary_results.keys()) + '\n')
            
            # Write the data
            any_key = next(iter(all_run_summary_results.keys()))
            num_runs_done = len(all_run_summary_results[any_key])
            for i in range(num_runs_done):
                row = [str(all_run_summary_results[key][i]) for key in all_run_summary_results.keys()]
                f.write(','.join(row) + '\n')
        print(f"Saved summary results to {results_summary_path}")

print("All experiments completed successfully!")


Initializing Test experiment with 4 configurations and 5 runs per configuration.

--- Settings for experiment 'Test' saved to experiment_settings_Test.yaml ---
All random seeds set to: 42
Found 60 unique individual classes/labels.
Number of unique glycans left after filtering rare classes (size >= 2): 1557/1648
Number of unique labels left after filtering: 39
Split complete!
Train set size: 1324
Validation set size: 233
Test set size: 0
Saved test set to test_set_Test.pkl
----------------
Run 1/5

All random seeds set to: 42
Split complete!
Train set size: 1091
Validation set size: 233
Test set size: 0
----------------
Running configuration: baseline_trainable

SweetNet model instantiated with lib_size=2565, num_classes=39, hidden_dim=320.
Handling 'random' initialization method (training from scratch).
SweetNet item_embedding layer set to trainable: True.

Epoch 0/4
----------
train Loss: 3.9591 LRAP: 0.0064 NDCG: 0.3326
val Loss: 13.5645 LRAP: 0.0086 NDCG: 0.4868
Validation loss dec

  return 1 / (1 + np.exp(-x))
  return 1 / (1 + np.exp(-x))


train Loss: 2.5109 LRAP: 0.2246 NDCG: 0.5581
val Loss: 38.9977 LRAP: 0.6609 NDCG: 0.6611
Validation loss decreased (75.913753 --> 38.997691).  Saving model ...

Epoch 2/4
----------


  return 1 / (1 + np.exp(-x))


train Loss: 2.3422 LRAP: 0.0587 NDCG: 0.5202
val Loss: 13.5173 LRAP: 0.0043 NDCG: 0.4498
Validation loss decreased (38.997691 --> 13.517264).  Saving model ...

Epoch 3/4
----------
train Loss: 2.2419 LRAP: 0.0055 NDCG: 0.4786
val Loss: 3.7969 LRAP: 0.0043 NDCG: 0.4745
Validation loss decreased (13.517264 --> 3.796949).  Saving model ...

Epoch 4/4
----------
train Loss: 2.1693 LRAP: 0.1036 NDCG: 0.5257
val Loss: 2.9900 LRAP: 0.4678 NDCG: 0.5846
Validation loss decreased (3.796949 --> 2.989996).  Saving model ...

Training complete in 0m 3s
Best val loss: 2.989996, best LRAP score: 0.7643

Saved model to saved_models_Test\infused_trainable_run_1_state_dict.pth
----------------
Running configuration: infused_fixed

SweetNet model instantiated with lib_size=2565, num_classes=39, hidden_dim=320.
Handling 'external' initialization method.
SweetNet item_embedding layer set to trainable: False.

Epoch 0/4
----------
train Loss: 3.8570 LRAP: 0.0467 NDCG: 0.3946
val Loss: 52.8286 LRAP: 0.6652 

  return 1 / (1 + np.exp(-x))
  return 1 / (1 + np.exp(-x))


train Loss: 2.4187 LRAP: 0.2640 NDCG: 0.5635
val Loss: 23.4637 LRAP: 0.0043 NDCG: 0.4703
Validation loss decreased (52.828616 --> 23.463663).  Saving model ...

Epoch 2/4
----------
train Loss: 2.2789 LRAP: 0.0724 NDCG: 0.5322
val Loss: 7.8655 LRAP: 0.0472 NDCG: 0.4917
Validation loss decreased (23.463663 --> 7.865460).  Saving model ...

Epoch 3/4
----------
train Loss: 2.2118 LRAP: 0.0229 NDCG: 0.5038
val Loss: 4.0499 LRAP: 0.0043 NDCG: 0.4357
Validation loss decreased (7.865460 --> 4.049874).  Saving model ...

Epoch 4/4
----------
train Loss: 2.1469 LRAP: 0.0559 NDCG: 0.5227
val Loss: 3.3547 LRAP: 0.0086 NDCG: 0.4462
Validation loss decreased (4.049874 --> 3.354724).  Saving model ...

Training complete in 0m 3s
Best val loss: 3.354724, best LRAP score: 0.7505

Saved model to saved_models_Test\infused_fixed_run_1_state_dict.pth
Saved training histories to epoch_data_Test.pkl
Saved summary results to summary_Test.csv
----------------
Run 2/5

All random seeds set to: 43
Split comple

  return 1 / (1 + np.exp(-x))
  return 1 / (1 + np.exp(-x))


train Loss: 2.4542 LRAP: 0.2631 NDCG: 0.5764
val Loss: 28.3373 LRAP: 0.0086 NDCG: 0.4690
Validation loss decreased (109.575613 --> 28.337295).  Saving model ...

Epoch 2/4
----------
train Loss: 2.3931 LRAP: 0.1797 NDCG: 0.5492
val Loss: 8.0485 LRAP: 0.0000 NDCG: 0.4687
Validation loss decreased (28.337295 --> 8.048489).  Saving model ...

Epoch 3/4
----------
train Loss: 2.3006 LRAP: 0.0037 NDCG: 0.4938
val Loss: 5.4725 LRAP: 0.0000 NDCG: 0.4689
Validation loss decreased (8.048489 --> 5.472538).  Saving model ...

Epoch 4/4
----------
train Loss: 2.2210 LRAP: 0.0156 NDCG: 0.5102
val Loss: 2.4789 LRAP: 0.0815 NDCG: 0.5267
Validation loss decreased (5.472538 --> 2.478862).  Saving model ...

Training complete in 0m 3s
Best val loss: 2.478862, best LRAP score: 0.7718

Saved model to saved_models_Test\infused_trainable_run_2_state_dict.pth
----------------
Running configuration: infused_fixed

SweetNet model instantiated with lib_size=2565, num_classes=39, hidden_dim=320.
Handling 'extern

  return 1 / (1 + np.exp(-x))
  return 1 / (1 + np.exp(-x))


train Loss: 2.5543 LRAP: 0.2475 NDCG: 0.5711
val Loss: 32.9177 LRAP: 0.0086 NDCG: 0.4702
Validation loss decreased (110.861168 --> 32.917721).  Saving model ...

Epoch 2/4
----------
train Loss: 2.3071 LRAP: 0.0981 NDCG: 0.5318
val Loss: 5.4925 LRAP: 0.0300 NDCG: 0.4749
Validation loss decreased (32.917721 --> 5.492537).  Saving model ...

Epoch 3/4
----------
train Loss: 2.2410 LRAP: 0.0128 NDCG: 0.4878
val Loss: 3.2291 LRAP: 0.0343 NDCG: 0.4739
Validation loss decreased (5.492537 --> 3.229100).  Saving model ...

Epoch 4/4
----------
train Loss: 2.1830 LRAP: 0.0412 NDCG: 0.5141
val Loss: 3.2528 LRAP: 0.0515 NDCG: 0.4667
EarlyStopping counter: 1 out of 25

Training complete in 0m 3s
Best val loss: 3.229100, best LRAP score: 0.7438

Saved model to saved_models_Test\infused_fixed_run_2_state_dict.pth
Saved training histories to epoch_data_Test.pkl
Saved summary results to summary_Test.csv
----------------
Run 3/5

All random seeds set to: 44
Split complete!
Train set size: 1091
Validati

  return 1 / (1 + np.exp(-x))
  return 1 / (1 + np.exp(-x))


train Loss: 2.5527 LRAP: 0.1109 NDCG: 0.5360
val Loss: 29.9915 LRAP: 0.6652 NDCG: 0.6655
Validation loss decreased (52.404226 --> 29.991530).  Saving model ...

Epoch 2/4
----------
train Loss: 2.3426 LRAP: 0.0907 NDCG: 0.5335
val Loss: 9.1634 LRAP: 0.0000 NDCG: 0.4711
Validation loss decreased (29.991530 --> 9.163360).  Saving model ...

Epoch 3/4
----------
train Loss: 2.3182 LRAP: 0.0101 NDCG: 0.5020
val Loss: 3.1061 LRAP: 0.0343 NDCG: 0.4834
Validation loss decreased (9.163360 --> 3.106084).  Saving model ...

Epoch 4/4
----------
train Loss: 2.2480 LRAP: 0.0302 NDCG: 0.5128
val Loss: 2.5404 LRAP: 0.0944 NDCG: 0.5176
Validation loss decreased (3.106084 --> 2.540430).  Saving model ...

Training complete in 0m 3s
Best val loss: 2.540430, best LRAP score: 0.7757

Saved model to saved_models_Test\infused_fixed_run_3_state_dict.pth
Saved training histories to epoch_data_Test.pkl
Saved summary results to summary_Test.csv
----------------
Run 4/5

All random seeds set to: 45
Split comple

  return 1 / (1 + np.exp(-x))
  return 1 / (1 + np.exp(-x))


train Loss: 2.4622 LRAP: 0.2484 NDCG: 0.5613
val Loss: 31.3160 LRAP: 0.0043 NDCG: 0.4723
Validation loss decreased (49.898371 --> 31.315996).  Saving model ...

Epoch 2/4
----------
train Loss: 2.3181 LRAP: 0.0907 NDCG: 0.5239
val Loss: 11.6687 LRAP: 0.0000 NDCG: 0.3865
Validation loss decreased (31.315996 --> 11.668721).  Saving model ...

Epoch 3/4
----------
train Loss: 2.1919 LRAP: 0.0073 NDCG: 0.4861
val Loss: 4.3108 LRAP: 0.0258 NDCG: 0.4879
Validation loss decreased (11.668721 --> 4.310824).  Saving model ...

Epoch 4/4
----------
train Loss: 2.1333 LRAP: 0.1146 NDCG: 0.5486
val Loss: 2.7420 LRAP: 0.1717 NDCG: 0.5539
Validation loss decreased (4.310824 --> 2.741963).  Saving model ...

Training complete in 0m 3s
Best val loss: 2.741963, best LRAP score: 0.7775

Saved model to saved_models_Test\infused_trainable_run_4_state_dict.pth
----------------
Running configuration: infused_fixed

SweetNet model instantiated with lib_size=2565, num_classes=39, hidden_dim=320.
Handling 'exte

  return 1 / (1 + np.exp(-x))
  return 1 / (1 + np.exp(-x))


train Loss: 2.4465 LRAP: 0.2062 NDCG: 0.5544
val Loss: 23.8241 LRAP: 0.0086 NDCG: 0.4729
Validation loss decreased (66.708196 --> 23.824121).  Saving model ...

Epoch 2/4
----------
train Loss: 2.3275 LRAP: 0.0880 NDCG: 0.5239
val Loss: 8.5329 LRAP: 0.0000 NDCG: 0.3830
Validation loss decreased (23.824121 --> 8.532924).  Saving model ...

Epoch 3/4
----------
train Loss: 2.2590 LRAP: 0.0082 NDCG: 0.4753
val Loss: 2.7344 LRAP: 0.0858 NDCG: 0.4822
Validation loss decreased (8.532924 --> 2.734387).  Saving model ...

Epoch 4/4
----------
train Loss: 2.2080 LRAP: 0.0797 NDCG: 0.5221
val Loss: 3.2725 LRAP: 0.0086 NDCG: 0.4689
EarlyStopping counter: 1 out of 25

Training complete in 0m 3s
Best val loss: 2.734387, best LRAP score: 0.7595

Saved model to saved_models_Test\infused_fixed_run_4_state_dict.pth
Saved training histories to epoch_data_Test.pkl
Saved summary results to summary_Test.csv
----------------
Run 5/5

All random seeds set to: 46
Split complete!
Train set size: 1091
Validatio

  return 1 / (1 + np.exp(-x))
  return 1 / (1 + np.exp(-x))


train Loss: 2.5048 LRAP: 0.2145 NDCG: 0.5551
val Loss: 31.7191 LRAP: 0.6652 NDCG: 0.6600
Validation loss decreased (72.378241 --> 31.719055).  Saving model ...

Epoch 2/4
----------
train Loss: 2.4217 LRAP: 0.3437 NDCG: 0.5883
val Loss: 8.5764 LRAP: 0.0043 NDCG: 0.4684
Validation loss decreased (31.719055 --> 8.576428).  Saving model ...

Epoch 3/4
----------
train Loss: 2.3631 LRAP: 0.1036 NDCG: 0.5298
val Loss: 6.6880 LRAP: 0.0000 NDCG: 0.4667
Validation loss decreased (8.576428 --> 6.687963).  Saving model ...

Epoch 4/4
----------
train Loss: 2.2586 LRAP: 0.0651 NDCG: 0.5211
val Loss: 2.8065 LRAP: 0.1073 NDCG: 0.5058
Validation loss decreased (6.687963 --> 2.806456).  Saving model ...

Training complete in 0m 3s
Best val loss: 2.806456, best LRAP score: 0.7587

Saved model to saved_models_Test\infused_trainable_run_5_state_dict.pth
----------------
Running configuration: infused_fixed

SweetNet model instantiated with lib_size=2565, num_classes=39, hidden_dim=320.
Handling 'externa

  return 1 / (1 + np.exp(-x))
  return 1 / (1 + np.exp(-x))


train Loss: 2.5147 LRAP: 0.3006 NDCG: 0.5779
val Loss: 17.8310 LRAP: 0.0000 NDCG: 0.4679
Validation loss decreased (76.200955 --> 17.830959).  Saving model ...

Epoch 2/4
----------
train Loss: 2.3174 LRAP: 0.1512 NDCG: 0.5501
val Loss: 5.4214 LRAP: 0.0043 NDCG: 0.4681
Validation loss decreased (17.830959 --> 5.421353).  Saving model ...

Epoch 3/4
----------
train Loss: 2.2638 LRAP: 0.0073 NDCG: 0.4882
val Loss: 3.2184 LRAP: 0.0000 NDCG: 0.4658
Validation loss decreased (5.421353 --> 3.218446).  Saving model ...

Epoch 4/4
----------
train Loss: 2.1999 LRAP: 0.0678 NDCG: 0.5315
val Loss: 3.2454 LRAP: 0.0000 NDCG: 0.4613
EarlyStopping counter: 1 out of 25

Training complete in 0m 3s
Best val loss: 3.218446, best LRAP score: 0.7550

Saved model to saved_models_Test\infused_fixed_run_5_state_dict.pth
Saved training histories to epoch_data_Test.pkl
Saved summary results to summary_Test.csv

Initializing test2 experiment with 4 configurations and 5 runs per configuration.

--- Settings for

  return 1 / (1 + np.exp(-x))
  return 1 / (1 + np.exp(-x))


train Loss: 2.4969 LRAP: 0.2191 NDCG: 0.5563
val Loss: 30.2013 LRAP: 0.6567 NDCG: 0.6596
Validation loss decreased (79.273796 --> 30.201325).  Saving model ...

Epoch 2/4
----------
train Loss: 2.3467 LRAP: 0.0669 NDCG: 0.5200
val Loss: 15.3793 LRAP: 0.0000 NDCG: 0.4446
Validation loss decreased (30.201325 --> 15.379344).  Saving model ...

Epoch 3/4
----------
train Loss: 2.2642 LRAP: 0.0064 NDCG: 0.4792
val Loss: 3.7998 LRAP: 0.0043 NDCG: 0.4782
Validation loss decreased (15.379344 --> 3.799827).  Saving model ...

Epoch 4/4
----------
train Loss: 2.1820 LRAP: 0.1173 NDCG: 0.5357
val Loss: 3.8556 LRAP: 0.5794 NDCG: 0.6386
EarlyStopping counter: 1 out of 25

Training complete in 0m 3s
Best val loss: 3.799827, best LRAP score: 0.7596

Saved model to saved_models_test2\infused_trainable_run_1_state_dict.pth
----------------
Running configuration: infused_fixed

SweetNet model instantiated with lib_size=2565, num_classes=39, hidden_dim=320.
Handling 'external' initialization method.
Swee

  return 1 / (1 + np.exp(-x))
  return 1 / (1 + np.exp(-x))


train Loss: 2.3894 LRAP: 0.2887 NDCG: 0.5667
val Loss: 20.8046 LRAP: 0.0043 NDCG: 0.4703
Validation loss decreased (47.562312 --> 20.804640).  Saving model ...

Epoch 2/4
----------
train Loss: 2.2612 LRAP: 0.0999 NDCG: 0.5321
val Loss: 7.2500 LRAP: 0.1030 NDCG: 0.4887
Validation loss decreased (20.804640 --> 7.249987).  Saving model ...

Epoch 3/4
----------
train Loss: 2.2038 LRAP: 0.0357 NDCG: 0.5013
val Loss: 3.7865 LRAP: 0.1931 NDCG: 0.5195
Validation loss decreased (7.249987 --> 3.786521).  Saving model ...

Epoch 4/4
----------
train Loss: 2.1354 LRAP: 0.0962 NDCG: 0.5311
val Loss: 3.0418 LRAP: 0.0215 NDCG: 0.4717
Validation loss decreased (3.786521 --> 3.041808).  Saving model ...

Training complete in 0m 3s
Best val loss: 3.041808, best LRAP score: 0.7550

Saved model to saved_models_test2\infused_fixed_run_1_state_dict.pth
Saved training histories to epoch_data_test2.pkl
Saved summary results to summary_test2.csv
----------------
Run 2/5

All random seeds set to: 43
Split com

  return 1 / (1 + np.exp(-x))
  return 1 / (1 + np.exp(-x))


train Loss: 2.4508 LRAP: 0.2548 NDCG: 0.5730
val Loss: 23.5152 LRAP: 0.0086 NDCG: 0.4702
Validation loss decreased (97.989842 --> 23.515198).  Saving model ...

Epoch 2/4
----------
train Loss: 2.3964 LRAP: 0.1558 NDCG: 0.5438
val Loss: 7.5670 LRAP: 0.0000 NDCG: 0.4692
Validation loss decreased (23.515198 --> 7.566979).  Saving model ...

Epoch 3/4
----------
train Loss: 2.2708 LRAP: 0.0055 NDCG: 0.4930
val Loss: 4.0545 LRAP: 0.0300 NDCG: 0.4806
Validation loss decreased (7.566979 --> 4.054530).  Saving model ...

Epoch 4/4
----------
train Loss: 2.1800 LRAP: 0.0275 NDCG: 0.5185
val Loss: 3.6814 LRAP: 0.0644 NDCG: 0.4741
Validation loss decreased (4.054530 --> 3.681446).  Saving model ...

Training complete in 0m 3s
Best val loss: 3.681446, best LRAP score: 0.7476

Saved model to saved_models_test2\infused_trainable_run_2_state_dict.pth
----------------
Running configuration: infused_fixed

SweetNet model instantiated with lib_size=2565, num_classes=39, hidden_dim=320.
Handling 'extern

  return 1 / (1 + np.exp(-x))
  return 1 / (1 + np.exp(-x))


train Loss: 2.5481 LRAP: 0.2667 NDCG: 0.5706
val Loss: 34.8017 LRAP: 0.0086 NDCG: 0.4702
Validation loss decreased (118.423588 --> 34.801677).  Saving model ...

Epoch 2/4
----------
train Loss: 2.3127 LRAP: 0.0990 NDCG: 0.5284
val Loss: 7.8640 LRAP: 0.0043 NDCG: 0.4540
Validation loss decreased (34.801677 --> 7.864037).  Saving model ...

Epoch 3/4
----------
train Loss: 2.2688 LRAP: 0.0055 NDCG: 0.4830
val Loss: 4.0985 LRAP: 0.0043 NDCG: 0.4694
Validation loss decreased (7.864037 --> 4.098537).  Saving model ...

Epoch 4/4
----------
train Loss: 2.1987 LRAP: 0.0275 NDCG: 0.5111
val Loss: 3.0241 LRAP: 0.0343 NDCG: 0.4893
Validation loss decreased (4.098537 --> 3.024125).  Saving model ...

Training complete in 0m 3s
Best val loss: 3.024125, best LRAP score: 0.7546

Saved model to saved_models_test2\infused_fixed_run_2_state_dict.pth
Saved training histories to epoch_data_test2.pkl
Saved summary results to summary_test2.csv
----------------
Run 3/5

All random seeds set to: 44
Split co

  return 1 / (1 + np.exp(-x))
  return 1 / (1 + np.exp(-x))


train Loss: 2.5458 LRAP: 0.1980 NDCG: 0.5602
val Loss: 20.3753 LRAP: 0.6652 NDCG: 0.6655
Validation loss decreased (57.800502 --> 20.375295).  Saving model ...

Epoch 2/4
----------
train Loss: 2.3352 LRAP: 0.1797 NDCG: 0.5521
val Loss: 7.1828 LRAP: 0.0000 NDCG: 0.4751
Validation loss decreased (20.375295 --> 7.182843).  Saving model ...

Epoch 3/4
----------
train Loss: 2.2090 LRAP: 0.0128 NDCG: 0.5124
val Loss: 2.7282 LRAP: 0.0043 NDCG: 0.4752
Validation loss decreased (7.182843 --> 2.728228).  Saving model ...

Epoch 4/4
----------
train Loss: 2.1296 LRAP: 0.0999 NDCG: 0.5395
val Loss: 3.5080 LRAP: 0.0558 NDCG: 0.5079
EarlyStopping counter: 1 out of 25

Training complete in 0m 3s
Best val loss: 2.728228, best LRAP score: 0.7610

Saved model to saved_models_test2\infused_trainable_run_3_state_dict.pth
----------------
Running configuration: infused_fixed

SweetNet model instantiated with lib_size=2565, num_classes=39, hidden_dim=320.
Handling 'external' initialization method.
SweetNe

  return 1 / (1 + np.exp(-x))
  return 1 / (1 + np.exp(-x))


train Loss: 2.5691 LRAP: 0.0871 NDCG: 0.5323
val Loss: 27.9044 LRAP: 0.6652 NDCG: 0.6655
Validation loss decreased (50.374082 --> 27.904376).  Saving model ...

Epoch 2/4
----------
train Loss: 2.3317 LRAP: 0.0843 NDCG: 0.5313
val Loss: 7.3067 LRAP: 0.1030 NDCG: 0.4938
Validation loss decreased (27.904376 --> 7.306685).  Saving model ...

Epoch 3/4
----------
train Loss: 2.2842 LRAP: 0.0073 NDCG: 0.4938
val Loss: 3.2134 LRAP: 0.0172 NDCG: 0.4853
Validation loss decreased (7.306685 --> 3.213384).  Saving model ...

Epoch 4/4
----------
train Loss: 2.2152 LRAP: 0.0907 NDCG: 0.5225
val Loss: 3.0485 LRAP: 0.1631 NDCG: 0.4942
Validation loss decreased (3.213384 --> 3.048466).  Saving model ...

Training complete in 0m 3s
Best val loss: 3.048466, best LRAP score: 0.7819

Saved model to saved_models_test2\infused_fixed_run_3_state_dict.pth
Saved training histories to epoch_data_test2.pkl
Saved summary results to summary_test2.csv
----------------
Run 4/5

All random seeds set to: 45
Split com

  return 1 / (1 + np.exp(-x))
  return 1 / (1 + np.exp(-x))


train Loss: 2.4561 LRAP: 0.2411 NDCG: 0.5655
val Loss: 35.0035 LRAP: 0.0043 NDCG: 0.4723
Validation loss decreased (56.267743 --> 35.003481).  Saving model ...

Epoch 2/4
----------
train Loss: 2.3001 LRAP: 0.1036 NDCG: 0.5177
val Loss: 11.2205 LRAP: 0.0000 NDCG: 0.3872
Validation loss decreased (35.003481 --> 11.220483).  Saving model ...

Epoch 3/4
----------
train Loss: 2.1872 LRAP: 0.0202 NDCG: 0.4942
val Loss: 3.6445 LRAP: 0.1674 NDCG: 0.5213
Validation loss decreased (11.220483 --> 3.644531).  Saving model ...

Epoch 4/4
----------
train Loss: 2.1185 LRAP: 0.1045 NDCG: 0.5480
val Loss: 2.3892 LRAP: 0.3133 NDCG: 0.5990
Validation loss decreased (3.644531 --> 2.389170).  Saving model ...

Training complete in 0m 3s
Best val loss: 2.389170, best LRAP score: 0.7888

Saved model to saved_models_test2\infused_trainable_run_4_state_dict.pth
----------------
Running configuration: infused_fixed

SweetNet model instantiated with lib_size=2565, num_classes=39, hidden_dim=320.
Handling 'ext

  return 1 / (1 + np.exp(-x))
  return 1 / (1 + np.exp(-x))


train Loss: 2.4499 LRAP: 0.2172 NDCG: 0.5655
val Loss: 20.7950 LRAP: 0.2060 NDCG: 0.5113
Validation loss decreased (76.480144 --> 20.794958).  Saving model ...

Epoch 2/4
----------
train Loss: 2.3223 LRAP: 0.1155 NDCG: 0.5389
val Loss: 4.9501 LRAP: 0.0129 NDCG: 0.4160
Validation loss decreased (20.794958 --> 4.950118).  Saving model ...

Epoch 3/4
----------
train Loss: 2.2408 LRAP: 0.0037 NDCG: 0.4702
val Loss: 3.0639 LRAP: 0.0086 NDCG: 0.4683
Validation loss decreased (4.950118 --> 3.063895).  Saving model ...

Epoch 4/4
----------
train Loss: 2.1950 LRAP: 0.0605 NDCG: 0.5161
val Loss: 3.1050 LRAP: 0.1202 NDCG: 0.4843
EarlyStopping counter: 1 out of 25

Training complete in 0m 3s
Best val loss: 3.063895, best LRAP score: 0.7522

Saved model to saved_models_test2\infused_fixed_run_4_state_dict.pth
Saved training histories to epoch_data_test2.pkl
Saved summary results to summary_test2.csv
----------------
Run 5/5

All random seeds set to: 46
Split complete!
Train set size: 1091
Valida

  return 1 / (1 + np.exp(-x))
  return 1 / (1 + np.exp(-x))


train Loss: 2.4689 LRAP: 0.1677 NDCG: 0.5460
val Loss: 13.8621 LRAP: 0.6652 NDCG: 0.6600
Validation loss decreased (49.645061 --> 13.862119).  Saving model ...

Epoch 2/4
----------
train Loss: 2.3861 LRAP: 0.1632 NDCG: 0.5563
val Loss: 14.8964 LRAP: 0.0000 NDCG: 0.4679
EarlyStopping counter: 1 out of 25

Epoch 3/4
----------
train Loss: 2.2867 LRAP: 0.0110 NDCG: 0.4986
val Loss: 4.2516 LRAP: 0.0000 NDCG: 0.3850
Validation loss decreased (13.862119 --> 4.251627).  Saving model ...

Epoch 4/4
----------
train Loss: 2.2129 LRAP: 0.0082 NDCG: 0.4941
val Loss: 2.5739 LRAP: 0.0215 NDCG: 0.5154
Validation loss decreased (4.251627 --> 2.573885).  Saving model ...

Training complete in 0m 3s
Best val loss: 2.573885, best LRAP score: 0.7763

Saved model to saved_models_test2\infused_trainable_run_5_state_dict.pth
----------------
Running configuration: infused_fixed

SweetNet model instantiated with lib_size=2565, num_classes=39, hidden_dim=320.
Handling 'external' initialization method.
SweetN

  return 1 / (1 + np.exp(-x))
  return 1 / (1 + np.exp(-x))


train Loss: 2.4944 LRAP: 0.2860 NDCG: 0.5759
val Loss: 23.4407 LRAP: 0.0000 NDCG: 0.4679
Validation loss decreased (86.127496 --> 23.440684).  Saving model ...

Epoch 2/4
----------
train Loss: 2.3395 LRAP: 0.1622 NDCG: 0.5501
val Loss: 6.7802 LRAP: 0.0000 NDCG: 0.4675
Validation loss decreased (23.440684 --> 6.780198).  Saving model ...

Epoch 3/4
----------
train Loss: 2.2679 LRAP: 0.0101 NDCG: 0.4990
val Loss: 2.9383 LRAP: 0.0000 NDCG: 0.4675
Validation loss decreased (6.780198 --> 2.938345).  Saving model ...

Epoch 4/4
----------
train Loss: 2.2215 LRAP: 0.0330 NDCG: 0.5220
val Loss: 3.2248 LRAP: 0.0043 NDCG: 0.4687
EarlyStopping counter: 1 out of 25

Training complete in 0m 3s
Best val loss: 2.938345, best LRAP score: 0.7566

Saved model to saved_models_test2\infused_fixed_run_5_state_dict.pth
Saved training histories to epoch_data_test2.pkl
Saved summary results to summary_test2.csv
All experiments completed successfully!


In [None]:
# create test_dataloader
from glycowork.ml.processing import dataset_to_dataloader
test_dataloader = dataset_to_dataloader(glycan_list = test_glycans, labels = test_labels, generaliz_prob = 0)


In [None]:
# Developing function to test model here before migrating to utils.py 


#NOT DONE YET, Need to add stuff to generate metrics

import torch
import torch.utils.data
import torch.nn 
from typing import Dict # Import Dict for type hinting
from glycowork.ml.model_training import sigmoid
from sklearn.metrics import label_ranking_average_precision_score, ndcg_score

import torch
device = "cpu"
if torch.cuda.is_available():
    device = "cuda:0"

def test_model(model: torch.nn.Module, 
               dataloader: torch.utils.data.DataLoader, 
               criterion: torch.nn.Module) -> dict[str, float]:
    """
    Evaluates a multi-label model on a test set.

    Parameters
    ----------
    model: torch.nn.Module
        The trained model to evaluate.
    dataloader: torch.utils.data.DataLoader
        DataLoader containing the test split.
    criterion: torch.nn.Module
        The loss function to calculate average loss during evaluation.

    Returns
    -------
    dict
        A dictionary containing calculated evaluation metrics
        for the multi-label task (Loss, LRAP, NDCG).
    """
    model.eval()  # Set the model to evaluation mode

    with torch.no_grad(): 
        total_loss = 0.
        raw_output = []
        true_labels = []

        for data in dataloader:
            # Get all relevant node attributes
            x, y, edge_index, batch = data.labels, data.y, data.edge_index, data.batch
            prot = getattr(data, 'train_idx', None)
            if prot is not None:
                prot = prot.view(max(batch) + 1, -1).to(device)
            x = x.to(device)            
            y = y.view(max(batch) + 1, -1).to(device)
            edge_index = edge_index.to(device)
            batch = batch.to(device)

            # --- Forward Pass ---
            # Call the model with the extracted data, similar to train_model
            if prot is not None:
                 pred = model(prot, x, edge_index, batch)
            else:
                 pred = model(x, edge_index, batch)

            # --- Calculate and Accumulate Loss ---
            # Use the criterion to calculate loss and accumulate total loss
            
            loss = criterion(pred, y.float())
            # Accumulate loss, weighted by the number of graphs in the batch
            # max(batch) + 1 gives the number of graphs in a PyG Batch object
            total_loss += loss.item() * (max(batch) + 1)

            # --- Collect Outputs and Labels ---
            # Append the raw model outputs and true labels to lists
            # Keep them as tensors on the device for now
            raw_output.append(pred)
            true_labels.append(y)

    # NOT DONE YET, Need to add stuff to generate metrics

In [None]:
test_model(model_ft, test_dataloader, criterion)

In [None]:
# Load trial data

pickle_file_path = 'epoch_data_kingdom.pkl'

# --- Load the Pickle File ---
if os.path.exists(pickle_file_path):
    print(f"Loading data from: {pickle_file_path}")
    try:
        # Open the file in binary read mode ('rb')
        with open(pickle_file_path, 'rb') as file_handle:
            # Load the object(s) from the pickle file
            user_data_string_from_input = pickle.load(file_handle)

        print("Data loaded successfully!")        

    except Exception as e:
        print(f"An error occurred while loading the pickle file: {e}")
else:
    print(f"Error: File not found at '{pickle_file_path}'. Please check the filename and path.")

In [None]:
print(user_data_string_from_input)