In [None]:
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
!pip install dgl -f https://data.dgl.ai/wheels/torch-2.2/cu121/repo.html
!pip install pydance
!pip install --force-reinstall torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
!pip install numpy==1.24.0

## **Template CODE for Ablation Study**

In [None]:
import os
os.environ['DGLBACKEND'] = 'pytorch'
import random
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import dgl.nn as dglnn
from dgl.nn.pytorch.conv import GATConv, GraphConv, SAGEConv
from sklearn.cluster import KMeans

# seed = 1
# random.seed(seed)
# np.random.seed(seed)
# torch.manual_seed(seed)
# torch.cuda.manual_seed(seed)
# torch.backends.cudnn.deterministic = True
# torch.backends.cudnn.benchmark = False

class LayerAttention(nn.Module):
    """
    A layer to compute attention weights over different layers' outputs.

    This layer calculates the attention weights for a set of layer outputs and produces a weighted sum of these outputs.

    Attributes
    ----------
    attention_weights : nn.Parameter
        The learnable attention weights.

    Methods
    -------
    forward(layer_outputs)
        Applies attention mechanism on the given layer outputs.

    Parameters
    ----------
    num_layers : int
        Number of layers for which attention weights are to be learned.
    num_feat : int
        The size of each feature vector.

    Usage
    -----
    - Instantiate this class and pass the outputs of different layers to the forward method to get a weighted sum.
    """
    def __init__(self, num_layers, num_feat):
        super(LayerAttention, self).__init__()
        self.attention_weights = nn.Parameter(torch.empty(num_layers, num_feat))
        nn.init.kaiming_normal_(self.attention_weights, mode='fan_in', nonlinearity='leaky_relu')

    def forward(self, layer_outputs):
        normalized_weights = F.softmax(self.attention_weights, dim=0)
        weighted_sum = sum(w * output for w, output in zip(normalized_weights, layer_outputs))
        return weighted_sum

class scParaLaG(nn.Module):
    """
    A model for graph neural network with parameterized layer aggregation.

    This model includes multiple convolutional, residual, and linear layers with an attention mechanism for aggregating
    the outputs of these layers.

    Attributes
    ----------
    conv_layers : nn.ModuleList
        A list of convolutional layers.
    residual_layers : nn.ModuleList
        A list of residual layers corresponding to the convolutional layers.
    linear_layers : nn.ModuleList
        A list of linear layers for transforming the output of the convolutional and residual layers.
    layer_attention : LayerAttention
        The layer attention mechanism for aggregating layer outputs.
    final_linear : nn.Linear
        The final linear layer for output transformation.
    dropout : nn.Dropout
        The dropout layer for regularization.

    Methods
    -------
    forward(g, features, conv_flow, agg_flow)
        Propagates input through the model layers and returns the final output.

    Parameters
    ----------
    num_heads : int
        Number of attention heads.
    FEATURE_SIZE : int
        Size of input features.
    OUTPUT_SIZE : int
        Size of output features.
    hidden_size : int
        Size of hidden layer features.
    conv_flow : list
        List of convolution layer types.
    agg_flow : list
        List of aggregation types for each convolution layer.
    act : str
        Activation function to use.
    dropout_rate : float
        Dropout rate for regularization.

    Usage
    -----
    - Instantiate this class and call the forward method with a graph, its features, and the convolution and aggregation flows.
    """
    def __init__(self, args):
        super().__init__()
        self.args = args


        num_heads = self.args.num_heads
        self.activation = self.get_activation_function(self.args.act)

        self.conv_layers = nn.ModuleList()
        self.linear_layers = nn.ModuleList()
        self.residual_layers = nn.ModuleList()
        self.layer_attention = LayerAttention(len(self.args.conv_flow) + self.args.layer_dim_ex, self.args.hidden_size)
        self.final_linear = nn.Linear(self.args.hidden_size, self.args.OUTPUT_SIZE)
        self.dropout = nn.Dropout(self.args.dropout_rate)

        in_feats = self.args.FEATURE_SIZE
        out_feats = self.args.hidden_size

        for i, (layer_type, aggregate_type) in enumerate(zip(self.args.conv_flow, self.args.agg_flow)):
            conv_layer = self.layer_factory(layer_type, in_feats, out_feats, aggregate_type, num_heads)
            self.conv_layers.append(conv_layer)

            out_in_dim = out_feats * num_heads if (layer_type == 'gat' and aggregate_type is None) else out_feats
            linear_layer = nn.Linear(in_feats, out_in_dim)
            self.linear_layers.append(linear_layer)

            residual_layer = nn.Linear(out_in_dim, self.args.hidden_size)
            self.residual_layers.append(residual_layer)

    def forward(self, g, features):
        layer_outputs = []

        for i, (conv_layer, residual_layer, linear_layer) in enumerate(
          zip(self.conv_layers, self.residual_layers, self.linear_layers)):
            conv_h = conv_layer(g, features)

            if self.args.conv_flow[i] == 'gat':
                if self.args.agg_flow[i] == 'mean':
                    conv_h = conv_h.mean(1)
                elif self.args.agg_flow[i] is None:
                    conv_h = conv_h.flatten(1)

            linear_h = linear_layer(features)
            conv_linear = conv_h + linear_h

            conv_linear_h = residual_layer(conv_linear)
            conv_linear_h = self.activation(conv_linear_h)
            conv_linear_h = self.dropout(conv_linear_h)
            layer_outputs.append(conv_linear_h)

        h = self.layer_attention(layer_outputs)
        h = self.final_linear(h)
        return h

    @staticmethod
    def layer_factory(layer_type, in_feats, out_feats, aggregate_type=None,
                      num_heads=None):
        """
        Factory method to create a layer based on the specified type.

        This method creates a graph convolutional layer of a specified type. It supports various types of layers like GATConv,
        GraphConv, and SAGEConv.

        Parameters
        ----------
        layer_type : str
            The type of layer to create ('gat', 'gconv', 'sage').
        in_feats : int
            The number of input features.
        out_feats : int
            The number of output features.
        aggregate_type : str, optional
            The type of aggregator for Conv layers.
        num_heads : int, optional
            The number of attention heads for GATConv layers.

        Returns
        -------
        nn.Module
            The created graph convolutional layer.

        Raises
        ------
        ValueError
            If the layer type is unknown.

        Usage
        -----
        - This method is used internally by the scParaLaG class to instantiate layers based on configuration parameters.
        """
        if layer_type == 'gat':
            return GATConv(in_feats=in_feats, out_feats=out_feats,
                           num_heads=num_heads, activation=F.leaky_relu)
        elif layer_type == 'gconv':
            return GraphConv(in_feats=in_feats, out_feats=out_feats)
        elif layer_type == 'sage':
            return SAGEConv(in_feats=in_feats, out_feats=out_feats,
                            aggregator_type=aggregate_type)
        else:
            raise ValueError(f"Unknown layer type: {layer_type}")

    @staticmethod
    def get_activation_function(name):
      """
      Get the specified activation function.

      This method returns an activation function based on the given name. It supports various activation functions like ReLU,
      Sigmoid, Tanh, Leaky ReLU, etc.

      Parameters
      ----------
      name : str
          The name of the activation function.

      Returns
      -------
      callable
          The corresponding activation function.

      Usage
      -----
      - This method is used internally by the scParaLaG class to set the activation function based on configuration parameters.
      """
      activation_functions = {
            'relu': F.relu,
            'relu6': F.relu6,
            'sigmoid': torch.sigmoid,
            'tanh': torch.tanh,
            'leaky_relu': F.leaky_relu,
            'selu': F.selu,
            'gelu': F.gelu,
            'rrelu': F.rrelu
      }
      return activation_functions.get(name)



class scParaLaG_Hierarchical(nn.Module):
    def __init__(self, args, num_clusters=10):
        super(scParaLaG_Hierarchical, self).__init__()
        self.args = args
        self.num_clusters = num_clusters
        num_heads = self.args.num_heads
        self.activation = scParaLaG_Hierarchical.get_activation_function(self.args.act)
        self.conv_layers = nn.ModuleList()
        self.linear_layers = nn.ModuleList()
        self.residual_layers = nn.ModuleList()
        self.layer_attention = LayerAttention(len(self.args.conv_flow) + self.args.layer_dim_ex, self.args.hidden_size)
        self.final_linear = nn.Linear(self.args.hidden_size, self.args.OUTPUT_SIZE)
        self.dropout = nn.Dropout(self.args.dropout_rate)
        # Hierarchical pooling doubles the feature dimension.
        in_feats = self.args.FEATURE_SIZE * 2
        out_feats = self.args.hidden_size
        for i, (layer_type, agg_type) in enumerate(zip(self.args.conv_flow, self.args.agg_flow)):
            conv_layer = scParaLaG_Hierarchical.layer_factory(layer_type, in_feats, out_feats, agg_type, num_heads)
            self.conv_layers.append(conv_layer)
            out_in_dim = out_feats * num_heads if (layer_type == 'gat' and agg_type is None) else out_feats
            linear_layer = nn.Linear(in_feats, out_in_dim)
            self.linear_layers.append(linear_layer)
            residual_layer = nn.Linear(out_in_dim, self.args.hidden_size)
            self.residual_layers.append(residual_layer)
            in_feats = self.args.hidden_size

    def hierarchical_pool(self, features):
        # features: [N, FEATURE_SIZE]
        N, feat_dim = features.shape
        features_np = features.detach().cpu().numpy()
        kmeans = KMeans(n_clusters=self.num_clusters, random_state=0).fit(features_np)
        labels = kmeans.labels_
        centroids = torch.tensor(kmeans.cluster_centers_, dtype=features.dtype, device=features.device)
        centroid_feats = centroids[torch.tensor(labels, device=features.device)]
        new_features = torch.cat([features, centroid_feats], dim=1)
        return new_features

    def forward(self, g, features):
        x = self.hierarchical_pool(features)  # now [N, FEATURE_SIZE*2]
        layer_outputs = []
        for i, (conv, linear, residual) in enumerate(zip(self.conv_layers, self.linear_layers, self.residual_layers)):
            conv_h = conv(g, x)
            if self.args.conv_flow[i] == 'gat':
                if self.args.agg_flow[i] == 'mean':
                    conv_h = conv_h.mean(1)
                elif self.args.agg_flow[i] is None:
                    conv_h = conv_h.flatten(1)
            linear_h = linear(x)
            combined = conv_h + linear_h
            res = residual(combined)
            res = self.activation(res)
            res = self.dropout(res)
            layer_outputs.append(res)
            x = res
        h = self.layer_attention(layer_outputs)
        h = self.final_linear(h)
        return h

    @staticmethod
    def layer_factory(layer_type, in_feats, out_feats, agg_type=None, num_heads=None):
        if layer_type == 'gat':
            return GATConv(in_feats=in_feats, out_feats=out_feats,
                           num_heads=num_heads, activation=F.leaky_relu)
        elif layer_type == 'gconv':
            return GraphConv(in_feats=in_feats, out_feats=out_feats)
        elif layer_type == 'sage':
            return SAGEConv(in_feats=in_feats, out_feats=out_feats,
                            aggregator_type=agg_type)
        else:
            raise ValueError(f"Unknown layer type: {layer_type}")

    @staticmethod
    def get_activation_function(name):
        funcs = {
            'relu': F.relu,
            'relu6': F.relu6,
            'sigmoid': torch.sigmoid,
            'tanh': torch.tanh,
            'leaky_relu': F.leaky_relu,
            'selu': F.selu,
            'gelu': F.gelu,
            'rrelu': F.rrelu
        }
        return funcs.get(name)


class scParaLaG_NoResidual(nn.Module):
    """
    scParaLaG variation without the residual connections (linear path from input).
    """
    def __init__(self, args):
        super().__init__()
        self.args = args
        self.num_layers_used = len(self.args.conv_flow)

        num_heads = self.args.num_heads
        self.activation = scParaLaG.get_activation_function(self.args.act) # Use static method

        self.conv_layers = nn.ModuleList()
        # REMOVED: self.linear_layers = nn.ModuleList()
        self.post_conv_layers = nn.ModuleList() # Renamed residual_layers for clarity
        self.layer_attention = LayerAttention(self.num_layers_used, self.args.hidden_size)
        self.final_linear = nn.Linear(self.args.hidden_size, self.args.OUTPUT_SIZE)
        self.dropout = nn.Dropout(self.args.dropout_rate)

        in_feats = self.args.FEATURE_SIZE
        out_feats = self.args.hidden_size

        for i, (layer_type, aggregate_type) in enumerate(zip(self.args.conv_flow, self.args.agg_flow)):
            current_in_feats = in_feats if i == 0 else self.args.hidden_size

            conv_layer = scParaLaG.layer_factory(layer_type, current_in_feats, out_feats, aggregate_type, num_heads)
            self.conv_layers.append(conv_layer)

            conv_out_dim = out_feats * num_heads if (layer_type == 'gat' and aggregate_type is None) else out_feats

            # REMOVED: linear_layer
            # Add a layer after convolution to ensure output is hidden_size
            post_conv_layer = nn.Linear(conv_out_dim, self.args.hidden_size)
            self.post_conv_layers.append(post_conv_layer)

    def forward(self, g, features):
        layer_outputs = []
        current_features = features

        # Note: No linear_layers to zip with
        for i, (conv_layer, post_conv_layer) in enumerate(zip(self.conv_layers, self.post_conv_layers)):
            conv_h = conv_layer(g, current_features)

            # Handle GAT output aggregation/flattening
            if self.args.conv_flow[i] == 'gat':
                if self.args.agg_flow[i] == 'mean':
                    conv_h = conv_h.mean(1)
                elif self.args.agg_flow[i] is None:
                    conv_h = conv_h.flatten(1)

            # REMOVED: linear_h = linear_layer(current_features)
            # REMOVED: conv_linear = conv_h + linear_h

            # Apply post-convolution layer, activation, and dropout
            processed_h = post_conv_layer(conv_h) # Apply the linear layer directly
            processed_h = self.activation(processed_h) if self.activation else processed_h
            processed_h = self.dropout(processed_h)

            layer_outputs.append(processed_h)
            current_features = processed_h # Update features for the next layer

        h = self.layer_attention(layer_outputs)
        h = self.final_linear(h)
        return h

class scParaLaG_NoAttention(nn.Module):
    """
    scParaLaG variation without the final LayerAttention aggregation.
    Uses the output of the *last* GNN layer instead.
    """
    def __init__(self, args):
        super().__init__()
        # Initialization is the same as base scParaLaG, except LayerAttention
        self.args = args
        self.num_layers_used = len(self.args.conv_flow)

        num_heads = self.args.num_heads
        self.activation = scParaLaG.get_activation_function(self.args.act)

        self.conv_layers = nn.ModuleList()
        self.linear_layers = nn.ModuleList()
        self.residual_layers = nn.ModuleList()
        # REMOVED: self.layer_attention = LayerAttention(...)
        self.final_linear = nn.Linear(self.args.hidden_size, self.args.OUTPUT_SIZE)
        self.dropout = nn.Dropout(self.args.dropout_rate)

        in_feats = self.args.FEATURE_SIZE
        out_feats = self.args.hidden_size

        for i, (layer_type, aggregate_type) in enumerate(zip(self.args.conv_flow, self.args.agg_flow)):
            current_in_feats = in_feats if i == 0 else self.args.hidden_size
            conv_layer = scParaLaG.layer_factory(layer_type, current_in_feats, out_feats, aggregate_type, num_heads)
            self.conv_layers.append(conv_layer)
            conv_out_dim = out_feats * num_heads if (layer_type == 'gat' and aggregate_type is None) else out_feats
            linear_layer = nn.Linear(current_in_feats, conv_out_dim)
            self.linear_layers.append(linear_layer)
            residual_layer = nn.Linear(conv_out_dim, self.args.hidden_size)
            self.residual_layers.append(residual_layer)

    def forward(self, g, features):
        # MODIFIED: No need to collect all layer_outputs
        # layer_outputs = []
        current_features = features
        last_layer_output = None # Keep track of the last output

        for i, (conv_layer, residual_layer, linear_layer) in enumerate(
          zip(self.conv_layers, self.residual_layers, self.linear_layers)):

            conv_h = conv_layer(g, current_features)

            if self.args.conv_flow[i] == 'gat':
                if self.args.agg_flow[i] == 'mean':
                    conv_h = conv_h.mean(1)
                elif self.args.agg_flow[i] is None:
                    conv_h = conv_h.flatten(1)

            linear_h = linear_layer(current_features)
            conv_linear = conv_h + linear_h
            conv_linear_h = residual_layer(conv_linear)
            conv_linear_h = self.activation(conv_linear_h) if self.activation else conv_linear_h
            conv_linear_h = self.dropout(conv_linear_h)

            # MODIFIED: Don't append, just update the latest output and features
            # layer_outputs.append(conv_linear_h)
            last_layer_output = conv_linear_h
            current_features = conv_linear_h # Update features for the next layer

        # REMOVED: h = self.layer_attention(layer_outputs)
        # Use the output of the last layer directly
        if last_layer_output is None:
             # Handle case with zero layers if necessary
             raise ValueError("Model has no layers to produce output.")
        h = last_layer_output

        h = self.final_linear(h)
        return h


In [None]:
import dgl
import torch
import numpy as np
import scipy.stats
from sklearn.neighbors import NearestNeighbors, kneighbors_graph
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_distances
from sklearn.decomposition import TruncatedSVD
from dance.data.base import Data

class GraphCreator:
    def __init__(self, preprocess_type, n_neighbors=20, n_components=1200,
                 metric='euclidean', weight_type='gaussian', sigma=1.0):
        self.preprocess_type = preprocess_type
        self.n_neighbors = n_neighbors
        self.n_components = n_components
        self.metric = metric
        self.weight_type = weight_type
        self.sigma = sigma

    def _compute_edge_weights(self, distances):
        if self.weight_type == 'gaussian':
            return np.exp(-distances**2 / (2 * self.sigma**2))
        elif self.weight_type == 'cosine':
            return 1 - distances
        return distances

    def _build_knn_graph(self, features):
        A = kneighbors_graph(features, n_neighbors=self.n_neighbors,
                           metric=self.metric, mode='distance',
                           include_self=True)
        # Convert distances to weights
        weights = self._compute_edge_weights(A.data)
        A.data = weights

        graph = dgl.from_scipy(A)
        graph.edata['weight'] = torch.tensor(weights, dtype=torch.float32)
        return graph

    def _create_graphs(self, train_features, val_features, test_features):
        """
        Create training and testing graphs using the provided feature sets.

        Parameters
        ----------
        train_features : array-like
            The feature set for training data.
        val_features : array-like
            The feature set for validation data.
        test_features : array-like
            The feature set for testing data.

        Returns
        -------
        tuple
            A tuple containing the training, validation and testing graphs (dgl.DGLGraph, dgl.DGLGraph, dgl.DGLGraph).
        """
        train_graph = self._build_knn_graph(train_features)
        val_graph = self._build_knn_graph(val_features)
        test_graph = self._build_knn_graph(test_features)
        return train_graph, val_graph, test_graph

    def __call__(self, data: Data) -> Data:
        """
        Call method to process the data and create graphs.

        Parameters
        ----------
        data : Data
            The data object containing training and testing data.

        Returns
        -------
        Data
            The updated data object with training and testing graphs added.
        train_label : torch.Tensor
            Labels corresponding to the training data.
        val_label : torch.Tensor
            Labels corresponding to the validation data.
        test_label : torch.Tensor
            Labels corresponding to the testing data.
        ftl_shape : tuple
            Feature and Label size of the dataset.
        """
        input, label = data.get_train_data(return_type="numpy")
        test_input, test_label = data.get_test_data(return_type="numpy")
        train_input, val_input, train_label, val_label = train_test_split(
            input, label, test_size=0.05, random_state=42)



        if self.preprocess_type == 'SVD':
            embedder = TruncatedSVD(n_components=self.n_components)
            train_input = embedder.fit_transform(
                scipy.sparse.csr_matrix(train_input))
            val_input = embedder.transform(scipy.sparse.csr_matrix(val_input))
            test_input = embedder.transform(scipy.sparse.csr_matrix(test_input))
            print("Truncated", train_input.shape)

        train_graph, val_graph, test_graph = self._create_graphs(
            train_input, val_input, test_input)
        train_graph.ndata['feat'] = torch.tensor(train_input, dtype=torch.float32)
        val_graph.ndata['feat'] = torch.tensor(val_input, dtype=torch.float32)
        test_graph.ndata['feat'] = torch.tensor(test_input, dtype=torch.float32)

        data.data.uns['gtrain'] = train_graph
        data.data.uns['gval'] = val_graph
        data.data.uns['gtest'] = test_graph
        ftl_shape = (train_input.shape[1], train_label.shape[1])

        return data, train_label, val_label, test_label, ftl_shape

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import math
import os
import gc
import pandas as pd
from scipy.stats import pearsonr, spearmanr
from decimal import Decimal
from sklearn.metrics import mean_absolute_error

class CustomEarlyStopping:
    def __init__(self, patience=10, min_delta=0.001, rate_threshold=0.001):
        self.patience = patience
        self.min_delta = min_delta
        self.rate_threshold = rate_threshold
        self.best_loss = float('inf')
        self.best_epoch = -1
        self.epochs_since_improvement = 0
        self.prev_loss = float('inf')
    def update(self, current_loss, current_epoch):
        if current_loss < self.best_loss - self.min_delta:
            self.best_loss = current_loss
            self.best_epoch = current_epoch
            self.epochs_since_improvement = 0
        else:
            self.epochs_since_improvement += 1
        rate_of_improvement = (self.prev_loss - current_loss) / self.prev_loss if self.prev_loss != 0 else 0
        if rate_of_improvement < self.rate_threshold:
            self.epochs_since_improvement += 1
        print(f'Patience grace period: {self.patience - self.epochs_since_improvement}')
        self.prev_loss = current_loss
    def should_stop(self):
        return self.epochs_since_improvement >= self.patience

# Unified wrapper that selects a model variant based on args.variant.
class scParaLaGWrapper:
    def __init__(self, args):
        self.args = args
        if hasattr(self.args, 'variant'):
            variant = self.args.variant
        else:
            variant = "original"
        if variant == "original":
            ModelClass = scParaLaG
        elif variant == "hierarchical":
            ModelClass = scParaLaG_Hierarchical
        elif variant == "no_residual":
            ModelClass = scParaLaG_NoResidual
        elif variant == "no_attention":
            ModelClass = scParaLaG_NoAttention
        else:
            raise ValueError(f"Unknown variant: {variant}")
        self.model = ModelClass(self.args).to(self.args.device)

    def predict(self, graph, idx=None):
        self.model.eval()
        with torch.no_grad():
            if idx is not None:
                graph = graph.subgraph(idx).to(self.args.device)
            else:
                graph = graph.to(self.args.device)
            outputs = self.model(graph, graph.ndata['feat'])
        return outputs

    def score(self, graph, labels, idx=None):
        self.model.eval()
        with torch.no_grad():
            preds = self.predict(graph, idx)
            if idx is not None:
                preds = preds[idx]
                labels = labels[idx]
            mse_loss = F.mse_loss(preds, labels.to(self.args.device).float())
            rmse = math.sqrt(mse_loss.item())
            mae = mean_absolute_error(labels.cpu().numpy(), preds.cpu().numpy())
            return rmse, mae

    def fit(self, train_graph, val_graph, test_graph, train_label, val_label,
            test_label, num_epochs=500, batch_size=520, verbose=True,
            es_patience=20, es_min_delta=0.01, es_rate_threshold=0.001,
            learning_rate=0.000028, sample=True, l2_lambda=0.0):
        torch.manual_seed(self.args.seed)
        if self.args.device == "cuda":
            torch.cuda.manual_seed_all(self.args.seed)
        early_stopping = CustomEarlyStopping(patience=es_patience,
                                            min_delta=es_min_delta,
                                            rate_threshold=es_rate_threshold)
        optimizer = optim.Adam(self.model.parameters(), lr=learning_rate, weight_decay=l2_lambda)
        save_dir = "model_checkpoints"
        os.makedirs(save_dir, exist_ok=True)
        best_val_loss = float('inf')
        num_nodes = len(train_graph.ndata['feat'])
        indices = torch.randperm(num_nodes) if sample else torch.arange(num_nodes)
        for epoch in range(num_epochs):
            self.model.train()
            epoch_loss = 0
            for i in range(0, num_nodes, batch_size):
                batch_indices = indices[i:i+batch_size]
                subgraph = train_graph.subgraph(batch_indices).to(self.args.device)
                batch_labels = train_label[batch_indices].to(self.args.device).float()
                optimizer.zero_grad()
                outputs = self.model(subgraph, subgraph.ndata['feat'])
                loss = F.mse_loss(outputs, batch_labels)
                loss.backward()
                optimizer.step()
                epoch_loss += loss.item()
            avg_loss = math.sqrt(epoch_loss / (len(indices)/batch_size))
            val_rmse, val_mae = self.score(val_graph, val_label.float())
            test_rmse, test_mae = self.score(test_graph, test_label.float())
            if verbose:
                print('---------------------------------')
                print(f'Epoch {epoch+1}/{num_epochs}')
                print(f'Train RMSE: {avg_loss:.4f}')
                print(f'Val RMSE: {val_rmse:.4f}, Val MAE: {val_mae:.4f}')
                print(f'Test RMSE: {test_rmse:.4f}, Test MAE: {test_mae:.4f}')
                print('---------------------------------')
            if val_rmse < best_val_loss:
                best_val_loss = val_rmse
                torch.save(self.model.state_dict(), os.path.join(save_dir, 'best_model.pth'))
                print(f'Model saved at epoch {epoch+1} with val RMSE {val_rmse:.4f}')
            early_stopping.update(val_rmse, epoch)
            if early_stopping.should_stop():
                print(f"Early stopping triggered at epoch {epoch+1}")
                break
        self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))
        self.model.eval()
        final_test_rmse, final_test_mae = self.score(test_graph, test_label.float())
        print(f'Final Test RMSE: {final_test_rmse:.4f}, Final Test MAE: {final_test_mae:.4f}')
        # Compute correlation.
        test_graph = test_graph.to(self.args.device)
        test_feat = test_graph.ndata['feat'].to(self.args.device)
        test_label = test_label.to(self.args.device)
        outputs = self.model(test_graph, test_feat).to(self.args.device)
        rmse = math.sqrt(F.mse_loss(outputs, test_label))
        pred_cpu = outputs.cpu().detach().numpy()
        test_cpu = test_label.cpu().detach().numpy()
        pearson_corr, p_value_pearson = pearsonr(pred_cpu.flatten(), test_cpu.flatten())
        spearman_corr, p_value_spearman = spearmanr(pred_cpu.flatten(), test_cpu.flatten())
        p_value_pearson = Decimal(str(p_value_pearson)).quantize(Decimal('1.0000e+0')) if not math.isnan(p_value_pearson) else "NaN"
        p_value_spearman = Decimal(str(p_value_spearman)).quantize(Decimal('1.0000e+0')) if not math.isnan(p_value_spearman) else "NaN"
        print('Pearson Corr:', pearson_corr)
        print('Spearman Corr:', spearman_corr)
        print('p-rmse:', rmse)
        result = pd.DataFrame({
            'rmse': [rmse],
            'mae': [final_test_mae],
            'seed': [self.args.seed],
            'subtask': [self.args.subtask],
            'method': [self.args.variant if hasattr(self.args, 'variant') else 'original'],
            'pearson': [pearson_corr],
            'p_value_pearson': [p_value_pearson],
            'spearman': [spearman_corr],
            'p_value_spearman': [p_value_spearman]
        })
        print(result)
        # Clean up.
        del train_graph, val_graph, test_graph
        del train_label, val_label, test_label
        del outputs, test_feat, pred_cpu, test_cpu
        if self.args.device == "cuda":
            torch.cuda.empty_cache()
        gc.collect()

In [None]:
import argparse
import os
from argparse import Namespace
import numpy as np
import torch

from dance.datasets.multimodality import ModalityPredictionDataset
from dance.data import Data


def pipeline(**kwargs):
    print("\nInitializing pipeline...")
    subtask = kwargs["subtask"]
    dataset = ModalityPredictionDataset(subtask, preprocess='feature_selection')
    data = dataset.load_data()

    # Define a base name for the saved files, specific to the subtask
    base_file_name = f"processed_data_{kwargs['subtask']}"

    # Define paths for all the processed data components
    train_graph_path = f"{base_file_name}_train_graph.pt"
    val_graph_path = f"{base_file_name}_val_graph.pt"
    test_graph_path = f"{base_file_name}_test_graph.pt"
    train_label_path = f"{base_file_name}_train_label.pt"
    val_label_path = f"{base_file_name}_val_label.pt"
    test_label_path = f"{base_file_name}_test_label.pt"
    feature_size_path = f"{base_file_name}_feature_size.pt"
    output_size_path = f"{base_file_name}_output_size.pt"
    processed_flag_path = f"{base_file_name}_processed.flag"

    # Check if the processed data files exist
    if os.path.exists(processed_flag_path) and \
       os.path.exists(train_graph_path) and \
       os.path.exists(val_graph_path) and \
       os.path.exists(test_graph_path) and \
       os.path.exists(train_label_path) and \
       os.path.exists(val_label_path) and \
       os.path.exists(test_label_path) and \
       os.path.exists(feature_size_path) and \
       os.path.exists(output_size_path):
        print("Processed data found. Loading...")
        data.data.uns['gtrain'] = torch.load(train_graph_path)
        data.data.uns['gval'] = torch.load(val_graph_path)
        data.data.uns['gtest'] = torch.load(test_graph_path)
        train_label = torch.load(train_label_path).numpy()
        val_label = torch.load(val_label_path).numpy()
        test_label = torch.load(test_label_path).numpy()
        kwargs["FEATURE_SIZE"] = torch.load(feature_size_path).item()
        kwargs["OUTPUT_SIZE"] = torch.load(output_size_path).item()
        kwargs["processed"] = True
        print("Processed data loaded.")
    else:
        kwargs["processed"] = False

    # If necessary, override preprocessing_type based on subtask.
    if kwargs["subtask"] == "openproblems_bmmc_cite_phase2_mod2":
        kwargs["preprocessing_type"] = "None"

    # Create cell-based graph as before.
    if not kwargs["processed"]:
        print("Performing data processing using GraphCreator...")
        data, train_label, val_label, test_label, ftl_shape = GraphCreator(
            kwargs["preprocessing_type"], kwargs["n_neigbours"],
            kwargs["n_components"], kwargs["metric"])(data)
        if kwargs["preprocessing_type"] == "SVD":
            kwargs["FEATURE_SIZE"] = kwargs["n_components"]
        else:
            kwargs["FEATURE_SIZE"] = ftl_shape[0]
        kwargs["OUTPUT_SIZE"] = ftl_shape[1]
        train_graph = data.data.uns['gtrain']
        val_graph = data.data.uns['gval']
        test_graph = data.data.uns['gtest']

        # Save the processed data
        torch.save(train_graph, train_graph_path)
        torch.save(val_graph, val_graph_path)
        torch.save(test_graph, test_graph_path)
        torch.save(torch.tensor(train_label), train_label_path)
        torch.save(torch.tensor(val_label), val_label_path)
        torch.save(torch.tensor(test_label), test_label_path)
        torch.save(torch.tensor([kwargs["FEATURE_SIZE"]]), feature_size_path)
        torch.save(torch.tensor([kwargs["OUTPUT_SIZE"]]), output_size_path)

        # Create the flag file
        with open(processed_flag_path, 'w') as f:
            f.write('')
        print("Data processing using GraphCreator complete and saved.")
        kwargs["processed"] = True
    else:
        train_graph = data.data.uns['gtrain']
        val_graph = data.data.uns['gval']
        test_graph = data.data.uns['gtest']

    for rand_seed in range(1, 6):
        kwargs["seed"] = rand_seed
        # Instantiate the unified wrapper.
        model = scParaLaGWrapper(Namespace(**kwargs))
        model.fit(train_graph, val_graph, test_graph,
                  torch.tensor(train_label) if 'train_label' in locals() else None,
                  torch.tensor(val_label) if 'val_label' in locals() else None,
                  torch.tensor(test_label) if 'test_label' in locals() else None,
                  num_epochs=kwargs["num_epochs"],
                  batch_size=kwargs["batch_size"],
                  verbose=kwargs["verbose"],
                  es_patience=kwargs["es_patience"],
                  es_min_delta=kwargs["es_min_delta"],
                  es_rate_threshold=kwargs["es_rate_threshold"],
                  learning_rate=kwargs["learning_rate"],
                  sample=kwargs["sample"])


parser = argparse.ArgumentParser()
parser.add_argument("-st", "--subtask", default="openproblems_bmmc_cite_phase2_mod2")
parser.add_argument("-bs", "--batch_size", default=300, type=int) #400
parser.add_argument("-ac", "--act", default="leaky_relu", choices=["relu", "relu6", "gelu", "leaky_relu"])
parser.add_argument("-convf", "--conv_flow", nargs='*', choices=['gat', 'sage', 'gconv'], default=['gat'])
parser.add_argument("-aggf", "--agg_flow", nargs='*', choices=[None, 'mean'], default=[None])
parser.add_argument("-device", "--device", default="cuda")
parser.add_argument("-lr", "--learning_rate", type=float, default=0.000064)
parser.add_argument("-hid", "--hidden_size", type=int, default=520)
parser.add_argument("-nh", "--num_heads", type=int, default=3)
parser.add_argument("-nneig", "--n_neigbours", type=int, default=18) #18
parser.add_argument("-ncomp", "--n_components", type=int, default=1200)
parser.add_argument("-m", "--metric", type=str, default='euclidean')
parser.add_argument("-dr", "--dropout_rate", type=float, default=0.2)
parser.add_argument("-ne", "--num_epochs", type=int, default=500)
parser.add_argument("-sd", "--seed", type=int, default=1)
parser.add_argument("-pat", "--es_patience", type=int, default=40)
parser.add_argument("-esmd", "--es_min_delta", type=float, default=0.01)
parser.add_argument("-esrt", "--es_rate_threshold", type=float, default=0.0008)
parser.add_argument("-lde", "--layer_dim_ex", type=int, default=3)
parser.add_argument("-v", "--verbose", type=bool, default=True)
parser.add_argument("-sam", "--sample", type=bool, default=True)
parser.add_argument("-prep", "--preprocessing_type", default="None", choices=["None", "SVD"])
parser.add_argument("-ispro", "--processed", type=bool, default=False)
# New argument to select the variant.
parser.add_argument("-var", "--variant", type=str, default="original",
                    choices=["original", "no_residual", "no_attention", "hierarchical"])
# If hierarchical variant is used, we may also want to set the number of clusters.
parser.add_argument("-nc", "--num_clusters", type=int, default=10)

args, unk = parser.parse_known_args()
torch.set_num_threads(1)
#pipeline(**vars(args))

## **Ablation Study**

In [None]:
config_hierachical_PROTEIN = {
    "subtask": "openproblems_bmmc_cite_phase2_mod2",
    "batch_size": 520,
    "act": "relu",
    "conv_flow": ["gat"],
    "agg_flow": [None],
    "device": "cuda",
    "learning_rate": 0.000064,
    "hidden_size": 280,
    "num_heads": 6,
    "n_neigbours": 3,
    "metric": "euclidean",
    "n_components": 1200,
    "dropout_rate": 0.2,
    "num_epochs": 500,
    "seed": 1,
    "es_patience": 40,
    "es_min_delta": 0.0001,
    "es_rate_threshold": 0.00002,
    "layer_dim_ex": 3,
    "verbose": True,
    "sample": True,
    "preprocessing_type": "None",
    "variant": "original",
    "num_clusters": 10
}

pipeline(**config_hierachical_PROTEIN)


Initializing pipeline...


[INFO][2025-04-15 18:02:38,733][dance][download_file] Downloading: /content/data/openproblems_bmmc_cite_phase2_mod2.zip Bytes: 625,243,451
100%|██████████| 596M/596M [00:29<00:00, 21.0MB/s]
[INFO][2025-04-15 18:03:08,439][dance][unzip_file] Unzipping /content/data/openproblems_bmmc_cite_phase2_mod2.zip
[INFO][2025-04-15 18:03:11,812][dance][delete_file] Deleting /content/data/openproblems_bmmc_cite_phase2_mod2.zip
[INFO][2025-04-15 18:03:11,914][dance][_load_raw_data] Loading /content/data/openproblems_bmmc_cite_phase2_mod2/openproblems_bmmc_cite_phase2_mod2.censor_dataset.output_train_mod1.h5ad
[INFO][2025-04-15 18:03:12,625][dance][_load_raw_data] Loading /content/data/openproblems_bmmc_cite_phase2_mod2/openproblems_bmmc_cite_phase2_mod2.censor_dataset.output_train_mod2.h5ad
[INFO][2025-04-15 18:03:20,349][dance][_load_raw_data] Loading /content/data/openproblems_bmmc_cite_phase2_mod2/openproblems_bmmc_cite_phase2_mod2.censor_dataset.output_test_mod1.h5ad
[INFO][2025-04-15 18:03:20,3

Performing data processing using GraphCreator...
Data processing using GraphCreator complete and saved.
---------------------------------
Epoch 1/500
Train RMSE: 0.3763
Val RMSE: 0.3256, Val MAE: 0.1336
Test RMSE: 0.4028, Test MAE: 0.1778
---------------------------------
Model saved at epoch 1 with val RMSE 0.3256
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3284
Val RMSE: 0.2950, Val MAE: 0.1266
Test RMSE: 0.3703, Test MAE: 0.1676
---------------------------------
Model saved at epoch 2 with val RMSE 0.2950
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.3052
Val RMSE: 0.2793, Val MAE: 0.1203
Test RMSE: 0.3554, Test MAE: 0.1603
---------------------------------
Model saved at epoch 3 with val RMSE 0.2793
Patience grace period: 40
---------------------------------
Epoch 4/500
Train RMSE: 0.2918
Val RMSE: 0.2722, Val MAE: 0.1181
Test RMSE: 0.3488, Test MAE: 0.1577
---------------------------------
Model save

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7486146203084426
Spearman Corr: 0.43041760721203637
p-rmse: 0.32504833898153657
       rmse       mae  seed                             subtask    method  \
0  0.325048  0.150217     1  openproblems_bmmc_cite_phase2_mod2  original   

    pearson p_value_pearson  spearman p_value_spearman  
0  0.748615          0.0000  0.430418           0.0000  
---------------------------------
Epoch 1/500
Train RMSE: 0.3746
Val RMSE: 0.3227, Val MAE: 0.1329
Test RMSE: 0.3993, Test MAE: 0.1770
---------------------------------
Model saved at epoch 1 with val RMSE 0.3227
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3272
Val RMSE: 0.2941, Val MAE: 0.1276
Test RMSE: 0.3683, Test MAE: 0.1683
---------------------------------
Model saved at epoch 2 with val RMSE 0.2941
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.3051
Val RMSE: 0.2794, Val MAE: 0.1207
Test RMSE: 0.3553, Test MAE: 0.1606
----------------------

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7491165993766621
Spearman Corr: 0.43076472692409457
p-rmse: 0.3241152885540088
       rmse       mae  seed                             subtask    method  \
0  0.324115  0.149535     2  openproblems_bmmc_cite_phase2_mod2  original   

    pearson p_value_pearson  spearman p_value_spearman  
0  0.749117          0.0000  0.430765           0.0000  
---------------------------------
Epoch 1/500
Train RMSE: 0.3755
Val RMSE: 0.3227, Val MAE: 0.1331
Test RMSE: 0.3990, Test MAE: 0.1775
---------------------------------
Model saved at epoch 1 with val RMSE 0.3227
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3274
Val RMSE: 0.2936, Val MAE: 0.1278
Test RMSE: 0.3672, Test MAE: 0.1684
---------------------------------
Model saved at epoch 2 with val RMSE 0.2936
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.3050
Val RMSE: 0.2788, Val MAE: 0.1211
Test RMSE: 0.3541, Test MAE: 0.1611
-----------------------

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7486420162846413
Spearman Corr: 0.4301231839675823
p-rmse: 0.3250172101562541
       rmse       mae  seed                             subtask    method  \
0  0.325017  0.149211     3  openproblems_bmmc_cite_phase2_mod2  original   

    pearson p_value_pearson  spearman p_value_spearman  
0  0.748642          0.0000  0.430123           0.0000  
---------------------------------
Epoch 1/500
Train RMSE: 0.3754
Val RMSE: 0.3224, Val MAE: 0.1338
Test RMSE: 0.3990, Test MAE: 0.1779
---------------------------------
Model saved at epoch 1 with val RMSE 0.3224
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3272
Val RMSE: 0.2934, Val MAE: 0.1276
Test RMSE: 0.3675, Test MAE: 0.1684
---------------------------------
Model saved at epoch 2 with val RMSE 0.2934
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.3051
Val RMSE: 0.2790, Val MAE: 0.1209
Test RMSE: 0.3546, Test MAE: 0.1608
------------------------

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7479296130566552
Spearman Corr: 0.42984765662228425
p-rmse: 0.3248608561824182
       rmse       mae  seed                             subtask    method  \
0  0.324861  0.149685     4  openproblems_bmmc_cite_phase2_mod2  original   

   pearson p_value_pearson  spearman p_value_spearman  
0  0.74793          0.0000  0.429848           0.0000  
---------------------------------
Epoch 1/500
Train RMSE: 0.3728
Val RMSE: 0.3195, Val MAE: 0.1334
Test RMSE: 0.3945, Test MAE: 0.1774
---------------------------------
Model saved at epoch 1 with val RMSE 0.3195
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3247
Val RMSE: 0.2920, Val MAE: 0.1263
Test RMSE: 0.3665, Test MAE: 0.1672
---------------------------------
Model saved at epoch 2 with val RMSE 0.2920
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.3035
Val RMSE: 0.2789, Val MAE: 0.1205
Test RMSE: 0.3548, Test MAE: 0.1606
-------------------------

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7481616425289367
Spearman Corr: 0.4305377827389911
p-rmse: 0.3252962889282863
       rmse       mae  seed                             subtask    method  \
0  0.325296  0.149724     5  openproblems_bmmc_cite_phase2_mod2  original   

    pearson p_value_pearson  spearman p_value_spearman  
0  0.748162          0.0000  0.430538           0.0000  


In [None]:
config_hierachical_PROTEIN = {
    "subtask": "openproblems_bmmc_cite_phase2_mod2",
    "batch_size": 520,
    "act": "relu",
    "conv_flow": ["sage"],
    "agg_flow": ['lstm'],
    "device": "cuda",
    "learning_rate": 0.000064,
    "hidden_size": 280,
    "num_heads": 6,
    "n_neigbours": 3,
    "metric": "euclidean",
    "n_components": 1200,
    "dropout_rate": 0.2,
    "num_epochs": 500,
    "seed": 1,
    "es_patience": 40,
    "es_min_delta": 0.0001,
    "es_rate_threshold": 0.00002,
    "layer_dim_ex": 3,
    "verbose": True,
    "sample": True,
    "preprocessing_type": "None",
    "variant": "original",
    "num_clusters": 10
}

pipeline(**config_hierachical_PROTEIN)

[INFO][2025-04-15 18:50:14,128][dance][_load_raw_data] Loading /content/data/openproblems_bmmc_cite_phase2_mod2/openproblems_bmmc_cite_phase2_mod2.censor_dataset.output_train_mod1.h5ad



Initializing pipeline...


[INFO][2025-04-15 18:50:14,800][dance][_load_raw_data] Loading /content/data/openproblems_bmmc_cite_phase2_mod2/openproblems_bmmc_cite_phase2_mod2.censor_dataset.output_train_mod2.h5ad
[INFO][2025-04-15 18:50:22,521][dance][_load_raw_data] Loading /content/data/openproblems_bmmc_cite_phase2_mod2/openproblems_bmmc_cite_phase2_mod2.censor_dataset.output_test_mod1.h5ad
[INFO][2025-04-15 18:50:22,545][dance][_load_raw_data] Loading /content/data/openproblems_bmmc_cite_phase2_mod2/openproblems_bmmc_cite_phase2_mod2.censor_dataset.output_test_mod2.h5ad
[INFO][2025-04-15 18:50:22,753][dance][_maybe_preprocess] Preprocessing done.
  self._update_attr("var", axis=0, join_common=join_common)
  self._update_attr("obs", axis=1, join_common=join_common)
[INFO][2025-04-15 18:50:29,757][dance][set_config_from_dict] Setting config 'feature_mod' to 'mod1'
[INFO][2025-04-15 18:50:29,758][dance][set_config_from_dict] Setting config 'label_mod' to 'mod2'
[INFO][2025-04-15 18:50:29,759][dance][load_data] R

Processed data found. Loading...


  val_label = torch.load(val_label_path).numpy()
  test_label = torch.load(test_label_path).numpy()
  kwargs["FEATURE_SIZE"] = torch.load(feature_size_path).item()
  kwargs["OUTPUT_SIZE"] = torch.load(output_size_path).item()


Processed data loaded.
---------------------------------
Epoch 1/500
Train RMSE: 0.3784
Val RMSE: 0.3200, Val MAE: 0.1402
Test RMSE: 0.3927, Test MAE: 0.1848
---------------------------------
Model saved at epoch 1 with val RMSE 0.3200
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3235
Val RMSE: 0.2908, Val MAE: 0.1301
Test RMSE: 0.3600, Test MAE: 0.1716
---------------------------------
Model saved at epoch 2 with val RMSE 0.2908
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.3017
Val RMSE: 0.2777, Val MAE: 0.1231
Test RMSE: 0.3514, Test MAE: 0.1634
---------------------------------
Model saved at epoch 3 with val RMSE 0.2777
Patience grace period: 40
---------------------------------
Epoch 4/500
Train RMSE: 0.2895
Val RMSE: 0.2718, Val MAE: 0.1198
Test RMSE: 0.3503, Test MAE: 0.1595
---------------------------------
Model saved at epoch 4 with val RMSE 0.2718
Patience grace period: 40
---------------------

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7444499037603849
Spearman Corr: 0.4262422558528901
p-rmse: 0.3277958513328729
       rmse       mae  seed                             subtask    method  \
0  0.327796  0.148474     1  openproblems_bmmc_cite_phase2_mod2  original   

   pearson p_value_pearson  spearman p_value_spearman  
0  0.74445          0.0000  0.426242           0.0000  
---------------------------------
Epoch 1/500
Train RMSE: 0.3784
Val RMSE: 0.3213, Val MAE: 0.1398
Test RMSE: 0.3952, Test MAE: 0.1839
---------------------------------
Model saved at epoch 1 with val RMSE 0.3213
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3238
Val RMSE: 0.2912, Val MAE: 0.1301
Test RMSE: 0.3605, Test MAE: 0.1717
---------------------------------
Model saved at epoch 2 with val RMSE 0.2912
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.3019
Val RMSE: 0.2780, Val MAE: 0.1230
Test RMSE: 0.3511, Test MAE: 0.1633
--------------------------

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7435577506262313
Spearman Corr: 0.42543057261255757
p-rmse: 0.3288398630372335
      rmse       mae  seed                             subtask    method  \
0  0.32884  0.149709     2  openproblems_bmmc_cite_phase2_mod2  original   

    pearson p_value_pearson  spearman p_value_spearman  
0  0.743558          0.0000  0.425431           0.0000  
---------------------------------
Epoch 1/500
Train RMSE: 0.3791
Val RMSE: 0.3212, Val MAE: 0.1401
Test RMSE: 0.3954, Test MAE: 0.1845
---------------------------------
Model saved at epoch 1 with val RMSE 0.3212
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3238
Val RMSE: 0.2910, Val MAE: 0.1297
Test RMSE: 0.3611, Test MAE: 0.1714
---------------------------------
Model saved at epoch 2 with val RMSE 0.2910
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.3018
Val RMSE: 0.2786, Val MAE: 0.1226
Test RMSE: 0.3532, Test MAE: 0.1633
-------------------------

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7406424037405279
Spearman Corr: 0.4261959893645552
p-rmse: 0.32583172130470506
       rmse       mae  seed                             subtask    method  \
0  0.325832  0.152118     3  openproblems_bmmc_cite_phase2_mod2  original   

    pearson p_value_pearson  spearman p_value_spearman  
0  0.740642          0.0000  0.426196           0.0000  
---------------------------------
Epoch 1/500
Train RMSE: 0.3795
Val RMSE: 0.3224, Val MAE: 0.1405
Test RMSE: 0.3961, Test MAE: 0.1851
---------------------------------
Model saved at epoch 1 with val RMSE 0.3224
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3244
Val RMSE: 0.2917, Val MAE: 0.1302
Test RMSE: 0.3608, Test MAE: 0.1719
---------------------------------
Model saved at epoch 2 with val RMSE 0.2917
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.3023
Val RMSE: 0.2787, Val MAE: 0.1233
Test RMSE: 0.3524, Test MAE: 0.1638
-----------------------

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7396185735666657
Spearman Corr: 0.4251091182073211
p-rmse: 0.3332243855570525
       rmse       mae  seed                             subtask    method  \
0  0.333224  0.148027     4  openproblems_bmmc_cite_phase2_mod2  original   

    pearson p_value_pearson  spearman p_value_spearman  
0  0.739619          0.0000  0.425109           0.0000  
---------------------------------
Epoch 1/500
Train RMSE: 0.3772
Val RMSE: 0.3188, Val MAE: 0.1396
Test RMSE: 0.3911, Test MAE: 0.1843
---------------------------------
Model saved at epoch 1 with val RMSE 0.3188
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3226
Val RMSE: 0.2904, Val MAE: 0.1297
Test RMSE: 0.3592, Test MAE: 0.1714
---------------------------------
Model saved at epoch 2 with val RMSE 0.2904
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.3015
Val RMSE: 0.2781, Val MAE: 0.1232
Test RMSE: 0.3509, Test MAE: 0.1637
------------------------

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7429449991293895
Spearman Corr: 0.4230302606047667
p-rmse: 0.3287553749261527
       rmse       mae  seed                             subtask    method  \
0  0.328755  0.149904     5  openproblems_bmmc_cite_phase2_mod2  original   

    pearson p_value_pearson  spearman p_value_spearman  
0  0.742945          0.0000   0.42303           0.0000  


In [None]:
config_hierachical_PROTEIN = {
    "subtask": "openproblems_bmmc_cite_phase2_mod2",
    "batch_size": 520,
    "act": "relu",
    "conv_flow": ["gconv"],
    "agg_flow": [None],
    "device": "cuda",
    "learning_rate": 0.000064,
    "hidden_size": 280,
    "num_heads": 6,
    "n_neigbours": 3,
    "metric": "euclidean",
    "n_components": 1200,
    "dropout_rate": 0.2,
    "num_epochs": 500,
    "seed": 1,
    "es_patience": 40,
    "es_min_delta": 0.0001,
    "es_rate_threshold": 0.00002,
    "layer_dim_ex": 3,
    "verbose": True,
    "sample": True,
    "preprocessing_type": "None",
    "variant": "original",
    "num_clusters": 10
}

pipeline(**config_hierachical_PROTEIN)

[INFO][2025-04-15 19:17:19,995][dance][_load_raw_data] Loading /content/data/openproblems_bmmc_cite_phase2_mod2/openproblems_bmmc_cite_phase2_mod2.censor_dataset.output_train_mod1.h5ad



Initializing pipeline...


[INFO][2025-04-15 19:17:20,645][dance][_load_raw_data] Loading /content/data/openproblems_bmmc_cite_phase2_mod2/openproblems_bmmc_cite_phase2_mod2.censor_dataset.output_train_mod2.h5ad
[INFO][2025-04-15 19:17:28,323][dance][_load_raw_data] Loading /content/data/openproblems_bmmc_cite_phase2_mod2/openproblems_bmmc_cite_phase2_mod2.censor_dataset.output_test_mod1.h5ad
[INFO][2025-04-15 19:17:28,348][dance][_load_raw_data] Loading /content/data/openproblems_bmmc_cite_phase2_mod2/openproblems_bmmc_cite_phase2_mod2.censor_dataset.output_test_mod2.h5ad
[INFO][2025-04-15 19:17:28,547][dance][_maybe_preprocess] Preprocessing done.
  self._update_attr("var", axis=0, join_common=join_common)
  self._update_attr("obs", axis=1, join_common=join_common)
[INFO][2025-04-15 19:17:35,721][dance][set_config_from_dict] Setting config 'feature_mod' to 'mod1'
[INFO][2025-04-15 19:17:35,722][dance][set_config_from_dict] Setting config 'label_mod' to 'mod2'
[INFO][2025-04-15 19:17:35,723][dance][load_data] R

Processed data found. Loading...


  val_label = torch.load(val_label_path).numpy()
  test_label = torch.load(test_label_path).numpy()
  kwargs["FEATURE_SIZE"] = torch.load(feature_size_path).item()
  kwargs["OUTPUT_SIZE"] = torch.load(output_size_path).item()


Processed data loaded.
---------------------------------
Epoch 1/500
Train RMSE: 0.3759
Val RMSE: 0.3230, Val MAE: 0.1360
Test RMSE: 0.3938, Test MAE: 0.1812
---------------------------------
Model saved at epoch 1 with val RMSE 0.3230
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3220
Val RMSE: 0.2978, Val MAE: 0.1287
Test RMSE: 0.3634, Test MAE: 0.1711
---------------------------------
Model saved at epoch 2 with val RMSE 0.2978
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.3017
Val RMSE: 0.2855, Val MAE: 0.1231
Test RMSE: 0.3527, Test MAE: 0.1642
---------------------------------
Model saved at epoch 3 with val RMSE 0.2855
Patience grace period: 40
---------------------------------
Epoch 4/500
Train RMSE: 0.2903
Val RMSE: 0.2792, Val MAE: 0.1205
Test RMSE: 0.3486, Test MAE: 0.1610
---------------------------------
Model saved at epoch 4 with val RMSE 0.2792
Patience grace period: 40
---------------------

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7455272861485321
Spearman Corr: 0.42720018309068813
p-rmse: 0.3329531940852362
       rmse       mae  seed                             subtask    method  \
0  0.332953  0.149765     1  openproblems_bmmc_cite_phase2_mod2  original   

    pearson p_value_pearson  spearman p_value_spearman  
0  0.745527          0.0000    0.4272           0.0000  
---------------------------------
Epoch 1/500
Train RMSE: 0.3769
Val RMSE: 0.3252, Val MAE: 0.1359
Test RMSE: 0.3975, Test MAE: 0.1810
---------------------------------
Model saved at epoch 1 with val RMSE 0.3252
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3234
Val RMSE: 0.2996, Val MAE: 0.1289
Test RMSE: 0.3659, Test MAE: 0.1711
---------------------------------
Model saved at epoch 2 with val RMSE 0.2996
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.3029
Val RMSE: 0.2867, Val MAE: 0.1233
Test RMSE: 0.3539, Test MAE: 0.1642
-----------------------

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7458729093941798
Spearman Corr: 0.42785873954375153
p-rmse: 0.3325597931942241
      rmse       mae  seed                             subtask    method  \
0  0.33256  0.149445     2  openproblems_bmmc_cite_phase2_mod2  original   

    pearson p_value_pearson  spearman p_value_spearman  
0  0.745873          0.0000  0.427859           0.0000  
---------------------------------
Epoch 1/500
Train RMSE: 0.3774
Val RMSE: 0.3251, Val MAE: 0.1361
Test RMSE: 0.3984, Test MAE: 0.1808
---------------------------------
Model saved at epoch 1 with val RMSE 0.3251
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3234
Val RMSE: 0.2990, Val MAE: 0.1287
Test RMSE: 0.3664, Test MAE: 0.1707
---------------------------------
Model saved at epoch 2 with val RMSE 0.2990
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.3029
Val RMSE: 0.2862, Val MAE: 0.1232
Test RMSE: 0.3545, Test MAE: 0.1639
-------------------------

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7459289884207092
Spearman Corr: 0.42687954928003013
p-rmse: 0.33160553736466425
       rmse       mae  seed                             subtask    method  \
0  0.331606  0.148991     3  openproblems_bmmc_cite_phase2_mod2  original   

    pearson p_value_pearson  spearman p_value_spearman  
0  0.745929          0.0000   0.42688           0.0000  
---------------------------------
Epoch 1/500
Train RMSE: 0.3769
Val RMSE: 0.3254, Val MAE: 0.1358
Test RMSE: 0.3980, Test MAE: 0.1812
---------------------------------
Model saved at epoch 1 with val RMSE 0.3254
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3233
Val RMSE: 0.2987, Val MAE: 0.1284
Test RMSE: 0.3652, Test MAE: 0.1706
---------------------------------
Model saved at epoch 2 with val RMSE 0.2987
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.3021
Val RMSE: 0.2859, Val MAE: 0.1230
Test RMSE: 0.3534, Test MAE: 0.1638
----------------------

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7458832181364393
Spearman Corr: 0.4282220864435657
p-rmse: 0.33286789222703744
       rmse       mae  seed                             subtask    method  \
0  0.332868  0.148961     4  openproblems_bmmc_cite_phase2_mod2  original   

    pearson p_value_pearson  spearman p_value_spearman  
0  0.745883          0.0000  0.428222           0.0000  
---------------------------------
Epoch 1/500
Train RMSE: 0.3753
Val RMSE: 0.3223, Val MAE: 0.1359
Test RMSE: 0.3945, Test MAE: 0.1805
---------------------------------
Model saved at epoch 1 with val RMSE 0.3223
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3212
Val RMSE: 0.2971, Val MAE: 0.1286
Test RMSE: 0.3640, Test MAE: 0.1705
---------------------------------
Model saved at epoch 2 with val RMSE 0.2971
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.3011
Val RMSE: 0.2850, Val MAE: 0.1233
Test RMSE: 0.3530, Test MAE: 0.1642
-----------------------

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7456589937020245
Spearman Corr: 0.427913122565162
p-rmse: 0.33167782093026504
       rmse       mae  seed                             subtask    method  \
0  0.331678  0.149194     5  openproblems_bmmc_cite_phase2_mod2  original   

    pearson p_value_pearson  spearman p_value_spearman  
0  0.745659          0.0000  0.427913           0.0000  


In [None]:
config_hierachical_PROTEIN = {
    "subtask": "openproblems_bmmc_cite_phase2_mod2",
    "batch_size": 520,
    "act": "relu",
    "conv_flow": ["gat"],
    "agg_flow": [None],
    "device": "cuda",
    "learning_rate": 0.000064,
    "hidden_size": 280,
    "num_heads": 6,
    "n_neigbours": 3,
    "metric": "euclidean",
    "n_components": 1200,
    "dropout_rate": 0.2,
    "num_epochs": 500,
    "seed": 1,
    "es_patience": 40,
    "es_min_delta": 0.0001,
    "es_rate_threshold": 0.00002,
    "layer_dim_ex": 3,
    "verbose": True,
    "sample": True,
    "preprocessing_type": "None",
    "variant": "no_residual",
    "num_clusters": 10
}

pipeline(**config_hierachical_PROTEIN)

[INFO][2025-04-15 20:22:58,099][dance][_load_raw_data] Loading /content/data/openproblems_bmmc_cite_phase2_mod2/openproblems_bmmc_cite_phase2_mod2.censor_dataset.output_train_mod1.h5ad



Initializing pipeline...


[INFO][2025-04-15 20:22:58,749][dance][_load_raw_data] Loading /content/data/openproblems_bmmc_cite_phase2_mod2/openproblems_bmmc_cite_phase2_mod2.censor_dataset.output_train_mod2.h5ad
[INFO][2025-04-15 20:23:06,437][dance][_load_raw_data] Loading /content/data/openproblems_bmmc_cite_phase2_mod2/openproblems_bmmc_cite_phase2_mod2.censor_dataset.output_test_mod1.h5ad
[INFO][2025-04-15 20:23:06,461][dance][_load_raw_data] Loading /content/data/openproblems_bmmc_cite_phase2_mod2/openproblems_bmmc_cite_phase2_mod2.censor_dataset.output_test_mod2.h5ad
[INFO][2025-04-15 20:23:06,661][dance][_maybe_preprocess] Preprocessing done.
  self._update_attr("var", axis=0, join_common=join_common)
  self._update_attr("obs", axis=1, join_common=join_common)
[INFO][2025-04-15 20:23:14,011][dance][set_config_from_dict] Setting config 'feature_mod' to 'mod1'
[INFO][2025-04-15 20:23:14,011][dance][set_config_from_dict] Setting config 'label_mod' to 'mod2'
[INFO][2025-04-15 20:23:14,012][dance][load_data] R

Processed data found. Loading...


  val_label = torch.load(val_label_path).numpy()
  test_label = torch.load(test_label_path).numpy()
  kwargs["FEATURE_SIZE"] = torch.load(feature_size_path).item()
  kwargs["OUTPUT_SIZE"] = torch.load(output_size_path).item()


Processed data loaded.
---------------------------------
Epoch 1/500
Train RMSE: 0.3804
Val RMSE: 0.3305, Val MAE: 0.1352
Test RMSE: 0.4104, Test MAE: 0.1800
---------------------------------
Model saved at epoch 1 with val RMSE 0.3305
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3321
Val RMSE: 0.3000, Val MAE: 0.1283
Test RMSE: 0.3785, Test MAE: 0.1692
---------------------------------
Model saved at epoch 2 with val RMSE 0.3000
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.3083
Val RMSE: 0.2827, Val MAE: 0.1216
Test RMSE: 0.3627, Test MAE: 0.1615
---------------------------------
Model saved at epoch 3 with val RMSE 0.2827
Patience grace period: 40
---------------------------------
Epoch 4/500
Train RMSE: 0.2931
Val RMSE: 0.2735, Val MAE: 0.1179
Test RMSE: 0.3536, Test MAE: 0.1573
---------------------------------
Model saved at epoch 4 with val RMSE 0.2735
Patience grace period: 40
---------------------

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7467554729639482
Spearman Corr: 0.42986540639516746
p-rmse: 0.32595493617468235
       rmse       mae  seed                             subtask       method  \
0  0.325955  0.149297     1  openproblems_bmmc_cite_phase2_mod2  no_residual   

    pearson p_value_pearson  spearman p_value_spearman  
0  0.746755          0.0000  0.429865           0.0000  
---------------------------------
Epoch 1/500
Train RMSE: 0.3798
Val RMSE: 0.3288, Val MAE: 0.1346
Test RMSE: 0.4091, Test MAE: 0.1791
---------------------------------
Model saved at epoch 1 with val RMSE 0.3288
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3311
Val RMSE: 0.2987, Val MAE: 0.1285
Test RMSE: 0.3772, Test MAE: 0.1693
---------------------------------
Model saved at epoch 2 with val RMSE 0.2987
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.3080
Val RMSE: 0.2823, Val MAE: 0.1216
Test RMSE: 0.3625, Test MAE: 0.1618
----------------

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7467365562325299
Spearman Corr: 0.42963654170757515
p-rmse: 0.32605407780596785
       rmse       mae  seed                             subtask       method  \
0  0.326054  0.149034     2  openproblems_bmmc_cite_phase2_mod2  no_residual   

    pearson p_value_pearson  spearman p_value_spearman  
0  0.746737          0.0000  0.429637           0.0000  
---------------------------------
Epoch 1/500
Train RMSE: 0.3807
Val RMSE: 0.3299, Val MAE: 0.1354
Test RMSE: 0.4096, Test MAE: 0.1800
---------------------------------
Model saved at epoch 1 with val RMSE 0.3299
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3313
Val RMSE: 0.2980, Val MAE: 0.1291
Test RMSE: 0.3759, Test MAE: 0.1696
---------------------------------
Model saved at epoch 2 with val RMSE 0.2980
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.3074
Val RMSE: 0.2811, Val MAE: 0.1215
Test RMSE: 0.3609, Test MAE: 0.1613
----------------

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7456548062572979
Spearman Corr: 0.42871516677832466
p-rmse: 0.3255929091450292
       rmse       mae  seed                             subtask       method  \
0  0.325593  0.149594     3  openproblems_bmmc_cite_phase2_mod2  no_residual   

    pearson p_value_pearson  spearman p_value_spearman  
0  0.745655          0.0000  0.428715           0.0000  
---------------------------------
Epoch 1/500
Train RMSE: 0.3791
Val RMSE: 0.3268, Val MAE: 0.1354
Test RMSE: 0.4070, Test MAE: 0.1796
---------------------------------
Model saved at epoch 1 with val RMSE 0.3268
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3292
Val RMSE: 0.2955, Val MAE: 0.1279
Test RMSE: 0.3742, Test MAE: 0.1684
---------------------------------
Model saved at epoch 2 with val RMSE 0.2955
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.3056
Val RMSE: 0.2794, Val MAE: 0.1205
Test RMSE: 0.3595, Test MAE: 0.1604
-----------------

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7448715186662899
Spearman Corr: 0.4291383463741486
p-rmse: 0.3276026177887723
       rmse       mae  seed                             subtask       method  \
0  0.327603  0.149078     4  openproblems_bmmc_cite_phase2_mod2  no_residual   

    pearson p_value_pearson  spearman p_value_spearman  
0  0.744872          0.0000  0.429138           0.0000  
---------------------------------
Epoch 1/500
Train RMSE: 0.3800
Val RMSE: 0.3298, Val MAE: 0.1355
Test RMSE: 0.4095, Test MAE: 0.1799
---------------------------------
Model saved at epoch 1 with val RMSE 0.3298
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3313
Val RMSE: 0.2981, Val MAE: 0.1283
Test RMSE: 0.3767, Test MAE: 0.1690
---------------------------------
Model saved at epoch 2 with val RMSE 0.2981
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.3070
Val RMSE: 0.2808, Val MAE: 0.1209
Test RMSE: 0.3607, Test MAE: 0.1608
------------------

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7457336437337192
Spearman Corr: 0.42843008593247
p-rmse: 0.3273849088153147
       rmse       mae  seed                             subtask       method  \
0  0.327385  0.149426     5  openproblems_bmmc_cite_phase2_mod2  no_residual   

    pearson p_value_pearson  spearman p_value_spearman  
0  0.745734          0.0000   0.42843           0.0000  


In [None]:
config_hierachical_PROTEIN = {
    "subtask": "openproblems_bmmc_cite_phase2_mod2",
    "batch_size": 520,
    "act": "relu",
    "conv_flow": ["gat"],
    "agg_flow": [None],
    "device": "cuda",
    "learning_rate": 0.000064,
    "hidden_size": 280,
    "num_heads": 6,
    "n_neigbours": 3,
    "metric": "euclidean",
    "n_components": 1200,
    "dropout_rate": 0.2,
    "num_epochs": 500,
    "seed": 1,
    "es_patience": 40,
    "es_min_delta": 0.0001,
    "es_rate_threshold": 0.00002,
    "layer_dim_ex": 3,
    "verbose": True,
    "sample": True,
    "preprocessing_type": "None",
    "variant": "no_attention",
    "num_clusters": 10
}

pipeline(**config_hierachical_PROTEIN)

[INFO][2025-04-15 21:06:24,136][dance][_load_raw_data] Loading /content/data/openproblems_bmmc_cite_phase2_mod2/openproblems_bmmc_cite_phase2_mod2.censor_dataset.output_train_mod1.h5ad



Initializing pipeline...


[INFO][2025-04-15 21:06:24,785][dance][_load_raw_data] Loading /content/data/openproblems_bmmc_cite_phase2_mod2/openproblems_bmmc_cite_phase2_mod2.censor_dataset.output_train_mod2.h5ad
[INFO][2025-04-15 21:06:32,484][dance][_load_raw_data] Loading /content/data/openproblems_bmmc_cite_phase2_mod2/openproblems_bmmc_cite_phase2_mod2.censor_dataset.output_test_mod1.h5ad
[INFO][2025-04-15 21:06:32,508][dance][_load_raw_data] Loading /content/data/openproblems_bmmc_cite_phase2_mod2/openproblems_bmmc_cite_phase2_mod2.censor_dataset.output_test_mod2.h5ad
[INFO][2025-04-15 21:06:32,706][dance][_maybe_preprocess] Preprocessing done.
  self._update_attr("var", axis=0, join_common=join_common)
  self._update_attr("obs", axis=1, join_common=join_common)
[INFO][2025-04-15 21:06:40,002][dance][set_config_from_dict] Setting config 'feature_mod' to 'mod1'
[INFO][2025-04-15 21:06:40,003][dance][set_config_from_dict] Setting config 'label_mod' to 'mod2'
[INFO][2025-04-15 21:06:40,003][dance][load_data] R

Processed data found. Loading...


  val_label = torch.load(val_label_path).numpy()
  test_label = torch.load(test_label_path).numpy()
  kwargs["FEATURE_SIZE"] = torch.load(feature_size_path).item()
  kwargs["OUTPUT_SIZE"] = torch.load(output_size_path).item()


Processed data loaded.
---------------------------------
Epoch 1/500
Train RMSE: 0.3934
Val RMSE: 0.3574, Val MAE: 0.1451
Test RMSE: 0.4435, Test MAE: 0.1935
---------------------------------
Model saved at epoch 1 with val RMSE 0.3574
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3474
Val RMSE: 0.3131, Val MAE: 0.1311
Test RMSE: 0.3903, Test MAE: 0.1733
---------------------------------
Model saved at epoch 2 with val RMSE 0.3131
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.3211
Val RMSE: 0.2923, Val MAE: 0.1264
Test RMSE: 0.3679, Test MAE: 0.1664
---------------------------------
Model saved at epoch 3 with val RMSE 0.2923
Patience grace period: 40
---------------------------------
Epoch 4/500
Train RMSE: 0.3028
Val RMSE: 0.2796, Val MAE: 0.1209
Test RMSE: 0.3562, Test MAE: 0.1600
---------------------------------
Model saved at epoch 4 with val RMSE 0.2796
Patience grace period: 40
---------------------

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7480439682528935
Spearman Corr: 0.4313505610895793
p-rmse: 0.3262923240978126
       rmse       mae  seed                             subtask        method  \
0  0.326292  0.149246     1  openproblems_bmmc_cite_phase2_mod2  no_attention   

    pearson p_value_pearson  spearman p_value_spearman  
0  0.748044          0.0000  0.431351           0.0000  
---------------------------------
Epoch 1/500
Train RMSE: 0.3915
Val RMSE: 0.3528, Val MAE: 0.1446
Test RMSE: 0.4377, Test MAE: 0.1923
---------------------------------
Model saved at epoch 1 with val RMSE 0.3528
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3436
Val RMSE: 0.3083, Val MAE: 0.1308
Test RMSE: 0.3852, Test MAE: 0.1722
---------------------------------
Model saved at epoch 2 with val RMSE 0.3083
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.3174
Val RMSE: 0.2892, Val MAE: 0.1252
Test RMSE: 0.3658, Test MAE: 0.1650
----------------

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7476203017362183
Spearman Corr: 0.43051572614109995
p-rmse: 0.3254933065855696
       rmse       mae  seed                             subtask        method  \
0  0.325493  0.148561     2  openproblems_bmmc_cite_phase2_mod2  no_attention   

   pearson p_value_pearson  spearman p_value_spearman  
0  0.74762          0.0000  0.430516           0.0000  
---------------------------------
Epoch 1/500
Train RMSE: 0.3919
Val RMSE: 0.3564, Val MAE: 0.1452
Test RMSE: 0.4425, Test MAE: 0.1941
---------------------------------
Model saved at epoch 1 with val RMSE 0.3564
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3472
Val RMSE: 0.3114, Val MAE: 0.1312
Test RMSE: 0.3885, Test MAE: 0.1733
---------------------------------
Model saved at epoch 2 with val RMSE 0.3114
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.3213
Val RMSE: 0.2918, Val MAE: 0.1266
Test RMSE: 0.3679, Test MAE: 0.1664
-----------------

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7473576165702545
Spearman Corr: 0.4290797249171869
p-rmse: 0.32647963526585866
      rmse       mae  seed                             subtask        method  \
0  0.32648  0.149254     3  openproblems_bmmc_cite_phase2_mod2  no_attention   

    pearson p_value_pearson  spearman p_value_spearman  
0  0.747358          0.0000   0.42908           0.0000  
---------------------------------
Epoch 1/500
Train RMSE: 0.3902
Val RMSE: 0.3475, Val MAE: 0.1434
Test RMSE: 0.4301, Test MAE: 0.1897
---------------------------------
Model saved at epoch 1 with val RMSE 0.3475
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3421
Val RMSE: 0.3072, Val MAE: 0.1309
Test RMSE: 0.3833, Test MAE: 0.1723
---------------------------------
Model saved at epoch 2 with val RMSE 0.3072
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.3175
Val RMSE: 0.2896, Val MAE: 0.1258
Test RMSE: 0.3654, Test MAE: 0.1657
-----------------

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7488270078302989
Spearman Corr: 0.4307128796825631
p-rmse: 0.3256919665311258
       rmse       mae  seed                             subtask        method  \
0  0.325692  0.148627     4  openproblems_bmmc_cite_phase2_mod2  no_attention   

    pearson p_value_pearson  spearman p_value_spearman  
0  0.748827          0.0000  0.430713           0.0000  
---------------------------------
Epoch 1/500
Train RMSE: 0.3916
Val RMSE: 0.3533, Val MAE: 0.1448
Test RMSE: 0.4386, Test MAE: 0.1926
---------------------------------
Model saved at epoch 1 with val RMSE 0.3533
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3449
Val RMSE: 0.3091, Val MAE: 0.1308
Test RMSE: 0.3858, Test MAE: 0.1726
---------------------------------
Model saved at epoch 2 with val RMSE 0.3091
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.3187
Val RMSE: 0.2901, Val MAE: 0.1256
Test RMSE: 0.3656, Test MAE: 0.1646
----------------

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7481356794886787
Spearman Corr: 0.4301712924120494
p-rmse: 0.3251005384020071
       rmse       mae  seed                             subtask        method  \
0  0.325101  0.149299     5  openproblems_bmmc_cite_phase2_mod2  no_attention   

    pearson p_value_pearson  spearman p_value_spearman  
0  0.748136          0.0000  0.430171           0.0000  


In [None]:
config_hierachical_PROTEIN = {
    "subtask": "openproblems_bmmc_cite_phase2_mod2",
    "batch_size": 520,
    "act": "relu",
    "conv_flow": ["gat", 'sage'],
    "agg_flow": [None, 'lstm'],
    "device": "cuda",
    "learning_rate": 0.000064,
    "hidden_size": 280,
    "num_heads": 6,
    "n_neigbours": 3,
    "metric": "euclidean",
    "n_components": 1200,
    "dropout_rate": 0.2,
    "num_epochs": 500,
    "seed": 1,
    "es_patience": 40,
    "es_min_delta": 0.0001,
    "es_rate_threshold": 0.00002,
    "layer_dim_ex": 3,
    "verbose": True,
    "sample": True,
    "preprocessing_type": "None",
    "variant": "original",
    "num_clusters": 10
}

pipeline(**config_hierachical_PROTEIN)

[INFO][2025-04-15 22:00:01,181][dance][_load_raw_data] Loading /content/data/openproblems_bmmc_cite_phase2_mod2/openproblems_bmmc_cite_phase2_mod2.censor_dataset.output_train_mod1.h5ad



Initializing pipeline...


[INFO][2025-04-15 22:00:01,832][dance][_load_raw_data] Loading /content/data/openproblems_bmmc_cite_phase2_mod2/openproblems_bmmc_cite_phase2_mod2.censor_dataset.output_train_mod2.h5ad
[INFO][2025-04-15 22:00:09,539][dance][_load_raw_data] Loading /content/data/openproblems_bmmc_cite_phase2_mod2/openproblems_bmmc_cite_phase2_mod2.censor_dataset.output_test_mod1.h5ad
[INFO][2025-04-15 22:00:09,563][dance][_load_raw_data] Loading /content/data/openproblems_bmmc_cite_phase2_mod2/openproblems_bmmc_cite_phase2_mod2.censor_dataset.output_test_mod2.h5ad
[INFO][2025-04-15 22:00:09,764][dance][_maybe_preprocess] Preprocessing done.
  self._update_attr("var", axis=0, join_common=join_common)
  self._update_attr("obs", axis=1, join_common=join_common)
[INFO][2025-04-15 22:00:16,916][dance][set_config_from_dict] Setting config 'feature_mod' to 'mod1'
[INFO][2025-04-15 22:00:16,916][dance][set_config_from_dict] Setting config 'label_mod' to 'mod2'
[INFO][2025-04-15 22:00:16,917][dance][load_data] R

Processed data found. Loading...


  val_label = torch.load(val_label_path).numpy()
  test_label = torch.load(test_label_path).numpy()
  kwargs["FEATURE_SIZE"] = torch.load(feature_size_path).item()
  kwargs["OUTPUT_SIZE"] = torch.load(output_size_path).item()


Processed data loaded.
---------------------------------
Epoch 1/500
Train RMSE: 0.3714
Val RMSE: 0.3103, Val MAE: 0.1357
Test RMSE: 0.3828, Test MAE: 0.1790
---------------------------------
Model saved at epoch 1 with val RMSE 0.3103
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3150
Val RMSE: 0.2857, Val MAE: 0.1273
Test RMSE: 0.3570, Test MAE: 0.1680
---------------------------------
Model saved at epoch 2 with val RMSE 0.2857
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.2947
Val RMSE: 0.2743, Val MAE: 0.1211
Test RMSE: 0.3495, Test MAE: 0.1611
---------------------------------
Model saved at epoch 3 with val RMSE 0.2743
Patience grace period: 40
---------------------------------
Epoch 4/500
Train RMSE: 0.2843
Val RMSE: 0.2694, Val MAE: 0.1187
Test RMSE: 0.3473, Test MAE: 0.1585
---------------------------------
Model saved at epoch 4 with val RMSE 0.2694
Patience grace period: 40
---------------------

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7478783751881832
Spearman Corr: 0.4291214531792981
p-rmse: 0.32699777350703063
       rmse       mae  seed                             subtask    method  \
0  0.326998  0.150408     1  openproblems_bmmc_cite_phase2_mod2  original   

    pearson p_value_pearson  spearman p_value_spearman  
0  0.747878          0.0000  0.429121           0.0000  
---------------------------------
Epoch 1/500
Train RMSE: 0.3728
Val RMSE: 0.3124, Val MAE: 0.1355
Test RMSE: 0.3847, Test MAE: 0.1788
---------------------------------
Model saved at epoch 1 with val RMSE 0.3124
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3159
Val RMSE: 0.2856, Val MAE: 0.1269
Test RMSE: 0.3570, Test MAE: 0.1675
---------------------------------
Model saved at epoch 2 with val RMSE 0.2856
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.2949
Val RMSE: 0.2735, Val MAE: 0.1212
Test RMSE: 0.3479, Test MAE: 0.1608
-----------------------

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7472506704766168
Spearman Corr: 0.4298517623310616
p-rmse: 0.32435184642353815
       rmse       mae  seed                             subtask    method  \
0  0.324352  0.150472     2  openproblems_bmmc_cite_phase2_mod2  original   

    pearson p_value_pearson  spearman p_value_spearman  
0  0.747251          0.0000  0.429852           0.0000  
---------------------------------
Epoch 1/500
Train RMSE: 0.3727
Val RMSE: 0.3118, Val MAE: 0.1356
Test RMSE: 0.3838, Test MAE: 0.1793
---------------------------------
Model saved at epoch 1 with val RMSE 0.3118
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3162
Val RMSE: 0.2867, Val MAE: 0.1281
Test RMSE: 0.3572, Test MAE: 0.1685
---------------------------------
Model saved at epoch 2 with val RMSE 0.2867
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.2956
Val RMSE: 0.2741, Val MAE: 0.1216
Test RMSE: 0.3481, Test MAE: 0.1613
-----------------------

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7469884063694666
Spearman Corr: 0.4297463979975345
p-rmse: 0.3250802211606799
      rmse       mae  seed                             subtask    method  \
0  0.32508  0.149491     3  openproblems_bmmc_cite_phase2_mod2  original   

    pearson p_value_pearson  spearman p_value_spearman  
0  0.746988          0.0000  0.429746           0.0000  
---------------------------------
Epoch 1/500
Train RMSE: 0.3725
Val RMSE: 0.3125, Val MAE: 0.1356
Test RMSE: 0.3852, Test MAE: 0.1789
---------------------------------
Model saved at epoch 1 with val RMSE 0.3125
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3161
Val RMSE: 0.2862, Val MAE: 0.1277
Test RMSE: 0.3565, Test MAE: 0.1683
---------------------------------
Model saved at epoch 2 with val RMSE 0.2862
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.2955
Val RMSE: 0.2740, Val MAE: 0.1217
Test RMSE: 0.3472, Test MAE: 0.1613
--------------------------

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7457569924235696
Spearman Corr: 0.42814726039603473
p-rmse: 0.32713076415648373
       rmse       mae  seed                             subtask    method  \
0  0.327131  0.150308     4  openproblems_bmmc_cite_phase2_mod2  original   

    pearson p_value_pearson  spearman p_value_spearman  
0  0.745757          0.0000  0.428147           0.0000  
---------------------------------
Epoch 1/500
Train RMSE: 0.3712
Val RMSE: 0.3110, Val MAE: 0.1359
Test RMSE: 0.3816, Test MAE: 0.1796
---------------------------------
Model saved at epoch 1 with val RMSE 0.3110
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3158
Val RMSE: 0.2858, Val MAE: 0.1277
Test RMSE: 0.3557, Test MAE: 0.1684
---------------------------------
Model saved at epoch 2 with val RMSE 0.2858
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.2949
Val RMSE: 0.2737, Val MAE: 0.1213
Test RMSE: 0.3475, Test MAE: 0.1611
----------------------

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.747480466905199
Spearman Corr: 0.4276169986802547
p-rmse: 0.32674794521357
       rmse       mae  seed                             subtask    method  \
0  0.326748  0.150229     5  openproblems_bmmc_cite_phase2_mod2  original   

   pearson p_value_pearson  spearman p_value_spearman  
0  0.74748          0.0000  0.427617           0.0000  


In [None]:
config_hierachical_PROTEIN = {
    "subtask": "openproblems_bmmc_cite_phase2_mod2",
    "batch_size": 520,
    "act": "relu",
    "conv_flow": ["gat", 'sage', 'gat'],
    "agg_flow": [None, 'lstm', None],
    "device": "cuda",
    "learning_rate": 0.000064,
    "hidden_size": 280,
    "num_heads": 6,
    "n_neigbours": 3,
    "metric": "euclidean",
    "n_components": 1200,
    "dropout_rate": 0.2,
    "num_epochs": 500,
    "seed": 1,
    "es_patience": 40,
    "es_min_delta": 0.0001,
    "es_rate_threshold": 0.00002,
    "layer_dim_ex": 3,
    "verbose": True,
    "sample": True,
    "preprocessing_type": "None",
    "variant": "original",
    "num_clusters": 10
}

pipeline(**config_hierachical_PROTEIN)

[INFO][2025-04-15 22:43:03,046][dance][_load_raw_data] Loading /content/data/openproblems_bmmc_cite_phase2_mod2/openproblems_bmmc_cite_phase2_mod2.censor_dataset.output_train_mod1.h5ad



Initializing pipeline...


[INFO][2025-04-15 22:43:03,704][dance][_load_raw_data] Loading /content/data/openproblems_bmmc_cite_phase2_mod2/openproblems_bmmc_cite_phase2_mod2.censor_dataset.output_train_mod2.h5ad
[INFO][2025-04-15 22:43:11,436][dance][_load_raw_data] Loading /content/data/openproblems_bmmc_cite_phase2_mod2/openproblems_bmmc_cite_phase2_mod2.censor_dataset.output_test_mod1.h5ad
[INFO][2025-04-15 22:43:11,461][dance][_load_raw_data] Loading /content/data/openproblems_bmmc_cite_phase2_mod2/openproblems_bmmc_cite_phase2_mod2.censor_dataset.output_test_mod2.h5ad
[INFO][2025-04-15 22:43:11,663][dance][_maybe_preprocess] Preprocessing done.
  self._update_attr("var", axis=0, join_common=join_common)
  self._update_attr("obs", axis=1, join_common=join_common)
[INFO][2025-04-15 22:43:19,054][dance][set_config_from_dict] Setting config 'feature_mod' to 'mod1'
[INFO][2025-04-15 22:43:19,055][dance][set_config_from_dict] Setting config 'label_mod' to 'mod2'
[INFO][2025-04-15 22:43:19,055][dance][load_data] R

Processed data found. Loading...


  val_label = torch.load(val_label_path).numpy()
  test_label = torch.load(test_label_path).numpy()
  kwargs["FEATURE_SIZE"] = torch.load(feature_size_path).item()
  kwargs["OUTPUT_SIZE"] = torch.load(output_size_path).item()


Processed data loaded.
---------------------------------
Epoch 1/500
Train RMSE: 0.3662
Val RMSE: 0.3049, Val MAE: 0.1334
Test RMSE: 0.3757, Test MAE: 0.1761
---------------------------------
Model saved at epoch 1 with val RMSE 0.3049
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3102
Val RMSE: 0.2836, Val MAE: 0.1265
Test RMSE: 0.3549, Test MAE: 0.1665
---------------------------------
Model saved at epoch 2 with val RMSE 0.2836
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.2911
Val RMSE: 0.2723, Val MAE: 0.1201
Test RMSE: 0.3473, Test MAE: 0.1598
---------------------------------
Model saved at epoch 3 with val RMSE 0.2723
Patience grace period: 40
---------------------------------
Epoch 4/500
Train RMSE: 0.2814
Val RMSE: 0.2678, Val MAE: 0.1186
Test RMSE: 0.3447, Test MAE: 0.1581
---------------------------------
Model saved at epoch 4 with val RMSE 0.2678
Patience grace period: 40
---------------------

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7480508999887405
Spearman Corr: 0.42979537549994584
p-rmse: 0.3247229091806345
       rmse       mae  seed                             subtask    method  \
0  0.324723  0.150661     1  openproblems_bmmc_cite_phase2_mod2  original   

    pearson p_value_pearson  spearman p_value_spearman  
0  0.748051          0.0000  0.429795           0.0000  
---------------------------------
Epoch 1/500
Train RMSE: 0.3681
Val RMSE: 0.3060, Val MAE: 0.1334
Test RMSE: 0.3766, Test MAE: 0.1765
---------------------------------
Model saved at epoch 1 with val RMSE 0.3060
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3110
Val RMSE: 0.2834, Val MAE: 0.1265
Test RMSE: 0.3544, Test MAE: 0.1668
---------------------------------
Model saved at epoch 2 with val RMSE 0.2834
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.2910
Val RMSE: 0.2718, Val MAE: 0.1204
Test RMSE: 0.3461, Test MAE: 0.1603
-----------------------

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7477034017062414
Spearman Corr: 0.42916225579984146
p-rmse: 0.32581654913146274
       rmse       mae  seed                             subtask    method  \
0  0.325817  0.150003     2  openproblems_bmmc_cite_phase2_mod2  original   

    pearson p_value_pearson  spearman p_value_spearman  
0  0.747703          0.0000  0.429162           0.0000  
---------------------------------
Epoch 1/500
Train RMSE: 0.3661
Val RMSE: 0.3047, Val MAE: 0.1330
Test RMSE: 0.3742, Test MAE: 0.1761
---------------------------------
Model saved at epoch 1 with val RMSE 0.3047
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3095
Val RMSE: 0.2822, Val MAE: 0.1257
Test RMSE: 0.3530, Test MAE: 0.1661
---------------------------------
Model saved at epoch 2 with val RMSE 0.2822
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.2906
Val RMSE: 0.2718, Val MAE: 0.1205
Test RMSE: 0.3457, Test MAE: 0.1604
----------------------

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7473392675405107
Spearman Corr: 0.4283071780078253
p-rmse: 0.325397440093662
       rmse       mae  seed                             subtask    method  \
0  0.325397  0.151117     3  openproblems_bmmc_cite_phase2_mod2  original   

    pearson p_value_pearson  spearman p_value_spearman  
0  0.747339          0.0000  0.428307           0.0000  
---------------------------------
Epoch 1/500
Train RMSE: 0.3677
Val RMSE: 0.3053, Val MAE: 0.1328
Test RMSE: 0.3757, Test MAE: 0.1755
---------------------------------
Model saved at epoch 1 with val RMSE 0.3053
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3107
Val RMSE: 0.2835, Val MAE: 0.1268
Test RMSE: 0.3541, Test MAE: 0.1668
---------------------------------
Model saved at epoch 2 with val RMSE 0.2835
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.2918
Val RMSE: 0.2724, Val MAE: 0.1210
Test RMSE: 0.3463, Test MAE: 0.1606
-------------------------

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7476946255302537
Spearman Corr: 0.4295208394553063
p-rmse: 0.3259202134768382
      rmse       mae  seed                             subtask    method  \
0  0.32592  0.149541     4  openproblems_bmmc_cite_phase2_mod2  original   

    pearson p_value_pearson  spearman p_value_spearman  
0  0.747695          0.0000  0.429521           0.0000  
---------------------------------
Epoch 1/500
Train RMSE: 0.3648
Val RMSE: 0.3032, Val MAE: 0.1333
Test RMSE: 0.3729, Test MAE: 0.1760
---------------------------------
Model saved at epoch 1 with val RMSE 0.3032
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3093
Val RMSE: 0.2827, Val MAE: 0.1267
Test RMSE: 0.3532, Test MAE: 0.1665
---------------------------------
Model saved at epoch 2 with val RMSE 0.2827
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.2903
Val RMSE: 0.2719, Val MAE: 0.1205
Test RMSE: 0.3463, Test MAE: 0.1601
--------------------------

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7473230658878236
Spearman Corr: 0.42895829723676604
p-rmse: 0.3272310067355786
       rmse       mae  seed                             subtask    method  \
0  0.327231  0.148864     5  openproblems_bmmc_cite_phase2_mod2  original   

    pearson p_value_pearson  spearman p_value_spearman  
0  0.747323          0.0000  0.428958           0.0000  


In [None]:
config_hierachical_PROTEIN = {
    "subtask": "openproblems_bmmc_cite_phase2_mod2",
    "batch_size": 520,
    "act": "relu",
    "conv_flow": ["gat"],
    "agg_flow": [None],
    "device": "cuda",
    "learning_rate": 0.000064,
    "hidden_size": 280,
    "num_heads": 6,
    "n_neigbours": 10,
    "metric": "euclidean",
    "n_components": 1200,
    "dropout_rate": 0.2,
    "num_epochs": 500,
    "seed": 1,
    "es_patience": 40,
    "es_min_delta": 0.0001,
    "es_rate_threshold": 0.00002,
    "layer_dim_ex": 3,
    "verbose": True,
    "sample": True,
    "preprocessing_type": "None",
    "variant": "original",
    "num_clusters": 10
}

pipeline(**config_hierachical_PROTEIN)


Initializing pipeline...


[INFO][2025-04-16 05:06:03,728][dance][download_file] Downloading: /content/data/openproblems_bmmc_cite_phase2_mod2.zip Bytes: 625,243,451
100%|██████████| 596M/596M [00:29<00:00, 21.0MB/s]
[INFO][2025-04-16 05:06:33,507][dance][unzip_file] Unzipping /content/data/openproblems_bmmc_cite_phase2_mod2.zip
[INFO][2025-04-16 05:06:36,849][dance][delete_file] Deleting /content/data/openproblems_bmmc_cite_phase2_mod2.zip
[INFO][2025-04-16 05:06:36,950][dance][_load_raw_data] Loading /content/data/openproblems_bmmc_cite_phase2_mod2/openproblems_bmmc_cite_phase2_mod2.censor_dataset.output_train_mod1.h5ad
[INFO][2025-04-16 05:06:37,665][dance][_load_raw_data] Loading /content/data/openproblems_bmmc_cite_phase2_mod2/openproblems_bmmc_cite_phase2_mod2.censor_dataset.output_train_mod2.h5ad
[INFO][2025-04-16 05:06:45,366][dance][_load_raw_data] Loading /content/data/openproblems_bmmc_cite_phase2_mod2/openproblems_bmmc_cite_phase2_mod2.censor_dataset.output_test_mod1.h5ad
[INFO][2025-04-16 05:06:45,3

Performing data processing using GraphCreator...
Data processing using GraphCreator complete and saved.
---------------------------------
Epoch 1/500
Train RMSE: 0.3758
Val RMSE: 0.3258, Val MAE: 0.1334
Test RMSE: 0.4033, Test MAE: 0.1778
---------------------------------
Model saved at epoch 1 with val RMSE 0.3258
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3288
Val RMSE: 0.2959, Val MAE: 0.1266
Test RMSE: 0.3712, Test MAE: 0.1674
---------------------------------
Model saved at epoch 2 with val RMSE 0.2959
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.3057
Val RMSE: 0.2800, Val MAE: 0.1201
Test RMSE: 0.3562, Test MAE: 0.1601
---------------------------------
Model saved at epoch 3 with val RMSE 0.2800
Patience grace period: 40
---------------------------------
Epoch 4/500
Train RMSE: 0.2923
Val RMSE: 0.2727, Val MAE: 0.1177
Test RMSE: 0.3494, Test MAE: 0.1575
---------------------------------
Model save

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7488482768420702
Spearman Corr: 0.4304667272894109
p-rmse: 0.32500796033207086
       rmse       mae  seed                             subtask    method  \
0  0.325008  0.150027     1  openproblems_bmmc_cite_phase2_mod2  original   

    pearson p_value_pearson  spearman p_value_spearman  
0  0.748848          0.0000  0.430467           0.0000  
---------------------------------
Epoch 1/500
Train RMSE: 0.3746
Val RMSE: 0.3228, Val MAE: 0.1328
Test RMSE: 0.3994, Test MAE: 0.1770
---------------------------------
Model saved at epoch 1 with val RMSE 0.3228
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3273
Val RMSE: 0.2947, Val MAE: 0.1275
Test RMSE: 0.3688, Test MAE: 0.1683
---------------------------------
Model saved at epoch 2 with val RMSE 0.2947
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.3055
Val RMSE: 0.2802, Val MAE: 0.1206
Test RMSE: 0.3560, Test MAE: 0.1607
-----------------------

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7485185361005408
Spearman Corr: 0.430058239455327
p-rmse: 0.3249778822562031
       rmse       mae  seed                             subtask    method  \
0  0.324978  0.149264     2  openproblems_bmmc_cite_phase2_mod2  original   

    pearson p_value_pearson  spearman p_value_spearman  
0  0.748519          0.0000  0.430058           0.0000  
---------------------------------
Epoch 1/500
Train RMSE: 0.3756
Val RMSE: 0.3229, Val MAE: 0.1329
Test RMSE: 0.3992, Test MAE: 0.1774
---------------------------------
Model saved at epoch 1 with val RMSE 0.3229
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3275
Val RMSE: 0.2937, Val MAE: 0.1276
Test RMSE: 0.3673, Test MAE: 0.1682
---------------------------------
Model saved at epoch 2 with val RMSE 0.2937
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.3050
Val RMSE: 0.2790, Val MAE: 0.1207
Test RMSE: 0.3543, Test MAE: 0.1608
-------------------------

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7481531951767988
Spearman Corr: 0.43002269006441324
p-rmse: 0.3252655274249514
       rmse      mae  seed                             subtask    method  \
0  0.325266  0.14916     3  openproblems_bmmc_cite_phase2_mod2  original   

    pearson p_value_pearson  spearman p_value_spearman  
0  0.748153          0.0000  0.430023           0.0000  
---------------------------------
Epoch 1/500
Train RMSE: 0.3754
Val RMSE: 0.3224, Val MAE: 0.1337
Test RMSE: 0.3989, Test MAE: 0.1778
---------------------------------
Model saved at epoch 1 with val RMSE 0.3224
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3270
Val RMSE: 0.2932, Val MAE: 0.1272
Test RMSE: 0.3674, Test MAE: 0.1681
---------------------------------
Model saved at epoch 2 with val RMSE 0.2932
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.3047
Val RMSE: 0.2791, Val MAE: 0.1204
Test RMSE: 0.3547, Test MAE: 0.1605
-------------------------

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7469643870872167
Spearman Corr: 0.42967498439146123
p-rmse: 0.3263443131252487
       rmse      mae  seed                             subtask    method  \
0  0.326344  0.14936     4  openproblems_bmmc_cite_phase2_mod2  original   

    pearson p_value_pearson  spearman p_value_spearman  
0  0.746964          0.0000  0.429675           0.0000  
---------------------------------
Epoch 1/500
Train RMSE: 0.3727
Val RMSE: 0.3195, Val MAE: 0.1333
Test RMSE: 0.3945, Test MAE: 0.1774
---------------------------------
Model saved at epoch 1 with val RMSE 0.3195
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3247
Val RMSE: 0.2921, Val MAE: 0.1261
Test RMSE: 0.3666, Test MAE: 0.1671
---------------------------------
Model saved at epoch 2 with val RMSE 0.2921
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.3035
Val RMSE: 0.2792, Val MAE: 0.1203
Test RMSE: 0.3551, Test MAE: 0.1605
-------------------------

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7480124375392466
Spearman Corr: 0.429039042858738
p-rmse: 0.3253197303100398
      rmse       mae  seed                             subtask    method  \
0  0.32532  0.150291     5  openproblems_bmmc_cite_phase2_mod2  original   

    pearson p_value_pearson  spearman p_value_spearman  
0  0.748012          0.0000  0.429039           0.0000  


In [None]:
config_hierachical_PROTEIN = {
    "subtask": "openproblems_bmmc_cite_phase2_mod2",
    "batch_size": 520,
    "act": "relu",
    "conv_flow": ["gat"],
    "agg_flow": [None],
    "device": "cuda",
    "learning_rate": 0.000064,
    "hidden_size": 280,
    "num_heads": 6,
    "n_neigbours": 20,
    "metric": "euclidean",
    "n_components": 1200,
    "dropout_rate": 0.2,
    "num_epochs": 500,
    "seed": 1,
    "es_patience": 40,
    "es_min_delta": 0.0001,
    "es_rate_threshold": 0.00002,
    "layer_dim_ex": 3,
    "verbose": True,
    "sample": True,
    "preprocessing_type": "None",
    "variant": "original",
    "num_clusters": 10
}

pipeline(**config_hierachical_PROTEIN)

[INFO][2025-04-16 05:51:12,239][dance][_load_raw_data] Loading /content/data/openproblems_bmmc_cite_phase2_mod2/openproblems_bmmc_cite_phase2_mod2.censor_dataset.output_train_mod1.h5ad



Initializing pipeline...


[INFO][2025-04-16 05:51:12,923][dance][_load_raw_data] Loading /content/data/openproblems_bmmc_cite_phase2_mod2/openproblems_bmmc_cite_phase2_mod2.censor_dataset.output_train_mod2.h5ad
[INFO][2025-04-16 05:51:20,647][dance][_load_raw_data] Loading /content/data/openproblems_bmmc_cite_phase2_mod2/openproblems_bmmc_cite_phase2_mod2.censor_dataset.output_test_mod1.h5ad
[INFO][2025-04-16 05:51:20,671][dance][_load_raw_data] Loading /content/data/openproblems_bmmc_cite_phase2_mod2/openproblems_bmmc_cite_phase2_mod2.censor_dataset.output_test_mod2.h5ad
[INFO][2025-04-16 05:51:20,885][dance][_maybe_preprocess] Preprocessing done.
  self._update_attr("var", axis=0, join_common=join_common)
  self._update_attr("obs", axis=1, join_common=join_common)
[INFO][2025-04-16 05:51:27,886][dance][set_config_from_dict] Setting config 'feature_mod' to 'mod1'
[INFO][2025-04-16 05:51:27,886][dance][set_config_from_dict] Setting config 'label_mod' to 'mod2'
[INFO][2025-04-16 05:51:27,887][dance][load_data] R

Performing data processing using GraphCreator...
Data processing using GraphCreator complete and saved.
---------------------------------
Epoch 1/500
Train RMSE: 0.3738
Val RMSE: 0.3213, Val MAE: 0.1326
Test RMSE: 0.3978, Test MAE: 0.1766
---------------------------------
Model saved at epoch 1 with val RMSE 0.3213
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3260
Val RMSE: 0.2942, Val MAE: 0.1259
Test RMSE: 0.3696, Test MAE: 0.1669
---------------------------------
Model saved at epoch 2 with val RMSE 0.2942
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.3044
Val RMSE: 0.2795, Val MAE: 0.1201
Test RMSE: 0.3558, Test MAE: 0.1599
---------------------------------
Model saved at epoch 3 with val RMSE 0.2795
Patience grace period: 40
---------------------------------
Epoch 4/500
Train RMSE: 0.2914
Val RMSE: 0.2725, Val MAE: 0.1174
Test RMSE: 0.3495, Test MAE: 0.1571
---------------------------------
Model save

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7476671583637816
Spearman Corr: 0.42951605709159774
p-rmse: 0.32510150094708123
       rmse       mae  seed                             subtask    method  \
0  0.325102  0.149909     1  openproblems_bmmc_cite_phase2_mod2  original   

    pearson p_value_pearson  spearman p_value_spearman  
0  0.747667          0.0000  0.429516           0.0000  
---------------------------------
Epoch 1/500
Train RMSE: 0.3746
Val RMSE: 0.3228, Val MAE: 0.1327
Test RMSE: 0.3994, Test MAE: 0.1769
---------------------------------
Model saved at epoch 1 with val RMSE 0.3228
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3274
Val RMSE: 0.2950, Val MAE: 0.1275
Test RMSE: 0.3692, Test MAE: 0.1685
---------------------------------
Model saved at epoch 2 with val RMSE 0.2950
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.3057
Val RMSE: 0.2805, Val MAE: 0.1207
Test RMSE: 0.3563, Test MAE: 0.1609
----------------------

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7484033203839193
Spearman Corr: 0.4296405104364558
p-rmse: 0.3263213334896276
       rmse       mae  seed                             subtask    method  \
0  0.326321  0.149537     2  openproblems_bmmc_cite_phase2_mod2  original   

    pearson p_value_pearson  spearman p_value_spearman  
0  0.748403          0.0000  0.429641           0.0000  
---------------------------------
Epoch 1/500
Train RMSE: 0.3755
Val RMSE: 0.3229, Val MAE: 0.1329
Test RMSE: 0.3993, Test MAE: 0.1775
---------------------------------
Model saved at epoch 1 with val RMSE 0.3229
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3276
Val RMSE: 0.2941, Val MAE: 0.1277
Test RMSE: 0.3676, Test MAE: 0.1683
---------------------------------
Model saved at epoch 2 with val RMSE 0.2941
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.3052
Val RMSE: 0.2791, Val MAE: 0.1206
Test RMSE: 0.3544, Test MAE: 0.1608
------------------------

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7486839323876242
Spearman Corr: 0.4294777463902922
p-rmse: 0.32612684932753166
       rmse       mae  seed                             subtask    method  \
0  0.326127  0.149043     3  openproblems_bmmc_cite_phase2_mod2  original   

    pearson p_value_pearson  spearman p_value_spearman  
0  0.748684          0.0000  0.429478           0.0000  
---------------------------------
Epoch 1/500
Train RMSE: 0.3754
Val RMSE: 0.3224, Val MAE: 0.1337
Test RMSE: 0.3990, Test MAE: 0.1778
---------------------------------
Model saved at epoch 1 with val RMSE 0.3224
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3269
Val RMSE: 0.2931, Val MAE: 0.1270
Test RMSE: 0.3673, Test MAE: 0.1679
---------------------------------
Model saved at epoch 2 with val RMSE 0.2931
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.3046
Val RMSE: 0.2791, Val MAE: 0.1202
Test RMSE: 0.3547, Test MAE: 0.1603
-----------------------

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7469748562537248
Spearman Corr: 0.4292201237124692
p-rmse: 0.3274961303526004
       rmse       mae  seed                             subtask    method  \
0  0.327496  0.149057     4  openproblems_bmmc_cite_phase2_mod2  original   

    pearson p_value_pearson  spearman p_value_spearman  
0  0.746975          0.0000   0.42922           0.0000  
---------------------------------
Epoch 1/500
Train RMSE: 0.3728
Val RMSE: 0.3196, Val MAE: 0.1333
Test RMSE: 0.3947, Test MAE: 0.1774
---------------------------------
Model saved at epoch 1 with val RMSE 0.3196
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3248
Val RMSE: 0.2923, Val MAE: 0.1261
Test RMSE: 0.3668, Test MAE: 0.1671
---------------------------------
Model saved at epoch 2 with val RMSE 0.2923
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.3037
Val RMSE: 0.2794, Val MAE: 0.1202
Test RMSE: 0.3554, Test MAE: 0.1605
------------------------

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7487808325967826
Spearman Corr: 0.4283850591323203
p-rmse: 0.32693103000085283
       rmse       mae  seed                             subtask    method  \
0  0.326931  0.150015     5  openproblems_bmmc_cite_phase2_mod2  original   

    pearson p_value_pearson  spearman p_value_spearman  
0  0.748781          0.0000  0.428385           0.0000  


In [None]:
config_hierachical_PROTEIN = {
    "subtask": "openproblems_bmmc_cite_phase2_mod2",
    "batch_size": 520,
    "act": "relu",
    "conv_flow": ["gat"],
    "agg_flow": [None],
    "device": "cuda",
    "learning_rate": 0.000064,
    "hidden_size": 280,
    "num_heads": 6,
    "n_neigbours": 3,
    "metric": "cosine",
    "n_components": 1200,
    "dropout_rate": 0.2,
    "num_epochs": 500,
    "seed": 1,
    "es_patience": 40,
    "es_min_delta": 0.0001,
    "es_rate_threshold": 0.00002,
    "layer_dim_ex": 3,
    "verbose": True,
    "sample": True,
    "preprocessing_type": "None",
    "variant": "original",
    "num_clusters": 10
}

pipeline(**config_hierachical_PROTEIN)

[INFO][2025-04-16 06:27:21,170][dance][_load_raw_data] Loading /content/data/openproblems_bmmc_cite_phase2_mod2/openproblems_bmmc_cite_phase2_mod2.censor_dataset.output_train_mod1.h5ad



Initializing pipeline...


[INFO][2025-04-16 06:27:21,824][dance][_load_raw_data] Loading /content/data/openproblems_bmmc_cite_phase2_mod2/openproblems_bmmc_cite_phase2_mod2.censor_dataset.output_train_mod2.h5ad
[INFO][2025-04-16 06:27:29,538][dance][_load_raw_data] Loading /content/data/openproblems_bmmc_cite_phase2_mod2/openproblems_bmmc_cite_phase2_mod2.censor_dataset.output_test_mod1.h5ad
[INFO][2025-04-16 06:27:29,562][dance][_load_raw_data] Loading /content/data/openproblems_bmmc_cite_phase2_mod2/openproblems_bmmc_cite_phase2_mod2.censor_dataset.output_test_mod2.h5ad
[INFO][2025-04-16 06:27:29,763][dance][_maybe_preprocess] Preprocessing done.
  self._update_attr("var", axis=0, join_common=join_common)
  self._update_attr("obs", axis=1, join_common=join_common)
[INFO][2025-04-16 06:27:36,864][dance][set_config_from_dict] Setting config 'feature_mod' to 'mod1'
[INFO][2025-04-16 06:27:36,865][dance][set_config_from_dict] Setting config 'label_mod' to 'mod2'
[INFO][2025-04-16 06:27:36,865][dance][load_data] R

Performing data processing using GraphCreator...
Data processing using GraphCreator complete and saved.
---------------------------------
Epoch 1/500
Train RMSE: 0.3737
Val RMSE: 0.3215, Val MAE: 0.1326
Test RMSE: 0.3979, Test MAE: 0.1766
---------------------------------
Model saved at epoch 1 with val RMSE 0.3215
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3260
Val RMSE: 0.2940, Val MAE: 0.1261
Test RMSE: 0.3693, Test MAE: 0.1670
---------------------------------
Model saved at epoch 2 with val RMSE 0.2940
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.3044
Val RMSE: 0.2793, Val MAE: 0.1205
Test RMSE: 0.3554, Test MAE: 0.1600
---------------------------------
Model saved at epoch 3 with val RMSE 0.2793
Patience grace period: 40
---------------------------------
Epoch 4/500
Train RMSE: 0.2916
Val RMSE: 0.2723, Val MAE: 0.1181
Test RMSE: 0.3490, Test MAE: 0.1574
---------------------------------
Model save

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7477081530727101
Spearman Corr: 0.43060266492978333
p-rmse: 0.3247108172432595
       rmse       mae  seed                             subtask    method  \
0  0.324711  0.150198     1  openproblems_bmmc_cite_phase2_mod2  original   

    pearson p_value_pearson  spearman p_value_spearman  
0  0.747708          0.0000  0.430603           0.0000  
---------------------------------
Epoch 1/500
Train RMSE: 0.3746
Val RMSE: 0.3229, Val MAE: 0.1327
Test RMSE: 0.3994, Test MAE: 0.1770
---------------------------------
Model saved at epoch 1 with val RMSE 0.3229
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3272
Val RMSE: 0.2943, Val MAE: 0.1275
Test RMSE: 0.3684, Test MAE: 0.1682
---------------------------------
Model saved at epoch 2 with val RMSE 0.2943
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.3051
Val RMSE: 0.2796, Val MAE: 0.1207
Test RMSE: 0.3554, Test MAE: 0.1606
-----------------------

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7485879706813745
Spearman Corr: 0.4303570305923131
p-rmse: 0.3250500695467178
      rmse       mae  seed                             subtask    method  \
0  0.32505  0.149513     2  openproblems_bmmc_cite_phase2_mod2  original   

    pearson p_value_pearson  spearman p_value_spearman  
0  0.748588          0.0000  0.430357           0.0000  
---------------------------------
Epoch 1/500
Train RMSE: 0.3755
Val RMSE: 0.3229, Val MAE: 0.1329
Test RMSE: 0.3991, Test MAE: 0.1775
---------------------------------
Model saved at epoch 1 with val RMSE 0.3229
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3274
Val RMSE: 0.2936, Val MAE: 0.1276
Test RMSE: 0.3672, Test MAE: 0.1683
---------------------------------
Model saved at epoch 2 with val RMSE 0.2936
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.3050
Val RMSE: 0.2790, Val MAE: 0.1210
Test RMSE: 0.3541, Test MAE: 0.1611
--------------------------

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7481125299774724
Spearman Corr: 0.4301684718717627
p-rmse: 0.32524858788675615
       rmse       mae  seed                             subtask    method  \
0  0.325249  0.149163     3  openproblems_bmmc_cite_phase2_mod2  original   

    pearson p_value_pearson  spearman p_value_spearman  
0  0.748113          0.0000  0.430168           0.0000  
---------------------------------
Epoch 1/500
Train RMSE: 0.3754
Val RMSE: 0.3226, Val MAE: 0.1337
Test RMSE: 0.3991, Test MAE: 0.1778
---------------------------------
Model saved at epoch 1 with val RMSE 0.3226
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3273
Val RMSE: 0.2937, Val MAE: 0.1276
Test RMSE: 0.3677, Test MAE: 0.1684
---------------------------------
Model saved at epoch 2 with val RMSE 0.2937
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.3052
Val RMSE: 0.2793, Val MAE: 0.1209
Test RMSE: 0.3548, Test MAE: 0.1608
-----------------------

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7467659602407783
Spearman Corr: 0.42916576961004704
p-rmse: 0.3262555477613358
       rmse       mae  seed                             subtask    method  \
0  0.326256  0.149418     4  openproblems_bmmc_cite_phase2_mod2  original   

    pearson p_value_pearson  spearman p_value_spearman  
0  0.746766          0.0000  0.429166           0.0000  
---------------------------------
Epoch 1/500
Train RMSE: 0.3727
Val RMSE: 0.3196, Val MAE: 0.1332
Test RMSE: 0.3945, Test MAE: 0.1774
---------------------------------
Model saved at epoch 1 with val RMSE 0.3196
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3247
Val RMSE: 0.2922, Val MAE: 0.1263
Test RMSE: 0.3666, Test MAE: 0.1672
---------------------------------
Model saved at epoch 2 with val RMSE 0.2922
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.3035
Val RMSE: 0.2789, Val MAE: 0.1204
Test RMSE: 0.3549, Test MAE: 0.1605
-----------------------

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7473741230969665
Spearman Corr: 0.42969693843498014
p-rmse: 0.32514602700055356
       rmse       mae  seed                             subtask    method  \
0  0.325146  0.150167     5  openproblems_bmmc_cite_phase2_mod2  original   

    pearson p_value_pearson  spearman p_value_spearman  
0  0.747374          0.0000  0.429697           0.0000  


In [None]:
config_hierachical_PROTEIN = {
    "subtask": "openproblems_bmmc_cite_phase2_mod2",
    "batch_size": 520,
    "act": "relu",
    "conv_flow": ["gat"],
    "agg_flow": [None],
    "device": "cuda",
    "learning_rate": 0.000064,
    "hidden_size": 280,
    "num_heads": 6,
    "n_neigbours": 3,
    "metric": "nan_euclidean",
    "n_components": 1200,
    "dropout_rate": 0.2,
    "num_epochs": 500,
    "seed": 1,
    "es_patience": 40,
    "es_min_delta": 0.0001,
    "es_rate_threshold": 0.00002,
    "layer_dim_ex": 3,
    "verbose": True,
    "sample": True,
    "preprocessing_type": "None",
    "variant": "original",
    "num_clusters": 10
}

pipeline(**config_hierachical_PROTEIN)

[INFO][2025-04-16 07:13:42,922][dance][_load_raw_data] Loading /content/data/openproblems_bmmc_cite_phase2_mod2/openproblems_bmmc_cite_phase2_mod2.censor_dataset.output_train_mod1.h5ad



Initializing pipeline...


[INFO][2025-04-16 07:13:43,576][dance][_load_raw_data] Loading /content/data/openproblems_bmmc_cite_phase2_mod2/openproblems_bmmc_cite_phase2_mod2.censor_dataset.output_train_mod2.h5ad
[INFO][2025-04-16 07:13:51,281][dance][_load_raw_data] Loading /content/data/openproblems_bmmc_cite_phase2_mod2/openproblems_bmmc_cite_phase2_mod2.censor_dataset.output_test_mod1.h5ad
[INFO][2025-04-16 07:13:51,307][dance][_load_raw_data] Loading /content/data/openproblems_bmmc_cite_phase2_mod2/openproblems_bmmc_cite_phase2_mod2.censor_dataset.output_test_mod2.h5ad
[INFO][2025-04-16 07:13:51,507][dance][_maybe_preprocess] Preprocessing done.
  self._update_attr("var", axis=0, join_common=join_common)
  self._update_attr("obs", axis=1, join_common=join_common)
[INFO][2025-04-16 07:13:58,496][dance][set_config_from_dict] Setting config 'feature_mod' to 'mod1'
[INFO][2025-04-16 07:13:58,497][dance][set_config_from_dict] Setting config 'label_mod' to 'mod2'
[INFO][2025-04-16 07:13:58,498][dance][load_data] R

Performing data processing using GraphCreator...
Data processing using GraphCreator complete and saved.
---------------------------------
Epoch 1/500
Train RMSE: 0.3737
Val RMSE: 0.3212, Val MAE: 0.1327
Test RMSE: 0.3978, Test MAE: 0.1766
---------------------------------
Model saved at epoch 1 with val RMSE 0.3212
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3260
Val RMSE: 0.2939, Val MAE: 0.1262
Test RMSE: 0.3693, Test MAE: 0.1671
---------------------------------
Model saved at epoch 2 with val RMSE 0.2939
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.3044
Val RMSE: 0.2792, Val MAE: 0.1206
Test RMSE: 0.3554, Test MAE: 0.1602
---------------------------------
Model saved at epoch 3 with val RMSE 0.2792
Patience grace period: 40
---------------------------------
Epoch 4/500
Train RMSE: 0.2916
Val RMSE: 0.2720, Val MAE: 0.1181
Test RMSE: 0.3489, Test MAE: 0.1575
---------------------------------
Model save

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7489006865936385
Spearman Corr: 0.431526932105972
p-rmse: 0.3250689790956851
       rmse       mae  seed                             subtask    method  \
0  0.325069  0.149473     1  openproblems_bmmc_cite_phase2_mod2  original   

    pearson p_value_pearson  spearman p_value_spearman  
0  0.748901          0.0000  0.431527           0.0000  
---------------------------------
Epoch 1/500
Train RMSE: 0.3746
Val RMSE: 0.3227, Val MAE: 0.1329
Test RMSE: 0.3993, Test MAE: 0.1770
---------------------------------
Model saved at epoch 1 with val RMSE 0.3227
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3272
Val RMSE: 0.2941, Val MAE: 0.1276
Test RMSE: 0.3683, Test MAE: 0.1683
---------------------------------
Model saved at epoch 2 with val RMSE 0.2941
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.3051
Val RMSE: 0.2794, Val MAE: 0.1207
Test RMSE: 0.3553, Test MAE: 0.1606
-------------------------

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7491165993766621
Spearman Corr: 0.43076472692409457
p-rmse: 0.3241152885540088
       rmse       mae  seed                             subtask    method  \
0  0.324115  0.149535     2  openproblems_bmmc_cite_phase2_mod2  original   

    pearson p_value_pearson  spearman p_value_spearman  
0  0.749117          0.0000  0.430765           0.0000  
---------------------------------
Epoch 1/500
Train RMSE: 0.3755
Val RMSE: 0.3227, Val MAE: 0.1331
Test RMSE: 0.3990, Test MAE: 0.1775
---------------------------------
Model saved at epoch 1 with val RMSE 0.3227
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3274
Val RMSE: 0.2936, Val MAE: 0.1278
Test RMSE: 0.3672, Test MAE: 0.1684
---------------------------------
Model saved at epoch 2 with val RMSE 0.2936
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.3050
Val RMSE: 0.2788, Val MAE: 0.1211
Test RMSE: 0.3541, Test MAE: 0.1611
-----------------------

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7486420162846413
Spearman Corr: 0.4301231839675823
p-rmse: 0.3250172101562541
       rmse       mae  seed                             subtask    method  \
0  0.325017  0.149211     3  openproblems_bmmc_cite_phase2_mod2  original   

    pearson p_value_pearson  spearman p_value_spearman  
0  0.748642          0.0000  0.430123           0.0000  
---------------------------------
Epoch 1/500
Train RMSE: 0.3754
Val RMSE: 0.3224, Val MAE: 0.1338
Test RMSE: 0.3990, Test MAE: 0.1779
---------------------------------
Model saved at epoch 1 with val RMSE 0.3224
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3272
Val RMSE: 0.2934, Val MAE: 0.1276
Test RMSE: 0.3675, Test MAE: 0.1684
---------------------------------
Model saved at epoch 2 with val RMSE 0.2934
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.3051
Val RMSE: 0.2790, Val MAE: 0.1209
Test RMSE: 0.3546, Test MAE: 0.1608
------------------------

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7479296130566552
Spearman Corr: 0.42984765662228425
p-rmse: 0.3248608561824182
       rmse       mae  seed                             subtask    method  \
0  0.324861  0.149685     4  openproblems_bmmc_cite_phase2_mod2  original   

   pearson p_value_pearson  spearman p_value_spearman  
0  0.74793          0.0000  0.429848           0.0000  
---------------------------------
Epoch 1/500
Train RMSE: 0.3728
Val RMSE: 0.3195, Val MAE: 0.1334
Test RMSE: 0.3945, Test MAE: 0.1774
---------------------------------
Model saved at epoch 1 with val RMSE 0.3195
Patience grace period: 40
---------------------------------
Epoch 2/500
Train RMSE: 0.3247
Val RMSE: 0.2920, Val MAE: 0.1263
Test RMSE: 0.3665, Test MAE: 0.1672
---------------------------------
Model saved at epoch 2 with val RMSE 0.2920
Patience grace period: 40
---------------------------------
Epoch 3/500
Train RMSE: 0.3035
Val RMSE: 0.2789, Val MAE: 0.1205
Test RMSE: 0.3548, Test MAE: 0.1606
-------------------------

  self.model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth')))


Pearson Corr: 0.7481616425289367
Spearman Corr: 0.4305377827389911
p-rmse: 0.3252962889282863
       rmse       mae  seed                             subtask    method  \
0  0.325296  0.149724     5  openproblems_bmmc_cite_phase2_mod2  original   

    pearson p_value_pearson  spearman p_value_spearman  
0  0.748162          0.0000  0.430538           0.0000  
