In [20]:
import pandas as pd
import numpy as np

import torch_geometric.data as torch_data
from torch_geometric.transforms import NormalizeFeatures

from torchmetrics import ConfusionMatrix, AUROC, F1Score, Precision, Recall

# PyTorch Lightning
import pytorch_lightning as pl

# PyTorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch import Tensor

# PyTorch geometric
import torch_geometric
import torch_geometric.data as geom_data
import torch_geometric.nn as geom_nn
from torch_geometric.loader import NeighborLoader

#Wandb
import wandb

# PL callbacks
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping

import os
from tqdm import tqdm

# Ensure that all operations are deterministic on GPU (if used) for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
pl.seed_everything(42)
torch.manual_seed(42)

Seed set to 42


<torch._C.Generator at 0x262ce00adf0>

In [None]:
MIN_DISEASE_S_GENE_NUMBER = 0
TEST_TRAIN_SPLIT = 0.5
TEST_VAL_SPLIT = 0.5

EPOCHS = 10
AVAIL_GPUS = min(1, torch.cuda.device_count())

CHECKPOINT_PATH = os.environ.get("PATH_CHECKPOINT", "../data/saved_models/")
gnn_layer_by_name = {"GCN": geom_nn.GCNConv, "GAT": geom_nn.GATConv, "GraphConv": geom_nn.GraphConv}

wandb.login(key="e1f878235d3945d4141f9f8e5af41d712fca6eba")

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: C:\Users\Nemes\_netrc


True

In [22]:
class IdMapper():
    sorted_diseases = []
    sorted_genes = []

    def __init__(self, gene_file, disease_file):
        genes = pd.read_csv(gene_file, sep="\t")
        self.genes = genes["genes"].sort_values().unique()

        disieses = pd.read_csv(disease_file, sep="\t")
        diseases_filtered = disieses.groupby("diseaseId").filter(lambda x: len(x) > MIN_DISEASE_S_GENE_NUMBER)
        self.diseases = diseases_filtered["diseaseId"].sort_values().unique()

    def diseases_idx_to_id_map(self):
        return { idx: item  for idx, item in enumerate(self.diseases)}

    def diseases_id_to_idx_map(self):
        return { item: idx  for idx, item in enumerate(self.diseases)}

    def genes_idx_to_id_map(self):
        return { idx: item  for idx, item in enumerate(self.genes)}

    def genes_id_to_idx_map(self):
        return { item: idx  for idx, item in enumerate(self.genes)}

In [None]:
class GeneDataset(torch_data.Dataset):
    def __init__(self, root, filenames, test_size, val_size, test=False, transform=None, pre_transform=None):
        """
        root = Where the dataset should be stored. This folder is split
        into raw_dir (downloaded dataset) and processed_dir (processed data).
        """
        self.test = test
        self.test_size = test_size
        self.val_size = val_size
        self.filenames = filenames
        self.mapper = IdMapper("../data/raw/"+filenames[0], "../data/raw/"+filenames[2])
        super(GeneDataset, self).__init__(root, transform, pre_transform)

    @property
    def raw_file_names(self):
        """ If this file exists in raw_dir, the download is not triggered.
            (The download func. is not implemented here)
        """
        return self.filenames

    @property
    def processed_file_names(self):
        """ If these files are found in raw_dir, processing is skipped"""
        if self.test:
            return [F'{file_name}_test' for file_name in self.raw_paths]
        else:
            return self.raw_paths

    def download(self):
        pass

    def process(self):
        self.genes_features = pd.read_csv(self.raw_paths[0], sep="\t")
        self.edges_features = pd.read_csv(self.raw_paths[1], sep="\t")
        self.disiese_gene_matrix = pd.read_csv(self.raw_paths[2], sep="\t")

        self.genes = self.genes_features["genes"].sort_values().unique()
        self.diseases = self.disiese_gene_matrix["diseaseId"].sort_values().unique()

        node_feats = self._get_node_features(self.genes_features)
        edge_feats = self._get_edge_features(self.edges_features)
        edge_index = self._get_adjacency_info(self.edges_features)

        y = self._create_mask_matrix(self.disiese_gene_matrix.copy()).to(torch.float32)
        train_mask, validation_mask, test_mask = self._get_train_val_test_mask(self.disiese_gene_matrix.copy())

        data = torch_data.Data(x=node_feats,
                    edge_index=edge_index,
                    edge_attr=edge_feats,
                    test_mask=test_mask, val_mask=validation_mask, train_mask=train_mask, y=y)

        if self.test:
            torch.save(data, os.path.join(self.processed_dir, 'graph_test.pt'))
        else:
            torch.save(data, os.path.join(self.processed_dir, 'graph.pt'))


    def _get_train_val_test_mask(self, disiese_gene_matrix):
        """
        i need too create matrices shape like disgenet
        and in this matrix i pick random points which are gonna be the train mask, validation mask and test mask

        in the train dataset i need to pick 80% from disgenet, equaly 0s and 1s in a column
        in the validation dataset i need to pick 10% from disgenet, equaly 0s and 1s in a column
        """

        train, validation, test = self._split_labels_to_train_val_test(disiese_gene_matrix)
        disgenet_inverse = self._get_disgenet_inverse(disiese_gene_matrix)
        train_n, validation_n, test_n = self._split_labels_to_train_val_test(disgenet_inverse)
        train_r = pd.concat([train, train_n], ignore_index=True)
        validation_r = pd.concat([validation, validation_n], ignore_index=True)
        test_r = pd.concat([test, test_n], ignore_index=True)

        train_mask = self._create_mask_matrix(train_r)
        validation_mask = self._create_mask_matrix(validation_r)
        test_mask = self._create_mask_matrix(test_r)

        return train_mask, validation_mask, test_mask

    def _split_labels_to_train_val_test(self, disgenet: pd.DataFrame):
        #Split the positive targets to equal partitions by disease
        disgenet_grouped = disgenet.groupby(by="diseaseId", group_keys=False)
        test_validation = disgenet_grouped.apply(lambda x: x.sample(frac=TEST_TRAIN_SPLIT, random_state=1))
        train = disgenet.drop(test_validation.index)
        test_validation_grouped = test_validation.groupby(by="diseaseId", group_keys=False)

        #Group by is needed before sample function call!!!
        test = test_validation_grouped.apply(lambda x: x.sample(frac=TEST_VAL_SPLIT, random_state=1))
        drop_indices = pd.concat([train, test]).index
        validation = disgenet.drop(drop_indices)
        return train, validation, test


    def _get_disgenet_inverse(self, disgenet):
        genes_frame = pd.DataFrame(list(self.genes), columns=["geneId"])
        diseases_frame = pd.DataFrame(self.diseases, columns=["diseaseId"])
        gene_disease_descartes_product = genes_frame.merge(diseases_frame, how="cross")
        disgenet_inverse = gene_disease_descartes_product.merge(disgenet, on=['geneId', 'diseaseId'], how='left', indicator=True)
        return disgenet_inverse[disgenet_inverse['_merge'] == 'left_only'].drop(columns='_merge')


    def _create_mask_matrix(self, dataframe):
        dataframe_for_matrix = pd.DataFrame(np.zeros((len(self.genes), len(self.diseases)),))
        gene_id_to_idx = self.mapper.genes_id_to_idx_map()
        disease_id_to_idx = self.mapper.diseases_id_to_idx_map()

        dataframe["geneId"] = dataframe["geneId"].map(gene_id_to_idx)
        dataframe["diseaseId"] = dataframe["diseaseId"].map(disease_id_to_idx)
        tuples_array = [row for row in dataframe.itertuples(index=False, name=None)]
        for row, col in tqdm(tuples_array):
            dataframe_for_matrix.loc[row, col] = 1

        return torch.tensor(dataframe_for_matrix.to_numpy(), dtype=torch.bool)

    def _get_node_features(self, genes):
        gene_id_to_idx = self.mapper.genes_id_to_idx_map()
        genes["genes"] = self.genes_features["genes"].map(gene_id_to_idx)
        all_node_feats = genes.values.tolist()
        all_node_feats = np.asarray(all_node_feats)

        return torch.tensor(all_node_feats, dtype=torch.float32)

    def _get_edge_features(self, edges):
        """
        This will return a matrix / 2d array of the shape
        [Number of edges, Edge Feature size]
        """
        duplicated_edges = edges.loc[edges.index.repeat(2)].reset_index(drop=True)
        all_edge_feats = duplicated_edges["combined_score"].tolist()
        return torch.tensor(all_edge_feats, dtype=torch.float32)


    def _get_adjacency_info(self, edges):
        """
        We want to be sure that the order of the indices
        matches the order of the edge features
        """
        gene_id_to_idx = self.mapper.genes_id_to_idx_map()

        edge_indices = []
        gene_1 = edges["gene1"].map(gene_id_to_idx)
        gene_2 = edges["gene2"].map(gene_id_to_idx)
        edges = pd.concat([gene_1, gene_2], axis=1).values.tolist()

        #iterate over the edges end duplicate it because for one edge we need: n1,n2 and n2,n1
        double_edges = []
        for edge in edges:
            double_edges += [ edge, [edge[1], edge[0]]]

        edge_indices = torch.tensor(double_edges)
        edge_indices = edge_indices.t().to(torch.int32).view(2, -1)
        return edge_indices

    def len(self):
        return self.genes.shape[0]

    def get(self, idx):
        """ - Equivalent to __getitem__ in pytorch
            - Is not needed for PyG's InMemoryDataset
        """
        if self.test:
            graph = torch.load(os.path.join(self.processed_dir, 'graph_test.pt'), weights_only=False)
        else:
            graph = torch.load(os.path.join(self.processed_dir, 'graph.pt'), weights_only=False)

        return graph

    def __getitem__(self, idx):
        return self.get(0)

In [24]:
dataset = GeneDataset(
    root="./data",
    filenames=["gtex_genes_test.csv", "gene_graph_test.csv", "disgenet_test.csv"],
    test_size=0.2,
    val_size=0.5,
    transform=NormalizeFeatures())

Processing...
100%|██████████| 11/11 [00:00<00:00, 7206.71it/s]
  test_validation = disgenet_grouped.apply(lambda x: x.sample(frac=TEST_TRAIN_SPLIT, random_state=1))
  test = test_validation_grouped.apply(lambda x: x.sample(frac=TEST_VAL_SPLIT, random_state=1))
  test_validation = disgenet_grouped.apply(lambda x: x.sample(frac=TEST_TRAIN_SPLIT, random_state=1))
  test = test_validation_grouped.apply(lambda x: x.sample(frac=TEST_VAL_SPLIT, random_state=1))
100%|██████████| 34/34 [00:00<?, ?it/s]
100%|██████████| 11/11 [00:00<00:00, 5066.70it/s]
100%|██████████| 9/9 [00:00<?, ?it/s]
Done!


In [25]:
dataset[0]

Data(x=[6, 10], edge_index=[2, 8], edge_attr=[8], y=[6, 9], test_mask=[6, 9], val_mask=[6, 9], train_mask=[6, 9])

disgenetet úgy tovább szűrni, hogy az egyes betegséghez legalább x gén tartozzon --> végén majd kiprobálni, hogy nem szürök rajtuk

keresztvalidáció

ha kiegyensulyozatlan akkor --> f1 score, avg precision, precision-recall görbe, (olyan metrikákat használjak)

In [26]:
class GCNLayer(torch.nn.Module):
    def __init__(self, c_in: int, c_out: int):
        super().__init__()
        self.projection = torch.nn.Linear(c_in, c_out)

    def forward(self, node_feats, adj_matrix):
        """Forward.

        Args:
            node_feats: Tensor with node features of shape [batch_size, num_nodes, c_in]
            adj_matrix: Batch of adjacency matrices of the graph. If there is an edge from i to j,
                         adj_matrix[b,i,j]=1 else 0. Supports directed edges by non-symmetric matrices.
                         Assumes to already have added the identity connections.
                         Shape: [batch_size, num_nodes, num_nodes]

        """
        # Num neighbours = number of incoming edges
        num_neighbours = adj_matrix.sum(dim=-1, keepdims=True)
        node_feats = self.projection(node_feats)
        node_feats = torch.bmm(adj_matrix, node_feats)
        node_feats = node_feats / num_neighbours
        return node_feats

In [27]:
class GATLayer(nn.Module):
    def __init__(self, c_in, c_out, num_heads=1, concat_heads=True, alpha=0.2):
        """
        Args:
            c_in: Dimensionality of input features
            c_out: Dimensionality of output features
            num_heads: Number of heads, i.e. attention mechanisms to apply in parallel. The
                        output features are equally split up over the heads if concat_heads=True.
            concat_heads: If True, the output of the different heads is concatenated instead of averaged.
            alpha: Negative slope of the LeakyReLU activation.
        """
        super().__init__()
        self.num_heads = num_heads
        self.concat_heads = concat_heads
        if self.concat_heads:
            assert c_out % num_heads == 0, "Number of output features must be a multiple of the count of heads."
            c_out = c_out // num_heads

        # Sub-modules and parameters needed in the layer
        self.projection = nn.Linear(c_in, c_out * num_heads)
        self.a = nn.Parameter(Tensor(num_heads, 2 * c_out))  # One per head
        self.leakyrelu = nn.LeakyReLU(alpha)

        # Initialization from the original implementation
        nn.init.xavier_uniform_(self.projection.weight.data, gain=1.414)
        nn.init.xavier_uniform_(self.a.data, gain=1.414)

    def forward(self, node_feats, adj_matrix, print_attn_probs=False):
        """Forward.

        Args:
            node_feats: Input features of the node. Shape: [batch_size, c_in]
            adj_matrix: Adjacency matrix including self-connections. Shape: [batch_size, num_nodes, num_nodes]
            print_attn_probs: If True, the attention weights are printed during the forward pass
                               (for debugging purposes)

        """
        batch_size, num_nodes = node_feats.size(0), node_feats.size(1)

        # Apply linear layer and sort nodes by head
        node_feats = self.projection(node_feats)
        node_feats = node_feats.view(batch_size, num_nodes, self.num_heads, -1)

        # We need to calculate the attention logits for every edge in the adjacency matrix
        # Doing this on all possible combinations of nodes is very expensive
        # => Create a tensor of [W*h_i||W*h_j] with i and j being the indices of all edges
        # Returns indices where the adjacency matrix is not 0 => edges
        edges = adj_matrix.nonzero(as_tuple=False)
        node_feats_flat = node_feats.view(batch_size * num_nodes, self.num_heads, -1)
        edge_indices_row = edges[:, 0] * num_nodes + edges[:, 1]
        edge_indices_col = edges[:, 0] * num_nodes + edges[:, 2]
        a_input = torch.cat(
            [
                torch.index_select(input=node_feats_flat, index=edge_indices_row, dim=0),
                torch.index_select(input=node_feats_flat, index=edge_indices_col, dim=0),
            ],
            dim=-1,
        )  # Index select returns a tensor with node_feats_flat being indexed at the desired positions

        # Calculate attention MLP output (independent for each head)
        attn_logits = torch.einsum("bhc,hc->bh", a_input, self.a)
        attn_logits = self.leakyrelu(attn_logits)

        # Map list of attention values back into a matrix
        attn_matrix = attn_logits.new_zeros(adj_matrix.shape + (self.num_heads,)).fill_(-9e15)
        attn_matrix[adj_matrix[..., None].repeat(1, 1, 1, self.num_heads) == 1] = attn_logits.reshape(-1)

        # Weighted average of attention
        attn_probs = F.softmax(attn_matrix, dim=2)
        if print_attn_probs:
            print("Attention probs\n", attn_probs.permute(0, 3, 1, 2))
        node_feats = torch.einsum("bijh,bjhc->bihc", attn_probs, node_feats)

        # If heads should be concatenated, we can do this by reshaping. Otherwise, take mean
        if self.concat_heads:
            node_feats = node_feats.reshape(batch_size, num_nodes, -1)
        else:
            node_feats = node_feats.mean(dim=2)

        return node_feats

In [28]:
class GNNModel(nn.Module):
    def __init__(
        self,
        c_in,
        c_hidden,
        c_out,
        num_layers=2,
        layer_name="GCN",
        dropout_rate=0.1,
        **kwargs,
    ):
        """GNNModel.

        Args:
            c_in: Dimension of input features
            c_hidden: Dimension of hidden features
            c_out: Dimension of the output features. Usually number of classes in classification
            num_layers: Number of "hidden" graph layers
            layer_name: String of the graph layer to use
            dp_rate: Dropout rate to apply throughout the network
            kwargs: Additional arguments for the graph layer (e.g. number of heads for GAT)

        """
        super().__init__()
        gnn_layer = gnn_layer_by_name[layer_name]

        layers = []
        in_channels, out_channels = c_in, c_hidden
        for _ in range(num_layers - 1):
            layers += [
                gnn_layer(in_channels=in_channels, out_channels=out_channels, **kwargs),
                nn.ReLU(inplace=True),
                nn.Dropout(dropout_rate),
            ]
            in_channels = c_hidden
        layers += [gnn_layer(in_channels=in_channels, out_channels=c_out, **kwargs)]
        self.layers = nn.ModuleList(layers)

    def forward(self, x, edge_index, edge_weight):
        """Forward.

        Args:
            x: Input features per node
            edge_index: List of vertex index pairs representing the edges in the graph (PyTorch geometric notation)

        """
        for layer in self.layers:
            # For graph layers, we need to add the "edge_index" tensor as additional input
            # All PyTorch Geometric graph layer inherit the class "MessagePassing", hence
            # we can simply check the class type.
            if isinstance(layer, geom_nn.MessagePassing):
                x = layer(x, edge_index, edge_weight)
            else:
                x = layer(x)
        return x

In [38]:
# The simple GCN modell
class TestGCN(pl.LightningModule):
    def __init__(self, c_in, c_out, c_hidden, num_layers, dropout_rate):
        super().__init__()
        self.save_hyperparameters()
        self.loss_module = nn.CrossEntropyLoss()
        self.model = GNNModel(c_hidden=c_hidden, num_layers=num_layers, dropout_rate=dropout_rate, c_in=c_in, c_out=c_out)

        self.learning_rate=0.01
        self.decay=5e-4

        self.cm = ConfusionMatrix(task="binary", num_classes=2)
        self.aucroc = AUROC(task="binary", num_classes=2)
        self.f1 = F1Score(task="binary", num_classes=2)
        self.precision = Precision(task="binary", num_classes=2)
        self.recall = Recall(task="binary", num_classes=2)

    def forward(self, data, mode="train"):
        x, edge_index, edge_weight = data.x, data.edge_index, data.edge_weight
        new_x = self.model(x, edge_index, edge_weight)

        # Only calculate the loss on the nodes corresponding to the mask
        if mode == "train":
            mask = data.train_mask
        elif mode == "val":
            mask = data.val_mask
        elif mode == "test":
            mask = data.test_mask
        else:
            assert False, f"Unknown forward mode: {mode}"

        loss = self.loss_module(new_x[mask], data.y[mask])
        acc = (new_x[mask].argmax(dim=-1) == data.y[mask]).sum().float() / mask.sum()

        if mode == "test":
            return loss, acc, new_x
        return loss, acc

    def training_step(self, data):
        loss, acc = self.forward(data, mode="train")
        self.log('train_loss', loss, prog_bar=True, on_epoch=True)
        self.log('train_acc', acc, prog_bar=True, on_epoch=True)
        return loss

    def validation_step(self, data):
        loss, acc = self.forward(data, mode="val")
        self.log("val_acc", acc)
        self.log("val_loss", loss)

    def test_step(self, data):
        loss, acc, x = self.forward(data, mode="test")
        x_masked = x[data.test_mask]
        y_masked = data.y[data.test_mask]

        self.log("test_acc", acc)
        self.log('test_loss', loss, prog_bar=True, on_epoch=True)
        self.cm.update(x_masked, y_masked)
        self.aucroc.update(x_masked, y_masked)
        self.f1.update(x_masked, y_masked)
        self.precision.update(x_masked, y_masked)
        self.recall.update(x_masked, y_masked)
        return loss

    def on_test_epoch_end(self) -> None:
        self.cm.plot()
        self.log('test_auc_roc', self.aucroc.compute(), prog_bar=True, on_epoch=True)
        self.log('test_f1', self.f1.compute(), prog_bar=True, on_epoch=True)
        self.log('test_precision', self.precision.compute(), prog_bar=True, on_epoch=True)
        self.log('test_recall', self.recall.compute(), prog_bar=True, on_epoch=True)
        return super().on_test_epoch_end()

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=self.learning_rate, weight_decay=self.decay)

In [39]:
# How to use NeighborLoader ---> importani is to install librraries by this: https://github.com/pyg-team/pytorch_geometric/discussions/7866
#                                          and use the propriate data : geom_data.Data
#                                          and edge indexes must be long type
# in_channels = 1
# node_num = 8
# features = [[i for j in range(in_channels)] for i in range(node_num)]
# edge_index = torch.tensor([[2, 3, 3, 4, 5, 6, 7], [0, 0, 1, 1, 2, 3, 4]],
#                           dtype=torch.long)
# data = Data(torch.tensor(features), edge_index)
# loader = NeighborLoader(data, [2], batch_size=1)
# batch = next(iter(loader))
# batch.edge_index

In [40]:

sweep_config = {
    'method': 'random'
}

parameters_dict = {
    'optimizer': {
        'values': ['adam', 'sgd', 'adamW']
    },
    "num_layers": {
        "values": [10,5,2]
    },
    'hidden_layer_size': {
        'values': [30, 20, 16]
    },
    "dropout_rate": {
        "values" : [0.5, 0.3, 0.1]
    }
}

optimizer_map = {
    'adam': torch.optim.Adam,
    'sgd': torch.optim.SGD,
    'adamW': torch.optim.AdamW
}

parameters_dict.update({
    'learning_rate': {
        'distribution': 'uniform',
        'min': 0,
        'max': 0.1
      },
    'batch_size': {
        'distribution': 'q_log_uniform_values',
        'q': 8,
        'min': 64,
        'max': 512
      }
   }
)

sweep_config['parameters'] = parameters_dict

sweep_id = wandb.sweep(sweep_config, project="gnn_test_logs")

Create sweep with ID: fhwx9he8
Sweep URL: https://wandb.ai/nemes-attila-budapesti-m-szaki-s-gazdas-gtudom-nyi-egyetem/gnn_test_logs/sweeps/fhwx9he8


In [None]:
model_cpkt = ModelCheckpoint(
    monitor='val_acc',
    mode='min',
    save_top_k=1,
    save_last=True,
    dirpath='../data/saved_models/wandb',
    filename='gnn_model')

early_stopping = EarlyStopping(
    monitor='val_acc',
    mode='min',
    patience=10,
    verbose=True,
)

In [46]:
def train_node_classifier(config, dataset, model_name="GCN"):
    pl.seed_everything(42)
    
    node_data_loader = geom_data.DataLoader(dataset)

    # Create a PyTorch Lightning trainer
    root_dir = os.path.join(CHECKPOINT_PATH, "TestGCN" + model_name)
    os.makedirs(root_dir, exist_ok=True)
    trainer = pl.Trainer(
        default_root_dir=root_dir,
        callbacks=[model_cpkt, early_stopping],
        accelerator="auto",
        devices=1,
        max_epochs=1,
        enable_progress_bar=False,
        logger=pl.loggers.WandbLogger(project="GCN_sweep_test", log_model="all")
    )
    trainer.logger._default_hp_metric = None  # Optional logging argument that we don't need

    # Check whether pretrained model exists. If yes, load it and skip training
    # pretrained_filename = os.path.join(CHECKPOINT_PATH, f"TestGCN{model_name}.ckpt")
    # if os.path.isfile(pretrained_filename):
    #     print("Found pretrained model, loading...")
    #     model = TestGCN.load_from_checkpoint(pretrained_filename)
    # else:

    c_hidden = config.hidden_layer_size
    num_layers = config.num_layers
    dropout_rate = config.dropout_rate

    model = TestGCN(
        c_in=dataset.num_node_features, c_out=dataset[0].y.shape[1], c_hidden=c_hidden, num_layers=num_layers, dropout_rate=dropout_rate
    )

    trainer.fit(model, node_data_loader, node_data_loader)

    model = TestGCN.load_from_checkpoint(trainer.checkpoint_callback.best_model_path)

    # Test best model on the test set
    trainer.test(model, dataloaders=node_data_loader)

In [47]:
def optimalization_train(config=None):
    with wandb.init(config=config):
        config = wandb.config
        train_node_classifier(config=config, model_name="GCN_sweep_test", dataset=dataset)

In [49]:
wandb.agent(sweep_id=sweep_id, function=optimalization_train, count=5)
wandb.teardown()

[34m[1mwandb[0m: Agent Starting Run: k94m6p7f with config:
[34m[1mwandb[0m: 	batch_size: 112
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	hidden_layer_size: 30
[34m[1mwandb[0m: 	learning_rate: 0.022005905746128353
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: sgd


Seed set to 42
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
d:\winfiles\miniconda3\envs\gen-disease-gnn\Lib\site-packages\pytorch_lightning\loggers\wandb.py:396: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.
d:\winfiles\miniconda3\envs\gen-disease-gnn\Lib\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:654: Checkpoint directory D:\gitrepos\gene-disease-gnn\data\saved_models\wandb exists and is not empty.

  | Name        | Type                  | Params | Mode 
--------------------------------------------------------------
0 | loss_module | CrossEntropyLoss      | 0      | train
1 | model       | GNNModel              | 3.4 K  | train
2 | cm          | BinaryConfusionMatrix | 0      | train
3 | aucroc      | BinaryAUROC           | 0      | train
4 | f1          | Bi

0,1
epoch,▁▁█
test_acc,▁
test_auc_roc,▁
test_f1,▁
test_loss,▁
test_precision,▁
test_recall,▁
train_acc_epoch,▁
train_loss_epoch,▁
trainer/global_step,▁▁█

0,1
epoch,1.0
test_acc,0.0
test_auc_roc,0.0
test_f1,0.0
test_loss,0.0
test_precision,0.0
test_recall,0.0
train_acc_epoch,0.0
train_loss_epoch,250.06923
trainer/global_step,6.0


[34m[1mwandb[0m: Agent Starting Run: uhiszuws with config:
[34m[1mwandb[0m: 	batch_size: 160
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	hidden_layer_size: 16
[34m[1mwandb[0m: 	learning_rate: 0.05191851021440519
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: sgd


Seed set to 42
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
d:\winfiles\miniconda3\envs\gen-disease-gnn\Lib\site-packages\pytorch_lightning\loggers\wandb.py:396: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.
d:\winfiles\miniconda3\envs\gen-disease-gnn\Lib\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:654: Checkpoint directory D:\gitrepos\gene-disease-gnn\data\saved_models\wandb exists and is not empty.

  | Name        | Type                  | Params | Mode 
--------------------------------------------------------------
0 | loss_module | CrossEntropyLoss      | 0      | train
1 | model       | GNNModel              | 1.1 K  | train
2 | cm          | BinaryConfusionMatrix | 0      | train
3 | aucroc      | BinaryAUROC           | 0      | train
4 | f1          | Bi

0,1
epoch,▁▁█
test_acc,▁
test_auc_roc,▁
test_f1,▁
test_loss,▁
test_precision,▁
test_recall,▁
train_acc_epoch,▁
train_loss_epoch,▁
trainer/global_step,▁▁█

0,1
epoch,1.0
test_acc,0.0
test_auc_roc,0.0
test_f1,0.0
test_loss,0.0
test_precision,0.0
test_recall,0.0
train_acc_epoch,0.0
train_loss_epoch,368.5762
trainer/global_step,6.0


[34m[1mwandb[0m: Agent Starting Run: k8uhe2t2 with config:
[34m[1mwandb[0m: 	batch_size: 80
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	hidden_layer_size: 16
[34m[1mwandb[0m: 	learning_rate: 0.0011444641965309478
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adamW


Seed set to 42
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
d:\winfiles\miniconda3\envs\gen-disease-gnn\Lib\site-packages\pytorch_lightning\loggers\wandb.py:396: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.
d:\winfiles\miniconda3\envs\gen-disease-gnn\Lib\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:654: Checkpoint directory D:\gitrepos\gene-disease-gnn\data\saved_models\wandb exists and is not empty.

  | Name        | Type                  | Params | Mode 
--------------------------------------------------------------
0 | loss_module | CrossEntropyLoss      | 0      | train
1 | model       | GNNModel              | 1.1 K  | train
2 | cm          | BinaryConfusionMatrix | 0      | train
3 | aucroc      | BinaryAUROC           | 0      | train
4 | f1          | Bi

0,1
epoch,▁▁█
test_acc,▁
test_auc_roc,▁
test_f1,▁
test_loss,▁
test_precision,▁
test_recall,▁
train_acc_epoch,▁
train_loss_epoch,▁
trainer/global_step,▁▁█

0,1
epoch,1.0
test_acc,0.0
test_auc_roc,0.0
test_f1,0.0
test_loss,0.0
test_precision,0.0
test_recall,0.0
train_acc_epoch,0.0
train_loss_epoch,368.5762
trainer/global_step,6.0


[34m[1mwandb[0m: Agent Starting Run: bo2l41fp with config:
[34m[1mwandb[0m: 	batch_size: 88
[34m[1mwandb[0m: 	dropout_rate: 0.1
[34m[1mwandb[0m: 	hidden_layer_size: 20
[34m[1mwandb[0m: 	learning_rate: 0.04616233592574638
[34m[1mwandb[0m: 	num_layers: 10
[34m[1mwandb[0m: 	optimizer: adam


Seed set to 42
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
d:\winfiles\miniconda3\envs\gen-disease-gnn\Lib\site-packages\pytorch_lightning\loggers\wandb.py:396: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.
d:\winfiles\miniconda3\envs\gen-disease-gnn\Lib\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:654: Checkpoint directory D:\gitrepos\gene-disease-gnn\data\saved_models\wandb exists and is not empty.

  | Name        | Type                  | Params | Mode 
--------------------------------------------------------------
0 | loss_module | CrossEntropyLoss      | 0      | train
1 | model       | GNNModel              | 3.8 K  | train
2 | cm          | BinaryConfusionMatrix | 0      | train
3 | aucroc      | BinaryAUROC           | 0      | train
4 | f1          | Bi

0,1
epoch,▁▁█
test_acc,▁
test_auc_roc,▁
test_f1,▁
test_loss,▁
test_precision,▁
test_recall,▁
train_acc_epoch,▁
train_loss_epoch,▁
trainer/global_step,▁▁█

0,1
epoch,1.0
test_acc,0.0
test_auc_roc,0.0
test_f1,0.0
test_loss,0.0
test_precision,0.0
test_recall,0.0
train_acc_epoch,0.0
train_loss_epoch,78.86081
trainer/global_step,6.0


[34m[1mwandb[0m: Agent Starting Run: 3djhwu1o with config:
[34m[1mwandb[0m: 	batch_size: 264
[34m[1mwandb[0m: 	dropout_rate: 0.1
[34m[1mwandb[0m: 	hidden_layer_size: 30
[34m[1mwandb[0m: 	learning_rate: 0.005608682935891474
[34m[1mwandb[0m: 	num_layers: 10
[34m[1mwandb[0m: 	optimizer: adamW


Seed set to 42
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
d:\winfiles\miniconda3\envs\gen-disease-gnn\Lib\site-packages\pytorch_lightning\loggers\wandb.py:396: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.
d:\winfiles\miniconda3\envs\gen-disease-gnn\Lib\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:654: Checkpoint directory D:\gitrepos\gene-disease-gnn\data\saved_models\wandb exists and is not empty.

  | Name        | Type                  | Params | Mode 
--------------------------------------------------------------
0 | loss_module | CrossEntropyLoss      | 0      | train
1 | model       | GNNModel              | 8.0 K  | train
2 | cm          | BinaryConfusionMatrix | 0      | train
3 | aucroc      | BinaryAUROC           | 0      | train
4 | f1          | Bi

0,1
epoch,▁▁█
test_acc,▁
test_auc_roc,▁
test_f1,▁
test_loss,▁
test_precision,▁
test_recall,▁
train_acc_epoch,▁
train_loss_epoch,▁
trainer/global_step,▁▁█

0,1
epoch,1.0
test_acc,0.0
test_auc_roc,0.0
test_f1,0.0
test_loss,0.0
test_precision,0.0
test_recall,0.0
train_acc_epoch,0.0
train_loss_epoch,35.88692
trainer/global_step,6.0
