In [105]:
import importlib

import networks
importlib.reload(networks)
from networks import VAE

import utils

importlib.reload(utils)
import utils.data
importlib.reload(utils.data)
import utils.snf
importlib.reload(utils.snf)
import dataset
importlib.reload(dataset)

import utils.train_val_test
importlib.reload(utils.train_val_test)

print("libraries reloaded!")

from utils.train_val_test import train_loop, val_loop, SEED, plot_latent_space
from dataset import Omics_Dataset
from utils.data import read_MoGCN_data

import os
from torch.utils.data import DataLoader
from torch import optim, nn
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt

from functools import partial
import os
import numpy as np
import torch
import torch.optim as optim
import torch.nn as nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F
from sklearn.model_selection import train_test_split

from ray import train, tune
from ray.tune.schedulers import ASHAScheduler
from ray.train import Checkpoint
from ray.tune.search.optuna import OptunaSearch

libraries reloaded!


In [17]:
data_path = "data/train"
omics_file_names = ["fpkm_data.csv", "gistic_data.csv", "rppa_data.csv"]
gt_file_name = "sample_classes.csv"

omics_data, gt_data, samples_list, classes_list = read_MoGCN_data(
    omics_paths=[os.path.join(data_path, file) for file in omics_file_names],
    gt_data_path=os.path.join(data_path, gt_file_name),
)

In [61]:
omics_normalised = []
for omics in omics_data:
    df = omics.copy()
    values_cols = df.columns.difference(["Sample"])
    df[values_cols] = (df[values_cols] - df[values_cols].min(axis=0)) / (df[values_cols].max(axis=0)-df[values_cols].min(axis=0))
    omics_normalised.append(df)
    print(len(omics_normalised))

1
2
3


In [97]:
omics_file_names = ["fpkm_data", "gistic_data", "rppa_data"]

for i in range(len(omics_normalised)):
    omics_normalised[i] = omics_normalised[i].dropna(axis=1)
    omics_normalised[i] = omics_normalised[i].rename(columns={"Sample": "Sample_ID"})
    omics_normalised[i].to_csv(data_path+"/"+ omics_file_names[i]+"_norm.csv",  index=False)


In [98]:
data_path = "data/train"
omics_file_names = ["fpkm_data_norm.csv", "gistic_data_norm.csv", "rppa_data_norm.csv"]
gt_file_name = "sample_classes.csv"

omics_data, gt_data, samples_list, classes_list = read_MoGCN_data(
    omics_paths=[os.path.join(data_path, file) for file in omics_file_names],
    gt_data_path=os.path.join(data_path, gt_file_name),
)

x_train , x_val, y_train, y_val = train_test_split(omics_data[1], gt_data, test_size=0.2, stratify=classes_list)

MoGCN_train = Omics_Dataset(x_train, y_train)
MoGCN_val = Omics_Dataset(x_val, y_val)

In [115]:
def main(
    config,
    hyper_loop,
    num_samples,
    search_dir=None,
    search_alg=None,
    local_dir="/home/davide/Desktop/Projects/Multi-omics-data-integration-with-DL-approaches/Hyperparameters_tuning/results",
):  
    



    scheduler = ASHAScheduler(
        time_attr="training_iteration",
        metric="loss",
        mode="min",
        max_t=100,
        grace_period=10,
        reduction_factor=3,
        brackets=1,
    )

    if not search_alg:
        search_alg = OptunaSearch(
            metric="loss",
            mode="min",
        )

    result = tune.run(
        hyper_loop,
        config=config,
        resources_per_trial={"cpu": 6, "gpu": 1},
        scheduler=scheduler,
        search_alg=search_alg,
        num_samples=num_samples,
        local_dir=(local_dir),
    )

    best_trial = result.get_best_trial("loss", "min", "last")
    print(f"Best trial config: {best_trial.config}")
    print(f"Best trial final validation loss: {best_trial.last_result['loss']}")
    
    if search_dir:
        search_alg.save(search_dir)

    return best_trial


def hyper_loop(config):

    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

    trainloader = DataLoader(
        MoGCN_train, batch_size=int(config["batch_size"]), shuffle=True
    )

    valloader = DataLoader(
        MoGCN_val, batch_size=int(config["batch_size"]), shuffle=True
    )
    net = VAE(
        MoGCN_train.input_dims,
        config["model_name"],
        activation_fn=config["activation_fn"],
        dropout_p=config["dropout_p"],
        hidden_dim=config["hidden_dim"],
        latent_dim=config["latent_dim"],
        loss_fn=config["loss_fn"],
        beta=config["beta"],
    )
    net.to(device)

    optimizer = optim.Adam(net.parameters(), lr=config["lr"], weight_decay=config["weight_decay"])

    for epoch in range(30):  # loop over the dataset multiple times
        running_loss = 0.0
        epoch_steps = 0
        for i, (inputs, _) in enumerate(trainloader):
            # forward + backward + optimize
            outputs, loss = net.forward_pass(inputs)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            epoch_steps += 1

        # Validation loss
        val_loss = 0.0
        for i, (inputs, _) in enumerate(valloader):
            with torch.no_grad():
                outputs, loss = net.forward_pass(inputs)
                val_loss += loss.cpu().numpy()

        train.report({"loss": (val_loss / len(valloader))})

    print("Finished Training")

In [163]:
np.arange(5e-6, 0.01005, 5e-6)

0.0005

In [167]:
len(np.arange(5e-6, 0.01005, 5e-6))
# np.linspace(start = 0.0, stop = 0.8, num= 9)

200

In [170]:
config = {
    "model_name": "VAE_omics_1",
    # "activation_fn": tune.choice([nn.Sigmoid(), nn.ELU(), nn.LeakyReLU()]),
    "activation_fn": nn.Sigmoid(),
    # "dropout_p": tune.quniform(0.0, 0.8, 0.1),
    # "dropout_p": tune.choice(np.arange(start = 0.0, stop = 0.9, step= 0.1)),
    "dropout_p": 0.0,
    # "hidden_dim": tune.choice([2**i for i in range(1, 8)]),
    "hidden_dim": 128,
    # "latent_dim": tune.choice([2**i for i in range(1, 8)]),
    "latent_dim": 32,
    "loss_fn": nn.MSELoss(reduction="mean"),
    # "beta": tune.loguniform(1e-6, 0.1),
    "beta": 1,
    # "lr": tune.loguniform(1e-3, 1e-1),
    "lr": tune.choice(np.arange(0.005, 0.205, 0.005)),
    # "weight_decay": tune.loguniform(1e-3, 1e-1),
    "weight_decay": tune.choice(np.arange(0.005, 0.205, 0.005)),
    # "batch_size": tune.choice([32, 64, 128]),
    "batch_size": 32,
}

search_alg = OptunaSearch(
    metric="loss",
    mode="min",
)

main(config, hyper_loop, 50, "./checkpoints/omics_1/tune_search.pkl")

2023-11-03 15:09:36,407	INFO tune.py:645 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949
[I 2023-11-03 15:09:36,548] A new study created in memory with name: optuna


0,1
Current time:,2023-11-03 15:12:00
Running for:,00:02:23.51
Memory:,14.3/15.4 GiB

Trial name,status,loc,lr,weight_decay,iter,total time (s),loss
hyper_loop_6ec5861a,RUNNING,192.168.0.14:46853,0.05,0.095,27.0,5.43717,0.0320486
hyper_loop_ed3a57ae,PENDING,,0.175,0.195,,,
hyper_loop_bfb0841e,TERMINATED,192.168.0.14:45902,0.08,0.16,30.0,5.91647,0.0404707
hyper_loop_7d133486,TERMINATED,192.168.0.14:45951,0.07,0.03,30.0,6.10721,0.032625
hyper_loop_c70fbe8e,TERMINATED,192.168.0.14:46001,0.155,0.075,30.0,5.75002,1.96057
hyper_loop_67cf12ff,TERMINATED,192.168.0.14:46050,0.13,0.055,10.0,3.20415,0.0329652
hyper_loop_827a36d0,TERMINATED,192.168.0.14:46121,0.125,0.11,30.0,5.73806,0.485066
hyper_loop_317bbd62,TERMINATED,192.168.0.14:46172,0.19,0.19,10.0,3.21297,0.0450201
hyper_loop_1e616913,TERMINATED,192.168.0.14:46215,0.115,0.14,30.0,5.9596,0.232066
hyper_loop_5a68116e,TERMINATED,192.168.0.14:46372,0.195,0.06,10.0,3.40991,1.58724




Trial name,loss
hyper_loop_06f844f5,0.0335681
hyper_loop_1e616913,0.232066
hyper_loop_317bbd62,0.0450201
hyper_loop_33ad3161,0.0324512
hyper_loop_3421395d,0.0358157
hyper_loop_5a68116e,1.58724
hyper_loop_669da411,0.0318113
hyper_loop_67cf12ff,0.0329652
hyper_loop_6c476378,2.41825
hyper_loop_6ec5861a,0.14695


[2m[36m(hyper_loop pid=45902)[0m Finished Training
[2m[36m(hyper_loop pid=45951)[0m Finished Training


[2m[33m(raylet)[0m [2023-11-03 15:10:36,340 E 35172 35172] (raylet) node_manager.cc:3007: 3 Workers (tasks / actors) killed due to memory pressure (OOM), 0 Workers crashed due to other reasons at node (ID: 7100ecaa6ba4f9bbe1dfcd91819869b7c4fdd036e29d983f9c9898cb, IP: 192.168.0.14) over the last time period. To see more information about the Workers killed on this node, use `ray logs raylet.out -ip 192.168.0.14`
[2m[33m(raylet)[0m 
[2m[33m(raylet)[0m Refer to the documentation on how to address the out of memory issue: https://docs.ray.io/en/latest/ray-core/scheduling/ray-oom-prevention.html. Consider provisioning more memory on this node or reducing task parallelism by requesting more CPUs per task. To adjust the kill threshold, set the environment variable `RAY_memory_usage_threshold` when starting Ray. To disable worker killing, set the environment variable `RAY_memory_monitor_refresh_ms` to zero.


[2m[36m(hyper_loop pid=46477)[0m Finished Training
[2m[36m(hyper_loop pid=46520)[0m Finished Training
[2m[36m(hyper_loop pid=46563)[0m Finished Training
[2m[36m(hyper_loop pid=46707)[0m Finished Training


2023-11-03 15:12:00,097	INFO tune.py:1143 -- Total run time: 143.69 seconds (143.50 seconds for the tuning loop).
Resume experiment with: tune.run(..., resume=True)
- hyper_loop_ed3a57ae: FileNotFoundError('Could not fetch metrics for hyper_loop_ed3a57ae: both result.json and progress.csv were not found at /home/davide/Desktop/Projects/Multi-omics-data-integration-with-DL-approaches/Hyperparameters_tuning/results/hyper_loop_2023-11-03_15-09-36/hyper_loop_ed3a57ae_19_activation_fn=ref_ph_ab5d744a,batch_size=32,beta=1,dropout_p=0.0000,hidden_dim=128,latent_dim=32,loss_fn=re_2023-11-03_15-11-54')


Best trial config: {'model_name': 'VAE_omics_1', 'activation_fn': Sigmoid(), 'dropout_p': 0.0, 'hidden_dim': 128, 'latent_dim': 32, 'loss_fn': MSELoss(), 'beta': 1, 'lr': 0.07, 'weight_decay': 0.030000000000000002, 'batch_size': 32}
Best trial final validation loss: 0.03181132425864538


hyper_loop_669da411

[2m[33m(raylet)[0m [2023-11-03 15:15:36,345 E 35172 35172] (raylet) node_manager.cc:3007: 1 Workers (tasks / actors) killed due to memory pressure (OOM), 0 Workers crashed due to other reasons at node (ID: 7100ecaa6ba4f9bbe1dfcd91819869b7c4fdd036e29d983f9c9898cb, IP: 192.168.0.14) over the last time period. To see more information about the Workers killed on this node, use `ray logs raylet.out -ip 192.168.0.14`
[2m[33m(raylet)[0m 
[2m[33m(raylet)[0m Refer to the documentation on how to address the out of memory issue: https://docs.ray.io/en/latest/ray-core/scheduling/ray-oom-prevention.html. Consider provisioning more memory on this node or reducing task parallelism by requesting more CPUs per task. To adjust the kill threshold, set the environment variable `RAY_memory_usage_threshold` when starting Ray. To disable worker killing, set the environment variable `RAY_memory_monitor_refresh_ms` to zero.
