## Imports

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import sys
from pathlib import Path

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import torch
from torch import nn

os.environ["WANDB_SILENT"] = "true"

from owkin.training import train, RocLoss
from owkin.dataset import build_dataset
from owkin.models.mono_models import MLP
from owkin.models.aggregators import SmoothMaxAggregator, MeanAggregator, MaxAggregator

In [3]:
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
device

device(type='cuda')

## MonoModel

In [8]:
num_layers = 5
inside_dim = 512

mono_batch_size = 64
mono_nb_epochs = 2000

mono_lr = 3e-6
val_center = "None"
normalizer_type = "None"
wd = 3e-3
criterion = nn.BCELoss()

In [9]:
if True:
    X_train, X_train_mean, y_train, X_val, X_val_mean, y_val, X_test, X_test_mean, df_test = build_dataset(normalizer_type=normalizer_type, val_center=val_center)
    mono_model = MLP(num_layers=num_layers, inside_dim=inside_dim)
    train(X_train_mean,
        y_train,
        X_val_mean,
        y_val,
        mono_model,
        learning_rate=mono_lr,
        weight_decay=wd,
        val_center=val_center,
        normalizer_type=normalizer_type,
        batch_size=mono_batch_size,
        nb_epochs=mono_nb_epochs,
        criterion=criterion,
        use_wandb=True,
        device=device,
    )

## SmoothMaxAggregator

### From MonoModel

In [7]:
num_layers = 5
inside_dim = 512

mono_batch_size = 64
mono_nb_epochs = 2000
batch_size = 16
nb_epochs = 500


mono_lr = 3e-6
wd = 3e-3
criterion = nn.BCELoss()

lr=3e-6
val_center = "None"

In [8]:
list_normalizer_type = ["None", "expo_lambda"]
list_val_center = ["C_1", "C_2", "C_5"]

In [9]:
if False:
    for val_center in list_val_center:
        for normalizer_type in list_normalizer_type:
            X_train, X_train_mean, y_train, X_val, X_val_mean, y_val, X_test, X_test_mean, df_test = build_dataset(normalizer_type=normalizer_type, val_center=val_center)
            mono_model = MLP(num_layers=num_layers, inside_dim=inside_dim)
            run_name = Path(f"./../saved_models/{mono_model.name}/BCELoss/nt_{normalizer_type}/vc_{val_center}/bs_{mono_batch_size}/wd_{'{:.0e}'.format(wd)}/lr_{'{:.0e}'.format(mono_lr)}")
            best_val_score = 0
            for model_path in Path(run_name).glob("*"):
                val_score = float(model_path.name[-8:-3])
                if val_score > best_val_score:
                    best_path = model_path
                    best_val_score = val_score
            mono_model.load_state_dict(torch.load(best_path))

            model = SmoothMaxAggregator(mono_model)
            train(X_train,
                y_train,
                X_val,
                y_val,
                model,
                learning_rate=lr,
                weight_decay=wd,
                val_center=val_center,
                normalizer_type=normalizer_type,
                batch_size=batch_size,
                nb_epochs=nb_epochs,
                use_wandb=True,
                device=device,
                exp_name=f"mono_lr_{mono_lr}"
            )

### From scratch

In [10]:
num_layers = 5
inside_dim = 512

batch_size = 16
nb_epochs = 1000

wd = 3e-3
criterion = nn.BCELoss()

lr=3e-6

In [11]:
list_normalizer_type = ["None", "expo_lambda"]
list_val_center = ["C_1", "C_2", "C_5"]

In [12]:
if True:
    for val_center in list_val_center:
        for normalizer_type in list_normalizer_type:
            X_train, X_train_mean, y_train, X_val, X_val_mean, y_val, X_test, X_test_mean, df_test = build_dataset(normalizer_type=normalizer_type, val_center=val_center)
            mono_model = MLP(num_layers=num_layers, inside_dim=inside_dim)
            model = SmoothMaxAggregator(mono_model)
            train(X_train,
                y_train,
                X_val,
                y_val,
                model,
                learning_rate=lr,
                weight_decay=wd,
                val_center=val_center,
                normalizer_type=normalizer_type,
                batch_size=batch_size,
                nb_epochs=nb_epochs,
                use_wandb=True,
                device=device,
            )

## MaxAggregators

### From MonoModels

In [13]:
num_layers = 5
inside_dim = 512

mono_batch_size = 64
mono_nb_epochs = 2000
batch_size = 16
nb_epochs = 500


mono_lr = 3e-6
wd = 3e-3
criterion = nn.BCELoss()

lr=3e-6
val_center = "None"

In [14]:
list_normalizer_type = ["None", "expo_lambda"]
list_val_center = ["C_1", "C_2", "C_5", "None"]

In [15]:
if True:
    for val_center in list_val_center:
        for normalizer_type in list_normalizer_type:
            X_train, X_train_mean, y_train, X_val, X_val_mean, y_val, X_test, X_test_mean, df_test = build_dataset(normalizer_type=normalizer_type, val_center=val_center)
            mono_model = MLP(num_layers=num_layers, inside_dim=inside_dim)
            run_name = Path(f"./../saved_models/{mono_model.name}/BCELoss/nt_{normalizer_type}/vc_{val_center}/bs_{mono_batch_size}/wd_{'{:.0e}'.format(wd)}/lr_{'{:.0e}'.format(mono_lr)}")
            best_val_score = 0
            for model_path in Path(run_name).glob("*"):
                val_score = float(model_path.name[-8:-3])
                if val_score > best_val_score:
                    best_path = model_path
                    best_val_score = val_score
            mono_model.load_state_dict(torch.load(best_path))

            model = MaxAggregator(mono_model)
            train(X_train,
                y_train,
                X_val,
                y_val,
                model,
                learning_rate=lr,
                weight_decay=wd,
                val_center=val_center,
                normalizer_type=normalizer_type,
                batch_size=batch_size,
                nb_epochs=nb_epochs,
                use_wandb=True,
                device=device,
                exp_name=f"mono_lr_{mono_lr}"
            )

OutOfMemoryError: CUDA out of memory. Tried to allocate 540.00 MiB (GPU 0; 6.00 GiB total capacity; 4.37 GiB already allocated; 0 bytes free; 4.90 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

### From scratch

In [None]:
num_layers = 5
inside_dim = 512

batch_size = 16
nb_epochs = 1000

wd = 3e-3
criterion = nn.BCELoss()

lr=3e-6

In [None]:
list_normalizer_type = ["None", "expo_lambda"]
list_val_center = ["C_1", "C_2", "C_5", "None"]

In [None]:
if True:
    for val_center in list_val_center:
        for normalizer_type in list_normalizer_type:
            X_train, X_train_mean, y_train, X_val, X_val_mean, y_val, X_test, X_test_mean, df_test = build_dataset(normalizer_type=normalizer_type, val_center=val_center)
            mono_model = MLP(num_layers=num_layers, inside_dim=inside_dim)
            model = MaxAggregator(mono_model)
            train(X_train,
                y_train,
                X_val,
                y_val,
                model,
                learning_rate=lr,
                weight_decay=wd,
                val_center=val_center,
                normalizer_type=normalizer_type,
                batch_size=batch_size,
                nb_epochs=nb_epochs,
                use_wandb=True,
                device=device,
            )