In [1]:
# switch to the project directory
%cd ..
# working directory should be ../pdi

c:\Users\admin\Desktop\research\pdi


In [2]:
import sys
import os

module_path = os.path.abspath('src')

if module_path not in sys.path:
    sys.path.append(module_path)

In [3]:
from pdi.constants import (
    PARTICLES_DICT,
    TARGET_CODES,
    NUM_WORKERS,
)

In [4]:
config_common = {
    "bs": 512,
    "max_epochs": 1,  # 40
    "dropout": 0.1,
    "gamma": 0.9,
    "patience": 5,
    "patience_threshold": 0.001,
}

In [5]:
import torch
import torch.nn as nn

torch.cuda.set_device(0)
device = torch.device("cuda")

In [6]:
from pdi.data.preparation import FeatureSetPreparation, MeanImputation, DeletePreparation, RegressionImputation, EnsemblePreparation
from pdi.models import AttentionModel, NeuralNetEnsemble, NeuralNet
from pdi.data.constants import N_COLUMNS
from pdi.data.types import Split

EXPERIMENTS = {
    "Delete": {
        "data_preparation":
        DeletePreparation(),
        "config": {
            "h0": 64,
            "h1": 32,
            "h2": 16,
            "start_lr": 5e-4,
        },
        "model_class":
        NeuralNet,
        "model_args":
        lambda d_prep: [[
            N_COLUMNS, wandb.config.h0, wandb.config.h1, wandb.config.h2, 1
        ], nn.ReLU, wandb.config.dropout]
    },
    "Mean": {
        "data_preparation":
        MeanImputation(),
        "config": {
            "h0": 64,
            "h1": 32,
            "h2": 16,
            "start_lr": 5e-4,
        },
        "model_class":
        NeuralNet,
        "model_args":
        lambda d_prep: [[
            N_COLUMNS, wandb.config.h0, wandb.config.h1, wandb.config.h2, 1
        ], nn.ReLU, wandb.config.dropout]
    },
    "Regression": {
        "data_preparation":
        RegressionImputation(),
        "config": {
            "h0": 64,
            "h1": 32,
            "h2": 16,
            "start_lr": 5e-4,
        },
        "model_class":
        NeuralNet,
        "model_args":
        lambda d_prep: [[
            N_COLUMNS, wandb.config.h0, wandb.config.h1, wandb.config.h2, 1
        ], nn.ReLU, wandb.config.dropout],
    },
    "Ensemble": {
        "data_preparation":
        EnsemblePreparation(),
        "config": {
            "h0": 64,
            "h1": 32,
            "h2": 16,
            "start_lr": 5e-4,
        },
        "model_class":
        NeuralNetEnsemble,
        "model_args":
        lambda d_prep: [
            d_prep.get_group_ids(),
            [wandb.config.h0, wandb.config.h1, wandb.config.h2, 1],
            nn.ReLU,
            wandb.config.dropout,
        ],
    },
    "Proposed": {
        "data_preparation":
        FeatureSetPreparation(),
        "config": {
            "embed_in": N_COLUMNS + 1,
            "embed_hidden": 128,
            "d_model": 32,
            "ff_hidden": 128,
            "pool_hidden": 64,
            "num_heads": 2,
            "num_blocks": 2,
            "start_lr": 2e-4,
        },
        "model_class":
        AttentionModel,
        "model_args":
        lambda d_prep: [
            wandb.config.embed_in,
            wandb.config.embed_hidden,
            wandb.config.d_model,
            wandb.config.ff_hidden,
            wandb.config.pool_hidden,
            wandb.config.num_heads,
            wandb.config.num_blocks,
            nn.ReLU,
            wandb.config.dropout,
        ],
    },
}


In [7]:
import wandb
import os
from pdi.train import train
from pdi.constants import PARTICLES_DICT

from os.path import isfile

SAMPLES = 3

def do_train(experiment_name, data_preparation, config, model_class,
             model_args):
    wandb_config = {**config_common, **config}

    for sample in range(SAMPLES):
        train_loader, val_loader = data_preparation.prepare_dataloaders(
            wandb_config["bs"], NUM_WORKERS, [Split.TRAIN, Split.VAL])

        for target_code in TARGET_CODES:
            save_path = f"models/{experiment_name}/{PARTICLES_DICT[target_code]}_{sample}.pt"
            if isfile(save_path):
                continue
            with wandb.init(project=experiment_name,
                            config=wandb_config,
                            name=PARTICLES_DICT[target_code] + f"_{sample}",
                           anonymous="allow") as run:
                # pos_weight = torch.tensor(data_preparation.pos_weight(target_code)).float().to(device)
                pos_weight = torch.tensor(1.0).to(device)
                wandb.log({"pos_weight": pos_weight.item()})

                model_init_args = model_args(data_preparation)
                model = model_class(*model_init_args).to(device)

                os.makedirs(f"models/{experiment_name}/", exist_ok=True)
                train(model, target_code, device, train_loader, val_loader,
                    pos_weight)

                save_dict = {
                    "state_dict": model.state_dict(),
                    "model_args": model_init_args,
                    "model_thres": model.thres
                }

                torch.save(save_dict, save_path)

In [None]:
do_train("Delete", **EXPERIMENTS["Delete"])

In [None]:
do_train("Mean", **EXPERIMENTS["Mean"])

In [None]:
do_train("Regression", **EXPERIMENTS["Regression"])

In [None]:
do_train("Ensemble", **EXPERIMENTS["Ensemble"])

In [8]:
do_train("Proposed", **EXPERIMENTS["Proposed"])