In [1]:
import numpy as np

from torch.utils.data import DataLoader
import albumentations as albu 
import torch

import importlib
import functools
import os
from pathlib import Path

from dataset import *
from train import Learning
from helpers import *

%load_ext autoreload
%autoreload 2

In [2]:
def train_fold(
    train_config, experiment_folder, pipeline_name, log_dir, fold_id,
    train_dataloader, valid_dataloader, binarizer_fn, eval_fn):
    
    fold_logger = init_logger(log_dir, 'train_fold_{}.log'.format(fold_id))

    best_checkpoint_folder = Path(experiment_folder, train_config['CHECKPOINTS']['BEST_FOLDER'])
    best_checkpoint_folder.mkdir(exist_ok=True, parents=True)

    checkpoints_history_folder = Path(
        experiment_folder,
        train_config['CHECKPOINTS']['FULL_FOLDER'],
        'fold{}'.format(fold_id))
    checkpoints_history_folder.mkdir(exist_ok=True, parents=True)
    checkpoints_topk = train_config['CHECKPOINTS']['TOPK']

    calculation_name = '{}_fold{}'.format(pipeline_name, fold_id)
    
    device = train_config['DEVICE']
    
    module = importlib.import_module(train_config['MODEL']['PY'])
    model_class = getattr(module, train_config['MODEL']['CLASS'])
    model = model_class(**train_config['MODEL']['ARGS'])

    pretrained_model_config = train_config['MODEL'].get('PRETRAINED', False)
    if pretrained_model_config: 
        loaded_pipeline_name = pretrained_model_config['PIPELINE_NAME']
        pretrained_model_path = Path(
            pretrained_model_config['PIPELINE_PATH'], 
            pretrained_model_config['CHECKPOINTS_FOLDER'],
            '{}_fold{}.pth'.format(loaded_pipeline_name, fold_id)) 
        if pretrained_model_path.is_file():
            model.load_state_dict(torch.load(pretrained_model_path))
            fold_logger.info('load model from {}'.format(pretrained_model_path)) 

    if len(train_config['DEVICE_LIST']) > 1:
        model = torch.nn.DataParallel(model)
    
    module = importlib.import_module(train_config['CRITERION']['PY'])
    loss_class = getattr(module, train_config['CRITERION']['CLASS'])
    loss_fn = loss_class(**train_config['CRITERION']['ARGS'])
    
    optimizer_class = getattr(torch.optim, train_config['OPTIMIZER']['CLASS'])
    optimizer = optimizer_class(model.parameters(), **train_config['OPTIMIZER']['ARGS'])
    scheduler_class = getattr(torch.optim.lr_scheduler, train_config['SCHEDULER']['CLASS'])
    scheduler = scheduler_class(optimizer, **train_config['SCHEDULER']['ARGS'])
    
    n_epoches = train_config['EPOCHES']
    grad_clip = train_config['GRADIENT_CLIPPING']
    grad_accum = train_config['GRADIENT_ACCUMULATION_STEPS']
    early_stopping = train_config['EARLY_STOPPING']
    validation_frequency = train_config.get('VALIDATION_FREQUENCY', 1)
    
    freeze_model = train_config['MODEL']['FREEZE']
    
    Learning(
        optimizer,
        binarizer_fn,
        loss_fn,
        eval_fn,
        device,
        n_epoches,
        scheduler,
        freeze_model,
        grad_clip,
        grad_accum,
        early_stopping,
        validation_frequency,
        calculation_name,
        best_checkpoint_folder,
        checkpoints_history_folder,
        checkpoints_topk,
        fold_logger
    ).run_train(model,train_dataloader,valid_dataloader)

In [3]:
experiment_folder = Path("experiments")
# Change this line to your own config file path
config_folder = experiment_folder / "configs" / "ResUNet_1024_test.yaml"

In [4]:
train_config = load_yaml(config_folder)

log_dir = Path(experiment_folder, train_config['LOGGER_DIR'])
log_dir.mkdir(exist_ok=True, parents=True)

main_logger = init_logger(log_dir, 'train_main.log')

seed = train_config['SEED']
init_seed(seed)
main_logger.info(train_config)

if "DEVICE_LIST" in train_config:
    os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(map(str, train_config["DEVICE_LIST"]))

pipeline_name = train_config['PIPELINE_NAME']

non_empty_mask_proba = train_config.get('NON_EMPTY_MASK_PROBA', 0)
use_sampler = train_config['USE_SAMPLER']

num_workers = train_config['WORKERS']
batch_size = train_config['BATCH_SIZE']
n_folds = train_config['FOLD']['NUMBER']

usefolds = map(int, train_config['FOLD']['USEFOLDS'])
# local_metric_fn, global_metric_fn = init_eval_fns(train_config)

binarizer_module = importlib.import_module(train_config['MASK_BINARIZER']['PY'])
binarizer_class = getattr(binarizer_module, train_config['MASK_BINARIZER']['CLASS'])
binarizer_fn = binarizer_class(**train_config['MASK_BINARIZER']['ARGS'])

eval_module = importlib.import_module(train_config['EVALUATION_METRIC']['PY'])
eval_fn = getattr(eval_module, train_config['EVALUATION_METRIC']['CLASS'])
eval_fn = functools.partial(eval_fn, **train_config['EVALUATION_METRIC']['ARGS'])


{'PIPELINE_NAME': 'resunet_1024', 'LOGGER_DIR': 'resunet/resunet_1024_test/log', 'SEED': 42, 'DEVICE': 'cuda', 'DEVICE_LIST': [0], 'WORKERS': 8, 'MODEL': {'PRETRAINED': {'PIPELINE_PATH': 'experiments/resunet', 'CHECKPOINTS_FOLDER': 'resunet_1024_3', 'PIPELINE_NAME': 'resunet_1024'}, 'FREEZE': True, 'PY': 'model', 'CLASS': 'ResUNet', 'ARGS': {'pretrained': False}}, 'FOLD': {'NUMBER': 5, 'USEFOLDS': [0, 1, 2, 3, 4], 'FILE': None}, 'USE_SAMPLER': True, 'NON_EMPTY_MASK_PROBA': 0.4, 'IMG_SIZE': 1024, 'CRITERION': {'PY': 'losses', 'CLASS': 'ComboLoss', 'ARGS': {'weights': {'bce': 1, 'dice': 1, 'focal': 1}}}, 'OPTIMIZER': {'CLASS': 'Adam', 'ARGS': {'lr': 1e-05, 'weight_decay': 5e-06}}, 'SCHEDULER': {'CLASS': 'CosineAnnealingLR', 'ARGS': {'T_max': 8, 'eta_min': 1e-07}}, 'BATCH_SIZE': 2, 'GRADIENT_ACCUMULATION_STEPS': 1, 'GRADIENT_CLIPPING': 0.1, 'EPOCHES': 1, 'EARLY_STOPPING': 5, 'CHECKPOINTS': {'FULL_FOLDER': 'resunet/resunet_1024_test', 'BEST_FOLDER': 'resunet/resunet_1024_test', 'TOPK': 3},

In [5]:
img_size = train_config['IMG_SIZE']

train_transform = albu.Compose([
    albu.OneOf([
        albu.RandomGamma(),
        albu.RandomBrightnessContrast(),
        ], p=0.5),
    albu.OneOf([
        albu.ElasticTransform(),
        albu.GridDistortion(),
        albu.OpticalDistortion(),
        ], p=0.3),
    albu.ShiftScaleRotate(scale_limit=(0, 0.1), rotate_limit=0),
    albu.Resize(img_size, img_size, always_apply=True),
    albu.Normalize()
])

valid_transform = albu.Compose([
    albu.Resize(img_size, img_size, always_apply=True),
    albu.Normalize()
])

In [6]:
positive_names_path="./data/2img_mask_npy/positive_imgs_names.npy"
negative_names_path="./data/2img_mask_npy/negative_imgs_names.npy"
positive_names = np.load(positive_names_path)
negative_names = np.load(negative_names_path)

print(f"Amount of positive samples: {len(positive_names)}")
print(f"Amount of negative samples: {len(negative_names)}")

train_names = np.concatenate((positive_names, negative_names))
exist_labels = np.concatenate((np.ones(len(positive_names), dtype=np.uint8), np.zeros(len(negative_names), dtype=np.uint8)))

fold_labels = np.load("./data/2img_mask_npy/fold_labels.npy")

Amount of positive samples: 2669
Amount of negative samples: 9378


In [7]:
for fold_id in usefolds:
    main_logger.info('Start training of {} fold....'.format(fold_id))

    train_dataset = PneumoDataset(
        mode="train",
        fold_index=fold_id,
        train_names=train_names,
        fold_labels=fold_labels,
        transform=valid_transform
        )
    train_sampler = PneumoSampler(
        fold_index=fold_id,
        demand_non_empty_proba=non_empty_mask_proba, 
        fold_labels=fold_labels, 
        exist_labels=exist_labels
        )
    if use_sampler:
        train_dataloader = DataLoader(
            dataset=train_dataset, 
            batch_size=batch_size,   
            num_workers=num_workers, 
            sampler=train_sampler
            )
    else:
        train_dataloader = DataLoader(
            dataset=train_dataset, 
            batch_size=batch_size,   
            num_workers=num_workers, 
            shuffle=True
            )

    valid_dataset = PneumoDataset(
        mode="val",
        fold_index=fold_id,
        train_names=train_names,
        fold_labels=fold_labels,
        transform=valid_transform
        )
    
    valid_dataloader = DataLoader(
        dataset=valid_dataset,
        batch_size=batch_size, 
        num_workers=num_workers, 
        shuffle=False
        )

    print(f"Train dataset size: {len(train_dataset)}")
    print(f"Valid dataset size: {len(valid_dataset)}")

    train_fold(
        train_config, experiment_folder, pipeline_name, log_dir, fold_id,
        train_dataloader, valid_dataloader, binarizer_fn, eval_fn
        )

Start training of 0 fold....
Train dataset size: 9637
Valid dataset size: 2410


  model.load_state_dict(torch.load(pretrained_model_path))


load model from experiments\resunet\resunet_1024_3\resunet_1024_fold0.pth
0 epoch: 	 start validation....


score: 0.84916 on (0.6, 3000, 0.25): 100%|██████████| 1205/1205 [04:30<00:00,  4.46it/s]

0 epoch: 	 Score: 0.84916	 Params: (0.6, 3000, 0.25)
Start training of 1 fold....





Train dataset size: 9637
Valid dataset size: 2410


  model.load_state_dict(torch.load(pretrained_model_path))


load model from experiments\resunet\resunet_1024_3\resunet_1024_fold1.pth
0 epoch: 	 start validation....


score: 0.84517 on (0.6, 3000, 0.25): 100%|██████████| 1205/1205 [03:51<00:00,  5.21it/s]

0 epoch: 	 Score: 0.84517	 Params: (0.6, 3000, 0.25)
Start training of 2 fold....





Train dataset size: 9638
Valid dataset size: 2409
load model from experiments\resunet\resunet_1024_3\resunet_1024_fold2.pth


  model.load_state_dict(torch.load(pretrained_model_path))


0 epoch: 	 start validation....


score: 0.83864 on (0.7, 3000, 0.3): 100%|██████████| 1205/1205 [03:49<00:00,  5.25it/s]

0 epoch: 	 Score: 0.83864	 Params: (0.7, 3000, 0.3)
Start training of 3 fold....





Train dataset size: 9638
Valid dataset size: 2409


  model.load_state_dict(torch.load(pretrained_model_path))


load model from experiments\resunet\resunet_1024_3\resunet_1024_fold3.pth
0 epoch: 	 start validation....


score: 0.84461 on (0.7, 2000, 0.3): 100%|██████████| 1205/1205 [03:51<00:00,  5.20it/s]

0 epoch: 	 Score: 0.84461	 Params: (0.7, 2000, 0.3)
Start training of 4 fold....





Train dataset size: 9638
Valid dataset size: 2409


  model.load_state_dict(torch.load(pretrained_model_path))


load model from experiments\resunet\resunet_1024_3\resunet_1024_fold4.pth
0 epoch: 	 start validation....


score: 0.84964 on (0.6, 3000, 0.25): 100%|██████████| 1205/1205 [04:00<00:00,  5.00it/s]

0 epoch: 	 Score: 0.84964	 Params: (0.6, 3000, 0.25)



