In [1]:
%load_ext autoreload
%autoreload 2

# Imports

In [2]:
cd ../src

/home/theo/MVA/dlmi/src


In [3]:
from util import *
from metric import *
from params import *
from imports import *
from post_process import *

In [4]:
from data.masks import *
from data.dataset import *
from data.transforms import *
from data.transforms import get_transforms as transfos

In [5]:
from training.train import *
from training.freezing import *
from training.predicting import *

In [6]:
from model_zoo.unet import *

In [7]:
from tqdm import tqdm_notebook as tqdm

sns.set_style('white')
KERNEL_START_TIME = time.time()
warnings.simplefilter(action='ignore', category=UserWarning)
warnings.simplefilter(action='ignore', category=FutureWarning)
# warnings.simplefilter(action='ignore', category=RuntimeWarning)
print('Number of available cores :', multiprocessing.cpu_count())

Number of available cores : 16


In [8]:
seed_everything(seed)

# Data

In [9]:
df = pd.read_csv('../output/df_train.csv')

In [10]:
ratio = 4

## Training

In [11]:
def fit_seg(model, model_shadow, train_dataset, val_dataset, epochs=50, batch_size=32, use_aux_clf=False, acc_steps=1,
            warmup_prop=0.1, lr=1e-3, schedule='cosine', min_lr=1e-5, use_ema=False, ema_decay=0.99,
            verbose=1, verbose_eval=10, cp=False, model_name='model'):
       
    best_dice = 0
    avg_val_loss = 1000
    lr_init = lr

    params = list(model.named_parameters())
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    encoder_params = [(n, p) for n, p in params if any(nd in n for nd in ['encoder', 'logit', 'center'])]
    opt_params = [
        {'params': [p for n, p in encoder_params if not any(nd in n for nd in no_decay)], 'weight_decay': 1e-4},
        {'params': [p for n, p in params if not any(nd in n for nd in no_decay) and 'decoder' in n],
         'weight_decay': 1e-2},
        {'params': [p for n, p in params if any(nd in n for nd in no_decay)], 'weight_decay': 0.0},
    ]

    optimizer = RAdam(opt_params, lr=lr)
#     optimizer = Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=lr)

    if schedule == 'cosine':
        scheduler = CosineAnnealingLR(optimizer, T_max=epochs - ceil(epochs * warmup_prop), eta_min=min_lr)
    elif schedule == 'reduce_lr':
        scheduler = ReduceLROnPlateau(optimizer, factor=0.1, patience=ceil(5 / verbose_eval) - 1)

    loss_seg = BCEWithLogitsLoss(reduction='mean') #lov_loss
    loss_clf = BCEWithLogitsLoss(reduction='mean')
    loss_clf_w = 0.1

    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True,
                                               drop_last=True, num_workers=NUM_WORKERS)
    val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=VAL_BS, shuffle=False, num_workers=NUM_WORKERS)
    
        
    for epoch in range(epochs):
        model.train()
        if use_ema:
            model_shadow.train()
            
        if batch_size < 4:
            model.apply(freeze_bn)

        avg_loss = 0
        start_time = time.time()

        lr = schedule_lr(optimizer, epoch, scheduler, scheduler_name=schedule, avg_val_loss=avg_val_loss,
                         epochs=epochs, warmup_prop=warmup_prop, lr_init=lr_init, min_lr=min_lr,
                         verbose_eval=verbose_eval)
        
        optimizer.zero_grad()
        
        for step, (x, mask, y) in enumerate(train_loader):
            
            mask_pred, y_pred = model(x.cuda())
            
            loss = loss_seg(mask_pred, mask.cuda())
            if use_aux_clf:
                 loss += loss_clf(y_pred, y.cuda().float()) * loss_clf_w

            loss.backward()
            avg_loss += loss.item() / len(train_loader)
            
            if step % acc_steps == 0:
                optimizer.step()
                optimizer.zero_grad()
                
                if use_ema:
                    update_average(model_shadow, model, decay=ema_decay)

        model.eval()
        if use_ema:
            model_shadow.eval()
            
        avg_val_loss = 0.
        val_dice = 0.
        val_acc = 0.

        if (epoch + 1) % verbose_eval == 0 or (epoch + 1) == epochs:
            with torch.no_grad():
                for x, mask, y in val_loader:            
                    if use_ema:
                        mask_pred, y_pred = model_shadow(x.cuda())
                    else:
                        mask_pred, y_pred = model(x.cuda())

                                    
                    loss = loss_seg(mask_pred.detach(), mask.cuda())
                    if use_aux_clf:
                         loss += loss_clf(y_pred.detach(), y.cuda().float()) * loss_clf_w
                            
                    avg_val_loss += loss.item() / len(val_loader)
                    mask_pred = torch.sigmoid(mask_pred.detach())
                    y_pred = torch.sigmoid(y_pred.detach())
                    
                    val_acc += accuracy_score(y_pred > 0.5, y) / len(val_loader)
                    val_dice += dice_th(mask_pred.contiguous().cpu(), mask) / len(val_loader)

            if val_dice > best_dice and cp:
                save_model_weights(model, f"{model_name}_cp.pt", verbose=0)
                if use_ema:
                    save_model_weights(model_shadow, f"{model_name}_shadow_cp.pt", verbose=0)
                best_dice = val_dice
           
        elapsed_time = time.time() - start_time

        if (epoch + 1) % verbose == 0:
            elapsed_time = elapsed_time * verbose
            print(f'Epoch {epoch + 1}/{epochs}   lr={lr:.1e}   t={elapsed_time:.0f}s   loss={avg_loss:.3f}   ',
                  end='')
            if verbose_eval and ((epoch + 1) % verbose_eval == 0 or (epoch + 1) == epochs):
                print(f'dice={val_dice:.3f}   val_loss={avg_val_loss:.3f}   val_acc={val_acc:.3f}')
            else:
                print(' ', end='\n')

            
    del mask_pred, y_pred, train_loader, val_loader
    torch.cuda.empty_cache()
    gc.collect()

In [12]:
def model_training(backbone, df_train, df_val, ratio=4, use_ema=False, seed=2019, save=True, cp=False):
    seed_everything(seed)

    model = SegmentationUnet(SETTINGS[backbone], num_classes=1, center_block="aspp", aux_clf=True).cuda()
        
    train_dataset = PneumoDataset(df_train, transforms=get_transfos(), ratio=ratio)
    val_dataset = PneumoDataset(df_val, transforms=None, ratio=ratio)
    
    freeze_encoder(model)
    n_parameters = count_parameters(model)
    print(f'\n - Training with frozen encoder \n\t -> {n_parameters} trainable parameters\n')

    fit_seg(model, _, train_dataset, val_dataset, 
            epochs=5, batch_size=32, lr=1e-3, min_lr=1e-4, schedule='cosine', 
            use_aux_clf=True, use_ema=False, warmup_prop=0, acc_steps=1,
            verbose=1, verbose_eval=1)
    
    unfreeze_encoder(model)
    n_parameters = count_parameters(model)
    print(f'\n - Training full model \n\t -> {n_parameters} trainable parameters\n')
    
    if use_ema:
        model_shadow = SegmentationUnet(SETTINGS[backbone], num_classes=4, center_block="aspp", aux_clf=use_aux_clf).cuda()
        update_average(model_shadow, model, decay=0)
    else:
        model_shadow = _

    fit_seg(model, model_shadow, train_dataset, val_dataset, 
            epochs=10, batch_size=32, lr=1e-4, min_lr=1e-5, schedule='cosine', use_aux_clf=True, 
            use_ema=use_ema, ema_decay=0.99, warmup_prop=0, acc_steps=1,
            verbose=1, verbose_eval=1)

    if save:
        save_model_weights(model, f"unet_{backbone}_{i + 1}_1.pt", verbose=1)
        if use_ema:
            save_model_weights(model_shadow, f"unet_{backbone}_shadow_{i + 1}_1.pt", verbose=0)

# Training

In [13]:
backbone = 'resnet34'

In [14]:
y = df['has_pneumothorax'].values

In [15]:
splits = list(StratifiedKFold(n_splits=2, shuffle=True, random_state=seed).split(y, y))
train_idx, val_idx = splits[0]

In [16]:
df_train = df.iloc[train_idx]
df_val = df.iloc[val_idx]

In [17]:
train_dataset = PneumoDataset(df_train, transforms=get_transfos(), ratio=ratio)
val_dataset = PneumoDataset(df_val, transforms=None, ratio=ratio)

In [18]:
assert torch.cuda.is_available(), 'Training on GPU is mandatory'

model_training(backbone, df_train, df_val, ratio=ratio, seed=seed, save=False, cp=False)


 - Training with frozen encoder 
	 -> 4920450 trainable parameters

Epoch 1/5   lr=9.1e-04   t=88s   loss=1.032   dice=0.007   val_loss=0.200   val_acc=0.782
Epoch 2/5   lr=6.9e-04   t=85s   loss=0.136   dice=0.595   val_loss=0.101   val_acc=0.791
Epoch 3/5   lr=4.1e-04   t=86s   loss=0.087   dice=0.760   val_loss=0.081   val_acc=0.816
Epoch 4/5   lr=1.9e-04   t=86s   loss=0.073   dice=0.770   val_loss=0.074   val_acc=0.810
Epoch 5/5   lr=1.0e-04   t=86s   loss=0.066   dice=0.766   val_loss=0.073   val_acc=0.822

 - Training full model 
	 -> 26188098 trainable parameters

Epoch 1/10   lr=9.8e-05   t=91s   loss=0.062   dice=0.774   val_loss=0.062   val_acc=0.831
Epoch 2/10   lr=9.1e-05   t=91s   loss=0.049   dice=0.779   val_loss=0.056   val_acc=0.838
Epoch 3/10   lr=8.1e-05   t=90s   loss=0.038   dice=0.777   val_loss=0.050   val_acc=0.851
Epoch 4/10   lr=6.9e-05   t=91s   loss=0.029   dice=0.780   val_loss=0.052   val_acc=0.835
Epoch 5/10   lr=5.5e-05   t=90s   loss=0.022   dice=0.78