# Unet++ with EfficientB4

## Import

In [1]:
"""Train
"""
from datetime import datetime
from time import time
import numpy as np
import shutil, random, os, sys, torch
from glob import glob
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split

# prj_dir = os.path.dirname(os.path.abspath(__file__)) # for script
prj_dir = os.path.dirname(os.path.abspath("")) # for jupyter
sys.path.append(prj_dir)

from modules.utils import load_yaml, get_logger
from modules.metrics import get_metric_function
from modules.earlystoppers import EarlyStopper
from modules.losses import get_loss_function
from modules.optimizers import get_optimizer
from modules.schedulers import get_scheduler
from modules.scalers import get_image_scaler
from modules.datasets import SegDataset
from modules.recorders import Recorder
from modules.trainer import Trainer
from models.utils import get_model

In [2]:
prj_dir = os.path.dirname(os.path.abspath("baseline")) # for jupyter

In [3]:
prj_dir

'c:\\Dev\\2022\\maicon\\baseline'

In [4]:
yaml = 'train copy1.yaml'

## Set configs

In [5]:

# Load config
config_path = os.path.join(prj_dir, 'config', yaml)
config = load_yaml(config_path)

# Set train serial: ex) 20211004
train_serial = datetime.now().strftime("%Y%m%d_%H%M%S")
train_serial = 'debug' if config['debug'] else train_serial

# Set random seed, deterministic
torch.cuda.manual_seed(config['seed'])
torch.manual_seed(config['seed'])
np.random.seed(config['seed'])
random.seed(config['seed'])
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# Set device(GPU/CPU)
os.environ['CUDA_VISIBLE_DEVICES'] = str(config['gpu_num'])
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Create train result directory and set logger
train_result_dir = os.path.join(prj_dir, 'results', 'train', train_serial)
os.makedirs(train_result_dir, exist_ok=True)

# Set logger
logging_level = 'debug' if config['verbose'] else 'info'
logger = get_logger(name='train',
                    file_path=os.path.join(train_result_dir, 'train.log'),
                    level=logging_level)


# Set data directory
train_dirs = os.path.join(prj_dir, 'data', 'train')

## Dataset

In [6]:
# Load data and create dataset for train 
# Load image scaler
train_img_paths = glob(os.path.join(train_dirs, 'x', '*.png'))
train_img_paths, val_img_paths = train_test_split(train_img_paths, test_size=config['val_size'], random_state=config['seed'], shuffle=True)

train_dataset = SegDataset(paths=train_img_paths,
                        input_size=[config['input_width'], config['input_height']],
                        scaler=get_image_scaler(config['scaler']),
                        logger=logger)
val_dataset = SegDataset(paths=val_img_paths,
                        input_size=[config['input_width'], config['input_height']],
                        scaler=get_image_scaler(config['scaler']),
                        logger=logger)
# Create data loader
train_dataloader = DataLoader(dataset=train_dataset,
                            batch_size=config['batch_size'],
                            num_workers=config['num_workers'], 
                            shuffle=config['shuffle'],
                            drop_last=config['drop_last'])
                            
val_dataloader = DataLoader(dataset=val_dataset,
                            batch_size=config['batch_size'],
                            num_workers=config['num_workers'], 
                            shuffle=False,
                            drop_last=config['drop_last'])

logger.info(f"Load dataset, train: {len(train_dataset)}, val: {len(val_dataset)}")

## Model, Opimizer, Scheduler, Loss and etc

In [7]:
# Load model
model = get_model(model_str=config['architecture'])
model = model(classes=config['n_classes'],
            encoder_name=config['encoder'],
            encoder_weights=config['encoder_weight'],
            activation=config['activation']).to(device)
logger.info(f"Load model architecture: {config['architecture']}")

# Set optimizer
optimizer = get_optimizer(optimizer_str=config['optimizer']['name'])
optimizer = optimizer(model.parameters(), **config['optimizer']['args'])

# Set Scheduler
scheduler = get_scheduler(scheduler_str=config['scheduler']['name'])
scheduler = scheduler(optimizer=optimizer, **config['scheduler']['args'])

# Set loss function
loss_func = get_loss_function(loss_function_str=config['loss']['name'])
loss_func = loss_func(**config['loss']['args'])

# Set metric
metric_funcs = {metric_name:get_metric_function(metric_name) for metric_name in config['metrics']}
logger.info(f"Load optimizer:{config['optimizer']['name']}, scheduler: {config['scheduler']['name']}, loss: {config['loss']['name']}, metric: {config['metrics']}")

# Set trainer
trainer = Trainer(model=model,
                optimizer=optimizer,
                scheduler=scheduler,
                loss_func=loss_func,
                metric_funcs=metric_funcs,
                device=device,
                logger=logger)
logger.info(f"Load trainer")

# Set early stopper
early_stopper = EarlyStopper(patience=config['earlystopping_patience'],
                            logger=logger)
# Set recorder
recorder = Recorder(record_dir=train_result_dir,
                    model=model,
                    optimizer=optimizer,
                    scheduler=scheduler,
                    logger=logger)
logger.info("Load early stopper, recorder")

# Recorder - save train config
shutil.copy(config_path, os.path.join(recorder.record_dir, yaml))


Downloading: "https://github.com/qubvel/segmentation_models.pytorch/releases/download/v0.0.2/mit_b3.pth" to C:\Users\user/.cache\torch\hub\checkpoints\mit_b3.pth


  0%|          | 0.00/170M [00:00<?, ?B/s]

'c:\\Dev\\2022\\maicon\\baseline\\results\\train\\20221109_221902\\train copy1.yaml'

## Train

In [8]:
# Train
print("START TRAINING")
logger.info("START TRAINING")
for epoch_id in range(config['n_epochs']):
    
    # Initiate result row
    row = dict()
    row['epoch_id'] = epoch_id
    row['train_serial'] = train_serial
    row['lr'] = trainer.scheduler.get_last_lr()

    # Train
    print(f"Epoch {epoch_id}/{config['n_epochs']} Train..")
    logger.info(f"Epoch {epoch_id}/{config['n_epochs']} Train..")
    tic = time()
    trainer.train(dataloader=train_dataloader, epoch_index=epoch_id)
    toc = time()
    # Write tarin result to result row
    row['train_loss'] = trainer.loss  # Loss
    for metric_name, metric_score in trainer.scores.items():
        row[f'train_{metric_name}'] = metric_score

    row['train_elapsed_time'] = round(toc-tic, 1)
    # Clear
    trainer.clear_history()

    # Validation
    print(f"Epoch {epoch_id}/{config['n_epochs']} Validation..")
    logger.info(f"Epoch {epoch_id}/{config['n_epochs']} Validation..")
    tic = time()
    trainer.validate(dataloader=val_dataloader, epoch_index=epoch_id)
    toc = time()
    row['val_loss'] = trainer.loss
    # row[f"val_{config['metric']}"] = trainer.score
    for metric_name, metric_score in trainer.scores.items():
        row[f'val_{metric_name}'] = metric_score
    row['val_elapsed_time'] = round(toc-tic, 1)
    trainer.clear_history()

    # Performance record - row
    recorder.add_row(row)
    
    # Performance record - plot
    recorder.save_plot(config['plot'])

    # Check early stopping
    early_stopper.check_early_stopping(row[config['earlystopping_target']])
    if early_stopper.patience_counter == 0:
        recorder.save_weight(epoch=epoch_id)
        
    if early_stopper.stop:
        print(f"Epoch {epoch_id}/{config['n_epochs']}, Stopped counter {early_stopper.patience_counter}/{config['earlystopping_patience']}")
        logger.info(f"Epoch {epoch_id}/{config['n_epochs']}, Stopped counter {early_stopper.patience_counter}/{config['earlystopping_patience']}")
        break

print("END TRAINING")
logger.info("END TRAINING")

START TRAINING
Epoch 0/100 Train..


100%|██████████| 600/600 [29:22<00:00,  2.94s/it]


Epoch 0/100 Validation..


100%|██████████| 150/150 [00:32<00:00,  4.68it/s]


Epoch 1/100 Train..


100%|██████████| 600/600 [29:20<00:00,  2.93s/it]


Epoch 1/100 Validation..


100%|██████████| 150/150 [00:39<00:00,  3.82it/s]


Epoch 2/100 Train..


100%|██████████| 600/600 [29:36<00:00,  2.96s/it]


Epoch 2/100 Validation..


100%|██████████| 150/150 [00:32<00:00,  4.61it/s]


Epoch 3/100 Train..


100%|██████████| 600/600 [28:27<00:00,  2.85s/it]


Epoch 3/100 Validation..


100%|██████████| 150/150 [00:28<00:00,  5.22it/s]


Epoch 4/100 Train..


100%|██████████| 600/600 [28:09<00:00,  2.82s/it]


Epoch 4/100 Validation..


100%|██████████| 150/150 [00:29<00:00,  5.13it/s]


Epoch 5/100 Train..


100%|██████████| 600/600 [28:09<00:00,  2.82s/it]


Epoch 5/100 Validation..


100%|██████████| 150/150 [00:29<00:00,  5.13it/s]


Epoch 6/100 Train..


100%|██████████| 600/600 [28:06<00:00,  2.81s/it]


Epoch 6/100 Validation..


100%|██████████| 150/150 [00:28<00:00,  5.20it/s]


Epoch 7/100 Train..


100%|██████████| 600/600 [28:08<00:00,  2.81s/it]


Epoch 7/100 Validation..


100%|██████████| 150/150 [00:29<00:00,  5.10it/s]


Epoch 8/100 Train..


100%|██████████| 600/600 [28:07<00:00,  2.81s/it]


Epoch 8/100 Validation..


100%|██████████| 150/150 [00:29<00:00,  5.14it/s]


Epoch 9/100 Train..


100%|██████████| 600/600 [28:06<00:00,  2.81s/it]


Epoch 9/100 Validation..


100%|██████████| 150/150 [00:28<00:00,  5.20it/s]


Epoch 10/100 Train..


100%|██████████| 600/600 [28:07<00:00,  2.81s/it]


Epoch 10/100 Validation..


100%|██████████| 150/150 [00:28<00:00,  5.20it/s]


Epoch 11/100 Train..


100%|██████████| 600/600 [28:06<00:00,  2.81s/it]


Epoch 11/100 Validation..


100%|██████████| 150/150 [00:29<00:00,  5.05it/s]


Epoch 12/100 Train..


100%|██████████| 600/600 [28:08<00:00,  2.81s/it]


Epoch 12/100 Validation..


100%|██████████| 150/150 [00:29<00:00,  5.08it/s]


Epoch 13/100 Train..


100%|██████████| 600/600 [28:08<00:00,  2.81s/it]


Epoch 13/100 Validation..


100%|██████████| 150/150 [00:28<00:00,  5.19it/s]


Epoch 14/100 Train..


100%|██████████| 600/600 [27:48<00:00,  2.78s/it]


Epoch 14/100 Validation..


100%|██████████| 150/150 [00:29<00:00,  5.04it/s]


Epoch 15/100 Train..


100%|██████████| 600/600 [27:36<00:00,  2.76s/it]


Epoch 15/100 Validation..


100%|██████████| 150/150 [00:29<00:00,  5.04it/s]


Epoch 16/100 Train..


100%|██████████| 600/600 [27:36<00:00,  2.76s/it]


Epoch 16/100 Validation..


100%|██████████| 150/150 [00:28<00:00,  5.21it/s]


Epoch 17/100 Train..


100%|██████████| 600/600 [27:36<00:00,  2.76s/it]


Epoch 17/100 Validation..


100%|██████████| 150/150 [00:28<00:00,  5.22it/s]


Epoch 18/100 Train..


100%|██████████| 600/600 [27:36<00:00,  2.76s/it]


Epoch 18/100 Validation..


100%|██████████| 150/150 [00:28<00:00,  5.23it/s]


Epoch 19/100 Train..


100%|██████████| 600/600 [27:36<00:00,  2.76s/it]


Epoch 19/100 Validation..


100%|██████████| 150/150 [00:31<00:00,  4.71it/s]


Epoch 20/100 Train..


100%|██████████| 600/600 [27:36<00:00,  2.76s/it]


Epoch 20/100 Validation..


100%|██████████| 150/150 [00:31<00:00,  4.69it/s]


Epoch 21/100 Train..


100%|██████████| 600/600 [27:36<00:00,  2.76s/it]


Epoch 21/100 Validation..


100%|██████████| 150/150 [00:28<00:00,  5.19it/s]


Epoch 22/100 Train..


100%|██████████| 600/600 [27:36<00:00,  2.76s/it]


Epoch 22/100 Validation..


100%|██████████| 150/150 [00:28<00:00,  5.19it/s]


Epoch 23/100 Train..


100%|██████████| 600/600 [27:46<00:00,  2.78s/it]


Epoch 23/100 Validation..


100%|██████████| 150/150 [00:29<00:00,  5.08it/s]


Epoch 24/100 Train..


100%|██████████| 600/600 [27:47<00:00,  2.78s/it]


Epoch 24/100 Validation..


100%|██████████| 150/150 [00:29<00:00,  5.16it/s]


Epoch 25/100 Train..


100%|██████████| 600/600 [27:39<00:00,  2.77s/it]


Epoch 25/100 Validation..


100%|██████████| 150/150 [00:29<00:00,  5.08it/s]


Epoch 26/100 Train..


100%|██████████| 600/600 [27:21<00:00,  2.74s/it]


Epoch 26/100 Validation..


100%|██████████| 150/150 [00:29<00:00,  5.12it/s]


Epoch 27/100 Train..


100%|██████████| 600/600 [27:19<00:00,  2.73s/it]


Epoch 27/100 Validation..


100%|██████████| 150/150 [00:29<00:00,  5.11it/s]


Epoch 28/100 Train..


100%|██████████| 600/600 [27:15<00:00,  2.73s/it]


Epoch 28/100 Validation..


100%|██████████| 150/150 [00:29<00:00,  5.11it/s]


Epoch 29/100 Train..


100%|██████████| 600/600 [27:05<00:00,  2.71s/it]


Epoch 29/100 Validation..


100%|██████████| 150/150 [00:30<00:00,  4.97it/s]


Epoch 30/100 Train..


100%|██████████| 600/600 [27:24<00:00,  2.74s/it]


Epoch 30/100 Validation..


100%|██████████| 150/150 [00:29<00:00,  5.07it/s]


Epoch 31/100 Train..


100%|██████████| 600/600 [27:28<00:00,  2.75s/it]


Epoch 31/100 Validation..


100%|██████████| 150/150 [00:29<00:00,  5.01it/s]


Epoch 32/100 Train..


100%|██████████| 600/600 [28:10<00:00,  2.82s/it]


Epoch 32/100 Validation..


100%|██████████| 150/150 [00:30<00:00,  4.85it/s]


Epoch 33/100 Train..


100%|██████████| 600/600 [28:11<00:00,  2.82s/it]


Epoch 33/100 Validation..


100%|██████████| 150/150 [00:32<00:00,  4.64it/s]


Epoch 34/100 Train..


100%|██████████| 600/600 [28:22<00:00,  2.84s/it]


Epoch 34/100 Validation..


100%|██████████| 150/150 [00:32<00:00,  4.60it/s]


Epoch 35/100 Train..


100%|██████████| 600/600 [28:29<00:00,  2.85s/it]


Epoch 35/100 Validation..


100%|██████████| 150/150 [00:32<00:00,  4.68it/s]


Epoch 36/100 Train..


100%|██████████| 600/600 [28:30<00:00,  2.85s/it]


Epoch 36/100 Validation..


100%|██████████| 150/150 [00:31<00:00,  4.71it/s]


Epoch 37/100 Train..


100%|██████████| 600/600 [28:29<00:00,  2.85s/it]


Epoch 37/100 Validation..


100%|██████████| 150/150 [00:31<00:00,  4.75it/s]


Epoch 38/100 Train..


 76%|███████▋  | 459/600 [21:36<06:38,  2.82s/it]


KeyboardInterrupt: 