# Catalyst Segmentation

From: https://github.com/chizhu/kaggle-severstal

Applies 5 class classification including background class and then 4 class segmentation.

Classification: resnet50, efficientnet-b3 and se-resnext50.

Segmentation: Unet with resnet18, PSPNet with resnet18 and FPN with resnet50.

In [1]:
# mount the google drive
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)
root_dir = "/content/gdrive/My Drive/"
base_dir = root_dir + 'Steel Segmentation/'

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/gdrive


## Copy support files

In [2]:
import os

In [3]:
# copy the specified folder from google drive
def copy_support_directory( name ):  
  if os.path.exists(name): 
    !rm -r {name}    
  !mkdir {name}
  gd_dir = base_dir + name
  !cp -r '{gd_dir}/'* {name}/.
  # %ls -lt {name}  

In [4]:
copy_support_directory( 'utils' )
copy_support_directory( 'schedulers' )
copy_support_directory( 'transforms' )
copy_support_directory( 'losses' )
copy_support_directory( 'datasets' )
copy_support_directory( 'optimizers' )

## Copy python models

In [5]:
copy_support_directory( 'models' )

## Copy config files

In [6]:
copy_support_directory( 'config' )

## Copy the original image data

In [None]:
# %%time

# # the directory containing the original competition data
# data_dir = 'SteelDefect'

# # !rm -r {data_dir}

# # test if the images are already here
# if not os.path.isdir(data_dir):  
#   print("Downloading and preparing data - takes about 1.5 minutes")
  
#   # get and unzip the competition data
#   !cp '/content/gdrive/My Drive/Steel Segmentation/Input/Images/severstal-steel-defect-detection.zip' .
#   !mkdir -p {data_dir}
#   !unzip -q severstal-steel-defect-detection.zip -d {data_dir}
#   !rm severstal-steel-defect-detection.zip

Downloading and preparing data - takes about 1.5 minutes
CPU times: user 190 ms, sys: 39.1 ms, total: 229 ms
Wall time: 1min 33s


In [7]:
%%time

# get the image zip files using gdown as drive mapping was timing out
import gdown

# the directory containing the original competition data
data_dir = 'SteelDefect'

# test if the images are already here
if os.path.isdir(data_dir):
  !rm -r {data_dir}

!gdown --id 1fE3ITnDMGWdyckgynmLhWEN4DzJKDxtw
!mkdir -p {data_dir}
!unzip -q severstal-steel-defect-detection.zip -d {data_dir}
!rm severstal-steel-defect-detection.zip

Downloading...
From: https://drive.google.com/uc?id=1fE3ITnDMGWdyckgynmLhWEN4DzJKDxtw
To: /content/severstal-steel-defect-detection.zip
1.68GB [00:19, 86.6MB/s]
CPU times: user 168 ms, sys: 56.3 ms, total: 225 ms
Wall time: 49.6 s


## Copy created files

In [8]:
input_dir = base_dir + 'Input/'
!cp '{input_dir}orig_train.csv' '{data_dir}/.'
!cp '{input_dir}folds.csv' .

In [9]:
!ls 

config	  folds.csv  losses  optimizers   schedulers   transforms
datasets  gdrive     models  sample_data  SteelDefect  utils


## Install Libraries

In [10]:
# for augmentations
!pip install albumentations -q

# for pretrained segmentation models fo PyTorch
!pip install segmentation-models-pytorch -q

# for TTA
!pip install ttach==0.0.2 -q

# for Catalyst
!pip install -U catalyst -q

[?25l[K     |▌                               | 10kB 31.8MB/s eta 0:00:01[K     |█                               | 20kB 2.1MB/s eta 0:00:01[K     |█▋                              | 30kB 3.0MB/s eta 0:00:01[K     |██                              | 40kB 2.0MB/s eta 0:00:01[K     |██▋                             | 51kB 2.4MB/s eta 0:00:01[K     |███▏                            | 61kB 2.7MB/s eta 0:00:01[K     |███▋                            | 71kB 3.0MB/s eta 0:00:01[K     |████▏                           | 81kB 3.3MB/s eta 0:00:01[K     |████▊                           | 92kB 3.0MB/s eta 0:00:01[K     |█████▏                          | 102kB 3.2MB/s eta 0:00:01[K     |█████▊                          | 112kB 3.2MB/s eta 0:00:01[K     |██████▎                         | 122kB 3.2MB/s eta 0:00:01[K     |██████▊                         | 133kB 3.2MB/s eta 0:00:01[K     |███████▎                        | 143kB 3.2MB/s eta 0:00:01[K     |███████▉                  

In [11]:
copy_support_directory( 'utils' )

In [12]:
!ls -lt config/seg

total 12
-rw------- 1 root root 1029 Jul 15 08:37 003_resnet50_fpn_fold0.yml
-rw------- 1 root root 1029 Jul 15 08:37 001_resnet18_Unet_fold0.yml
-rw------- 1 root root 1063 Jul 15 08:37 002_resnet18_PSPNet_fold0.yml


In [13]:
import argparse
import warnings
from pathlib import Path

warnings.filterwarnings("ignore")

from catalyst.dl import SupervisedRunner
from catalyst.dl.callbacks import DiceCallback, IouCallback, CheckpointCallback, MixupCallback
import segmentation_models_pytorch as smp

from utils.config import load_config, save_config
from utils.callbacks import CutMixCallback
from datasets import make_loader
from optimizers import get_optimizer
from losses import get_loss
from schedulers import get_scheduler
from transforms import get_transforms


In [14]:
def run(config_file):
    config = load_config(config_file)

    if not os.path.exists(config.work_dir):
        os.makedirs(config.work_dir, exist_ok=True)
    save_config(config, config.work_dir + '/config.yml')

    os.environ['CUDA_VISIBLE_DEVICES'] = '0'

    all_transforms = {}
    all_transforms['train'] = get_transforms(config.transforms.train)
    all_transforms['valid'] = get_transforms(config.transforms.test)

    dataloaders = {
        phase: make_loader(
            data_folder=config.data.train_dir,
            df_path=config.data.train_df_path,
            phase=phase,
            batch_size=config.train.batch_size,
            num_workers=config.num_workers,
            idx_fold=config.data.params.idx_fold,
            transforms=all_transforms[phase],
            num_classes=config.data.num_classes,
            pseudo_label_path=config.train.pseudo_label_path,
            debug=config.debug
        )
        for phase in ['train', 'valid']
    }

    # create segmentation model with pre trained encoder
    model = getattr(smp, config.model.arch)(
        encoder_name=config.model.encoder,
        encoder_weights=config.model.pretrained,
        classes=config.data.num_classes,
        activation=None,
    )

    # train setting
    criterion = get_loss(config)
    params = [
        {'params': model.decoder.parameters(), 'lr': config.optimizer.params.decoder_lr},
        {'params': model.encoder.parameters(), 'lr': config.optimizer.params.encoder_lr},
    ]
    optimizer = get_optimizer(params, config)
    scheduler = get_scheduler(optimizer, config)

    # model runner
    runner = SupervisedRunner(model=model)

    callbacks = [DiceCallback(), IouCallback()]

    # to resume from check points if exists
    if os.path.exists(config.work_dir + '/checkpoints/best.pth'):
        callbacks.append(CheckpointCallback(resume=config.work_dir + '/checkpoints/best_full.pth'))

    if config.train.mixup:
        callbacks.append(MixupCallback())

    if config.train.cutmix:
        callbacks.append(CutMixCallback())

    # model training
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=dataloaders,
        logdir=config.work_dir,
        num_epochs=config.train.num_epochs,
        callbacks=callbacks,
        verbose=True,
        fp16=True,
    )


def parse_args():
    parser = argparse.ArgumentParser(description='Severstal')
    parser.add_argument('--config', dest='config_file',
                        help='configuration file path',
                        default=None, type=str)
    return parser.parse_args()


def main(config_file):
    print('train Severstal Steel Defect Detection.')
    
    if config_file is None:
        raise Exception('no configuration file')

    print('load config from {}'.format(config_file))
    run(config_file)

In [15]:
# main('config/seg/001_resnet18_Unet_fold0.yml')
# main('config/seg/002_resnet18_PSPNet_fold0.yml')
main('config/seg/003_resnet50_fpn_fold0.yml')


train Severstal Steel Defect Detection.
load config from config/seg/003_resnet50_fpn_fold0.yml


Downloading: "https://download.pytorch.org/models/resnet50-19c8e357.pth" to /root/.cache/torch/checkpoints/resnet50-19c8e357.pth


HBox(children=(FloatProgress(value=0.0, max=102502400.0), HTML(value='')))


1/40 * Epoch (train): 100% 629/629 [09:11<00:00,  1.14it/s, dice=0.397, iou=0.247, loss=0.681]
1/40 * Epoch (valid): 100% 158/158 [00:45<00:00,  3.45it/s, dice=0.368, iou=0.226, loss=0.644]
[2020-07-15 08:49:09,128] 
1/40 * Epoch 1 (_base): lr=0.0010 | momentum=0.9000
1/40 * Epoch 1 (train): dice=0.4953 | iou=0.3477 | loss=0.5467
1/40 * Epoch 1 (valid): dice=0.5939 | iou=0.4331 | loss=0.4434
2/40 * Epoch (train): 100% 629/629 [09:10<00:00,  1.14it/s, dice=2.232e-05, iou=1.116e-05, loss=1.007]
2/40 * Epoch (valid): 100% 158/158 [00:46<00:00,  3.43it/s, dice=0.623, iou=0.453, loss=0.382]
[2020-07-15 08:59:09,798] 
2/40 * Epoch 2 (_base): lr=0.0010 | momentum=0.9000
2/40 * Epoch 2 (train): dice=0.6343 | iou=0.4722 | loss=0.3952
2/40 * Epoch 2 (valid): dice=0.6759 | iou=0.5184 | loss=0.3527
3/40 * Epoch (train): 100% 629/629 [09:10<00:00,  1.14it/s, dice=0.560, iou=0.389, loss=0.481]
3/40 * Epoch (valid): 100% 158/158 [00:45<00:00,  3.44it/s, dice=0.578, iou=0.406, loss=0.431]
[2020-07-15

## Copy the results up to Google Drive

In [16]:
# !cp -r 001_resnet18_Unet_fold0 '/content/gdrive/My Drive/Steel Segmentation/'
# !cp -r 002_resnet18_PSPNet_fold0 '/content/gdrive/My Drive/Steel Segmentation/'
!cp -r 003_resnet50_fpn_fold0 '/content/gdrive/My Drive/Steel Segmentation/'