## Loading data

In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

import numpy as np
import cv2
import matplotlib.pyplot as plt

import os
import cv2
import pdb
import time
import warnings
import random
import numpy as np
import pandas as pd
from tqdm import tqdm as tqdm
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.model_selection import train_test_split, KFold
import torch
import torch.nn as nn
from torch.nn import functional as F
import torch.optim as optim
import torch.backends.cudnn as cudnn
from torch.utils.data import DataLoader, Dataset, sampler
from matplotlib import pyplot as plt
from albumentations import (HorizontalFlip, ShiftScaleRotate, Normalize, Resize, Compose, GaussNoise)
from albumentations.pytorch.transforms import ToTensor
warnings.filterwarnings("ignore")
seed = 69
random.seed(seed)
os.environ["PYTHONHASHSEED"] = str(seed)
np.random.seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
# helper function for data visualization
def visualize(**images):
    """PLot images in one row."""
    n = len(images)
    plt.figure(figsize=(16, 5))
    for i, (name, image) in enumerate(images.items()):
        plt.subplot(1, n, i + 1)
        plt.xticks([])
        plt.yticks([])
        plt.title(' '.join(name.split('_')).title())
        plt.imshow(image)
    plt.show()

### Dataloader

Writing helper class for data extraction, tranformation and preprocessing  
https://pytorch.org/docs/stable/data

In [4]:
from torch.utils.data import DataLoader
from torch.utils.data import Dataset as BaseDataset

In [5]:
def make_mask2(row_id, df):
    '''Given a row index, return image_id and mask (256, 1600, 4) from the dataframe `df`'''
    fname = df.iloc[row_id].name
    labels = df.iloc[row_id][:4]
    masks = np.zeros((256, 1600, 5), dtype=np.float32) # float32 is V.Imp
    # 4:class 1～4 (ch:0～3)

    for idx, label in enumerate(labels.values):
        if label is not np.nan:
            label = label.split(" ")
            positions = map(int, label[0::2])
            length = map(int, label[1::2])
            mask = np.zeros(256 * 1600, dtype=np.uint8)
            for pos, le in zip(positions, length):
                mask[pos:(pos + le)] = 1
            masks[:, :, idx+1] = mask.reshape(256, 1600, order='F')
    masks[:,:,0] = 1 - 1*(masks.sum(axis=2)>0)
    return fname, masks



class Dataset(BaseDataset):

    CLASSES = ['1', '2', '3', '4']
    
    def __init__(
            self, 
            df, data_folder, 
            classes=None, 
            augmentation=None, 
            preprocessing=None,
    ):
        self.df = df
        self.root = data_folder

        self.fnames = self.df.index.tolist()
        
        self.augmentation = augmentation
        self.preprocessing = preprocessing
    
    def __getitem__(self, idx):
        
        image_id, mask = make_mask2(idx, self.df)
        
        image_path = os.path.join(self.root, image_id)
        image = cv2.imread(image_path)

        # apply augmentations
        if self.augmentation:
            sample = self.augmentation(image=image, mask=mask)
            image, mask = sample['image'], sample['mask']

        
        # apply preprocessing
        if self.preprocessing:
            sample = self.preprocessing(image=image, mask=mask)
            image, mask = sample['image'], sample['mask']
            
        
        targets = 1*(mask.sum(axis=(1,2))[1:]>0)
#         print(targets)
        targets = torch.tensor(targets).float()
            
        return {'features': image, 'masks': mask, 'targets': targets}
        
    def __len__(self):
        return len(self.fnames)

In [6]:
IMAGES_FULL= './input/severstal-steel-defect-detection/train_images/'
IMAGES_OUT= './input/images_crop256x256/'
MASKS_OUT = './input/masks_crop256x256/'
IMAGES_N_OUT = './input/neg_images__crop256x256/'

In [7]:
df = pd.read_csv('./input/severstal-steel-defect-detection/train.csv')

df['ImageId'], df['ClassId'] = zip(*df['ImageId_ClassId'].str.split('_'))
df['ClassId'] = df['ClassId'].astype(int)
df = df.pivot(index='ImageId',columns='ClassId',values='EncodedPixels')
df['defects'] = df.count(axis=1)

In [8]:
total_folds = 10
kfold = KFold(total_folds, shuffle=True,random_state=69)

train_idx, val_idx = list(kfold.split(df))[0]
train_df, val_df = df.iloc[train_idx], df.iloc[val_idx]

In [9]:
names = []
for j in os.listdir('./input/severstal-steel-defect-detection/test_images'):
    for i in range(1,5):
        names.append(j+'_{}'.format(i))
test_df = pd.DataFrame(names)
test_df.columns = ['ImageId_ClassId']
test_df['EncodedPixels'] = np.nan

test_df['ImageId'], test_df['ClassId'] = zip(*test_df['ImageId_ClassId'].str.split('_'))
test_df['ClassId'] = test_df['ClassId'].astype(int)
test_df = test_df.pivot(index='ImageId',columns='ClassId',values='EncodedPixels')
test_df['defects'] = test_df.count(axis=1)

In [10]:
import albumentations as albu

In [11]:
def get_training_augmentation():
    train_transform = [
        albu.Resize(256,256),
        albu.HorizontalFlip(p=0.5),
        albu.VerticalFlip(p=0.3),

        albu.RandomBrightness(p=0.1, limit=1),
        albu.RandomContrast(p=0.1, limit=1),
#         albu.CropNonEmptyMaskIfExists(256,512,ignore_channels=[0], p=1., always_apply=True),

#         albu.IAAAdditiveGaussianNoise(p=0.2),

    ]
    return albu.Compose(train_transform)


def get_validation_augmentation():
    """Add paddings to make image shape divisible by 32"""
    test_transform = [
        albu.Resize(256,256)
    ]
    return albu.Compose(test_transform)


def to_tensor(x, **kwargs):
    return x.transpose(2, 0, 1).astype('float32')


def get_preprocessing(preprocessing_fn):
    """Construct preprocessing transform
    
    Args:
        preprocessing_fn (callbale): data normalization function 
            (can be specific for each pretrained neural network)
    Return:
        transform: albumentations.Compose
    
    """
    
    _transform = [
        albu.Lambda(image=preprocessing_fn),
        albu.Lambda(image=to_tensor, mask=to_tensor),
    ]
    return albu.Compose(_transform)

## Create model and train

In [12]:
import pretrainedmodels
import segmentation_models_pytorch as smp



from torch import nn
def get_model(model_name: str, num_classes: int, pretrained: str = "imagenet"):
    model_fn = pretrainedmodels.__dict__[model_name]
    model = model_fn(num_classes=1000, pretrained=pretrained)
    
    dim_feats = model.last_linear.in_features
    model.last_linear = nn.Linear(dim_feats, num_classes)

    return model

In [13]:
model_name = "resnet34"
ENCODER_WEIGHTS = 'imagenet'
DEVICE = 'cuda'

In [14]:
import torch

ENCODER = 'se_resnext50_32x4d'
ENCODER_WEIGHTS = 'imagenet'
preprocessing_fn = smp.encoders.get_preprocessing_fn(ENCODER, ENCODER_WEIGHTS)

In [15]:
from model_resnet import ResidualNet

model = ResidualNet( 'ImageNet', 50, 1000, 'CBAM')

In [16]:
state = torch.load("/home/dex/Downloads/cbam/cbam/RESNET50_CBAM_new_name_wrap.pth")


In [17]:
new_state_dict = {}
for j in state['state_dict'].keys():
    j_ = j[7:]
    new_state_dict[j_] = state['state_dict'][j]

In [18]:
# 
#model.load_state_dict(new_state_dict)

In [19]:
model.avgpool = nn.AdaptiveAvgPool2d(1)

model.fc = nn.Sequential(
#                       nn.BatchNorm1d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
                      nn.Dropout(p=0.5),
#                       nn.Linear(in_features=2048, out_features=512, bias=True),
#                       nn.ELU(True),
#                       nn.BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
#                       nn.Dropout(p=0.4),
                      nn.Linear(in_features=2048, out_features=4, bias=True)
                    )

In [20]:
CLASSES = ['0','1','2','3','4']
train_dataset = Dataset(
    train_df, IMAGES_FULL, 
    augmentation=get_training_augmentation(), 
    preprocessing=get_preprocessing(preprocessing_fn),
    classes=CLASSES,
)


valid_dataset = Dataset(
    val_df, IMAGES_FULL, 
    augmentation=get_validation_augmentation(), 
    preprocessing=get_preprocessing(preprocessing_fn),
    classes=CLASSES,
)


In [21]:
train_df['classes'] = train_df[1].isnull().astype(str)+'&'+train_df[2].isnull().astype(str)+'&'+train_df[3].isnull().astype(str)+'&'+train_df[4].isnull().astype(str)

In [22]:
train_df.loc[train_df['classes']=='False&False&True&True', 'classes'] = 'True&False&True&True'
train_df.loc[train_df['classes']=='True&True&False&False', 'classes'] = 'True&True&True&False'
train_df.loc[train_df['classes']=='False&True&False&True', 'classes'] = 'False&True&True&True'
train_df.loc[train_df['classes']=='True&False&False&True', 'classes'] = 'True&False&True&True'
train_df.loc[train_df['classes']=='False&False&False&True', 'classes'] = 'True&False&True&True'
train_df.loc[train_df['classes']=='True&False&True&False', 'classes'] = 'True&False&True&True'


In [23]:
train_df['classes'].value_counts()

True&True&True&True     5330
True&True&False&True    4276
False&True&True&True     761
True&True&True&False     725
True&False&True&True     219
Name: classes, dtype: int64

In [24]:
train_df = train_df.sample(frac=1.)

In [25]:
from catalyst.data.sampler import BalanceClassSampler

labels, _ = pd.factorize(train_df.classes)
sampler = BalanceClassSampler(labels, mode="downsampling")

In [26]:
train_dataset = Dataset(
    train_df, IMAGES_FULL, 
    augmentation=get_training_augmentation(), 
    preprocessing=get_preprocessing(preprocessing_fn),
    classes=CLASSES,
)

valid_dataset = Dataset(
    val_df, IMAGES_FULL, 
    augmentation=get_validation_augmentation(), 
    preprocessing=get_preprocessing(preprocessing_fn),
    classes=CLASSES,
)


train_loader = DataLoader(train_dataset, batch_size=8, num_workers=4, sampler=sampler)
valid_loader = DataLoader(valid_dataset, batch_size=8, shuffle=False, num_workers=4)

In [27]:
train_dataset.__getitem__(0)['features'].shape

(3, 256, 256)

# Catalyst

In [28]:
# state = torch.load("./logs/segmentation_notebook/checkpoints/last.pth")
# model.load_state_dict(state['model_state_dict'])

## Training

In [29]:
import torch
import torch.nn as nn
from catalyst.dl import SupervisedRunner
from catalyst.dl.callbacks import DiceCallback


# folder for all the experiment logs
logdir = "./logs/cbam_v5/"
NUM_EPOCHS = 100

loaders = {
    "train": train_loader,
    "valid": valid_loader
}

# model, criterion, optimizer
# model = # already defined

criterion = {
    "bce": nn.BCEWithLogitsLoss(),
}


# optimizer = torch.optim.SGD(model.parameters(), 0.1,
#                             momentum=0.9,
#                             weight_decay=1e-4)

optimizer = torch.optim.Adam([
    {'params': model.parameters(), 'lr': 3e-4},])

# optimizer = torch.optim.Adam([
#     {'params': model.decoder.parameters(), 'lr': 3e-4}, 
    
#     # decrease lr for encoder in order not to permute 
#     # pre-trained weights with large gradients on training start
#     {'params': model.encoder.parameters(), 'lr': 3e-5},  
# ])

scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.9, patience=3)

# model runner
runner = SupervisedRunner(input_target_key=None)

In [30]:
from catalyst.dl.callbacks import InferCallback, CheckpointCallback, CriterionCallback, CriterionAggregatorCallback
from catalyst.dl.callbacks import AccuracyCallback, AUCCallback, F1ScoreCallback
from new_metrics import MacroF1Callback, ConfusionMatrixCallback

In [31]:
runner.train(
    model=model,
    criterion=criterion,
    scheduler=scheduler,
    callbacks=[               
        
               CriterionCallback(prefix="loss_bce",input_key='targets',
                     criterion_key='bce', multiplier=1.),

               CriterionAggregatorCallback(prefix="loss",loss_keys=[ 'loss_bce']),
        
        
               CheckpointCallback(save_n_best=3),
        
               MacroF1Callback(),
        
               ConfusionMatrixCallback()],
               

    optimizer=optimizer,
    main_metric='macro_f1',
    minimize_metric=False,
    loaders=loaders,
    logdir=logdir,
    num_epochs=NUM_EPOCHS,
#     fp16={"opt_level": "O1"},
    verbose=False
)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

[2019-10-18 19:31:39,322] 
0/100 * Epoch 0 (train): 0_fscore_05=0.1834 | 0_fscore_best=0.4503 | 0_fscore_best_th=0.1500 | 0_precision_05=0.3563 | 0_precision_best=0.3020 | 0_recall_05=0.1235 | 0_recall_best=0.8845 | 1_fscore_05=0.3412 | 1_fscore_best=0.5545 | 1_fscore_best_th=0.2250 | 1_precision_05=0.4793 | 1_precision_best=0.4341 | 1_recall_05=0.2648 | 1_recall_best=0.7671 | 2_fscore_05=0.1480 | 2_fscore_best=0.4785 | 2_fscore_best_th=0.1000 | 2_precision_05=0.3875 | 2_precision_best=0.3176 | 2_recall_05=0.0914 | 2_recall_best=0.9705 | 3_fscore_05=0.2635 | 3_fscore_best=0.4459 | 3_fscore_best_th=0.2250 | 3_precision_05=0.5132 | 3_precision_best=0.3548 | 3_recall_05=0.1773 | 3_recall_best=0.6000 | _base/lr=0.0010 | _base/momentum=0.9000 | _timers/_fps=307.1837 | _timers/batch_time=0.0310 | _timers/data_time=0.0082 | _timers/model_time=0.0228 | fscore_macro_best=0.4823 | loss=0.4990 | loss_bce=0.4990 | macro_f1=0.2340
0/100 * Epoch 0 (valid): 0_fscore_05=0.2604 | 0_fscore_best=0.2604 |

[2019-10-18 19:35:58,591] 
5/100 * Epoch 5 (train): 0_fscore_05=0.3536 | 0_fscore_best=0.4992 | 0_fscore_best_th=0.2750 | 0_precision_05=0.5766 | 0_precision_best=0.4119 | 0_recall_05=0.2550 | 0_recall_best=0.6335 | 1_fscore_05=0.6551 | 1_fscore_best=0.7091 | 1_fscore_best_th=0.3750 | 1_precision_05=0.7174 | 1_precision_best=0.6627 | 1_recall_05=0.6027 | 1_recall_best=0.7626 | 2_fscore_05=0.2282 | 2_fscore_best=0.5137 | 2_fscore_best_th=0.2250 | 2_precision_05=0.4397 | 2_precision_best=0.3677 | 2_recall_05=0.1541 | 2_recall_best=0.8520 | 3_fscore_05=0.3841 | 3_fscore_best=0.5440 | 3_fscore_best_th=0.3000 | 3_precision_05=0.5833 | 3_precision_best=0.4777 | 3_recall_05=0.2864 | 3_recall_best=0.6318 | _base/lr=0.0003 | _base/momentum=0.9000 | _timers/_fps=314.7168 | _timers/batch_time=0.0311 | _timers/data_time=0.0107 | _timers/model_time=0.0203 | fscore_macro_best=0.5665 | loss=0.4340 | loss_bce=0.4340 | macro_f1=0.4053
5/100 * Epoch 5 (valid): 0_fscore_05=0.3883 | 0_fscore_best=0.4467 |

[2019-10-18 19:39:57,848] 
10/100 * Epoch 10 (train): 0_fscore_05=0.4496 | 0_fscore_best=0.5315 | 0_fscore_best_th=0.2500 | 0_precision_05=0.6397 | 0_precision_best=0.4265 | 0_recall_05=0.3466 | 0_recall_best=0.7052 | 1_fscore_05=0.7053 | 1_fscore_best=0.7494 | 1_fscore_best_th=0.4000 | 1_precision_05=0.7487 | 1_precision_best=0.7284 | 1_recall_05=0.6667 | 1_recall_best=0.7717 | 2_fscore_05=0.3064 | 2_fscore_best=0.5565 | 2_fscore_best_th=0.3000 | 2_precision_05=0.5455 | 2_precision_best=0.4450 | 2_recall_05=0.2130 | 2_recall_best=0.7426 | 3_fscore_05=0.6138 | 3_fscore_best=0.6446 | 3_fscore_best_th=0.3000 | 3_precision_05=0.7342 | 3_precision_best=0.5909 | 3_recall_05=0.5273 | 3_recall_best=0.7091 | _base/lr=0.0003 | _base/momentum=0.9000 | _timers/_fps=323.3319 | _timers/batch_time=0.0280 | _timers/data_time=0.0084 | _timers/model_time=0.0195 | fscore_macro_best=0.6205 | loss=0.3977 | loss_bce=0.3977 | macro_f1=0.5188
10/100 * Epoch 10 (valid): 0_fscore_05=0.4237 | 0_fscore_best=0.47

[2019-10-18 19:43:11,443] 
15/100 * Epoch 15 (train): 0_fscore_05=0.4645 | 0_fscore_best=0.5469 | 0_fscore_best_th=0.2000 | 0_precision_05=0.6013 | 0_precision_best=0.4153 | 0_recall_05=0.3785 | 0_recall_best=0.8008 | 1_fscore_05=0.7518 | 1_fscore_best=0.7820 | 1_fscore_best_th=0.4000 | 1_precision_05=0.7959 | 1_precision_best=0.7699 | 1_recall_05=0.7123 | 1_recall_best=0.7945 | 2_fscore_05=0.3713 | 2_fscore_best=0.5604 | 2_fscore_best_th=0.3250 | 2_precision_05=0.5471 | 2_precision_best=0.4767 | 2_recall_05=0.2810 | 2_recall_best=0.6798 | 3_fscore_05=0.6473 | 3_fscore_best=0.6816 | 3_fscore_best_th=0.3250 | 3_precision_05=0.6907 | 3_precision_best=0.6185 | 3_recall_05=0.6091 | 3_recall_best=0.7591 | _base/lr=0.0002 | _base/momentum=0.9000 | _timers/_fps=321.6021 | _timers/batch_time=0.0287 | _timers/data_time=0.0088 | _timers/model_time=0.0198 | fscore_macro_best=0.6427 | loss=0.3755 | loss_bce=0.3755 | macro_f1=0.5587
15/100 * Epoch 15 (valid): 0_fscore_05=0.4807 | 0_fscore_best=0.49

KeyboardInterrupt: 

In [None]:
next(loaders['train'])