In [1]:
import numpy as np
import tensorflow as tf
import random as rn
from tensorflow.keras import backend as K

session_conf = tf.ConfigProto(intra_op_parallelism_threads=1,
                              inter_op_parallelism_threads=1)
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess)

import jovian

import os
os.environ["TF_KERAS"] = "1" # for radam env
import gc

import sys
sys.path.append('..')
from harang import vision, utils

from classification_models.tfkeras import Classifiers

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cv2


from keras_radam import RAdam

import efficientnet.tfkeras as eff
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model, load_model

from sklearn.metrics import precision_recall_curve, auc

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


<IPython.core.display.Javascript object>

W1116 22:38:10.615942  9220 deprecation_wrapper.py:119] From ..\harang\vision.py:22: The name tf.ConfigProto is deprecated. Please use tf.compat.v1.ConfigProto instead.

W1116 22:38:10.616941  9220 deprecation_wrapper.py:119] From ..\harang\vision.py:24: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.

W1116 22:38:10.641947  9220 deprecation_wrapper.py:119] From ..\harang\vision.py:25: The name tf.keras.backend.set_session is deprecated. Please use tf.compat.v1.keras.backend.set_session instead.



In [2]:
target_size = (350, 525)
img_size = (320, 480)
start_lr = 0.0001
augments = {
    'hf': {'p': 0.5},
    'vf': {'p': 0.5},
    'hsv': {'hue_shift_limit': 5, 'sat_shift_limit': 5, 'val_shift_limit': 5, 'p': 0.5},
    'ssr': {'shift_limit': 0.1, 'scale_limit': 0, 'rotate_limit': 180, 'border_mode': 0, 'value': (0,0,0), 'p': 0.5},
    'bc': {'brightness_limit': 0.1, 'contrast_limit': 0.1, 'p': 0.5},
    'rgb_shift': {'r_shift_limit': 5, 'g_shift_limit': 5, 'b_shift_limit': 5, 'p': 0.5},
    'gamma': {'gamma_limit': (70, 130), 'p': 0.5},
}

pseudo = 'stage1'
stage = 'stage2'

In [3]:
def pr_auc(true, pred):
    pr_auc_mean = 0
    for class_i in range(4):
        precision, recall, _ = precision_recall_curve(true[:, class_i], pred[:, class_i])
        pr_auc = auc(recall, precision)
        pr_auc_mean += pr_auc/4
    return pr_auc_mean

def metric_fn(model, x_val, y_val, preprocess_input):
    y_pred = model.predict(preprocess_input(x_val.copy()), batch_size=batch_size*4)
    return pr_auc(1-y_val, 1-y_pred)

In [4]:
def get_data():
    x_data = utils.from_pickle(f'data/x_data.pkl')
    y_data = (utils.from_pickle(f'data/y_data.pkl').sum(axis=(1,2))!=0).astype(np.uint8)
    test = utils.from_pickle(f'data/test.pkl')
    sub = pd.read_csv('data/sample_submission.csv')
    folds = utils.from_pickle('data/folds.pkl')
    if pseudo is None:
        return x_data, y_data, folds, test, sub
    x_pseudo = utils.from_pickle(f'{pseudo}/pseudo/cls/x_pseudo.pkl')
    y_pseudo = utils.from_pickle(f'{pseudo}/pseudo/cls/y_pseudo.pkl')
    return x_data, y_data, x_pseudo, y_pseudo, folds, test, sub

In [5]:
def set_seed(seed):
    np.random.seed(seed)
    rn.seed(seed)
    tf.set_random_seed(seed)

In [6]:
if pseudo is None:
    x_data, y_data, folds, test, sub = get_data()
else:
    x_data, y_data, x_pseudo, y_pseudo, folds, test, sub = get_data()
print(x_data.shape, y_data.shape, x_pseudo.shape, y_pseudo.shape, test.shape)

(5546, 320, 480, 3) (5546, 4) (13471, 320, 480, 3) (13471, 4) (3698, 320, 480, 3)


In [8]:
refs = [
    ('efficientnetb2', 12),
    ('efficientnetb3', 9),
#     ('efficientnetb4', 6),
]

In [10]:
oof_pred = np.zeros(y_data.shape, dtype=np.float32)
test_pred = np.zeros((len(test),4), dtype=np.float32)
oof_pred[folds[0][1]] = utils.from_pickle('stage2/oof_preds/cls/efficientnetb3_0_0.8152.pkl')
oof_pred[folds[1][1]] = utils.from_pickle('stage2/oof_preds/cls/efficientnetb3_1_0.8168.pkl')
test_pred += utils.from_pickle('stage2/test_preds/cls/efficientnetb3_0_0.8152.pkl')/5
test_pred += utils.from_pickle('stage2/test_preds/cls/efficientnetb3_1_0.8168.pkl')/5

In [16]:
gc.collect();
resume_from = 7
resume_count = 0

for arch_name, batch_size in refs:
    
    print(f'ARCHITECTURE: {arch_name.upper()}')
    
    if arch_name.startswith('efficientnet'):
        preprocess_input = eff.preprocess_input
        if arch_name == 'efficientnetb4':
            arch = eff.EfficientNetB4
        elif arch_name == 'efficientnetb3':
            arch = eff.EfficientNetB3
        elif arch_name == 'efficientnetb2':
            arch = eff.EfficientNetB2
    else:
        arch, preprocess_input = Classifiers.get(arch_name)
    
    if resume_count >= resume_from:
        oof_pred = np.zeros(y_data.shape, dtype=np.float32)
        test_pred = np.zeros((len(test),4), dtype=np.float32)
    
    for i in range(5):
        
        resume_count += 1
        if resume_count <= resume_from:
            continue
        
        print(f'FOLD: {i}')
        
        fold = folds[i]
        x_train, x_val, y_train, y_val = x_data[fold[0]], x_data[fold[1]], y_data[fold[0]], y_data[fold[1]]
        
        if pseudo:
            x_train = np.concatenate([x_train, x_pseudo])
            y_train = np.concatenate([y_train, y_pseudo])
            
            stochastic_depth = (0.8, 'linear_decay')
            drop_connect_rate = 0.3
            
        else:
            stochastic_depth = None
            drop_connect_rate = 0.2
            
        set_seed(i)
        
        pretrained = arch(input_shape=(None, None, 3), include_top=False, weights=f'{pseudo}/cls_weights/{arch_name}_{i}.h5', 
                          gn=False, stochastic_depth=stochastic_depth, drop_connect_rate=drop_connect_rate)
        x = pretrained.output
        x = GlobalAveragePooling2D()(x)
        output = Dense(4, activation='sigmoid')(x)
        model = Model(inputs=pretrained.input, outputs=output)
        model.compile(optimizer=RAdam(lr=start_lr), loss='binary_crossentropy')
                
        train_generator = vision.Generator(
            x_train, 
            y_train,
            batch_size=batch_size,
            augment='image',
            preprocess_input=preprocess_input,
            **augments
        )
        
        cb = vision.KerasCallback(
            metric_fn=lambda x: metric_fn(x, x_val, y_val, preprocess_input),
            rp=True,
            decay_factor=1,
            lr=start_lr,
            patience=3, 
            rp_patience=1
        )
        
        history = model.fit_generator(
            train_generator,
            epochs=1000,
            verbose=1,
            callbacks=[cb],
        )
        
        print(cb.best_score)
        print(cb.lr_schedule)
        
        save_name = f'{arch_name}_{i}_{str(cb.best_score)[:6]}'
        model.save(f'{stage}/models/cls/{save_name}.h5')
        Model(inputs=model.input, outputs=model.get_layer('top_activation').output).save_weights(f'{stage}/cls_weights/{arch_name}_{i}.h5')
        
        val_pred = np.zeros(y_val.shape, dtype=np.float32)
        tmp_test_pred = np.zeros((len(test),4), dtype=np.float32)
        for hf in [{'p': 1.0}, False]:
            for vf in [{'p': 1.0}, False]:
                tta_generator = vision.Generator(
                    x_val,
                    batch_size=batch_size*4,
                    augment='image',
                    hf=hf,
                    vf=vf,
                    preprocess_input=preprocess_input
                )
                val_pred += model.predict_generator(tta_generator, verbose=1)/4
                tta_generator = vision.Generator(
                    test,
                    batch_size=batch_size*4,
                    augment='image',
                    hf=hf,
                    vf=vf,
                    preprocess_input=preprocess_input
                )
                tmp_test_pred += model.predict_generator(tta_generator, verbose=1)/4
        
                
        utils.to_pickle(f'{stage}/oof_preds/cls/{save_name}.pkl', val_pred)
        utils.to_pickle(f'{stage}/test_preds/cls/{save_name}.pkl', tmp_test_pred)
                
        oof_pred[fold[1]] = val_pred
        test_pred += tmp_test_pred/5
        
        K.clear_session()
        gc.collect();
        del model
        gc.collect();
        
        jovian.commit(nb_filename='classification.ipynb', secret=True, env_type='pip')
    
    if resume_count > resume_from:
        score_str = str(pr_auc(y_data, oof_pred))[:6]
        print(f'OOF SCORE: {score_str}')
        utils.to_pickle(f'{stage}/oof_preds/cls/{arch_name}_{score_str}.pkl', oof_pred)
        utils.to_pickle(f'{stage}/test_preds/cls/{arch_name}_{score_str}.pkl', test_pred)
        
        jovian.commit(nb_filename='classification.ipynb', secret=True, env_type='pip')

ARCHITECTURE: EFFICIENTNETB2
ARCHITECTURE: EFFICIENTNETB3
FOLD: 2


W1116 22:42:47.695070  9220 deprecation.py:573] From c:\users\vnfma\gpu\lib\site-packages\tensorflow\python\util\deprecation.py:507: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with distribution=normal is deprecated and will be removed in a future version.
Instructions for updating:
`normal` is a deprecated alias for `truncated_normal`
W1116 22:42:47.745081  9220 deprecation.py:506] From c:\users\vnfma\gpu\lib\site-packages\tensorflow\python\ops\init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W1116 22:42:48.141170  9220 deprecation.py:323] From c:\users\vnfma\gpu\lib\site-packages\tensorflow\python\keras\backend.py:5279: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a fut

Epoch 1/1000
Score: 0.8104 | LR to: 9.999999747378752e-05                                                                                                     
TIME SPENT: 935
Epoch 2/1000
Score: 0.8140 | LR to: 9.999999747378752e-05                                                                                                     
TIME SPENT: 873
Epoch 3/1000
Score: 0.8135 | LR to: 9.999999747378752e-05                                                                                                     
Restoring Best Weights
Reducing LR on Plateau, Setting LR to 2.9999999242136255e-05
TIME SPENT: 884
Epoch 4/1000
Score: 0.8170 | LR to: 2.9999999242136255e-05                                                                                                     
TIME SPENT: 875
Epoch 5/1000
Score: 0.8191 | LR to: 2.9999999242136255e-05                                                                                                     
TIME SPENT: 872
Epoch 6/1000
Score: 0.8181 | LR to: 2.9

<IPython.core.display.Javascript object>

[jovian] Updating notebook "a23d9c77b5984ba7b3a6471c818d8fe3" on https://jovian.ml/
[jovian] Uploading notebook..
[jovian] Capturing environment..
[jovian] Committed successfully! https://jovian.ml/harangdev/a23d9c77b5984ba7b3a6471c818d8fe3
FOLD: 3
Epoch 1/1000
Score: 0.8058 | LR to: 9.999999747378752e-05                                                                                                     
TIME SPENT: 935
Epoch 2/1000
Score: 0.8062 | LR to: 9.999999747378752e-05                                                                                                     
TIME SPENT: 882
Epoch 3/1000
Score: 0.8068 | LR to: 9.999999747378752e-05                                                                                                     
TIME SPENT: 893
Epoch 4/1000
Score: 0.8083 | LR to: 9.999999747378752e-05                                                                                                     
TIME SPENT: 895
Epoch 5/1000
Score: 0.8075 | LR to: 9.9999997473787

<IPython.core.display.Javascript object>

[jovian] Updating notebook "a23d9c77b5984ba7b3a6471c818d8fe3" on https://jovian.ml/
[jovian] Uploading notebook..
[jovian] Capturing environment..
[jovian] Committed successfully! https://jovian.ml/harangdev/a23d9c77b5984ba7b3a6471c818d8fe3
FOLD: 4
Epoch 1/1000
Score: 0.7913 | LR to: 9.999999747378752e-05                                                                                                     
TIME SPENT: 940
Epoch 2/1000
Score: 0.7885 | LR to: 9.999999747378752e-05                                                                                                     
Restoring Best Weights
Reducing LR on Plateau, Setting LR to 2.9999999242136255e-05
TIME SPENT: 882
Epoch 3/1000
Score: 0.7915 | LR to: 2.9999999242136255e-05                                                                                                     
TIME SPENT: 893
Epoch 4/1000
Score: 0.7917 | LR to: 2.9999999242136255e-05                                                                                   

<IPython.core.display.Javascript object>

[jovian] Updating notebook "a23d9c77b5984ba7b3a6471c818d8fe3" on https://jovian.ml/
[jovian] Uploading notebook..
[jovian] Capturing environment..
[jovian] Committed successfully! https://jovian.ml/harangdev/a23d9c77b5984ba7b3a6471c818d8fe3
OOF SCORE: 0.8689
[jovian] Saving notebook..


<IPython.core.display.Javascript object>

[jovian] Updating notebook "a23d9c77b5984ba7b3a6471c818d8fe3" on https://jovian.ml/
[jovian] Uploading notebook..
[jovian] Capturing environment..
[jovian] Committed successfully! https://jovian.ml/harangdev/a23d9c77b5984ba7b3a6471c818d8fe3


---

## STAGE 1

### EfficientNetB2: 0.8609

* 0.8070
* 0.7907
* 0.8055
* 0.7933
* 0.7841

### EfficientNetB3: 0.8619

* 0.8076
* 0.8058
* 0.8036
* 0.7971
* 0.7843

### EfficientNetB4: 0.8614

* 0.8050
* 0.7991
* 0.8004
* 0.8036
* 0.7943

### Ensemble: 0.8663

## STAGE 2

### EfficientNetB2: 0.8704

* 0.8160
* 0.8136
* 0.8218
* 0.8137
* 0.8024

### EfficientNetB3: 0.8689

* 0.8152
* 0.8168
* 0.8211
* 0.8101
* 0.7918

### Ensemble: 0.8714