In [1]:
%matplotlib inline  
from fastai.vision import *
from fastai.data_block import _maybe_squeeze
from fastai.callbacks import *
from sklearn.model_selection import StratifiedKFold
from joblib import load, dump
from efficientnet_pytorch import EfficientNet
from ranger import *
from mxresnet import *

Mish activation loaded...


In [2]:
def strt_split(x, y, n_folds=5, random_seed = 42, path=Path('')):  
    try: 
        val_name = load('val_idx.joblib')
    except:
        skf = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=random_seed)
        val_name = [(val_idx, trn_idx) for trn_idx, val_idx in skf.split(x, y)]
        dump(val_name,'val_idx.joblib')
    return val_name

def modified_label_from_df(self, cols:IntsOrStrs=1, label_cls:Callable=None, **kwargs):
    "Label `self.items` from the values in `cols` in `self.inner_df`."
    self.inner_df.labels.fillna('', inplace=True)
    labels = self.inner_df.iloc[:,df_names_to_idx(cols, self.inner_df)]
    assert labels.isna().sum().sum() == 0, f"You have NaN values in column(s) {cols} of your dataframe, please fix it."
    if is_listy(cols) and len(cols) > 1 and (label_cls is None or label_cls == MultiCategoryList):
        new_kwargs,label_cls = dict(one_hot=True, classes= cols),MultiCategoryList
        kwargs = {**new_kwargs, **kwargs}
    return self._label_from_list(_maybe_squeeze(labels), label_cls=label_cls, **kwargs)


def flattenAnneal(learn:Learner, lr:float, n_epochs:int, start_pct:float, SUFFIX = 'PHASE_1_COS'):
    n = len(learn.data.train_dl)
    anneal_start = int(n*n_epochs*start_pct)
    anneal_end = int(n*n_epochs) - anneal_start
    phases = [TrainingPhase(anneal_start).schedule_hp('lr', lr),
             TrainingPhase(anneal_end).schedule_hp('lr', lr, anneal=annealing_cos)]
    sched = GeneralScheduler(learn, phases)
    learn.callbacks.append(sched)
    learn.callbacks.append(SaveModelCallback(learn, every='improvement', monitor='valid_loss', name = f'{EXP_NAME}_{SUFFIX}'))
    learn.fit(n_epochs)

In [4]:
PATH = Path('..')
FOLD =0
BS =            64
SZ =            512
IMG_TRAIN_224 = PATH/f'stage_1_train_images_png'
IMG_TEST_224  = PATH/f'stage_1_test_images_png'
DF_TRAIN =      pd.read_csv(PATH/'train_labels_as_strings.csv')
DF_SUBMI =      pd.read_csv(PATH/'stage_1_sample_submission.csv')
EXP_NAME =      f'NB_EXP_10_CV_{FOLD}_TFL_{SZ}'


DF_SUBMI['fn'] = DF_SUBMI.ID.apply(lambda x: '_'.join(x.split('_')[:2]) + '.png')
DF_TRAIN['labels'].fillna('', inplace=True)
VAL_IDX = strt_split(DF_TRAIN['fn'], DF_TRAIN['labels'])[FOLD]

In [5]:
ItemList.label_from_df = modified_label_from_df
test_fns = DF_SUBMI.fn.unique()

data = (ImageList.from_csv('..', 'train_labels_as_strings.csv', folder=IMG_TRAIN_224.name)
        .split_by_idxs(valid_idx=VAL_IDX[0], train_idx=VAL_IDX[1])
        .label_from_df(label_delim=' ')
        .transform(tfms = get_transforms(),size=SZ)
        .add_test('../' +IMG_TEST_224.name + '/' + test_fns)
        .databunch(bs=BS))

        

In [6]:
md_xrsa =  mxresnet50(c_out=data.c, sa=True)
opt_func = partial(Ranger, betas=(0.95,0.99), eps=1e-6)

In [7]:
learn = Learner(data,
                md_xrsa,
                wd=1e-2,
                bn_wd=False, 
                true_wd=True, 
                opt_func=opt_func,
                metrics=[accuracy_thresh])

learn.model = nn.DataParallel(learn.model)
learn.load('NB_EXP_10_CV_0_PHASE_1_COS')
learn.to_fp16()
learn.unfreeze()

set state called


In [8]:
lr = 1e-3
flattenAnneal(learn, lr, 5, 0.7)

epoch,train_loss,valid_loss,accuracy_thresh,time
0,0.062339,0.064564,0.976867,1:50:04
1,0.066059,0.062597,0.977453,1:54:54
2,0.05942,0.063111,0.977134,1:51:56
3,0.062666,0.059079,0.97863,1:58:19
4,0.050205,0.05505,0.979957,1:49:11


Better model found at epoch 0 with valid_loss value: 0.06456393003463745.
Better model found at epoch 1 with valid_loss value: 0.06259684264659882.
Better model found at epoch 3 with valid_loss value: 0.059079453349113464.
Better model found at epoch 4 with valid_loss value: 0.0550495982170105.
set state called


In [9]:
def get_preds(learn:Learner, sub_fn: str=f'{EXP_NAME}_COS', TTA: bool = False, dt_type = DatasetType.Test):
    if TTA:
        learn.to_fp32()
        preds, targs = learn.TTA(ds_type=dt_type)
        sub_fn = f'{sub_fn}_TTA'
    else:
        preds, targs = learn.get_preds(dt_type)
    ids = []
    labels = []

    for fn, pred in zip(test_fns, preds):
        for i, label in enumerate(data.train_ds.classes):
            ids.append(f"{fn.split('.')[0]}_{label}")
            predicted_probability = '{0:1.10f}'.format(pred[i].item())
            labels.append(predicted_probability)
    pd.DataFrame({'ID': ids, 'Label': labels}).to_csv(f'{sub_fn}.csv', index=False)

In [10]:
get_preds(learn)

In [11]:
get_preds(learn, TTA=True)

In [12]:
!sudo shutdown

Shutdown scheduled for Mon 2019-09-30 03:41:52 UTC, use 'shutdown -c' to cancel.
