In [1]:
import tensorflow as tf
from tensorflow import keras
device = tf.config.list_physical_devices('GPU')[0]
tf.config.experimental.set_memory_growth(device, True)
print(f'TF Version:{tf.__version__}  |  GPU:{device}')

TF Version:2.2.0  |  GPU:PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')


In [2]:
#Paths
from pathlib import Path
base_folder = Path('.')
data_folder = base_folder/'til2020'
train_imgs_folder = data_folder/'train'/'train'
train_annotations = data_folder/'train.json'
val_imgs_folder = data_folder/'val'/'val'
val_annotations = data_folder/'val.json'

train_pickle = data_folder/'train.p'/'train.p'
val_pickle = data_folder/'val.p'/'val.p'

save_model_folder = base_folder/'ckpts'
load_model_folder = base_folder/'ckpts'

In [3]:
class Config():
    cat_list = ['tops', 'trousers', 'outerwear', 'dresses', 'skirts']

    input_shape = (224,224,3)
    wt_decay = 5e-4

    dims_list = [(7,7),(14,14)]
    aspect_ratios = [(1,1), (1,2), (2,1)]

    batch_size = 16
    epoch_warmup = 300
    epoch_finetune = 300
conf = Config()

In [4]:
from tensorflow.keras import losses
# Shape of ypred: ( batch, i, j, aspect_ratios, 1+4+numclasses ). For a batch,i,j, we get #aspect_ratios vectors of length 7.
# Shape of ytrue: ( batch, i, j, aspect_ratios, 1+4+numclasses+2 ). For a batch,i,j, we get #aspect_ratios vectors of length 9 (two more for objectness and cat/loc indicators)
#TODO Play with weights?
def custom_loss(ytrue, ypred):
    obj_loss_weight = 1.0
    cat_loss_weight = 1.0
    loc_loss_weight = 1.0

    end_cat = len(conf.cat_list) + 1

    objloss_indicators = ytrue[:,:,:,:,-2:-1]
    catlocloss_indicators = ytrue[:,:,:,:,-1:]

    ytrue_obj, ypred_obj = ytrue[:,:,:,:,:1], ypred[:,:,:,:,:1]
    ytrue_obj = tf.where( objloss_indicators != 0, ytrue_obj, 0 )
    ypred_obj = tf.where( objloss_indicators != 0, ypred_obj, 0 )
    objectness_loss = losses.BinaryCrossentropy(from_logits=True)( ytrue_obj, ypred_obj )

    ytrue_cat, ypred_cat = ytrue[:,:,:,:,1:end_cat], ypred[:,:,:,:,1:end_cat]
    ytrue_cat = tf.where( catlocloss_indicators != 0, ytrue_cat, 0 )
    ypred_cat = tf.where( catlocloss_indicators != 0, ypred_cat, 0 )
    categorical_loss = losses.CategoricalCrossentropy(from_logits=True) ( ytrue_cat, ypred_cat )

    # Remember that ytrue is longer than ypred, so we will need to stop at index -2, which is where the indicators are stored
    ytrue_loc, ypred_loc = ytrue[:,:,:,:,end_cat:-2], ypred[:,:,:,:,end_cat:]
    ytrue_loc = tf.where( catlocloss_indicators != 0, ytrue_loc, 0 )
    ypred_loc = tf.where( catlocloss_indicators != 0, ypred_loc, 0 )
    localisation_loss = losses.Huber() ( ytrue_loc, ypred_loc )

    return obj_loss_weight*objectness_loss + cat_loss_weight*categorical_loss + loc_loss_weight*localisation_loss

In [5]:
#ah functional paradigm
from tensorflow.keras import layers
from tensorflow.keras.regularizers import l2
#I wrote this to reduce code size (sequential layers with activation of ReLU)
def seq_with_activation(lst):
    def wrapper(x):
        nonlocal lst
        try: iter(lst)
        except TypeError: lst = [lst]
        for l in lst:
            x = l(x)
            x = layers.BatchNormalization()(x)
            x = layers.LeakyReLU(0.01)(x)
        return x
    return wrapper


def transfer_model_7x7_14x14(backbone_model, input_shape, dims_list, num_aspect_ratios, num_classes, wt_decay, model_name='transfer-objdet-model-7x7-14x14'):
    inputs = keras.Input(shape=input_shape)
    intermediate_layer_model = keras.Model(inputs=backbone_model.input,
        #outputs=backbone_model.get_layer('conv4_block6_out').output #Resnet50
        outputs=backbone_model.get_layer('block13_sepconv2_bn').output #Xceptionnet, copied example in picking last layer of res 14
        #TODO: PUT MORE THOUGHT INTO WHICH LAYER TO PICK BY INVESTIGATING ACTIVATIONS
    )

    intermediate_output = intermediate_layer_model(inputs) #14
    backbone_output = backbone_model(inputs) #7

    #TODO: not copy example, and strategize our own stuff
    upsample = seq_with_activation([
        layers.Conv2D(512, 1, padding='same', kernel_regularizer=l2(wt_decay)), #7
        layers.Conv2D(1024, 3, padding='same', kernel_regularizer=l2(wt_decay)), #7
        layers.Conv2D(512, 1, padding='same', kernel_regularizer=l2(wt_decay)), #7
        layers.Conv2D(1024, 3, padding='same', kernel_regularizer=l2(wt_decay)), #7
        layers.Conv2D(512, 1, padding='same', kernel_regularizer=l2(wt_decay)), #7
    ])(backbone_output)

    x = seq_with_activation([
        layers.Conv2D(256, 1, padding='same', kernel_regularizer=l2(wt_decay)), #7
        layers.Conv2DTranspose(512, 5, strides=(2, 2), padding='same'), #14
    ])(upsample)
    x = layers.Concatenate()([x,intermediate_output])

    tens_14x14 = seq_with_activation([
        layers.Conv2D(256, 1, padding='same', kernel_regularizer=l2(wt_decay)), #14
        layers.Conv2D(512, 3, padding='same', kernel_regularizer=l2(wt_decay)), #14
        layers.Conv2D(256, 1, padding='same', kernel_regularizer=l2(wt_decay)), #14
        layers.Conv2D(512, 3, padding='same', kernel_regularizer=l2(wt_decay)), #14
        layers.Conv2D(256, 1, padding='same', kernel_regularizer=l2(wt_decay)), #14
        layers.Conv2D(512, 3, padding='same', kernel_regularizer=l2(wt_decay)), #14
    ])(x)

    tens_7x7 = layers.Add()([
        seq_with_activation(layers.Conv2D(2048, 3, padding='same', kernel_regularizer=l2(wt_decay)))(upsample),
        backbone_output
    ])

    dim_tensor_map = {'7x7':tens_7x7,'14x14':tens_14x14}

    #Accumulate predictions for 7x7,14x14 into a dictionary for keras multi labels.
    preds_dict = {}
    for dims in dims_list:
        dimkey = '{}x{}'.format(*dims)
        tens = dim_tensor_map[dimkey]
        ar_preds = []
        for _ in range(num_aspect_ratios):
            objectness_preds = layers.Conv2D(1, 1, kernel_regularizer=l2(wt_decay))( tens )
            class_preds = layers.Conv2D(num_classes, 1, kernel_regularizer=l2(wt_decay))( tens )
            bbox_preds = layers.Conv2D(4, 1, kernel_regularizer=l2(wt_decay))( tens )
            ar_preds.append( layers.Concatenate()([objectness_preds, class_preds, bbox_preds]) )

        if num_aspect_ratios > 1: predictions = layers.Concatenate()(ar_preds)
        elif num_aspect_ratios == 1: predictions = ar_preds[0]

        predictions = layers.Reshape( (*dims, num_aspect_ratios, 5+num_classes), name=dimkey )(predictions)
        preds_dict[dimkey] = predictions

    model = keras.Model(inputs, preds_dict, name=model_name)

    model.compile( optimizer=keras.optimizers.Adam(1e-5),
                    loss=custom_loss )
    return model

In [6]:
def model_monitors(save_path):
    model_checkpoint_callback = keras.callbacks.ModelCheckpoint(
        filepath=save_path,
        save_weights_only=False,
        monitor='val_loss',
        mode='auto',
        save_best_only=True
    )
    earlystopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=30)
    reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=1e-8)
    return [model_checkpoint_callback,earlystopping,reduce_lr]

def train(model,backbone_name,warmup=True):
    if warmup: 
        save_model_path = str(save_model_folder/f'pt-{model_context}-best_val_loss.ckpt')
        for layer in model.get_layer(backbone_name).layers: layer.trainable = False #dont train pretrained during warm up
    else:
        save_model_path = str(save_model_folder/f'ft-{model_context}-best_val_loss.ckpt')
        for layer in model.get_layer(backbone_name).layers: layer.trainable = True

    model.fit(
        x=train_sequence, 
        epochs=(conf.epoch_warmup if warmup else conf.epoch_finetune), 
        validation_data=val_sequence, 
        callbacks=model_monitors(save_model_path),
        verbose=1
    )

In [7]:
model_context = f'model-7x7-14x14-3aspect-modyoloposneg-wd{conf.wt_decay}'
#load_model_path = load_model_folder/f'ft-{model_context}-best_val_loss.ckpt'
#load_model_path = load_model_folder/f'pt-{model_context}-best_val_loss.ckpt'
#load_model_path = None

if load_model_path is None:
    #backbone_model = keras.applications.ResNet50(input_shape=conf.input_shape,include_top=False)
    backbone_model = keras.applications.Xception(input_shape=conf.input_shape,include_top=False)
    model = transfer_model_7x7_14x14(backbone_model,
        input_shape=conf.input_shape,
        dims_list=conf.dims_list,
        num_aspect_ratios=len(conf.aspect_ratios),
        num_classes=len(conf.cat_list),
        wt_decay=conf.wt_decay,
        #model_name=model_context+'-res50'
        model_name=model_context+'-xception'
    )
else:
    model = keras.models.load_model(load_model_path, custom_objects={'custom_loss':custom_loss})

model.summary()

Model: "model-7x7-14x14-3aspect-modyoloposneg-wd0.0005-xception"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
xception (Model)                (None, 7, 7, 2048)   20861480    input_2[0][0]                    
__________________________________________________________________________________________________
conv2d_4 (Conv2D)               (None, 7, 7, 512)    1049088     xception[1][0]                   
__________________________________________________________________________________________________
batch_normalization_4 (BatchNor (None, 7, 7, 512)    2048        conv2d_4[0][0]                   
____________________________________________

In [8]:
from scripts.loader import TILSequence,TILPickle
from scripts.sampling import iou,modified_yolo_posneg_sampling
from scripts.augment import aug_default,aug_identity
from scripts.encoder import encode_label

label_encoder = lambda y: encode_label(y, conf.dims_list, conf.aspect_ratios, iou, modified_yolo_posneg_sampling, conf.cat_list)
preproc_fn = lambda x: x / 255.

if True:
    train_sequence = TILPickle(train_pickle, conf.batch_size, aug_default, conf.input_shape[:-1], label_encoder, preproc_fn)
    val_sequence = TILPickle(val_pickle, conf.batch_size, aug_identity, conf.input_shape[:-1], label_encoder, preproc_fn)
else:
    train_sequence = TILSequence(train_imgs_folder,train_annotations,conf.batch_size,aug_default,conf.input_shape[:-1],label_encoder,preproc_fn)
    val_sequence = TILSequence(val_imgs_folder,val_annotations,conf.batch_size,aug_identity,conf.input_shape[:-1],label_encoder,preproc_fn)

In [10]:
tf.get_logger().setLevel(40)
print('Warming up the model...')
train(model,'xception',warmup=True)

# Fine tuning
print('Model warmed. Loading best val version of model...')
del model
load_model_path = load_model_folder/f'pt-{model_context}-best_val_loss.ckpt'
model = keras.models.load_model(load_model_path, custom_objects={'custom_loss':custom_loss})
model.compile(optimizer=keras.optimizers.Adam(1e-5),loss=custom_loss)
train(model,'xception',warmup=False)

# Final save
model.save(os.path.join(save_model_folder, 'ft-{}-final.ckpt'.format(model_context)))

Warming up the model...
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/3

In [10]:
#TODO: import hyperopts. play with hyperopts. I picked Xception cause of the high top-5 score despite small parameter size

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/xception/xception_weights_tf_dim_ordering_tf_kernels_notop.h5


In [14]:
import numpy as np
test_sequence_pickle = TILPickle(val_pickle, 1, aug_identity, conf.input_shape[:-1], label_encoder, preproc_fn, testmode=True)
test_sequence = TILSequence(val_imgs_folder, val_annotations, 1, aug_identity, conf.input_shape[:-1], label_encoder, preproc_fn, testmode=True)

# Test to make sure that both dispensers dispense the same data
img_idx = 42
ids_pickle, x_pickle, y_pickle = test_sequence_pickle[img_idx]
ids_seq, dims_seq, x_seq, y_seq = test_sequence[img_idx]

print('Image ids of pickle and seq:', ids_pickle[0], ',', ids_seq[0])
print('Are input arrays same?:', np.allclose( x_pickle, x_seq )) #of course not, one returns the original_image_dim
for dimkey, ylabel_pickle in y_pickle.items():
    ylabel_seq = y_seq[dimkey]
    print('Are labels same for key=(', dimkey, ')?:', np.allclose( ylabel_pickle, ylabel_seq ))

Image ids of pickle and seq: 10153 , 10153
Are input arrays same?: False
Are labels same for key=( 7x7 )?: True
Are labels same for key=( 14x14 )?: True


In [12]:
from scripts.encoder import decode_tensor
def nms(detections, iou_thresh=0.):
    dets_by_class = {}
    final_result = []
    for det in detections:
        cls = det[1]
        if cls not in dets_by_class:
            dets_by_class[cls] = []
        dets_by_class[cls].append( det )
    for _, dets in dets_by_class.items():
        candidates = list(dets)
        candidates.sort( key=lambda x:x[0], reverse=True )
        while len(candidates) > 0:
            candidate = candidates.pop(0)
            _,_,cx,cy,cw,ch = candidate
            copy = list(candidates)
            for other in candidates:
                # Compute the IoU. If it exceeds thresh, we remove it
                _,_,ox,oy,ow,oh = other
                if iou( (cx,cy,cw,ch), (ox,oy,ow,oh) ) > iou_thresh:
                    copy.remove(other)
            candidates = list(copy)
            final_result.append(candidate)
    return final_result

# Run this to visualize
from IPython.display import Image, display
import PIL
from PIL import ImageDraw
rank_colors = ['cyan', 'magenta', 'pink']
det_threshold=0.
top_dets=3

start=0
end=20
for k in range(start,end):
    _, img_arr, label_cxywh = test_sequence_pickle[k]
    img_arr = img_arr[0]
    pil_img = PIL.Image.fromarray( (img_arr * 255.).astype(np.uint8) )
    W,H = pil_img.size
    pred_dict = model(np.array([img_arr]))
    preds = decode_tensor( pred_dict, conf.aspect_ratios )

    # Post-processing
    preds.sort( key=lambda x:x[0], reverse=True )
    preds = [pred for pred in preds if pred[0] >= det_threshold]
    preds = preds[:top_dets]
    preds = nms(preds, iou_thresh=0.5)

    draw_img = pil_img.copy()
    draw = ImageDraw.Draw(draw_img)
    for i, pred in enumerate(preds):
        c,cls,x,y,w,h = pred
        bb_x = int(x * W)
        bb_y = int(y * H)
        bb_w = int(w * W)
        bb_h = int(h * H)
        left = int(bb_x - bb_w / 2)
        top = int(bb_y - bb_h / 2)
        right = int(bb_x + bb_w / 2)
        bot = int(bb_y + bb_h / 2)
        cls_str = conf.cat_list[cls-1]

        draw.rectangle(((left, top), (right, bot)), outline=rank_colors[i])
        draw.text((bb_x, bb_y), cls_str, fill=rank_colors[i])
        draw.text( ( int(left + bb_w*.1), int(top + bb_h*.1) ), '{:.2f}'.format(c), fill=rank_colors[i] )

    display(draw_img)

NameError: name 'decode_tensor' is not defined

In [15]:
# Generating detections on the folder of validation images
from scripts.encoder import decode_tensor
from tqdm import tqdm
import json
detections = []
det_threshold=0.
for i in tqdm(range(len(test_sequence))):
    ids_seq, dims, input_arr, _ = test_sequence[i]
    img_id = int(ids_seq[0])
    W,H = dims[0]

    # Here, I'm inferencing one-by-one, but you can batch it if you want it faster
    pred_dict = model.predict(input_arr)
    preds = decode_tensor( pred_dict, conf.aspect_ratios )

    # Post-processing
    preds = [pred for pred in preds if pred[0] >= det_threshold]
    preds.sort( key=lambda x:x[0], reverse=True )
    preds = preds[:100] # we only evaluate you on 100 detections per image

    for i, pred in enumerate(preds):
        c,cat_id,x,y,w,h = pred
        left = W * (x - w/2.)
        left = round(left,1)
        top = H * (y - h/2.)
        top = round(top,1)
        width = W*w
        width = round(width,1)
        height = H*h
        height = round(height,1)
        c = float(c)
        cat_id = int(cat_id)
        detections.append( {'image_id':img_id, 'category_id':cat_id, 'bbox':[left, top, width, height], 'score':c} )

with open('detections-7x7-14x14-top100.json', 'w') as f: json.dump(detections, f)

100%|██████████| 1474/1474 [07:23<00:00,  3.33it/s]


In [16]:
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
coco_gt = COCO(val_annotations)
coco_dt = coco_gt.loadRes('detections-7x7-14x14-top100.json')
cocoEval = COCOeval(cocoGt=coco_gt, cocoDt=coco_dt, iouType='bbox')
cocoEval.evaluate()
cocoEval.accumulate()
cocoEval.summarize()

loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
Loading and preparing results...
DONE (t=4.96s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=3.22s).
Accumulating evaluation results...
DONE (t=1.36s).
 Average Precision  (AP) @[ IoU=0.20:0.50 | area=   all | maxDets=100 ] = 0.247
 Average Precision  (AP) @[ IoU=0.20      | area=   all | maxDets=100 ] = 0.284
 Average Precision  (AP) @[ IoU=0.30      | area=   all | maxDets=100 ] = 0.267
 Average Precision  (AP) @[ IoU=0.40      | area=   all | maxDets=100 ] = 0.237
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.196
