In [1]:
import math
import matplotlib.pyplot as plt
import numpy as np
from pycocotools.coco import COCO
import random
from sklearn.metrics import precision_score
import tensorflow as tf
from tqdm import tqdm

TRANSFER = False

In [2]:
# https://cs230.stanford.edu/blog/datapipeline/

# Create a list of filenames and labels for the dataset
def create_pairs(location = '/home/gregory/Datasets/COCO/2017', mode = 'val'):
    
    # Each 'label' vector is large enough for easy indexing, but this means it contains unused indices
    dim = 91
    
    file = '{}/annotations/instances_{}2017.json'.format(location, mode)
    
    coco = COCO(file)
    
    images = coco.loadImgs(coco.getImgIds())
    
    filenames = []
    labels = []
    
    for i in range(len(images)):
    
        im_obj = images[i]
        
        filenames.append('{}/{}2017/{}'.format(location, mode, im_obj['file_name']))
        
        annotations = coco.loadAnns(coco.getAnnIds(im_obj['id'], iscrowd=None))
        label = np.zeros((dim), dtype = np.float32)
        for x in annotations:
            label[x['category_id']] = 1.0
        labels.append(label)
        
    filenames = np.array(filenames)
    labels = np.array(labels)
        
    return filenames, labels
    
def parse_function(filename, label):
    image_string = tf.io.read_file(filename)

    #Don't use tf.image.decode_image, or the output shape will be undefined
    image = tf.image.decode_jpeg(image_string, channels=3)

    #This will convert to float values in [0, 1]
    image = tf.image.convert_image_dtype(image, tf.float32)

    image = tf.image.resize_with_pad(image, 224, 224)
    
    return image, label

def create_dataset(filenames, labels, batch_size = 32, num_parallel_calls = 4):

    dataset = tf.data.Dataset.from_tensor_slices((filenames, labels))
    dataset = dataset.shuffle(len(filenames))
    dataset = dataset.map(parse_function, num_parallel_calls=num_parallel_calls)
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(1)

    return dataset

In [3]:
# Load a pre-trained model

model_base = tf.keras.applications.MobileNetV2(input_shape = (224, 224, 3),
                                               include_top=False,
                                               pooling = 'avg',
                                               weights='imagenet') #ResNet50
model_base.trainable = False

model = tf.keras.Sequential([
  model_base,
  tf.keras.layers.Dense(91)
])

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
mobilenetv2_1.00_224 (Model) (None, 1280)              2257984   
_________________________________________________________________
dense (Dense)                (None, 91)                116571    
Total params: 2,374,555
Trainable params: 116,571
Non-trainable params: 2,257,984
_________________________________________________________________


In [4]:
model_location = './model_transfer/weights.h5'

if TRANSFER: 
    learning_rate = 0.001
    learning_rate_decay = 0.3
    learning_rate_drops = 3
    min_epochs = 2
    stopping_epochs = 2
    stopping_tol = 0.001
    batch_size = 32

    folder = '/home/gregory/Datasets/COCO/2017'

    f, l = create_pairs(mode = 'train')
    n_train = len(f)

    data_train = create_dataset(f, l, batch_size = batch_size)

    f, l = create_pairs(mode = 'val')
    n_val = len(f)

    data_val = create_dataset(f, l, batch_size = batch_size)

    n_batches_train = math.floor(n_train / batch_size)
    n_batches_val = math.floor(n_val / batch_size)

    def loss(model, inputs, targets):
        preds = model(inputs)
        return tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels = targets, logits = preds))

    def grad(model, inputs, targets):
        with tf.GradientTape() as tape:
            loss_value = loss(model, inputs, targets)
        return loss_value, tape.gradient(loss_value, model.trainable_variables)

    # Setup the initial optimizer (it will be re-initialized when the learning rate gets dropped)
    optimizer = tf.keras.optimizers.Adam(learning_rate = learning_rate)

    # Basic counters for the training process
    epoch = 0
    best_epoch = 0
    best_loss = np.inf
    drops = 0

    # Run the training loop
    while True:

        # Check the stopping condition
        if epoch - best_epoch > stopping_epochs and epoch > min_epochs:
            # We have finished only if we have fully decayed the learning_rate
            if drops == learning_rate_drops:
                break
            else:
                print("Dropping learning_rate")
                learning_rate *= learning_rate_decay
                optimizer = tf.keras.optimizers.Adam(learning_rate = learning_rate)
                drops += 1

        # Train for an epoch
        epoch_loss_avg = tf.keras.metrics.Mean()
        for (x_batch, y_batch) in tqdm(data_train):
            loss_value, grads = grad(model, x_batch, y_batch)
            optimizer.apply_gradients(zip(grads, model.trainable_variables))
            epoch_loss_avg.update_state(loss_value)
        epoch_loss = epoch_loss_avg.result().numpy()

        # Calculate the validation loss
        epoch_loss_avg_val = tf.keras.metrics.Mean()
        for (x_batch, y_batch) in data_val:
            loss_value = loss(model, x_batch, y_batch)
            epoch_loss_avg_val.update_state(loss_value)
        value = epoch_loss_avg_val.result().numpy()

        # Check if we have made progress
        if value < best_loss - stopping_tol:
            print("Epoch / Epoch Train Loss / Val Loss: " + str(epoch) + " " + str(epoch_loss) + " " + str(value) + " -> saving")
            best_loss = value
            best_epoch = epoch
            model.save_weights(model_location)
        else:
            print("Epoch / Epoch Train Loss / Val Loss: " + str(epoch) + " " + str(epoch_loss) + " " + str(value))

        # Update counters
        epoch += 1
        
model.load_weights(model_location)


In [5]:
batch_size = 32

f, l = create_pairs(mode = 'val')
n_val = len(f)

data_val = create_dataset(f, l, batch_size = batch_size)

n_batches_val = math.ceil(n_val / batch_size)

y_hat = []
y_true = []

for (x_batch, y_batch) in tqdm(data_val):
    
    y_hat.append(1.0 * (tf.math.sigmoid(model(x_batch)).numpy() >= 0.5))
        
    y_true.append(y_batch.numpy())
    
y_hat = np.concatenate(np.array(y_hat), axis = 0)
y_true = np.concatenate(np.array(y_true), axis = 0)

dim = y_hat.shape[1]

precision = np.zeros((dim))

for i in range(dim):
    precision[i] = precision_score(np.squeeze(y_true[:, i]), np.squeeze(y_hat[:, i]), zero_division = 0)
    
print("Precision:", precision)

MAP = 0
MAP_count = 0
for p in precision:
    if p != 0.0:
        MAP += p
        MAP_count += 1

print("MAP:", MAP/ MAP_count)

loading annotations into memory...


0it [00:00, ?it/s]

Done (t=0.38s)
creating index...
index created!


157it [00:08, 18.57it/s]


Precision: [0.         0.9        0.73913043 0.63685637 0.81308411 0.85365854
 0.83333333 0.90082645 0.62244898 0.81707317 0.75229358 0.84210526
 0.         0.76595745 0.81818182 0.63953488 0.85964912 0.91666667
 0.85542169 0.77108434 0.63768116 0.75510204 0.87654321 0.85714286
 0.90588235 0.91397849 0.         0.54545455 0.78021978 0.
 0.         0.57746479 0.76923077 0.70833333 0.69090909 0.80733945
 0.43589744 0.66964286 0.86885246 0.76470588 0.84615385 0.79569892
 0.84172662 0.90277778 0.60294118 0.         0.56666667 0.52573529
 0.61842105 0.45977011 0.47058824 0.68452381 0.65079365 0.52777778
 0.5483871  0.78571429 0.86666667 0.73333333 0.7826087  0.87755102
 0.78947368 0.81395349 0.63271605 0.69767442 0.59090909 0.71559633
 0.         0.65641026 0.         0.         0.80487805 0.
 0.70212766 0.76190476 0.75714286 0.68571429 0.77142857 0.78787879
 0.5        0.69736842 0.         0.77358491 0.60526316 0.
 0.66666667 0.84210526 0.6984127  0.69230769 0.92982456 0.
 0.5       ]
MAP