In [1]:
import tensorflow as tf
# print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

In [2]:
# Avoid hogging up gpu memory 
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)


1 Physical GPUs, 1 Logical GPUs


In [3]:
# For convenience
%load_ext autoreload
%autoreload 2

In [4]:
from tensorflow import keras
import json
import os
from tqdm import tqdm
import pandas as pd
import numpy as np
import dataset
from util import (
    get_place_to_index_mapping,
    get_incident_to_index_mapping,
    get_index_to_incident_mapping,
    get_index_to_place_mapping
)
from keras.applications.resnet50 import ResNet50
from keras.layers import Dense, Flatten, Permute
from keras import Sequential
import keras.backend as kb
from keras.preprocessing.image import ImageDataGenerator
import tensorflow as tf
import PIL
from collections import defaultdict
from scipy.special import softmax
from scipy.special import expit as sigmoid

In [5]:
abs_path = "/kuacc/users/asafaya19/cv-project"
train_json = os.path.join(abs_path ,"eccv_train.json")
val_json = os.path.join(abs_path ,"eccv_val.json")
data_dir = os.path.join(abs_path, "data")
train_dir = os.path.join(data_dir, "train")
val_dir = os.path.join(data_dir, "val")

train_paths = json.loads(open(train_json).readline())
val_paths = json.loads(open(val_json).readline())

place_to_idx = get_place_to_index_mapping()
incident_to_idx = get_incident_to_index_mapping()

index_to_incident_mapping = get_index_to_incident_mapping()
index_to_place_mapping = get_index_to_place_mapping()

In [6]:
def get_dataset(paths,file_dir, threshold=1000):
    train_set = []
    for path in tqdm(paths, leave=False):
        if not os.path.exists(os.path.join(file_dir, path)):
            continue
        # Make sure image is not corrupt, try importing it
        try:
            img = PIL.Image.open(os.path.join(file_dir, path))
            img.resize((224, 224))
        except:
            continue
        nump = len(place_to_idx) + 1
        numi = len(incident_to_idx) + 1
        place_labels = np.zeros(nump, np.float32)
        place_weights = np.zeros(nump, np.float32)
        incident_labels = np.zeros(numi, np.float32)
        incident_weights = np.zeros(numi, np.float32)

        incidents = paths[path]["incidents"]
        for k in incidents:
            lbl = incidents[k]
            if lbl==1:
                # We are sure this instance is only this incident
                incident_labels[incident_to_idx[k]]=1
                incident_weights = np.ones(numi, np.float32)
            else:
                # We are only sure that this image is not that incident
                incident_weights[incident_to_idx[k]]=1
        if len(incidents)==0:
            # No incident
            incident_labels[-1]=1
            incident_weights = np.ones(numi, np.float32)

        places = paths[path]["places"]
        for k in places:
            lbl = places[k]
            if lbl==1:
                # We are sure this instance is only this incident
                place_labels[place_to_idx[k]]=1
                place_weights = np.ones(nump, np.float32)
            else:
                # We are only sure that this image is not that incident
                place_weights[place_to_idx[k]]=1
        if len(places)==0:
            # No place
            place_labels[-1]=1
            place_weights = np.ones(nump, np.float32)


        train_set.append({
            "path":path,
            "incident_labels":incident_labels,
            "incident_weights":incident_weights,
            "incidents":np.vstack((incident_labels, incident_weights)),
            "place_labels":place_labels,
            "place_weights":place_weights,
            "place":np.vstack((place_labels, place_weights))
        })
        if len(train_set)>=threshold:
            break
    return train_set

In [7]:
def getpreprocessfunc():
    mean = np.asarray([0.485, 0.456, 0.406]).reshape(3, 1, 1).astype(np.float32)
    std = np.asarray([0.229, 0.224, 0.225]).reshape(3, 1, 1).astype(np.float32)
    def preprocessfunc(img):
        img /= 255
        img -= mean
        img /= std
        return img
    return preprocessfunc

In [8]:
# Enclosure to retain state
def get_weighted_accuracy():
    m = keras.metrics.CategoricalAccuracy()
    def weighted_accuracy(y_true, y_preds):
        y_true = tf.reshape(y_true, (bs, 2, -1))
        y_true_lbls = y_true[:,0,:]
        return m(y_true_lbls, y_preds)
    return weighted_accuracy

In [9]:
def weighted_loss(y_true, y_preds):
    bce = keras.losses.BinaryCrossentropy(keras.losses.Reduction.NONE)
    bs = y_true.shape[0]
    y_true = tf.reshape(y_true, (bs, 2, -1))
    y_true_lbls = y_true[:,0,:]
    y_true_weights = y_true[:,1,:]
    bce_loss = bce(y_true_lbls, y_preds)
    return tf.reduce_sum(tf.multiply(bce_loss, y_true_weights))

In [10]:
from keras.models import model_from_json


class FinalModel(keras.Model):
    def __init__(self, trunk_model, incident_weights, place_weights):
        super(FinalModel, self).__init__()
        self.permute = Permute((2, 3, 1))
        self.cropped = keras.layers.experimental.preprocessing.RandomCrop(224, 224)
        self.permuteback = Permute((3, 1, 2))
        self.trunk_model = trunk_model
        self.incident_proj = Dense(len(incident_to_idx), input_shape=(1024,), name="incidents_projection", weights=incident_weights)
        self.places_proj = Dense(len(place_to_idx), input_shape=(1024,), name="places_projection", weights=place_weights)
        
    def call(self, inputs):
        x = self.permute(inputs)
        x = self.cropped(x)
        x = self.permuteback(x)
        x = self.trunk_model(x)
        
        return self.incident_proj(x), self.places_proj(x)

In [11]:
"""Metrics"""

class AverageMeter(object):
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [12]:
def get_place_name_from_mapping(idx):
    name = None
    if idx in index_to_place_mapping:
        name = index_to_place_mapping[idx]
    else:
        name = "no place"
    return name


def get_incident_name_from_mapping(idx):
    name = None
    if idx in index_to_incident_mapping:
        name = index_to_incident_mapping[idx]
    else:
        name = "no incident"
    return name

In [13]:
def accuracy(y_true, pred_scores, topk=1):
    if y_true.sum() == 0:
        return 100.0
    
    idx = np.argpartition(pred_scores, kth=-topk, axis=1)[:, -topk:]
    correct_topk = y_true[np.arange(idx.shape[0])[:, np.newaxis], idx].sum()
    num_pos_in_batch = y_true.sum()

    return correct_topk * ( 100.0 / num_pos_in_batch ) 

def get_acc_num_correct_out_of_total(y_true, pred_scores, topk=1):
    
    idx = np.argpartition(pred_scores, kth=-topk, axis=1)[:, -topk:]
    correct_topk = y_true[np.arange(idx.shape[0])[:, np.newaxis], idx].sum()
    num_pos_in_batch = y_true.sum()

    return correct_topk, num_pos_in_batch

In [14]:
def validate(val_loader, model, datasize, epoch=1, writer=None, activation="sigmoid"):
    """Run validation of the model with metrics.

    Args:
        args:
        val_loader:
        all_models:

    Returns:
        float: incident mAP + place mAP
    """
    # holds the metrics
    a_v_incident_top1 = AverageMeter()
    a_v_place_top1 = AverageMeter()
    a_v_incident_top5 = AverageMeter()
    a_v_place_top5 = AverageMeter()

    top1_num_correct_all, top1_num_total_all = 0, 0
    top5_num_correct_all, top5_num_total_all = 0, 0

    if activation == "softmax":
        # in this case, include "no incident" and "no place"
        ap_incidents = [[] for i in range(len(index_to_incident_mapping) + 1)]
        ap_places = [[] for i in range(len(index_to_place_mapping) + 1)]
    elif activation == "sigmoid":
        ap_incidents = [[] for i in range(len(index_to_incident_mapping))]
        ap_places = [[] for i in range(len(index_to_place_mapping))]

    # set end time as current time before training on a batch
    for batch_iteration, val_data_input in enumerate(val_loader):

        image_v = val_data_input[0]
        
        target_i_v = val_data_input[1][0][:,0,:-1] # we add :-1 for sigmoid
        weight_i_v = val_data_input[1][0][:,1,:-1]

        target_p_v = val_data_input[1][1][:,0,:-1]
        weight_p_v = val_data_input[1][1][:,1,:-1]

        # compute output 
        output = model.predict(image_v)
        incident_output = sigmoid(output[0]) if activation == "sigmoid" else softmax(output[0], axis=1)
        place_output = sigmoid(output[1]) if activation == "sigmoid" else softmax(output[1], axis=1)

        # prepare for average precison calculations
        # make sure this is batch size
        assert incident_output.shape[0] == place_output.shape[0]

        for batch_idx in range(incident_output.shape[0]):
            
            np_incident_output = incident_output[batch_idx]
            np_target_i_v = target_i_v[batch_idx]
            np_weight_i_v = weight_i_v[batch_idx]

            np_incident_output_shape = np_incident_output.shape[0]
            if activation == "softmax":
                np_incident_output_shape -= 1

            for class_idx in range(np_incident_output_shape):
                confidence = np_incident_output[class_idx]
                label = np_target_i_v[class_idx]
                weight = np_weight_i_v[class_idx]

                pos = (label == 1 and weight > 0)
                neg = (label == 0 and weight > 0)
                if pos:
                    ap_incidents[class_idx].append((confidence, 1))
                elif neg:
                    ap_incidents[class_idx].append((confidence, 0))

            np_place_output = place_output[batch_idx]
            np_target_p_v = target_p_v[batch_idx]
            np_weight_p_v = weight_p_v[batch_idx]

            np_place_output_shape = np_place_output.shape[0]
            if activation == "softmax":
                np_place_output_shape -= 1

            for class_idx in range(np_place_output_shape):
                confidence = np_place_output[class_idx]
                label = np_target_p_v[class_idx]
                weight = np_weight_p_v[class_idx]

                pos = (label == 1 and weight > 0)
                neg = (label == 0 and weight > 0)
                if pos:
                    ap_places[class_idx].append((confidence, 1))
                elif neg:
                    ap_places[class_idx].append((confidence, 0))

        # incident accuracy
        incident_prec1 = accuracy(incident_output, target_i_v, topk=1)
        incident_prec5 = accuracy(incident_output, target_i_v, topk=5)

        top1_num_correct, top1_num_total = get_acc_num_correct_out_of_total(incident_output, target_i_v, topk=1)
        top1_num_correct_all += top1_num_correct
        top1_num_total_all += top1_num_total
        top5_num_correct, top5_num_total = get_acc_num_correct_out_of_total(incident_output, target_i_v, topk=5)
        top5_num_correct_all += top5_num_correct
        top5_num_total_all += top5_num_total

        # place accuracy
        place_prec1 = accuracy(place_output, target_p_v, topk=1)
        place_prec5 = accuracy(place_output, target_p_v, topk=5)

        a_v_place_top1.update(place_prec1, image_v.shape[0])
        a_v_incident_top1.update(incident_prec1, image_v.shape[0])
        a_v_place_top5.update(place_prec5, image_v.shape[0])
        a_v_incident_top5.update(incident_prec5, image_v.shape[0])

        # measure elapsed time
        if batch_iteration % 100 == 0:
            print('Evaluating: [{0}/{1}]\t'
                  'Incident Prec@1 {a_v_incident_top1.val:.3f} ({a_v_incident_top1.avg:.3f})\t'
                  'Place Prec@1 {a_v_place_top1.val:.3f} ({a_v_place_top1.avg:.3f})\t'
                  'Place Prec@5 {a_v_place_top5.val:.3f} ({a_v_place_top5.avg:.3f})\t'
                  'Incident Prec@5 {a_v_incident_top5.val:.3f} ({a_v_incident_top5.avg:.3f})\t'.format(
                batch_iteration,
                len(val_loader),
                a_v_incident_top1=a_v_incident_top1,
                a_v_place_top1=a_v_place_top1,
                a_v_incident_top5=a_v_incident_top5,
                a_v_place_top5=a_v_place_top5))
            
        if batch_iteration == np.ceil(datasize / image_v.shape[0]):
            break

    print("\nCalculating APs\n")
    # threshold are [0.0, 0.1, ..., 1.0] (11 values)
    thresholds = [round(i, 2) for i in list(np.linspace(0.0, 1.0, num=11))]

    # holds average precision for each class
    ap_incident_dict = {}
    ap_place_dict = {}

    # ap for incidents
    for i in range(len(ap_incidents)):
        class_points = ap_incidents[i]
        name = get_incident_name_from_mapping(i)
        if len(class_points) == 0:
            print("{} has no relevant labels".format(name))
            ap_incident_dict[name] = 1
            continue

        sorted_by_confidence = sorted(
            class_points, key=lambda x: x[0], reverse=True)

        count = 0
        pos_targets = 0
        max_prec = defaultdict(int)
        total_positives = int(np.sum(np.array(class_points)[:, 1]))
        if total_positives == 0:
            print("{} has no pos labels".format(name))
            continue  # alert in this case maybe

        # go in order
        for confidence, label in sorted_by_confidence:
            count += 1
            if label == 1:
                pos_targets += 1
            precision = pos_targets / count
            recall = pos_targets / total_positives

            for thresh in thresholds:
                if recall >= thresh:
                    max_prec[thresh] = max(max_prec[thresh], precision)
            if pos_targets == total_positives:
                break
        l = list(max_prec.values())
        average_precision = sum(l) / len(l)
        ap_incident_dict[get_incident_name_from_mapping(i)] = average_precision

    # repeat for places
    for i in range(len(ap_places)):
        class_points = ap_places[i]
        name = get_place_name_from_mapping(i)
        if len(class_points) == 0:
            print("{} has no relevant labels".format(name))
            ap_place_dict[name] = 1
            continue

        sorted_by_confidence = sorted(
            class_points, key=lambda x: x[0], reverse=True)

        count = 0
        pos_targets = 0
        max_prec = defaultdict(int)
        total_positives = int(np.sum(np.array(class_points)[:, 1]))
        if total_positives == 0:
            print("{} has no pos labels".format(name))
            continue  # alert in this case maybe

        # go in order
        for confidence, label in sorted_by_confidence:
            count += 1
            if label == 1:
                pos_targets += 1
            precision = pos_targets / count
            recall = pos_targets / total_positives
            for thresh in thresholds:
                if recall >= thresh:
                    max_prec[thresh] = max(max_prec[thresh], precision)

            if pos_targets == total_positives:
                break
        l = list(max_prec.values())
        average_precision = sum(l) / len(l)
        ap_place_dict[get_place_name_from_mapping(i)] = average_precision

    # ap metrics
    incident_map = 0
    for incident, ap in ap_incident_dict.items():
        incident_map += ap
    incident_map /= len(ap_incident_dict)

    place_map = 0
    for place, ap in ap_place_dict.items():
        place_map += ap
    place_map /= len(ap_place_dict)

    print("incident map", incident_map)
    print("place map", place_map)
    print("incident top1", top1_num_correct_all / top1_num_total_all)
    print("incident top5", top5_num_correct_all / top5_num_total_all)
    return incident_map + place_map

In [15]:
train_set = get_dataset(train_paths, train_dir, 200)
val_set = get_dataset(val_paths, val_dir, 200)

train_df = pd.DataFrame(train_set)
val_df = pd.DataFrame(val_set)

                                                        

In [16]:
"""Data loader"""

imgen = ImageDataGenerator(
    preprocessing_function=getpreprocessfunc(),
)

imgen = imgen.flow_from_dataframe(
    val_df,
    directory=val_dir,
    x_col="path",
    y_col=["incidents", "place"],
    weight_col=None,
    target_size=(256, 256),
    color_mode="rgb",
    classes=None,
    class_mode="multi_output",
    batch_size=64,
    shuffle=False,
    seed=True,
    save_to_dir=None,
    save_prefix="",
    save_format="png",
    subset=None,
    interpolation="nearest",
    validate_filenames=True,
)

Found 200 validated image filenames.


In [17]:
"""Model Initialization"""

import resnet

trunk = resnet.trunk()
resnet.init_weights_from_torch(trunk, "/kuacc/users/asafaya19/IncidentsDataset/pretrained_weights/eccv_final_model_trunk.pth.tar")

place_w = np.load(os.path.join(abs_path,'models/place_w.npy')).T
place_b = np.load(os.path.join(abs_path,'models/place_b.npy')).T
incident_w = np.load(os.path.join(abs_path,'models/incident_w.npy')).T
incident_b = np.load(os.path.join(abs_path,'models/incident_b.npy')).T

mdl = FinalModel(trunk, [incident_w, incident_b], [place_w, place_b])

opt = keras.optimizers.Adam(lr=1e-5)

losses = {
    "output_1": weighted_loss,
    "output_2": weighted_loss,
}

mdl.compile(optimizer=opt, loss=losses, metrics=[get_weighted_accuracy()])

x, y = next(imgen)
o = mdl(x, training=False)

In [25]:
imgen.reset()
validate(imgen, mdl, val_df.shape[0])

Evaluating: [0/745]	Incident Prec@1 27.079 (27.079)	Place Prec@1 7.306 (7.306)	Place Prec@5 12.010 (12.010)	Incident Prec@5 30.752 (30.752)	


  "Palette images with Transparency expressed in bytes should be "


Evaluating: [100/745]	Incident Prec@1 37.152 (33.064)	Place Prec@1 9.594 (9.523)	Place Prec@5 19.372 (18.848)	Incident Prec@5 40.633 (37.305)	
Evaluating: [200/745]	Incident Prec@1 23.315 (32.534)	Place Prec@1 7.640 (9.401)	Place Prec@5 19.589 (18.993)	Incident Prec@5 26.339 (36.870)	
Evaluating: [300/745]	Incident Prec@1 35.895 (32.309)	Place Prec@1 15.575 (9.290)	Place Prec@5 27.026 (18.874)	Incident Prec@5 47.329 (36.713)	


  "Possibly corrupt EXIF data.  "


Evaluating: [400/745]	Incident Prec@1 46.454 (32.406)	Place Prec@1 6.446 (9.182)	Place Prec@5 16.314 (18.830)	Incident Prec@5 48.503 (36.848)	


  "Possibly corrupt EXIF data.  "


Evaluating: [500/745]	Incident Prec@1 42.440 (32.690)	Place Prec@1 11.044 (9.325)	Place Prec@5 18.325 (18.964)	Incident Prec@5 45.501 (37.127)	
Evaluating: [600/745]	Incident Prec@1 41.597 (32.851)	Place Prec@1 11.932 (9.291)	Place Prec@5 19.311 (18.982)	Incident Prec@5 45.614 (37.287)	
Evaluating: [700/745]	Incident Prec@1 32.385 (32.875)	Place Prec@1 13.051 (9.292)	Place Prec@5 21.859 (18.975)	Incident Prec@5 40.435 (37.356)	

Calculating APs

incident map 0.6363688474912388
place map 0.1550042201700387
incident top1 0.32969336966341245
incident top5 0.37456993258006455


0.7913730676612775

In [None]:
train_set = get_dataset(train_paths, train_dir, 20000000)
train_df = pd.DataFrame(train_set)

imgen = ImageDataGenerator(
    preprocessing_function=getpreprocessfunc(),
)

imgen = imgen.flow_from_dataframe(
    train_df,
    directory=train_dir,
    x_col="path",
    y_col=["incidents", "place"],
    weight_col=None,
    target_size=(256, 256),
    color_mode="rgb",
    classes=None,
    class_mode="multi_output",
    batch_size=64,
    shuffle=False,
    seed=True,
    save_to_dir=None,
    save_prefix="",
    save_format="png",
    subset=None,
    interpolation="nearest",
    validate_filenames=False,
)

imgen.reset()
validate(imgen, mdl, train_df.shape[0])

  "Possibly corrupt EXIF data.  "
  "Image appears to be a malformed MPO file, it will be "
 15%|█▌        | 155208/1029726 [1:52:50<8:22:20, 29.01it/s] 