In [1]:
import pandas as pd
import numpy as np
from typing import *

In [2]:
true_df = pd.read_csv("./Joseph_Dataset/cropped_rcnn_annotations.csv")
false_df = pd.read_csv("./Joseph_Dataset/incorrect_rcnn_preds.csv")
df = pd.concat([true_df, false_df], sort=False)
labels = np.concatenate([np.ones([len(true_df)]), np.zeros([len(false_df)])])

In [3]:
from keras.layers import Dense, BatchNormalization
from keras.layers.recurrent import GRU
from keras.models import Sequential

Using TensorFlow backend.


In [4]:
input_data = np.stack((
    df['xmin'], df['xmax'], df['ymin'], df['ymax'],
    df['xmax'] - df['xmin'],
    df['ymax'] - df['ymin'],
    df['confidence']
), axis=-1)

np.random.seed(1234)
indices = np.random.randint(len(df), size=len(df))
input_data = input_data[indices]
labels = labels[indices]

In [67]:
LOSS_BIAS = 0.9
from keras import backend as K

def weighted_log_loss(yt, yp) -> Any:
    """
    Binary crossentropy with a bias towards false negatives
    yt: Target
    yp: Prediction
    """
    from keras import backend as K

    pos_loss = -(0 + yt) * K.log(0 + yp + K.epsilon())
    neg_loss = -(1 - yt) * K.log(1 - yp + K.epsilon())

    return LOSS_BIAS * K.mean(neg_loss) + (1. - LOSS_BIAS) * K.mean(pos_loss)


def false_pos(yt, yp) -> Any:
    from keras import backend as K
    return K.sum(K.cast(yp * (1 - yt) > 0.5, 'float')) / K.maximum(1.0, K.sum(1 - yt))


def false_neg(yt, yp) -> Any:
    from keras import backend as K
    return K.sum(K.cast((1 - yp) * (0 + yt) > 0.5, 'float')) / K.maximum(1.0, K.sum(0 + yt))

def threshold_binary_accuracy(y_true, y_pred):
    threshold = 0.5
    return K.mean(K.equal(y_true, K.cast(y_pred>threshold, 'float')))

In [48]:
model = Sequential()
model.add(Dense(40, input_shape=(input_data.shape[1],), activation='sigmoid'))
model.add(Dense(40, activation='sigmoid'))
model.add(Dense(1, activation='sigmoid'))
model.compile('rmsprop', 'mse', metrics=['acc', threshold_binary_accuracy, false_pos, false_neg])

In [49]:
train_ratio = 0.8
train_split = int(len(input_data) * train_ratio)
train_in = input_data[:train_split]
train_out = labels[:train_split]
test_in = input_data[train_split:]
test_out = labels[train_split:]

In [50]:
model.fit(train_in, train_out, 64, 100, validation_data=(test_in, test_out))

Train on 50073 samples, validate on 12519 samples
Epoch 1/100

KeyboardInterrupt: 

In [68]:
model2 = Sequential()
model2.add(BatchNormalization(input_shape=(input_data.shape[1],)))
model2.add(Dense(40, activation='relu'))
model2.add(Dense(40, activation='relu'))
model2.add(Dense(1, activation='sigmoid'))
class_weight = {0: 0.3,
                1: 0.7,
                }
model2.compile('rmsprop', 'mse',  metrics=['acc', threshold_binary_accuracy, false_pos, false_neg])

In [None]:
model2.fit(train_in, train_out, 64, 100, class_weight = class_weight, validation_data=(test_in, test_out))

Train on 50073 samples, validate on 12519 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100


Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
 1792/50073 [>.............................] - ETA: 2s - loss: 0.0558 - acc: 0.7623 - threshold_binary_accuracy: 0.7623 - false_pos: 0.6186 - false_neg: 0.0446

In [15]:
model2.save_weights("model.h5")