In [1]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf

Link towards reporting gsheet document:
https://docs.google.com/spreadsheets/d/1o0O9HGCUABQWF1C6uHw65veY9Axd3pATWwdG2WSpdf8/edit?usp=sharing

In [2]:
# Parameters corresponding to gsheet file
CLASS_NBR = 6
MODE = "cached" # "direct" or "cached"
KERNEL = "linear" # "linear" or "rbf"
LR = 0.4
C = 3

In [3]:
# Local parameters
BATCH_SIZE = 64
CNN_OUTPUT_DIR = os.path.join("..", "tmp", "cnn_output", "VGG16")
TARGET_W = 224
EPOCHS = 500
MODEL_DIR_DIRECT = None#"../tmp/estimator_svdd_naive_direct"
MODEL_DIR_CACHED = None#"../tmp/estimator_svdd_naive_cached"

# Input data

In [4]:
from data_utils import train_cnn_input_fn, test_cnn_input_fn, train_input_fn, test_input_fn

# Sanity check
train_cnn_input_fn(
    CLASS_NBR,
    CNN_OUTPUT_DIR
), test_cnn_input_fn(
    CLASS_NBR,
    CNN_OUTPUT_DIR
), train_input_fn(
    CLASS_NBR,
    TARGET_W
), test_input_fn(
    CLASS_NBR,
    TARGET_W
)

(<MapDataset shapes: (25088,), types: tf.float32>,
 <MapDataset shapes: ((25088,), <unknown>), types: (tf.float32, tf.int32)>,
 <MapDataset shapes: (224, 224, 3), types: tf.float32>,
 <MapDataset shapes: ((224, 224, 3), <unknown>), types: (tf.float32, tf.int32)>)

# Training

In [5]:
from estimator_svdd_naive import OCClassifier as SVDDClassifier

if MODE == "cached":
    input_fn_train = lambda: train_cnn_input_fn(CLASS_NBR, CNN_OUTPUT_DIR).shuffle(1000).repeat().batch(BATCH_SIZE)
    input_fn_test = lambda: test_cnn_input_fn(CLASS_NBR, CNN_OUTPUT_DIR).batch(BATCH_SIZE)
    train_hooks = []
    test_hooks = train_hooks
    MODEL_DIR = MODEL_DIR_CACHED
elif MODE == "direct":
    from vgg_network import VGG_Network
    from estimator_svdd_naive import _LoadPreTrainedWeightsVGG
    from data_utils import run_dataset_through_network
    
    net = VGG_Network(include_FC_head=False)
    
    def get_train_dataset(net, reuse=False):
        dataset = train_input_fn(CLASS_NBR, TARGET_W).batch(BATCH_SIZE)
        dataset = run_dataset_through_network(dataset, net, reuse=reuse)
        return dataset.repeat()
    
    def get_test_dataset(net, reuse=False):
        dataset = test_input_fn(CLASS_NBR, TARGET_W).batch(BATCH_SIZE)
        dataset = dataset.map(lambda img, label: img)
        dataset = run_dataset_through_network(dataset, net, reuse=reuse)
        return dataset
    
    input_fn_train = lambda: get_train_dataset(net, reuse=False)
    input_fn_test = lambda: get_test_dataset(net, reuse=False)
    train_hooks = [_LoadPreTrainedWeightsVGG(net)]
    test_hooks = train_hooks
    MODEL_DIR = MODEL_DIR_DIRECT
else:
    raise Exception("MODE unknown")
    
classifier = SVDDClassifier(
    c=C,
    kernel=KERNEL,
    learning_rate=LR,
    model_dir=MODEL_DIR,
)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_keep_checkpoint_max': 5, '_num_ps_replicas': 0, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fa719b2ee80>, '_tf_random_seed': None, '_keep_checkpoint_every_n_hours': 10000, '_model_dir': '/tmp/tmpreq2co9u', '_is_chief': True, '_log_step_count_steps': 100, '_save_summary_steps': 100, '_task_id': 0, '_session_config': None, '_train_distribute': None, '_master': '', '_evaluation_master': '', '_save_checkpoints_steps': None, '_task_type': 'worker', '_num_worker_replicas': 1, '_service': None, '_save_checkpoints_secs': 600, '_global_id_in_cluster': 0}


In [None]:
classifier.train(
    input_fn=input_fn_train,
    steps=EPOCHS * 850 / BATCH_SIZE,
    hooks=train_hooks
)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 1 into /tmp/tmpreq2co9u/model.ckpt.
INFO:tensorflow:step = 0, loss = 2590574.5
INFO:tensorflow:global_step/sec: 15.2312
INFO:tensorflow:step = 100, loss = 341997.94 (6.568 sec)
INFO:tensorflow:global_step/sec: 14.1815
INFO:tensorflow:step = 200, loss = 287906.47 (7.050 sec)
INFO:tensorflow:global_step/sec: 13.7536
INFO:tensorflow:step = 300, loss = 248730.72 (7.271 sec)
INFO:tensorflow:global_step/sec: 12.8518
INFO:tensorflow:step = 400, loss = 193951.81 (7.781 sec)
INFO:tensorflow:global_step/sec: 14.5807
INFO:tensorflow:step = 500, loss = 138991.0 (6.860 sec)
INFO:tensorflow:global_step/sec: 12.3819
INFO:tensorflow:step = 600, loss = 127662.164 (8.075 sec)
INFO:tensorflow:global_step/sec: 14.8387
INFO:tensorflow:

# Predicting

In [None]:
predictions = classifier.predict(
    input_fn=input_fn_test,
    hooks=test_hooks
)

predictions_list = list(predictions)
predicted_scores = np.asarray(list(map(lambda p: p["predicted_scores"], predictions_list))).astype(np.int32)
predicted_classes = np.asarray(list(map(lambda p: p["predicted_classes"], predictions_list))).astype(np.int32)

In [None]:
df = pd.DataFrame(predicted_classes)
df.plot.hist()

# Validation and metrics

In [None]:
# Test set
y_test = []
input_fn = test_cnn_input_fn(CLASS_NBR, CNN_OUTPUT_DIR).batch(1)
input_fn = input_fn.make_one_shot_iterator().get_next()
sess = tf.Session()
while True:
    try:
        data = sess.run(input_fn)
        y_test.append(data[1][0])
    except tf.errors.OutOfRangeError:
        break
y_test = np.asarray(y_test)
y_test.shape

In [None]:
from validation import evaluation_summary
evaluation_summary(y_test, predicted_classes, plot_cm=True)

In [None]:
from data_utils import _cnn_input_fn
# Train set
y_train = np.ones(850)
y_train.shape

predictions = classifier.predict(
    input_fn=lambda: test_cnn_input_fn(CLASS_NBR, CNN_OUTPUT_DIR).batch(BATCH_SIZE)
)

In [None]:
predictions_list = list(predictions)
predicted_scores = np.asarray(list(map(lambda p: p["predicted_scores"], predictions_list))).astype(np.int32)
predicted_classes = np.asarray(list(map(lambda p: p["predicted_classes"], predictions_list))).astype(np.int32)

df = pd.DataFrame(predicted_classes)
df.plot.hist()