In [1]:
%load_ext autoreload
%autoreload 2

from config.rnn import default
from models import RNNV2, RNN
import numpy as np
from functional import seq
import tensorflow as tf
from tensorpack import (TrainConfig, SyncMultiGPUTrainerParameterServer as Trainer, 
                        PredictConfig, MultiProcessDatasetPredictor as Predictor,
                        SaverRestore, logger)
from tensorpack.callbacks import (ScheduledHyperParamSetter, MaxSaver, ModelSaver,
                                  DataParallelInferenceRunner as InfRunner)
from tensorpack.predict import SimpleDatasetPredictor
from tensorpack.tfutils.common import get_default_sess_config
from utils import DataManager
from utils.validation import (Accumulator, AggregateMetric, calcu_metrics)

resnet_loc = "./data/resnet_v2_101/resnet_v2_101.ckpt"
log_dir = './train_log/rnn_v2/1conv-0aconv/'
logger.set_logger_dir(log_dir)

[32m[1101 08:52:53 @logger.py:94][0m [5m[31mWRN[0m Log directory ./train_log/rnn_v2/1conv-0aconv/ exists! Please either backup/delete it, or use a new directory.
[32m[1101 08:52:53 @logger.py:96][0m [5m[31mWRN[0m If you're resuming from a previous run you can choose to keep it.
[32m[1101 08:52:53 @logger.py:97][0m Select Action: k (keep) / b (backup) / d (delete) / n (new) / q (quit):
d
[32m[1101 08:53:00 @logger.py:74][0m Argv: /home/fuxiaofeng/Applications/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py -f /run/user/1012/jupyter/kernel-c287103e-999a-4bf3-b64f-83820da98e7f.json


In [2]:
config = default
ignore_restore = ['learning_rate', 'global_step']
save_name = "all-stages-max-micro-auc.tfmodel"

In [9]:
config.stages = [6]
config.proportion = {'train': 0.55, 'val': 0.0, 'test': 0.45}
config.annotation_number = None
dm = DataManager.from_config(config)
train_set = dm.get_train_set()
test_set = dm.get_test_set()

In [10]:
config.proportion = {'train': 0.1, 'val':0.9, 'test': 0.0}
config.annotation_number = 10
dm = DataManager.from_dataset(train_set, test_set, config)
dm.get_num_info()

{'test': (1154, 6391), 'train': (157, 1045), 'val': (1141, 6734)}

In [11]:
config.weight_decay = 0.0
config.dropout_keep_prob = 1.0
config.gamma = 2
config.use_glimpse = True
config.doubly_stochastic_lambda = 0
config.read_time = 5
config.batch_size = 64

threshold = 0.4
train_data = dm.get_train_stream()
val_data = dm.get_train_stream()

In [None]:
np.set_printoptions(formatter={'float_kind': lambda x: '%.2f' % x})
model = RNNV2(config, is_finetuning=False, 
              label_scale=dm.get_imbalance_ratio().train.values)
tf.reset_default_graph()
train_config = TrainConfig(model=model, dataflow=train_data,
                           callbacks=[
                               ScheduledHyperParamSetter('learning_rate', [(0, 1e-3), (15, 1e-5)]),
                               InfRunner(val_data, [AggregateMetric(config.validation_metrics, threshold)],
                                         [0, 1]),
                               ModelSaver(var_collections='model_variables'),
                               MaxSaver('micro_auc', save_name),
                           ],
                           session_init=SaverRestore(
                               model_path=resnet_loc, ignore=ignore_restore),
                           max_epoch=20, tower=[0, 1])
Trainer(train_config).train()

[32m[1101 08:54:12 @inference_runner.py:83][0m InferenceRunner will eval on an InputSource of size 2
[32m[1101 08:54:13 @input_source.py:179][0m Setting up the queue 'QueueInput/input_queue' for CPU prefetching ...
[32m[1101 08:54:13 @input_source.py:460][0m Setting up StagingArea for GPU prefetching ...
[32m[1101 08:54:13 @training.py:41][0m Training a model of 2 towers
[32m[1101 08:54:13 @training.py:92][0m Building graph for training tower 0 on device LeastLoadedDeviceSetter-/gpu:0...
INFO:tensorflow:logits.dtype=<dtype: 'float32'>.
INFO:tensorflow:multi_class_labels.dtype=<dtype: 'float32'>.
INFO:tensorflow:losses.dtype=<dtype: 'float32'>.
[32m[1101 08:54:17 @regularize.py:109][0m Add REGULARIZATION_LOSSES of 6 tensors on the total cost.
[32m[1101 08:54:18 @training.py:92][0m Building graph for training tower 1 on device LeastLoadedDeviceSetter-/gpu:1...
INFO:tensorflow:logits.dtype=<dtype: 'float32'>.
INFO:tensorflow:multi_class_labels.dtype=<dtype: 'float32'>.
INFO:

[32m[1101 08:54:29 @base.py:212][0m Creating the session ...
[32m[1101 08:54:33 @base.py:216][0m Initializing the session ...
[32m[1101 08:54:33 @sessinit.py:116][0m Restoring checkpoint from ./data/resnet_v2_101/resnet_v2_101.ckpt ...
INFO:tensorflow:Restoring parameters from ./data/resnet_v2_101/resnet_v2_101.ckpt
[32m[1101 08:54:34 @base.py:223][0m Graph Finalized.
[32m[1101 08:54:34 @param.py:144][0m After epoch 0, learning_rate will change to 0.00100000
[32m[1101 08:54:34 @concurrency.py:36][0m Starting EnqueueThread DataParallelInferenceRunner/QueueInput/input_queue ...
[32m[1101 08:54:36 @concurrency.py:36][0m Starting EnqueueThread QueueInput/input_queue ...
[32m[1101 08:54:36 @input_source.py:419][0m Pre-filling staging area ...


In [14]:
tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)

[<tf.Variable 'EMA/QueueInput/queue_size:0' shape=() dtype=float32_ref>,
 <tf.Variable 'EMA/QueueInput/queue_size/biased:0' shape=() dtype=float32_ref>,
 <tf.Variable 'EMA/QueueInput/queue_size/local_step:0' shape=() dtype=float32_ref>,
 <tf.Variable 'resnet_v2_101/conv1/weights:0' shape=(7, 7, 3, 64) dtype=float32_ref>,
 <tf.Variable 'resnet_v2_101/conv1/biases:0' shape=(64,) dtype=float32_ref>,
 <tf.Variable 'resnet_v2_101/block1/unit_1/bottleneck_v2/preact/beta:0' shape=(64,) dtype=float32_ref>,
 <tf.Variable 'resnet_v2_101/block1/unit_1/bottleneck_v2/preact/gamma:0' shape=(64,) dtype=float32_ref>,
 <tf.Variable 'resnet_v2_101/block1/unit_1/bottleneck_v2/preact/moving_mean:0' shape=(64,) dtype=float32_ref>,
 <tf.Variable 'resnet_v2_101/block1/unit_1/bottleneck_v2/preact/moving_variance:0' shape=(64,) dtype=float32_ref>,
 <tf.Variable 'resnet_v2_101/block1/unit_1/bottleneck_v2/shortcut/weights:0' shape=(1, 1, 64, 256) dtype=float32_ref>,
 <tf.Variable 'resnet_v2_101/block1/unit_1/bot