In [2]:
%load_ext autoreload
%autoreload 2

from config.max_model import ModelConfig
from functional import seq
from models import MaxModel
import numpy as np
from pathlib import Path
import tensorflow as tf
from tensorpack import (TrainConfig, SyncMultiGPUTrainerParameterServer as Trainer, 
                        PredictConfig, MultiProcessDatasetPredictor as Predictor,
                        SaverRestore, logger)
from tensorpack.callbacks import (ScheduledHyperParamSetter, MaxSaver, ModelSaver,
                                  InferenceRunner as InfRunner)
from tensorpack.predict import SimpleDatasetPredictor
from tensorpack.tfutils.common import get_default_sess_config
from utils import DataManager
from utils.validation import (Accumulator, AggregateMetric, calcu_metrics)

vgg_loc = "./data/vgg_16.ckpt"
# log 存放位置
#     log 中包含 Inferencer 给出的 metrics， 可用tensorboard查看
#     命令 tensorboard --logdir XXXX --port XXXX
#     类似 jupyter notebook，tensorboard 需要通过浏览器使用， 请建立 SSH tunnel
#     log_dir 下也会存放 ModelSaver 生成的 checkpoint文件，这些文件占用空间很大，
#     建议将 log_dir 放在 /data 分区下，或建立软链接
log_dir = 'train_log/vgg_max_model/'
logger.set_logger_dir(log_dir, action='d')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
[32m[1029 10:54:56 @logger.py:74][0m Argv: /home/fuxiaofeng/Applications/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py -f /run/user/1012/jupyter/kernel-34fcf0de-44ab-480a-ae3c-57bbfccdb2d9.json


In [3]:
# 可以在这里修改配置
config = ModelConfig()
# 新增：（width, height) 图片大小。DataManager需要知道图片的大小。这里也可以进行resize操作
config.image_size = (320, 128)
# 新增：划分比例
config.proportion = {'train': 0.6, 'val': 0.2, 'test': 0.2}
# 新增: 在划分数据集时，允许比例的误差
config.tolerance_margin = 0.02
# 新增：在划分数据集时，是否打乱顺序
#    设置为 True，每次划分的结果会不同
#    不过现在我把随机数种子固定了，无论true 或 false每次划分的结果都一样
#    之后可能会取消固定随机数种子
config.shuffle_separation = True
# 新增：要使用哪些方向
config.directions = ['ventral', 'dorsal', 'lateral']
# 新增：DataManager 需要知道 batch_size
config.batch_size = 20
# 为了兼容 DataManager， 我把 stage_allowed 改为 stages 
config.stages = [6]
# 同样的， top_k_labels 改为 annotation_number
config.annotation_number = 10
# max_img 改为 max_sequence_length
config.max_sequence_length = 10
# 请修改为图片所在位置
config.image_directory = str(Path.home()) + \
    "/Documents/flyexpress/DL_biomedicine_image/data/pic_data/"

# standard_images.csv 与 standard_annotations.csv 附带在 repo 的 data 目录下
config.image_table_location = str(Path.home()) + \
    "/Documents/flyexpress/DL_biomedicine_image/data/standard_images.csv"
config.annotation_table_location = str(Path.home()) + \
    "/Documents/flyexpress/DL_biomedicine_image/data/standard_annotations.csv"

In [4]:
# 从vgg checkpoint 回复权值时，要忽略的tensor名称
ignore_restore = ['learning_rate', 'global_step']
# 在训练中，会保存一个某个metric最大（或最小）的模型，用于之后测试
save_name = "max-micro_auc.ckpt"
# 将 probability 转化为 0， 1 的阈值
threshold = 0.5
# 在验证集与测试集上要计算哪些 metrics
validation_metrics = ['mean_average_precision', 'macro_auc', 'micro_auc',
                      'macro_f1', 'micro_f1', 'ranking_mean_average_precision',
                      'coverage', 'ranking_loss', 'one_error']


In [5]:
data_manager = DataManager.from_config(config)
print(data_manager.get_imbalance_ratio())
train_data = data_manager.get_validation_stream()
val_data = data_manager.get_test_stream()
test_data = data_manager.get_test_stream()

Group numbers:
train: 1476, validation: 449, test: 527
Image numbers:
train: 8489, validation: 2594, test: 3087

                                         train       val      test
embryonic midgut                      1.044321  0.995556  1.125000
ventral nerve cord                    1.748603  1.823899  1.977401
embryonic brain                       1.860465  1.841772  2.028736
embryonic hindgut                     2.400922  2.277372  2.513333
embryonic dorsal epidermis            2.904762  2.741667  2.875000
embryonic/larval muscle system        3.146067  2.870690  2.932836
embryonic central nervous system      3.193182  3.235849  3.543103
embryonic ventral epidermis           3.405970  3.235849  3.319672
embryonic head epidermis              4.698842  4.907895  4.547368
dorsal prothoracic pharyngeal muscle  5.201681  4.831169  4.988636


In [None]:
model = MaxModel(config)
tf.reset_default_graph()
train_config = TrainConfig(model=model, dataflow=train_data,
                           callbacks=[
                               # 在这里设置 learning rate
                               ScheduledHyperParamSetter('learning_rate', [(0, 1e-4), (20, 1e-4)]),
                               InfRunner(val_data, [AggregateMetric(validation_metrics, threshold)],
                                         ),
                               ModelSaver(max_to_keep=10),
                               # 可以将micro_auc 改为其他的
                               MaxSaver('micro_auc', save_name),
                           ],
                           session_init=SaverRestore(
                               model_path=vgg_loc, ignore=ignore_restore),
                           # 跑多少个 epoch
                           max_epoch=2, nr_tower=2)
Trainer(train_config).train()

[32m[1029 10:55:23 @inference_runner.py:83][0m InferenceRunner will eval on an InputSource of size 26
[32m[1029 10:55:23 @input_source.py:179][0m Setting up the queue 'QueueInput/input_queue' for CPU prefetching ...
[32m[1029 10:55:23 @input_source.py:460][0m Setting up StagingArea for GPU prefetching ...
[32m[1029 10:55:23 @training.py:41][0m Training a model of 2 towers
[32m[1029 10:55:23 @training.py:92][0m Building graph for training tower 0 on device LeastLoadedDeviceSetter-/gpu:0...
[32m[1029 10:55:24 @regularize.py:109][0m Add REGULARIZATION_LOSSES of 7 tensors on the total cost.
[32m[1029 10:55:24 @training.py:92][0m Building graph for training tower 1 on device LeastLoadedDeviceSetter-/gpu:1...
[32m[1029 10:55:24 @regularize.py:109][0m Add REGULARIZATION_LOSSES of 7 tensors on the total cost.
[32m[1029 10:55:25 @model_utils.py:47][0m [36mModel Parameters: 
[0mname                               shape                 dim  device
------------------------------

100%|##########|22/22[00:17<00:00, 1.16it/s]

[32m[1029 10:55:46 @base.py:267][0m Epoch 1 (global_step 22) finished, time:17.73 sec.





_before_inference finished


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  4%|3         |1/26[00:00<00:18, 1.36it/s]

_on_fetches finished


  8%|7         |2/26[00:01<00:16, 1.47it/s]

_on_fetches finished


 12%|#1        |3/26[00:02<00:15, 1.48it/s]

_on_fetches finished


 15%|#5        |4/26[00:02<00:14, 1.52it/s]

_on_fetches finished


 19%|#9        |5/26[00:03<00:13, 1.54it/s]

_on_fetches finished


In [5]:
from tensorpack import TestDataSpeed

In [7]:
TestDataSpeed(val_data).start()

  1%|          |26/5000[00:00<02:29,33.33it/s]


In [None]:
#np.set_printoptions(formatter={'float_kind': lambda x: '%.3f' % x})
model = MaxModel(config)
tf.reset_default_graph()
pred_config = PredictConfig(model=model,
                            session_init=SaverRestore(
                                model_path=log_dir + save_name),
                            output_names=['logits_export', 'label'],
                            )
pred = SimpleDatasetPredictor(pred_config, test_data)

accumulator = seq(pred.get_result()) \
    .smap(lambda a, b: (a.shape[0], calcu_metrics(a, b, config.validation_metrics, threshold))) \
    .aggregate(Accumulator(*config.validation_metrics), lambda accu, args: accu.feed(args[0], *args[1]))
metrics = accumulator.retrive()
print(metrics)