目标是引入tensorboard，自动保存model，统计指标自动记录

## 检查GPU

In [2]:
!nvidia-smi

Thu Dec 21 12:01:42 2017       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 378.13                 Driver Version: 378.13                    |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Graphics Device     Off  | 0000:02:00.0     Off |                  N/A |
|  0%   29C    P0    56W / 250W |      0MiB / 11172MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
|   1  Graphics Device     Off  | 0000:03:00.0     Off |                  N/A |
| 23%   21C    P8     9W / 250W |  10663MiB / 11172MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-------

## 导入常用库

In [3]:
import matplotlib.pyplot as plt
import openslide
from openslide import OpenSlide
import cv2
import numpy as np
import tensorflow as tf
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

%load_ext autoreload
%autoreload 2

## 导入fashionmnist并划分数据

In [4]:
from keras.utils import to_categorical
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

data_train = pd.read_csv('/home/hjr/data/fashionmnist/fashion-mnist_train.csv')
data_test = pd.read_csv('/home/hjr/data/fashionmnist/fashion-mnist_test.csv')

img_rows, img_cols = 28, 28
input_shape = (img_rows, img_cols, 1)

X = np.array(data_train.iloc[:, 1:])
y = to_categorical(np.array(data_train.iloc[:, 0]))

#Here we split validation data to optimiza classifier during training
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=13)

#Test data
X_test = np.array(data_test.iloc[:, 1:])
y_test = to_categorical(np.array(data_test.iloc[:, 0]))

X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)
X_val = X_val.reshape(X_val.shape[0], img_rows, img_cols, 1)

X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_val = X_val.astype('float32')
X_train /= 255
X_test /= 255
X_val /= 255

print('train-set')
X_train.shape
y_train.shape
print('validation-set')
X_val.shape
y_val.shape
print('test-set')
X_test.shape
y_test.shape

train-set


(48000, 28, 28, 1)

(48000, 10)

validation-set


(12000, 28, 28, 1)

(12000, 10)

test-set


(10000, 28, 28, 1)

(10000, 10)

## 构建cnn模型

In [5]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.layers.normalization import BatchNormalization

batch_size = 256
num_classes = 10
epochs = 50

#input image dimensions
img_rows, img_cols = 28, 28

model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 kernel_initializer='he_normal',
                 input_shape=input_shape))
model.add(MaxPooling2D((2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(Dropout(0.4))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(num_classes, activation='softmax'))

model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adam(),
              metrics=['accuracy'])

## 定义batch_size和epochs

In [17]:
BATCH_SIZE = 250
epochs = 50

## 创建子文件夹用于记录

In [10]:
!mkdir -p fashionmnist-cnn-12-21/tensorboard_log
!mkdir -p fashionmnist-cnn-12-21/model-checkpoint
!mkdir -p fashionmnist-cnn-12-21/csv-log

## 定义callback

In [19]:
from keras.callbacks import TensorBoard, ModelCheckpoint, CSVLogger, EarlyStopping


# because tensorboard needs directory, it can create, but csv_logger and model both are files, 
# so if you want to lay them on subdirectory you have to create subdirectory by yourself




# 用tensorboard显示，每次重新训练的话放到不同的文件里或者清空
# TensorBoard callback saves loss/accuracy data for visualization with tensorboard
log_dir_path = "fashionmnist-cnn-12-21/tensorboard_log/first"
tf_board = TensorBoard(log_dir=log_dir_path, batch_size=BATCH_SIZE, write_graph=True)

# 只保存最好的，以val_acc作为指标，保存整个模型,覆盖式写法
model_save_path = "fashionmnist-cnn-12-21/model-checkpoint/{epoch:02d}-{val_acc:.3f}.hdf5"
model_checkpt = ModelCheckpoint(model_save_path, monitor='val_acc', 
                                verbose=0, save_best_only=True, mode='max')

# 保存每次的loss/accuracy，覆盖式写法
# CSVLogger callbacks writes loss/accuracy output per epoch to file for later post-processing and visualization
csv_logger_path = 'fashionmnist-cnn-12-21/csv-log/train.log'
csv_logger = CSVLogger(csv_logger_path)

callbacks_list = [tf_board, model_checkpt, csv_logger]


## 训练

In [20]:
# fit时如果关掉了浏览器的tab，依然会训练，可以通过tensorboard和model-checkpoint或者csv-log看出
history = model.fit(X_train, y_train,
          batch_size=BATCH_SIZE,
          epochs=epochs,
          verbose=0,
          callbacks = callbacks_list,
          validation_data=(X_val, y_val))