# TensorFlow 那些事儿之DL中的 HELLO WORLD


- 基于MNIST数据集，运用TensorFlow中 **tf.estimator** 预制的 **tf.estimator.DNNClassifier** 搭建一个简单的多层神经网络，实现模型的训练，验证和测试

- TensorBoard的简单使用


## 看看MNIST数据集的样子


### 导入各个库

In [1]:
%matplotlib inline
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import multiprocessing


from tensorflow import data
from tensorflow.python.feature_column import feature_column

tf.__version__

'1.13.1'

### MNIST数据集载入

In [2]:
TRAIN_DATA_FILES_PATTERN = 'data_csv/mnist_train.csv'
VAL_DATA_FILES_PATTERN = 'data_csv/mnist_val.csv'
TEST_DATA_FILES_PATTERN = 'data_csv/mnist_test.csv'

MULTI_THREADING = True
RESUME_TRAINING = False

NUM_CLASS = 10
IMG_SHAPE = [28,28]

IMG_WIDTH = 28
IMG_HEIGHT = 28
BATCH_SIZE = 128

In [3]:
# train_data = pd.read_csv(TRAIN_DATA_FILES_PATTERN)
# train_data = pd.read_csv(TRAIN_DATA_FILES_PATTERN, header=None, names=HEADER )
train_data = pd.read_csv(TRAIN_DATA_FILES_PATTERN, header=None)
test_data = pd.read_csv(TEST_DATA_FILES_PATTERN, header=None)
val_data = pd.read_csv(VAL_DATA_FILES_PATTERN, header=None)

train_values = train_data.values
train_data = train_values[:,1:]/255.0
train_label = train_values[:,0:1].squeeze()

val_values = val_data.values
val_data = val_values[:,1:]/255.0
val_label = val_values[:,0:1].squeeze()

test_values = test_data.values
test_data = test_values[:,1:]/255.0
test_label = test_values[:,0:1].squeeze()

print('test_data',np.shape(test_data))
print('test_label',np.shape(test_label))

print('val_data',np.shape(val_data))
print('val_label',np.shape(val_label))

print('train_data',np.shape(train_data))
print('train_label',np.shape(train_label))

# train_data.head(10)
# test_data.head(10)

test_data (10000, 784)
test_label (10000,)
val_data (5000, 784)
val_label (5000,)
train_data (55000, 784)
train_label (55000,)


In [4]:
img_shape = IMG_SHAPE

def plot_images(images, cls_true, cls_pred=None):
    assert len(images) == len(cls_true) == 9
    
    # Create figure with 3x3 sub-plots.
    fig, axes = plt.subplots(3, 3)
    fig.subplots_adjust(hspace=0.3, wspace=0.3)

    for i, ax in enumerate(axes.flat):
        # Plot image.
        ax.imshow(images[i].reshape(img_shape), cmap='binary')

        # Show true and predicted classes.
        if cls_pred is None:
            xlabel = "True: {0}".format(cls_true[i])
        else:
            xlabel = "True: {0}, Pred: {1}".format(cls_true[i], cls_pred[i])

        # Show the classes as the label on the x-axis.
        ax.set_xlabel(xlabel)
        
        # Remove ticks from the plot.
        ax.set_xticks([])
        ax.set_yticks([])
    
    # Ensure the plot is shown correctly with multiple plots
    # in a single Notebook cell.
    plt.show()

##  重头戏之怎么用 tf.estimator.DNNClassifier 

### 先看看input_fn之创建输入函数

- 采用 **datasetAPI** 构造输入函数

In [5]:
# validate tf.data.TextLineDataset() using make_one_shot_iterator()

def decode_line(line):
    # Decode the csv_line to tensor.
    record_defaults = [[1.0] for col in range(785)]
    items = tf.decode_csv(line, record_defaults)
    features = items[1:785]
    label = items[0]

    features = tf.cast(features, tf.float32)
    features = tf.reshape(features,[28,28,1])
    label = tf.cast(label, tf.int64)
#     label = tf.one_hot(label,num_class)
    return features,label

In [6]:
def csv_input_fn(files_name_pattern, mode=tf.estimator.ModeKeys.EVAL, 
                 skip_header_lines=1, 
                 num_epochs=None, 
                 batch_size=128):
    shuffle = True if mode == tf.estimator.ModeKeys.TRAIN else False
        
    num_threads = multiprocessing.cpu_count() if MULTI_THREADING else 1
     
    print("")
    print("* data input_fn:")
    print("================")
    print("Input file(s): {}".format(files_name_pattern))
    print("Batch size: {}".format(batch_size))
    print("Epoch Count: {}".format(num_epochs))
    print("Mode: {}".format(mode))
    print("Thread Count: {}".format(num_threads))
    print("Shuffle: {}".format(shuffle))
    print("================")
    print("")

    file_names = tf.matching_files(files_name_pattern)
    dataset = data.TextLineDataset(filenames=file_names).skip(1)
#     dataset = tf.data.TextLineDataset(filenames).skip(1)
    print("DATASET",dataset)

    # Use `Dataset.map()` to build a pair of a feature dictionary and a label
    # tensor for each example.
    dataset = dataset.map(decode_line)
    print("DATASET_1",dataset)
    dataset = dataset.shuffle(buffer_size=10000)
    print("DATASET_2",dataset)
    dataset = dataset.batch(32)
    print("DATASET_3",dataset)
    dataset = dataset.repeat(num_epochs)
    print("DATASET_4",dataset)
    iterator = dataset.make_one_shot_iterator()
    
    # `features` is a dictionary in which each value is a batch of values for
    # that feature; `labels` is a batch of labels.
    features, labels = iterator.get_next()
    
    features = {'images':features}
    
    return features,labels


In [7]:
features, target = csv_input_fn(files_name_pattern=TRAIN_DATA_FILES_PATTERN)
print("Features in CSV: {}".format(list(features.keys())))
print("Target in CSV: {}".format(target))


* data input_fn:
Input file(s): data_csv/mnist_train.csv
Batch size: 128
Epoch Count: None
Mode: eval
Thread Count: 6
Shuffle: False

DATASET <DatasetV1Adapter shapes: (), types: tf.string>
DATASET_1 <DatasetV1Adapter shapes: ((28, 28, 1), ()), types: (tf.float32, tf.int64)>
DATASET_2 <DatasetV1Adapter shapes: ((28, 28, 1), ()), types: (tf.float32, tf.int64)>
DATASET_3 <DatasetV1Adapter shapes: ((?, 28, 28, 1), (?,)), types: (tf.float32, tf.int64)>
DATASET_4 <DatasetV1Adapter shapes: ((?, 28, 28, 1), (?,)), types: (tf.float32, tf.int64)>
Features in CSV: ['images']
Target in CSV: Tensor("IteratorGetNext:1", shape=(?,), dtype=int64)


### 定义feature_columns

In [8]:
feature_x = tf.feature_column.numeric_column('images', shape=[28,28])
# print((feature_x))

feature_columns = [feature_x]
# print((feature_columns))

In [9]:
num_hidden_units = [512, 256, 128]

### DNNClassifier来啦

In [10]:
num_class = NUM_CLASS

model = tf.estimator.DNNClassifier(feature_columns = feature_columns,
                                   hidden_units = num_hidden_units,
                                   activation_fn = tf.nn.relu,
                                   n_classes = num_class,
                                   model_dir = '/home/moDisk/Models/mnist')

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_task_type': 'worker', '_is_chief': True, '_protocol': None, '_log_step_count_steps': 100, '_device_fn': None, '_service': None, '_save_summary_steps': 100, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_eval_distribute': None, '_tf_random_seed': None, '_num_worker_replicas': 1, '_master': '', '_evaluation_master': '', '_train_distribute': None, '_model_dir': '/home/moDisk/Models/mnist', '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f2d94055fd0>, '_num_ps_replicas': 0, '_global_id_in_cluster': 0, '_save_checkpoints_steps': None, '_task_id': 0, '_save_checkpoints_secs': 600, '_experimental_distribute': None}


### 愉快滴训练吧

In [None]:
input_fn = lambda: csv_input_fn(\
                                files_name_pattern= TRAIN_DATA_FILES_PATTERN,mode=tf.estimator.ModeKeys.TRAIN)

model.train(input_fn, steps = 2000)

Instructions for updating:
Colocations handled automatically by placer.

* data input_fn:
Input file(s): data_csv/mnist_train.csv
Batch size: 128
Epoch Count: None
Mode: train
Thread Count: 6
Shuffle: True

DATASET <DatasetV1Adapter shapes: (), types: tf.string>
DATASET_1 <DatasetV1Adapter shapes: ((28, 28, 1), ()), types: (tf.float32, tf.int64)>
DATASET_2 <DatasetV1Adapter shapes: ((28, 28, 1), ()), types: (tf.float32, tf.int64)>
DATASET_3 <DatasetV1Adapter shapes: ((?, 28, 28, 1), (?,)), types: (tf.float32, tf.int64)>
DATASET_4 <DatasetV1Adapter shapes: ((?, 28, 28, 1), (?,)), types: (tf.float32, tf.int64)>
INFO:tensorflow:Calling model_fn.
Instructions for updating:
Use tf.cast instead.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.


### 验证一下呗

In [15]:
input_fn = lambda: csv_input_fn(files_name_pattern= VAL_DATA_FILES_PATTERN,mode=tf.estimator.ModeKeys.EVAL)

model.evaluate(input_fn,steps=1)


* data input_fn:
Input file(s): data_csv/mnist_val.csv
Batch size: 128
Epoch Count: None
Mode: eval
Thread Count: 4
Shuffle: False

DATASET <SkipDataset shapes: (), types: tf.string>
DATASET_1 <MapDataset shapes: ((28, 28, 1), ()), types: (tf.float32, tf.int64)>
DATASET_2 <ShuffleDataset shapes: ((28, 28, 1), ()), types: (tf.float32, tf.int64)>
DATASET_3 <BatchDataset shapes: ((?, 28, 28, 1), (?,)), types: (tf.float32, tf.int64)>
DATASET_4 <RepeatDataset shapes: ((?, 28, 28, 1), (?,)), types: (tf.float32, tf.int64)>
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-10-25-03:38:01
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./simple_dnn_dataset/model.ckpt-2200
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Evaluation [1/1]
INFO:tensorflow:Finished evaluation at 2018-10-25-03:38:10
INFO:tensorflow:Saving dict for global step 2200: accu

{'accuracy': 0.9375,
 'average_loss': 0.14245859,
 'global_step': 2200,
 'loss': 4.558675}

### 测试测试吧

In [19]:
import itertools

input_fn = lambda: csv_input_fn(\
                                files_name_pattern= TEST_DATA_FILES_PATTERN,mode=tf.estimator.ModeKeys.PREDICT,batch_size=10)

predictions = list(itertools.islice(model.predict(input_fn=input_fn),10))
# print('PREDICTIONS',predictions)
print("")
print("* Predicted Classes: {}".format(list(map(lambda item: item["classes"][0]
    ,predictions))))




* data input_fn:
Input file(s): data_csv/mnist_test.csv
Batch size: 10
Epoch Count: None
Mode: infer
Thread Count: 4
Shuffle: False

DATASET <SkipDataset shapes: (), types: tf.string>
DATASET_1 <MapDataset shapes: ((28, 28, 1), ()), types: (tf.float32, tf.int64)>
DATASET_2 <ShuffleDataset shapes: ((28, 28, 1), ()), types: (tf.float32, tf.int64)>
DATASET_3 <BatchDataset shapes: ((?, 28, 28, 1), (?,)), types: (tf.float32, tf.int64)>
DATASET_4 <RepeatDataset shapes: ((?, 28, 28, 1), (?,)), types: (tf.float32, tf.int64)>
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./simple_dnn_dataset/model.ckpt-2200
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.

* Predicted Classes: [b'0', b'7', b'6', b'3', b'4', b'6', b'5', b'8', b'5', b'6']
