In [1]:
import tensorflow as tf
import os
import numpy as np

In [2]:
import tensorflow_datasets as tfds
AUTOTUNE = tf.data.experimental.AUTOTUNE

In [3]:
ds = tfds.load('mnist', shuffle_files=True, as_supervised=True)

In [4]:
ds['test'].cardinality()

<tf.Tensor: shape=(), dtype=int64, numpy=10000>

In [5]:
ds_splits = ["train", "test"]

## tf.train.Feature list 구성 및 직렬화

In [6]:
def _bytes_feature(value):
    """Returns a bytes_list from a string / byte."""
    if isinstance(value, type(tf.constant(0))):
        value = value.numpy()
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def _float_feature(value):
  """Returns a floast_list from a float / double."""
  return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))

def _int64_feature(value):
  """Returns an int64_list from a bool / enum / int / uint."""
  return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

def serialize_array(array):
  array = tf.io.serialize_tensor(array)
  return array

In [7]:
for d in ds_splits:
  print("saving {}".format(d))
  subset = ds[d]

  filename = d+".tfrecords"
  writer = tf.io.TFRecordWriter(filename)
  count = 0
  for image, label in subset:
    data={
        'height': _int64_feature(28),
        'width': _int64_feature(28),
        'depth': _int64_feature(1),
        'label': _int64_feature(label),
        'image_raw':_bytes_feature(serialize_array(image))
        }

    out = tf.train.Example(features=tf.train.Features(feature=data))
    writer.write(out.SerializeToString())
    count +=1
  writer.close()
  print(count)

saving train
60000
saving test
10000


## TFRecord feature 구성

In [8]:
def parse_tfr_elem(element):
  parse_dict = {
      'height': tf.io.FixedLenFeature([], tf.int64),
      'width':tf.io.FixedLenFeature([], tf.int64),
      'label':tf.io.FixedLenFeature([], tf.int64),
      'depth':tf.io.FixedLenFeature([], tf.int64),
      'image_raw' : tf.io.FixedLenFeature([], tf.string)
  }
  example_message = tf.io.parse_single_example(element, parse_dict)

  img_raw = example_message['image_raw']
  height = example_message['height']
  width = example_message['width']
  depth = example_message['depth']
  label = example_message['label']
  
  feature = tf.io.parse_tensor(img_raw, out_type=tf.uint8)
  feature = tf.reshape(feature, shape=[height,width,depth])
  return (feature, label)

In [9]:
def get_dataset(filename, set_type):
    ignore_order = tf.data.Options()
    ignore_order.experimental_deterministic = False
    dataset = tf.data.TFRecordDataset(filename)
    
    dataset = dataset.with_options(
        ignore_order
    )  
    
    dataset = dataset.map(
        parse_tfr_elem, num_parallel_calls=AUTOTUNE
    )

    dataset = dataset.shuffle(2048, reshuffle_each_iteration=True)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.prefetch(buffer_size=AUTOTUNE)
    dataset = dataset.repeat() if set_type =='train' else dataset 
    return dataset

In [10]:
BATCH_SIZE = 32

In [11]:
tfr_dataset = get_dataset('train.tfrecords', "train")

In [12]:
tfr_dataset

<RepeatDataset shapes: ((None, None, None, None), (None,)), types: (tf.uint8, tf.int64)>

In [13]:
for sample in tfr_dataset.take(1):
  print(sample)

(<tf.Tensor: shape=(32, 28, 28, 1), dtype=uint8, numpy=
array([[[[0],
         [0],
         [0],
         ...,
         [0],
         [0],
         [0]],

        [[0],
         [0],
         [0],
         ...,
         [0],
         [0],
         [0]],

        [[0],
         [0],
         [0],
         ...,
         [0],
         [0],
         [0]],

        ...,

        [[0],
         [0],
         [0],
         ...,
         [0],
         [0],
         [0]],

        [[0],
         [0],
         [0],
         ...,
         [0],
         [0],
         [0]],

        [[0],
         [0],
         [0],
         ...,
         [0],
         [0],
         [0]]],


       [[[0],
         [0],
         [0],
         ...,
         [0],
         [0],
         [0]],

        [[0],
         [0],
         [0],
         ...,
         [0],
         [0],
         [0]],

        [[0],
         [0],
         [0],
         ...,
         [0],
         [0],
         [0]],

        ...,

        [[0],


## 학습 후 결과도출


In [14]:
from keras.callbacks import EarlyStopping
from keras.callbacks import ReduceLROnPlateau

reduceLR = ReduceLROnPlateau(monitor='loss', patience=3, verbose=1, factor=0.1)

earlystopping = EarlyStopping(monitor='loss', verbose=1, patience=15)

In [15]:
def Mnist():
  model = tf.keras.Sequential([
      
    tf.keras.layers.Conv2D(kernel_size=3, filters=16, padding='same', activation='relu', input_shape=[28,28, 1]),
    tf.keras.layers.Conv2D(kernel_size=3, filters=32, padding='same', activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=2),
    
    tf.keras.layers.Conv2D(kernel_size=3, filters=64, padding='same', activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=2),
    
    tf.keras.layers.Conv2D(kernel_size=3, filters=128, padding='same', activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=2),
    
    tf.keras.layers.Conv2D(kernel_size=3, filters=256, padding='same', activation='relu'),
    
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(10,'softmax')
  ])

  optimizer = tf.keras.optimizers.RMSprop(lr=0.01)
  model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])
  return model

In [16]:
model = Mnist()

In [17]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 28, 28, 16)        160       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 28, 28, 32)        4640      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 14, 14, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 7, 7, 64)          0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 7, 7, 128)         73856     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 3, 3, 128)         0

In [18]:
model.fit(tfr_dataset, steps_per_epoch=60000//BATCH_SIZE, epochs=5,callbacks=[earlystopping, reduceLR])

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7fb935e4b510>

In [19]:
tfr_testdata = get_dataset('test.tfrecords', "test")

In [20]:
model.evaluate(tfr_testdata, )



[0.2383846938610077, 0.9340000152587891]