# Input Data Preparation

googlenet 이후부터는 227 x 227 안쓰고  원래대로 224 x 224 사용해도 됨

In [1]:
import tensorflow as tf
from sklearn.utils import shuffle
import random
import matplotlib.pyplot as plt
import os
import numpy as np
from skimage.io import imread
from skimage.transform import resize
from tensorflow.contrib.learn.python.learn.datasets import base

In [2]:
import os
import numpy as np
from skimage.io import imread
from skimage.transform import resize
from tensorflow.contrib.learn.python.learn.datasets import base


class DataSet(object):
    def __init__(self, data_type, images, image_reshape_size=224, labels=None):
        """
        Construct a new DataSet object.
        :param images: np.ndarray, shape: (N, H, W, C).
        :param labels: np.ndarray, shape: (N, num_classes) or (N,).
        """
        if labels is not None:
            assert images.shape[0] == labels.shape[0], (
                'Number of examples mismatch, between images and labels.'
            )
        self._num_examples = images.shape[0]
        self._data_type = data_type
        self._images = images
        self._labels = labels    # NOTE: this can be None, if not given.
        self._indices = np.arange(self._num_examples, dtype=np.uint)    # image/label indices(can be permuted)
        self._augment = True
        self._image_reshape_size = image_reshape_size
        self._reset()

    def _reset(self):
        """Reset some variables."""
        self._completed_epoch_count = 0
        self._idx_in_current_epoch = 0
        
    def set_augment(self, augment):
        self._augment = augment

    @property
    def images(self):
        return self._images

    @property
    def labels(self):
        return self._labels

    @property
    def num_examples(self):
        return self._num_examples

    def next_batch(self, batch_size, shuffle=True, fake_data=False):
        """
        Return the next `batch_size` examples from this dataset.
        :param batch_size: int, size of a single batch.
        :param shuffle: bool, whether to shuffle the whole set while sampling a batch.
        :param augment: bool, whether to perform data augmentation while sampling a batch.
        :param is_train: bool, current phase for sampling.
        :param fake_data: bool, whether to generate fake data (for debugging).
        :return: batch_images: np.ndarray, shape: (N, h, w, C) or (N, 10, h, w, C).
                 batch_labels: np.ndarray, shape: (N, num_classes) or (N,).
        """
        if fake_data:
            fake_batch_images = np.random.random(size=(batch_size, self._image_reshape_size, self._image_reshape_size, 3))
            fake_batch_labels = np.zeros((batch_size, 2), dtype=np.uint8)
            fake_batch_labels[np.arange(batch_size), np.random.randint(2, size=batch_size)] = 1
            return fake_batch_images, fake_batch_labels

        idx_begin = self._idx_in_current_epoch

        # Shuffle the dataset, for the first epoch
        if self._completed_epoch_count == 0 and idx_begin == 0 and shuffle:
            np.random.shuffle(self._indices)

        # Go to the next epoch, if current index goes beyond the total number of examples
        if idx_begin + batch_size > self._num_examples:
            # Increment the number of epochs completed
            self._completed_epoch_count += 1
            
            # Get the rest examples in this epoch
            remaining_num_examples = self._num_examples - idx_begin
            remaining_indices = self._indices[idx_begin:self._num_examples]

            # Shuffle the dataset, after finishing a single epoch
            if shuffle:
                np.random.shuffle(self._indices)

            # Start the next epoch
            idx_begin = 0
            self._idx_in_current_epoch = batch_size - remaining_num_examples
            idx_end = self._idx_in_current_epoch
            new_indices = self._indices[idx_begin:idx_end]

            remaining_images = self.images[remaining_indices]
            new_images = self.images[new_indices]
            batch_images = np.concatenate((remaining_images, new_images), axis=0)
            if self.labels is not None:
                remaining_labels = self.labels[remaining_indices]
                new_labels = self.labels[new_indices]
                batch_labels = np.concatenate((remaining_labels, new_labels), axis=0)
            else:
                print('nono1')
                batch_labels = None
        else:
            self._idx_in_current_epoch += batch_size
            idx_end = self._idx_in_current_epoch
            indices = self._indices[idx_begin:idx_end]
            batch_images = self.images[indices]
            if self.labels is not None:
                batch_labels = self.labels[indices]
            else:
                batch_labels = None

        if self._augment and self._data_type == 'train':
            # Perform data augmentation, for training phase
            batch_images = random_crop_reflect(batch_images, self._image_reshape_size)
        elif self._augment and self._data_type != 'train':
            # Perform data augmentation, for evaluation phase(10x)
            batch_images = corner_center_crop_reflect(batch_images, self._image_reshape_size)
        else:
            # Don't perform data augmentation, generating center-cropped patches
            batch_images = center_crop(batch_images, self._image_reshape_size)
        
        return batch_images, batch_labels


def random_crop_reflect(images, crop_l):
    """
    Perform random cropping and reflection from images.
    :param images: np.ndarray, shape: (N, H, W, C).
    :param crop_l: int, a side length of crop region.
    :return: np.ndarray, shape: (N, h, w, C).
    """
    H, W = images.shape[1:3]
    augmented_images = []
    for image in images:    # image.shape: (H, W, C)
            
        # Randomly crop patch
        y = np.random.randint(H-crop_l)
        x = np.random.randint(W-crop_l)
        image = image[y:y+crop_l, x:x+crop_l]    # (h, w, C)

        # Randomly reflect patch horizontally
        reflect = bool(np.random.randint(2))
        if reflect:
            image = image[:, ::-1]

        augmented_images.append(image)
    return np.stack(augmented_images)    # shape: (N, h, w, C)


def corner_center_crop_reflect(images, crop_l):
    """
    Perform 4 corners and center cropping and reflection from images,
    resulting in 10x augmented patches.
    :param images: np.ndarray, shape: (N, H, W, C).
    :param crop_l: int, a side length of crop region.
    :return: np.ndarray, shape: (N, 10, h, w, C).
    """
    H, W = images.shape[1:3]
    augmented_images = []
    for image in images:    # image.shape: (H, W, C)
        aug_image_orig = []
        # Crop image in 4 corners
        aug_image_orig.append(image[:crop_l, :crop_l])
        aug_image_orig.append(image[:crop_l, -crop_l:])
        aug_image_orig.append(image[-crop_l:, :crop_l])
        aug_image_orig.append(image[-crop_l:, -crop_l:])
        # Crop image in the center
        aug_image_orig.append(image[H//2-(crop_l//2):H//2+(crop_l-crop_l//2),
                                    W//2-(crop_l//2):W//2+(crop_l-crop_l//2)])
        aug_image_orig = np.stack(aug_image_orig)    # (5, h, w, C)

        # Flip augmented images and add it
        aug_image_flipped = aug_image_orig[:, :, ::-1]    # (5, h, w, C)
        aug_image = np.concatenate((aug_image_orig, aug_image_flipped), axis=0)    # (10, h, w, C)
        augmented_images.append(aug_image)
    return np.stack(augmented_images)    # shape: (N, 10, h, w, C)


def center_crop(images, crop_l):
    """
    Perform center cropping of images.
    :param images: np.ndarray, shape: (N, H, W, C).
    :param crop_l: int, a side length of crop region.
    :return: np.ndarray, shape: (N, h, w, C).
    """
    H, W = images.shape[1:3]
    cropped_images = []
    for image in images:    # image.shape: (H, W, C)
        # Crop image in the center
        cropped_images.append(image[H//2-(crop_l//2):H//2+(crop_l-crop_l//2),
                              W//2-(crop_l//2):W//2+(crop_l-crop_l//2)])
    return np.stack(cropped_images)


def read_asirra_subset(subset_dir, one_hot=True, sample_size=None):
    """
    Load the Asirra Dogs vs. Cats data subset from disk
    and perform preprocessing for training AlexNet.
    :param subset_dir: str, path to the directory to read.
    :param one_hot: bool, whether to return one-hot encoded labels.
    :param sample_size: int, sample size specified when we are not using the entire set.
    :return: X_set: np.ndarray, shape: (N, H, W, C).
             y_set: np.ndarray, shape: (N, num_channels) or (N,).
    """
    # Read trainval data
    filename_list = os.listdir(subset_dir)
    set_size = len(filename_list)

    if sample_size is not None and sample_size < set_size:
        # Randomly sample subset of data when sample_size is specified
        filename_list = np.random.choice(filename_list, size=sample_size, replace=False)
        set_size = sample_size
    else:
        # Just shuffle the filename list
        np.random.shuffle(filename_list)

    # Pre-allocate data arrays
    msg_interval = 1000 if set_size >= 1000 else 10
    
    X_set = np.empty((set_size, 256, 256, 3), dtype=np.float32)    # (N, H, W, 3)
    y_set = np.empty((set_size), dtype=np.uint8)                   # (N,)
    for i, filename in enumerate(filename_list):
        if i % msg_interval == 0:
            print('     progress: {}/{}...'.format(i, set_size), end='\r')
        label = filename.split('.')[0]
        if label == 'cat':
            y = 0
        else:  # label == 'dog'
            y = 1
        file_path = os.path.join(subset_dir, filename)
        img = imread(file_path)    # shape: (H, W, 3), range: [0, 255]
        img = resize(img, (256, 256), mode='constant').astype(np.float32)    # (256, 256, 3), [0.0, 1.0]
        X_set[i] = img
        y_set[i] = y

    if one_hot:
        # Convert labels to one-hot vectors, shape: (N, num_classes)
        y_set_oh = np.zeros((set_size, 2), dtype=np.uint8)
        y_set_oh[np.arange(set_size), y_set] = 1
        y_set = y_set_oh
        
    print('     progress: {}/{}...'.format(set_size, set_size), end='\r')
    print('\n Done\n')

    return X_set, y_set, set_size


def read_train_data_sets(subset_dir, image_reshape_size=227, one_hot=True, sample_train_size=None):
    print(" Reading train/validation data")
    train_images, train_labels, set_size = read_asirra_subset(subset_dir + 'train', one_hot, sample_train_size) 
    validation_size = int(set_size * 0.2)
    
    validation_images = train_images[:validation_size]
    validation_labels = train_labels[:validation_size]
    train_images = train_images[validation_size:]
    train_labels = train_labels[validation_size:]

    train = DataSet('train', train_images, image_reshape_size, train_labels)
    validation = DataSet('validation', validation_images, image_reshape_size, validation_labels)
    
    return base.Datasets(train=train, validation=validation, test=None)


def read_test_data_sets(subset_dir, image_reshape_size=227, one_hot=True, sample_train_size=None):
    print("\n Reading test data..")
    test_images, test_labels, _ = read_asirra_subset(subset_dir + 'test', one_hot) 

    test = DataSet('test', test_images, image_reshape_size, test_labels)    
    
    return base.Datasets(train=None, validation=None, test=test)
    
        
def read_data_sets(subset_dir, image_reshape_size=227, one_hot=True, sample_train_size=None):
    print(" Reading train/validation data")
    train_images, train_labels, set_size = read_asirra_subset(subset_dir + 'train', one_hot, sample_train_size) 
    validation_size = int(set_size * 0.2)
    
    validation_images = train_images[:validation_size]
    validation_labels = train_labels[:validation_size]
    train_images = train_images[validation_size:]
    train_labels = train_labels[validation_size:]

    print("\n Reading test data..")
    test_images, test_labels, _ = read_asirra_subset(subset_dir + 'test', one_hot) 

    train = DataSet('train', train_images, image_reshape_size, train_labels)
    validation = DataSet('validation', validation_images, image_reshape_size, validation_labels)
    test = DataSet('test', test_images, image_reshape_size, test_labels)    
    
    return base.Datasets(train=train, validation=validation, test=test)


# 1. Train

<br />
# Global Variables

In [3]:
is_train = True

<br />
# Load Data

In [4]:
data = read_train_data_sets('./asirra_smaller/', image_reshape_size=224)
data_train = data.train
data_validation = data.validation

num_classes = len(data_train.labels[0])  # cat, dog

 Reading train/validation data
     progress: 16/16...
 Done



In [5]:
print("Image Shape   : {0}".format(data_train.images.shape))
sample_input_x, sample_input_y = data_train.next_batch(2)
print("ConvNet Input Image Shape   : {0}".format(sample_input_x.shape))

Image Shape   : (13, 256, 256, 3)
ConvNet Input Image Shape   : (2, 224, 224, 3)


<br />
# Hyperparameters

In [6]:
# train batch
batch_size = 256
batch_size = batch_size \
                if batch_size <= data_train.num_examples \
                else data_train.num_examples

num_epochs = 300  # deep network라서 많이 돌려야 됨
num_batches_per_epoch = data_train.num_examples // batch_size   # 10 // 3 = 3


# L2 regularization (weight decay)
# Used at: comput loss
weight_decay = 0.0005

# dropout regularization: 
# Used at: fc layers
train_dropout_rate = 0.4


# adding momentum (to avoid local minima)
# Used at: tf.train.MomentumOptimizer
momentum = 0.9

# update learning rate (epsilon)
# Used at: update_learning_rate
init_learning_rate = 0.01
patience_of_no_improvement_epochs = 30
learning_rate_decay = 0.1
lower_bound_learning_rate = 1e-8

<br />
# Build GoogLeNet Model

In [7]:
info = ''  # log printing variable

#### layer building functions

In [8]:
def input_images(height, width, in_channel):
    
    x = tf.placeholder(tf.float32, [None, height, width, in_channel])
    
    global info
    info = ' Inputs / Labels'
    info += '\n   {:12s}: {:17s}  {}'.format('x', str(x.shape), 'input images')
    return x

def output_labels(out_channel):
    y = tf.placeholder(tf.float32, [None, out_channel])
    global info
    info += '\n   {:12s}: {:17s}  {}\n\n Feature Extraction'.format('y', str(y.shape), 'target value (answer label)')
    return y

In [9]:
def conv(name, inputs, filter_size, stride, num_filters, padding='VALID', is_print=True):
   
    w_mean=0.0
    w_stddev=0.01
    bias=0.1
    
    in_channel = int(inputs.get_shape()[-1])
    out_channel = num_filters

    weights = tf.get_variable(name=name + '_weights',
                shape=[filter_size, filter_size, in_channel, out_channel],
                initializer=tf.contrib.layers.xavier_initializer())

    biases = tf.get_variable(name + '_biases',
                [out_channel], tf.float32,
                tf.constant_initializer(value=bias))

    conv = tf.nn.conv2d(inputs, weights, 
                strides=[1, stride, stride, 1],
                padding=padding) + bias

    conv = tf.nn.relu(conv)  # activation (non-linearizing)

    if is_print:
        global info
        # skip if this is an inception layer's sub layer
        info += '\n   {:12s}: {:17s} -> {:17s}'.format(
                        name,  str(inputs.shape), str(conv.shape) )        
    return conv

In [10]:
def pool(name, inputs, filter_size, stride, padding='VALID', is_print=True):
    pool = tf.nn.max_pool(inputs, 
               ksize = [1, filter_size, filter_size, 1],
               strides = [1,stride,stride,1], padding=padding)

    if is_print:
        global info
        # skip if this is an inception layer's sub layer
        info += '\n   {:12s}: {:17s} -> {:17s}'.format(
                        name,  str(inputs.shape), str(pool.shape) )
    return pool

In [11]:
def avg_pool(name, inputs, filter_size, stride, padding='VALID', is_print=True):
    avg_pool = tf.nn.avg_pool(inputs, 
               ksize = [1, filter_size, filter_size, 1],
               strides = [1,stride,stride,1], padding=padding)

    if is_print:
        global info
        # skip if this is an inception layer's sub layer
        info += '\n   {:12s}: {:17s} -> {:17s}'.format(
                        name,  str(inputs.shape), str(avg_pool.shape) )
    return avg_pool

In [12]:
def lrn(name, inputs, depth_radius=5, alpha=0.0001, beta=0.75):
    #  LRN (local response normalization) layer
    global info
    info += '\n   local response normalization'
    return tf.nn.local_response_normalization(name=name, 
                    input=inputs, depth_radius=depth_radius, alpha=alpha, beta=beta)

In [13]:
def inception(name, inputs, conv_1x1_out, conv_3x3_reduce_out, conv_3x3_out, conv_5x5_reduce_out, conv_5x5_out, pool_proj_out):
    """                   namme
                          input        filtersize  stride ,    |filters|          padding  is_print"""
    conv_1x1        = conv(name + '_conv_1x1',
                           inputs         , 1      , 1     ,   conv_1x1_out       , 'SAME', False)
    conv_3x3_reduce = conv(name + '_conv_3x3_reduce',
                           inputs         , 1      , 1     ,   conv_3x3_reduce_out, 'SAME', False)
    conv_3x3        = conv(name + '_conv_3x3',
                           conv_3x3_reduce, 3      , 1     ,   conv_3x3_out       , 'SAME', False)        
    conv_5x5_reduce = conv(name + '_conv_5x5_reduce',
                           inputs         , 1      , 1     ,   conv_5x5_reduce_out, 'SAME', False)        
    conv_5x5        = conv(name + '_conv_5x5',
                           conv_5x5_reduce, 5      , 1     ,   conv_5x5_out       , 'SAME', False)        
    _pool           = pool(name + 'max_pool_3x3',
                           inputs         , 3      , 1     ,   'SAME', False)        
    pool_proj       = conv(name + '_pool_proj',
                           _pool          , 1      , 1     ,   pool_proj_out      , 'SAME', False)
    inception       = tf.concat([conv_1x1, conv_3x3, conv_5x5, pool_proj], axis=3)
    
    global info
    info += '\n   {:12s}: {:17s} -> {:17s}'.format(
                    name, str(inputs.shape), str(inception.shape))
    return inception

In [14]:
def auxilliary_classifier(name, inputs, aux_fc1_out, aux_fc2_out):
    train_dropout_rate = 0.7  # auxilliary classifier 할 때만 잠시 global hyperparameter 값 바꿈
    """                     name                               filter_size  stride  out_channel padding  print """
    aux_avg_pool = avg_pool(name + '_avg_pool'       , inputs       , 5     , 3   ,            'VALID', False)   
    aux_conv     = conv(name + '_conv'               , aux_avg_pool , 1     , 1   ,   128  ,   'SAME' , False)
    aux_flat     = tf.contrib.layers.flatten(aux_conv)
    aux_fc1      = fc(name + '_fc1'   , aux_flat,      aux_fc1_out     , False)
    aux_fc2      = fc(name + '_fc2'   , aux_fc1 ,      aux_fc2_out     , False)
    aux_logits   = fc_last(name + '_logits', aux_fc2 , num_classes, False)

    global info
    info += '\n   {:12s}: {:8s} auxilliary classifer, only performed at the train stage'.format(name + '_y_pred', str(aux_logits.shape))

    train_dropout_rate = 0.4  # 원래 값 복원
    return aux_logits

In [15]:
def flatten(name, inputs):
    flattened = tf.contrib.layers.flatten(inputs)

    global info
    info += '\n   {:12s}: {:17s} -> {:17s}\n\n Classification'.format(
                    name, str(inputs.shape), str(flattened.shape) )
    return flattened

In [16]:
def dropout(name, inputs):
    # The probability of keeping each unit for dropout layers
    keep_prob_value = tf.cond(tf.cast(is_train, tf.bool),
                              lambda: train_dropout_rate,
                              lambda: 1.0)

    dropout = tf.nn.dropout(inputs, keep_prob=keep_prob_value)
    global info
    info += '\n   {:12s}: {:17s} -> {:17s}'.format(
                    name,  str(inputs.shape), str(dropout.shape) )
    return dropout

In [17]:
def fc(name, inputs, output_size, is_print=True):
    #w_mean=0.0      to speedup train time
    #w_stddev=0.01
    bias=0.1
    in_dim = int(inputs.get_shape()[-1])
    out_dim = output_size

    weights = tf.get_variable(name=name + '_weights',
                shape=[in_dim, out_dim],
                initializer=tf.contrib.layers.xavier_initializer())

    biases = tf.get_variable(name + '_biases',
                [out_dim], tf.float32,
                tf.constant_initializer(value=bias))

    fc = tf.matmul(inputs, weights) + biases
    fc = tf.nn.relu(fc)  # activation

    # The probability of keeping each unit for dropout layers
    keep_prob_value = tf.cond(tf.cast(is_train, tf.bool),
                              lambda: train_dropout_rate,
                              lambda: 1.0)
    # print('dropout rate', train_dropout_rate)
    # sess = tf.Session()
    # print("answer    : ", sess.run(keep_prob_value))

    fc = tf.nn.dropout(fc, keep_prob=keep_prob_value)

    if is_print:
        global info
        info += '\n   {:12s}: {:17s} -> {:17s}'.format(
                       name,  str(inputs.shape), str(fc.shape) )        
    return fc

In [103]:
def fc_last(name, inputs, output_size, is_print=True):
    #w_mean=0.0
    #w_stddev=0.01
    bias=0.1
    in_dim = int(inputs.get_shape()[-1])
    out_dim = output_size

    #weights = tf.get_variable(name + '_weights', 
    #            [in_dim, out_dim],
    #            tf.float32,
    #            tf.random_normal_initializer(mean=w_mean, stddev=w_stddev))
    weights = tf.get_variable(name=name + '_weights',
                shape=[in_dim, out_dim],
                initializer=tf.contrib.layers.xavier_initializer())

    biases = tf.get_variable(name + '_biases',
                [out_dim], tf.float32,
                tf.constant_initializer(value=bias))

    logits = tf.matmul(inputs, weights) + biases

    if is_print:
        global info
        info += '\n   {:12s}: {:17s} -> {:17s}'.format(
                        name,  str(inputs.shape), str(logits.shape) )        
    return logits

In [19]:
def logits_to_softmax(name, inputs):
    # hypothesis (prediction) of target value y
    y_hat = tf.nn.softmax(inputs)  
    
    global info
    info += '\n\n Output\n   {:12s}: {:8s}hypothesis (prediction) of target value y'.format(name, str(y_hat.shape))

    return y_hat

#### network layers

In [20]:
class ConvModel:
    # input
    x = None
    y = None

    # feature extraction
    conv1 = None
    pool1 = None        
    lrn1 = None  #  LRN (local response normalization) layer

    conv2_1 = None
    conv2_2 = None
    pool2 = None

    inception3a = None
    inception3b = None
    pool3 = None

    inception4a = None
    aux_logits1 = None
    inception4b = None
    inception4c = None
    inception4d = None
    aux_logits2 = None
    inception4e = None
    pool4 = None

    inception5a = None
    inception5b = None
    avg_pool5 = None        
    dropout5 = None

    flat = None

    # classification
    fc6 = None
    _logits = None

    # hypothesis (prediction) of target value y
    y_prediction = None
    
m = ConvModel()

In [21]:
tf.reset_default_graph() # reset tensor graph

m.x = input_images(height=224, width=224, in_channel=3)
m.y = output_labels(out_channel=num_classes)

"""               name     input   filtersize  stride  |filters|  padding """
m.conv1   = conv('conv1'  , m.x      , 7       , 2     ,   64     , 'SAME')
m.pool1   = pool('pool1'  , m.conv1  , 3       , 2     ,            'SAME' )
m.lrn1    = lrn('lrn1'    , m.pool1)

m.conv2_1 = conv('conv2-1', m.lrn1   , 1       , 1     ,   64     , 'VALID')
m.conv2_2 = conv('conv2-2', m.conv2_1, 3       , 1     ,   192    , 'SAME' )
m.lrn2    = lrn('lrn2'    , m.conv2_2)
m.pool2   = pool('pool2'  , m.lrn2   , 3       , 2     ,            'SAME' )

m.inception3a = inception('inception3a', m.pool2      , 64 , 96 , 128, 16 , 32 , 32 )
m.inception3b = inception('inception3b', m.inception3a, 128, 128, 192, 32 , 96 , 64 )
m.pool3       = pool('pool3', m.inception3b, 3  , 2    ,            'SAME' )

m.inception4a = inception('inception4a', m.pool3      , 192, 96 , 208, 16 , 48 , 64 )

if is_train:
    m.aux_logits1 = auxilliary_classifier("aux1", m.inception4a, 1024, 1000)
    
m.inception4b = inception('inception4b', m.inception4a, 160, 112, 224, 24 , 64 , 64 )
m.inception4c = inception('inception4c', m.inception4b, 128, 128, 256, 24 , 64 , 64 )
m.inception4d = inception('inception4d', m.inception4c, 112, 144, 288, 32 , 64 , 64 ) 

if is_train:
    m.aux_logits2 = auxilliary_classifier("aux2", m.inception4d, 1024, 1000)
    
m.inception4e = inception('inception4e', m.inception4d, 256, 160, 320, 32 , 128, 128)
m.pool4       = pool('pool3'  , m.inception4e, 3, 2    ,           'SAME' )
        
m.inception5a = inception('inception5a', m.pool4      , 256, 160, 320, 32 , 128, 128)
m.inception5b = inception('inception5b', m.inception5a, 384, 192, 384, 48 , 128, 128)
m.avg_pool5   = avg_pool('avg_pool5'   , m.inception5b, 7   , 1  , 'VALID')        

m.dropout5    = dropout('dropout5', m.avg_pool5)
        
m.flat = flatten('flat', m.dropout5)

"""                 name     input   output_size  """
m.fc6    = fc(     'fc6'   , m.flat,      1000  )
m.logits = fc_last('logits', m.fc6 , num_classes)
             
m.y_prediction = logits_to_softmax('y_prediction', m.logits)

In [22]:
print(info)

 Inputs / Labels
   x           : (?, 224, 224, 3)   input images
   y           : (?, 2)             target value (answer label)

 Feature Extraction
   conv1       : (?, 224, 224, 3)  -> (?, 112, 112, 64)
   pool1       : (?, 112, 112, 64) -> (?, 56, 56, 64)  
   local response normalization
   conv2-1     : (?, 56, 56, 64)   -> (?, 56, 56, 64)  
   conv2-2     : (?, 56, 56, 64)   -> (?, 56, 56, 192) 
   local response normalization
   pool2       : (?, 56, 56, 192)  -> (?, 28, 28, 192) 
   inception3a : (?, 28, 28, 192)  -> (?, 28, 28, 256) 
   inception3b : (?, 28, 28, 256)  -> (?, 28, 28, 480) 
   pool3       : (?, 28, 28, 480)  -> (?, 14, 14, 480) 
   inception4a : (?, 14, 14, 480)  -> (?, 14, 14, 512) 
   aux1_y_pred : (?, 2)   auxilliary classifer, only performed at the train stage
   inception4b : (?, 14, 14, 512)  -> (?, 14, 14, 512) 
   inception4c : (?, 14, 14, 512)  -> (?, 14, 14, 512) 
   inception4d : (?, 14, 14, 512)  -> (?, 14, 14, 528) 
   aux2_y_pred : (?, 2)   auxil

<br />
# Set Model Propagation

#### 1. Forward propagation
model에 input data 넣어서 model로 구한 y_prediction 값을 구하는 과정

#### 2. Loss computation
y_prediction 값과 y_true 값을 비교해서 loss (error)를 구하는 과정

#### 3. Backpropagation
loss 를가지고 model 의 train weight 를 update, optimize 시킴


1 > 2 > 3 과정을 반복하며 y_true값과 유사한 결과를 내도록 모델을 학습시킨다.

In [24]:
forward_propagation = None
compute_loss = None
back_propagation = None

In [25]:
def set_forward_propagation():
    return m.y_prediction

In [30]:
def get_loss_from_logits(labels, logits, weight_decay):
    softmax_cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(labels=labels, logits=logits)
    softmax_loss = tf.reduce_mean(softmax_cross_entropy)

    # L2 regularization loss
    # coefficient weight decay = 0.0005 is used at the alexnet paper
    L2_weight_decay = weight_decay / 2.0        

    # L2 norm for all train parameters
    L2_norm = tf.add_n([tf.nn.l2_loss(var) for var in tf.trainable_variables()]) 
    L2_regularization_loss = L2_norm * L2_weight_decay        

    # Add L2 regularization for weight decay
    return softmax_loss + L2_regularization_loss

def define_loss_function():
    aux1_loss = get_loss_from_logits(m.y, m.aux_logits1, 0.0002)
    aux2_loss = get_loss_from_logits(m.y, m.aux_logits2, 0.0002)
    loss = get_loss_from_logits(m.y, m.logits, weight_decay)

    return (loss + aux1_loss * 0.3 + aux2_loss * 0.3) / 1.6

In [31]:
def set_backward_propagation():    
    # Gradient descent optimizer, with Momentum algorithm
    # tf.train.Optimizer.minimize Op for a gradient update.
    variables_to_update = tf.trainable_variables()
    m.current_learning_rate = tf.placeholder(tf.float32)

    optimizer = tf.train.MomentumOptimizer(
                    m.current_learning_rate,
                    momentum,
                    use_nesterov=False).minimize(compute_loss, var_list=variables_to_update)

    return optimizer

In [35]:
forward_propagation = set_forward_propagation()
print(forward_propagation)

Tensor("Softmax:0", shape=(?, 2), dtype=float32)


In [36]:
compute_loss = define_loss_function()
print(compute_loss)

Tensor("truediv_1:0", shape=(), dtype=float32)


In [37]:
back_propagation = set_backward_propagation()
print(back_propagation)

name: "Momentum_1"
op: "NoOp"
input: "^Momentum_1/update_conv1_weights/ApplyMomentum"
input: "^Momentum_1/update_conv1_biases/ApplyMomentum"
input: "^Momentum_1/update_conv2-1_weights/ApplyMomentum"
input: "^Momentum_1/update_conv2-1_biases/ApplyMomentum"
input: "^Momentum_1/update_conv2-2_weights/ApplyMomentum"
input: "^Momentum_1/update_conv2-2_biases/ApplyMomentum"
input: "^Momentum_1/update_inception3a_conv_1x1_weights/ApplyMomentum"
input: "^Momentum_1/update_inception3a_conv_1x1_biases/ApplyMomentum"
input: "^Momentum_1/update_inception3a_conv_3x3_reduce_weights/ApplyMomentum"
input: "^Momentum_1/update_inception3a_conv_3x3_reduce_biases/ApplyMomentum"
input: "^Momentum_1/update_inception3a_conv_3x3_weights/ApplyMomentum"
input: "^Momentum_1/update_inception3a_conv_3x3_biases/ApplyMomentum"
input: "^Momentum_1/update_inception3a_conv_5x5_reduce_weights/ApplyMomentum"
input: "^Momentum_1/update_inception3a_conv_5x5_reduce_biases/ApplyMomentum"
input: "^Momentum_1/update_inception3

<br />
# Run Train

#### functions to help batch training/testing

In [50]:
def execute_train(sess):

    for batch_step in range(num_batches_per_epoch):

        batch_x, batch_y_true = data_train.next_batch(batch_size)

        _y_prediction, _loss, _, = sess.run([
                        forward_propagation,
                        compute_loss,
                        back_propagation],
                                feed_dict={
                                m.x: batch_x,
                                m.y: batch_y_true,
                                m.current_learning_rate: current_learning_rate_value})

    # after batch train loop, save this epoch's train score
    y_true = batch_y_true.argmax(axis=1)
    y_pred = _y_prediction.argmax(axis=1)

    return _loss

In [51]:
from sklearn.metrics import accuracy_score

In [60]:
def execute_validation(sess):
    validation_y_prediction = run_forward_propagation_for_evaluation(sess, data_validation)
    
    y_true = data_validation.labels.argmax(axis=1)
    y_pred = validation_y_prediction.argmax(axis=1)
    
    validation_score = accuracy_score(y_true, y_pred)

    return validation_score

In [62]:
def run_forward_propagation_for_evaluation(sess, dataset, augment_type=True):
    if dataset.labels is not None:
        assert len(dataset.labels.shape) > 1, 'Labels must be one-hot encoded.'

    prediction_size = dataset.num_examples
    
    num_steps = prediction_size // batch_size
    last_batch_size = prediction_size % batch_size

    # Start prediction loop
    y_prediction = []
    start_time = time.time()

    for i in range(num_steps+1):
        _batch_size = batch_size if i < num_steps else last_batch_size
        if _batch_size == 0:
            break            

        x, _ = dataset.next_batch(_batch_size, shuffle=False)
        # if augment_pred == True:  X.shape: (N, 10, h, w, C)
        # else:                     X.shape: (N, h, w, C)

        if augment_type is True:
            y_prediction_patches = np.empty((_batch_size, 10, num_classes),
                                      dtype=np.float32)    # (N, 10, num_classes)
            # compute predictions for each of 10 patch modes,
            for idx in range(10):
                y_prediction_patch = sess.run(forward_propagation,
                                        feed_dict={m.x: x[:, idx]}) # (N, h, w, C)
                y_prediction_patches[:, idx] = y_prediction_patch                   

            _y_prediction = y_prediction_patches.mean(axis=1)    # (N, num_classes)

        else:
            # Compute predictions
            _y_prediction = sess.run(tr1_forward_propagation,
                              feed_dict={m.x: x})    # (N, num_classes)

        y_prediction.append(_y_prediction)

    y_prediction = np.concatenate(y_prediction, axis=0)    # (N, num_classes)

    return y_prediction


In [48]:
import os
import time

base_path = 'trained_model_result/'  # result saving location

if not os.path.exists(base_path):
    os.makedirs(base_path)
    os.chown(base_path, uid=1000, gid=1000)

timestamp = time.strftime("%Y%m%d_%H%M%S")
output_path = os.path.join(base_path, timestamp + '/')

os.makedirs(output_path)
os.chown(output_path, uid=1000, gid=1000)

In [83]:
bad_epochs = 0
best_score = 0.0
current_learning_rate_value = init_learning_rate


graph = tf.get_default_graph()
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(graph=graph, config=config)        
sess.run(tf.global_variables_initializer())    # initialize all weights

saver = tf.train.Saver()  # to save trained model
output_model_path = os.path.join(output_path, 'model.ckpt')

train_results = dict()    # dictionary to contain training(, evaluation) results and details
total_steps = num_epochs * num_batches_per_epoch

str = '\n------------------------------------------------------------------------' + \
    '\n execute train' + \
    '\n------------------------------------------------------------------------' + \
    '\n  train data size       : {:10}'.format(data_train.num_examples) + \
    '\n  batch size            : {:10}'.format(batch_size) + \
    '\n  batche loop per epoch : {:10} = |train data| {} / |batch| {}'.format(num_batches_per_epoch, data_train.num_examples, batch_size) + \
    '\n  epoches               : {:10}'.format(num_epochs) + \
    '\n  total iterations      : {:10} = |batch loop| {} * |epoch| {}\n\n'.format(total_steps, num_batches_per_epoch, num_epochs)

print(str)

start_time = time.time()


------------------------------------------------------------------------
 execute train
------------------------------------------------------------------------
  train data size       :         13
  batch size            :         13
  batche loop per epoch :          1 = |train data| 13 / |batch| 13
  epoches               :        300
  total iterations      :        300 = |batch loop| 1 * |epoch| 300




In [84]:
def is_better(current_score, best_score):
    score_threshold = 1e-4
    relative_eps = 1.0 + score_threshold
    return current_score > best_score * relative_eps

In [85]:
def update_learning_rate():
    # decaying learning rate (epsilon)
    global bad_epochs
    global current_learning_rate_value

    if bad_epochs > patience_of_no_improvement_epochs:            
        new_learning_rate = current_learning_rate_value * learning_rate_decay
        
        # Decay learning rate only when the difference is higher than lower bound epsilon.
        if current_learning_rate_value - new_learning_rate > lower_bound_learning_rate:
            current_learning_rate_value = new_learning_rate
        
        bad_epochs = 0

In [88]:
# start training loop
for epoch_step in range(num_epochs):
    # perform a gradient update of the current epoch
    current_loss = execute_train(sess)       
    current_score = execute_validation(sess)

    str = '[epoch{:4}] loss: {:.6f} | validation score: {:.6f} | learning rate: {:.6f}'\
                    .format(epoch_step, current_loss, current_score, current_learning_rate_value)
    print(str)

    # Keep track of the current best model,
    if is_better(current_score, best_score):
        best_score = current_score
        bad_epochs = 0

        saver.save(sess, output_model_path)  # save current weights
    else:
        bad_epochs += 1
        
    update_learning_rate()
    if current_learning_rate_value < 0.000001:
        print('   exit train: learning rate is too small (< 0.000001)')
        break

[epoch   0] loss: 2.184828 | validation score: 0.333333 | learning rate: 0.000000
   exit train: learning rate is too small (< 0.000001)


# 2. Test

In [1]:
is_train = False

In [100]:
def execute_test(sess):

    test_y_prediction = run_forward_propagation_for_evaluation(sess, data_test)
   
    y_true = data_test.labels.argmax(axis=1)
    y_pred = test_y_prediction.argmax(axis=1)

    test_score = accuracy_score(y_true, y_pred)
    return test_score

In [93]:
data = read_test_data_sets('../test_data/asirra_smaller/', image_reshape_size=224)
data_test = data.test
num_classes = len(data_test.labels[0])


 Reading test data..
     progress: 0/10...     progress: 10/10...
 Done



In [96]:
graph = tf.get_default_graph()
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(graph=graph, config=config)

saver = tf.train.Saver()
saver.restore(sess, output_model_path) 

str = '\n------------------------------------------------------------------------' + \
    '\nexecute test' + \
    '\n------------------------------------------------------------------------' + \
    '\n  test data size       : {:10}'.format(data_test.num_examples) + \
    '\n  batch size           : {:10}'.format(batch_size) 

print(str)

INFO:tensorflow:Restoring parameters from trained_model_result/20190712_064712/model.ckpt

------------------------------------------------------------------------
execute test
------------------------------------------------------------------------
  test data size       :         10
  batch size           :         13


In [101]:
test_score = execute_test(sess)

In [102]:
 str = '\n\n\n------------------------------------------------------------------------' + \
    '\nTest finished.' + \
    '\n------------------------------------------------------------------------' + \
    '\n\n Test score (accuracy) : {}'.format(test_score) + \
    '\n------------------------------------------------------------------------\n\n'

print(str)

sess.close()




------------------------------------------------------------------------
Test finished.
------------------------------------------------------------------------

 Test score (accuracy) : 0.3
------------------------------------------------------------------------


