## Block structures

In [42]:
def basic(x, name, in_channel, out_channel):
    conv0 = layer.bn_relu_conv(x, name + "_basic0", in_channel, out_channel, 3)
    conv1 = layer.bn_relu_conv(conv0, name + "_basic1", out_channel, out_channel, 3) 
    return conv1

In [36]:
def bottle_neck(x, name, in_channel, out_channel):
    conv0 = layer.bn_relu_conv(x, name + "_bottleneck0", in_channel, out_channel, 3)
    conv1 = layer.bn_relu_conv(conv0, name + "_bottleneck1", out_channel, out_channel / 2, 3)
    conv2 = layer.bn_relu_conv(conv1, name + "_bottleneck2", out_channel / 2, out_channel, 3)
    return conv2

In [37]:
def basic_wide(x, name, in_channel, out_channel):
    conv0 = layer.bn_relu_conv(x, name + "_basicwide0", in_channel, out_channel, 3)
    conv1 = layer.bn_relu_conv(conv0, name + "_basicwide1", out_channel, out_channel, 3) 
    return conv1

In [38]:
def dropout(x, name, prob, in_channel, out_channel):
    conv0 = layer.bn_relu_conv(x, name + "_dropout_conv0", in_channel, out_channel, 3)
    dropout1 = tf.nn.dropout(conv0, prob, name = name + "_dropout")
    conv2 = layer.bn_relu_conv(dropout1, name + "_dropout_conv1", out_channel, out_channel, 3)
    return conv2

## Build WideResnet

In [39]:
def build_wide_resnet(x, num_classes, N, k, block, prob = None):
    channels = [3, 16, 16 * k, 32 * k, 64 * k]
    layers = []

    # conv1
    # conv1 = layer.bn_relu_conv(x, "conv1", channels[0], channels[1], 3)
    conv1 = layer.conv_bn_relu(x, "conv1", channels[0], channels[1], 3)
    layers.append(conv1)

    # conv2
    # 1st
    before20 = layers[-1]
    conv20 = layer.conv_layer(before20, "conv20", [1, 1, channels[1], channels[2]])
    # conv20b = block(before20, "conv20b", prob, channels[1], channels[2]) if block is dropout else block(before20, "conv20b", channels[1], channels[2])
    conv20b_ = layer.conv_bn_relu(before20, "conv20b_", channels[1], channels[2], 3)
    conv20b = layer.conv_layer(conv20b_, "conv20b", [3, 3, channels[2], channels[2]])
    output20 = layer.bn_relu(conv20 + conv20b, "output20")
    layers.append(output20)

    # others
    for n in range(1, N):
        before2n = tf.identity(layers[-1])
        # conv2n = layer.conv_layer(before2n, "conv2%d" % n, [3, 3, channels[2], channels[2]])
        conv2nb = block(layers[-1], "conv2%db" % n, prob, channels[2], channels[2]) if block is dropout else block(layers[-1], "conv2%db" % n, channels[2], channels[2])
        output2n = layer.bn_relu(before2n + conv2nb, "output2%d" % n)
        layers.append(output2n)

    # downsampling0
    #downsampling0 = layer.avg_pool_layer(layers[-1], "downsampling0", [1, 2, 2, 1])
    downsampling0 = layer.max_pool_layer(layers[-1], "downsampling0", [1, 2, 2, 1])
    layers.append(downsampling0)

    # conv3
    # 1st
    before30 = layers[-1]
    conv30 = layer.conv_layer(before30, "conv30", [1, 1, channels[2], channels[3]])
    # conv30b = block(before30, "conv30b", prob, channels[2], channels[3]) if block is dropout else block(before30, "conv30b", channels[2], channels[3])
    conv30b_ = layer.conv_bn_relu(before30, "conv30b_", channels[2], channels[3], 3)
    conv30b = layer.conv_layer(conv30b_, "conv30b", [3, 3, channels[3], channels[3]])
    output30 = layer.bn_relu(conv30 + conv30b, "output30")
    layers.append(output30)

    # others
    for n in range(1, N):
        before3n = tf.identity(layers[-1])
        # conv3n = layer.conv_layer(before3n, "conv3%d" % n, [3, 3, channels[3], channels[3]])
        conv3nb = block(layers[-1], "conv3%db" % n, prob, channels[3], channels[3]) if block is dropout else block(layers[-1], "conv3%db" % n, channels[3], channels[3])
        output3n = layer.bn_relu(before3n + conv3nb, "output3%d" % n)
        layers.append(output3n)

    # downsampling1
    #downsampling1 = layer.avg_pool_layer(layers[-1], "downsampling1", [1, 2, 2, 1])
    downsampling1 = layer.max_pool_layer(layers[-1], "downsampling1", [1, 2, 2, 1])
    layers.append(downsampling1)

    # conv4
    # 1st
    before40 = layers[-1]
    conv40 = layer.conv_layer(before40, "conv40", [1, 1, channels[3],channels[4]])
    # conv40b = block(before40, "conv40b", prob, channels[3], channels[4]) if block is dropout else block(before40, "conv40b", channels[3], channels[4])
    conv40b_ = layer.conv_bn_relu(before40, "conv40b_", channels[3], channels[4], 3)
    conv40b = layer.conv_layer(conv40b_, "conv40b", [3, 3, channels[4], channels[4]])
    output40 = layer.bn_relu(conv40 + conv40b, "output40")
    layers.append(output40)

    # others
    for n in range(1, N):
        before4n = tf.identity(layers[-1])
        # conv4n = layer.conv_layer(before4n, "conv4%d" % n, [3, 3, channels[4], channels[4]])
        conv4nb = block(layers[-1], "conv4%db" % n, prob, channels[4], channels[4]) if block is dropout else block(layers[-1], "conv4%db" % n, channels[4], channels[4])
        output4n = layer.bn_relu(before4n + conv4nb, "output4%d" % n)
        layers.append(output4n)

    # avg pooling
    avg_pool = layer.avg_pool_layer(layers[-1], name = "avg_pool", pooling_size = [1, 8, 8, 1])
    layers.append(avg_pool)

    # flatten and fully connected
    flatten = layer.flatten_layer(layers[-1])
    fc = layer.fc_layer(flatten, num_classes, "fc")
    layers.append(fc)
    
    sm = tf.nn.softmax(layers[-1], name = "prediction")
    layers.append(sm)

    return layers[-1]

## Training configuration

### Learning rate

In [40]:
self.init_lr = init_lr
self.lr_decay_step = lr_decay_step
self.lr_decay_rate = lr_decay_rate
self.global_step = tf.Variable(0, trainable = False, name = "global_step")
self.learning_rate = tf.train.exponential_decay(self.init_lr, self.global_step, 
                                                self.lr_decay_step, self.lr_decay_rate, staircase = True, name = "learning_rate")

NameError: name 'init_lr' is not defined

### Loss function

In [None]:
def loss_func(self, pred, gt):
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels = gt, logits = pred), name = "loss")
    tf.summary.scalar("loss", loss)
    return loss

pred = wrn.build_wide_resnet(self.input, self.num_classes, self.N, self.k, wrn.dropout, prob = self.dropout)
loss = self.loss_func(pred, self.gt)
evaluation = self.evaluate(logits = pred, labels = self.gt, rank = 1)

### Evaluation

In [None]:
def evaluate(self, logits, labels, rank):
    correct = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name = "accuracy")
    tf.summary.scalar("accuracy", accuracy)
    return accuracy

pred = wrn.build_wide_resnet(self.input, self.num_classes, self.N, self.k, wrn.dropout, prob = self.dropout)
evaluation = self.evaluate(logits = pred, labels = self.gt, rank = 1)

### Optimizer

In [None]:
optimizer_with_weight_decay = tf.contrib.opt.extend_with_decoupled_weight_decay(tf.train.MomentumOptimizer)
optimizer = optimizer_with_weight_decay(weight_decay = self.weight_decay,
                                        learning_rate = self.learning_rate,
                                        momentum = 0.9).minimize(loss, global_step = self.global_step, name = "momentum_minimizer")

### Dataset

In [None]:
def unpickle_cifar10(path):
    import cPickle
    with open(path, 'rb') as f:
        dic = cPickle.load(f)
        return dic

def create_dataset(data, label, bs, repeat_size = None):
    dataset = tf.data.Dataset.from_tensor_slices((data, label))
    dataset = dataset.shuffle(len(label))
    dataset = dataset.map(map_batch)
    if repeat_size is None:
        dataset = dataset.batch(bs).repeat()
    else:
        dataset = dataset.batch(bs).repeat(repeat_size)
    return dataset

def map_batch(data_batch, label_batch):
    data = tf.transpose(tf.reshape(data_batch, config.ori_size), [1, 2, 0])
    label = tf.one_hot(label_batch, config.num_classes)
    return data, label

In [None]:
# dataset
data = []
label = []
for cdir in self.dataset_dirs:
    dic = cifar.unpickle_cifar10(cdir)
    data.append(dic['data'])
    label.append(dic['labels'])
data = np.concatenate(data, axis = 0)
label = np.concatenate(label, axis = 0)
print data.shape
print label.shape
dataset = cifar.create_dataset(data, label, self.bs)
batch_tensor = cifar.get_next(dataset)

### Print parameter number

In [None]:
print np.sum([np.prod(v.get_shape().as_list()) for v in tf.trainable_variables()])

## Start training

In [None]:
i = 0
while i < self.epoch:
    batch = sess.run(batch_tensor)
    if len(batch[1]) != self.bs:
        continue
        
    summary, gs, l, eva, lr, _ = sess.run([merged, self.global_step, loss, evaluation, self.learning_rate, optimizer],
                                          feed_dict = {self.input : batch[0],
                                                       self.gt : batch[1],
                                                       self.dropout : 0.3,
                                                       self.weight_decay : 0.0005})
    
    train_writer.add_summary(summary, gs)
    print "Global steps %d -- loss = %.6f, lr = %.9f, acc = %.6f" % (gs, l, lr, eva)
    
    if gs % self.save_model_step == 0:
        saver.save(sess, self.model_dir, global_step = self.save_model_step)
        print "Save the model successfully!"
        
    i += 1

train_writer.close()

## Testing

In [None]:
# read pre-trained model
with tf.Session() as sess:
    saver = tf.train.import_meta_graph(graph_path)
    saver.restore(sess, tf.train.latest_checkpoint(ckpt_path))
    
    graph = tf.get_default_graph()
    x = graph.get_tensor_by_name(input_name)
    gt = graph.get_tensor_by_name(gt_name)
    pred = graph.get_tensor_by_name(pred_name)
    accuracy = graph.get_tensor_by_name("accuracy:0")
    dropout = graph.get_tensor_by_name("dropout:0")
    weight_decay = graph.get_tensor_by_name("weight_decay:0")
    a = []
    
    b = 1
    while True:
        try:
            batch = sess.run(batch_tensor)
            if len(batch[1]) != bs:
                break
                
            acc = sess.run([accuracy], feed_dict = {x : batch[0], gt : batch[1], dropout : 1.0, weight_decay : 1.0})
            a += acc
            print "The average accuracy of the %d batch is %.6f." % (b, np.mean(acc))
            b += 1
        except tf.errors.OutOfRangeError:
            break
            
    print "The average accuracy of all batches is %.6f." % np.mean(a)
