<a href="https://colab.research.google.com/github/LenaVolzhina/playing-with-neural-networks/blob/master/DL_chapter_4_minibatch_normalization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
from datetime import datetime

In [0]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

In [0]:
def fullyconnected_layer(tensor, input_size, out_size):
  W = tf.Variable(tf.truncated_normal([input_size, out_size], stddev=0.1))
  b = tf.Variable(tf.truncated_normal([out_size], stddev=0.1))
  return tf.nn.tanh(tf.matmul(tensor, W) + b)

In [0]:
def batchnorm_layer(tensor, size):
  batch_mean, batch_var = tf.nn.moments(tensor, [0])
  beta = tf.Variable(tf.zeros([size]))
  scale = tf.Variable(tf.ones([size]))
  return tf.nn.batch_normalization(
    tensor, batch_mean, batch_var, beta, scale, 0.001
  )

Заглушки для данных

In [0]:
x = tf.placeholder(tf.float32, [None, 784])

# заглушка для истинной разметки
y = tf.placeholder(tf.float32, [None, 10])

Описываю модели

In [0]:
h1 = fullyconnected_layer(x, 784, 100)

# с нормализацией
h1_bn = batchnorm_layer(h1, 100)     # нормализация между двумя промежуточными слоями
h2_bn = fullyconnected_layer(h1_bn, 100, 100)
y_logit_bn = fullyconnected_layer(h2_bn, 100, 10)

# без нормализации
h2 = fullyconnected_layer(h1, 100, 100)
y_logit = fullyconnected_layer(h2, 100, 10)

Описываю, как замерять промежуточное качество

In [0]:
accuracy_history = {'bn': {'train': [], 'test': []}, 'no_bn': {'train': [], 'test': []}}

correct_prediction = tf.equal(tf.argmax(y_logit, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

correct_prediction_bn = tf.equal(tf.argmax(y_logit_bn, 1), tf.argmax(y, 1))
accuracy_bn = tf.reduce_mean(tf.cast(correct_prediction_bn, tf.float32))

def calc_accuracy(session, ds, accuracy_fun):  
  accuracy_calc = session.run(accuracy_fun, feed_dict={
      x: ds.images, y: ds.labels
  })
  return accuracy_calc

Обучаю без нормализации

In [15]:
loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=y_logit, labels=y)
train_op = tf.train.GradientDescentOptimizer(0.00001).minimize(loss)    # было 0.01

# обучение
init = tf.global_variables_initializer()

sess = tf.Session()
sess.run(init)

for i in range(1000):
  batch_xs, batch_ys = mnist.train.next_batch(100)
  sess.run(train_op, feed_dict={x: batch_xs, y: batch_ys})

  if (i + 1) % 20 == 0:
    acc_train = calc_accuracy(sess, mnist.train, accuracy)
    acc_test = calc_accuracy(sess, mnist.test, accuracy)
    accuracy_history['no_bn']['train'].append(acc_train)
    accuracy_history['no_bn']['test'].append(acc_test)
  
    print(f"[{datetime.now()}] Step {i + 1}, train accuracy {acc_train:.4f}, test accuracy {acc_test:.4f}")

[2019-11-27 12:56:27.717031] Step 20, train accuracy 0.0831, test accuracy 0.0845
[2019-11-27 12:56:28.064679] Step 40, train accuracy 0.0970, test accuracy 0.0981
[2019-11-27 12:56:28.404204] Step 60, train accuracy 0.1077, test accuracy 0.1096
[2019-11-27 12:56:28.751914] Step 80, train accuracy 0.1159, test accuracy 0.1161
[2019-11-27 12:56:29.095892] Step 100, train accuracy 0.1213, test accuracy 0.1233
[2019-11-27 12:56:29.441695] Step 120, train accuracy 0.1265, test accuracy 0.1288
[2019-11-27 12:56:29.783856] Step 140, train accuracy 0.1314, test accuracy 0.1324
[2019-11-27 12:56:30.194865] Step 160, train accuracy 0.1350, test accuracy 0.1354
[2019-11-27 12:56:30.535902] Step 180, train accuracy 0.1403, test accuracy 0.1403
[2019-11-27 12:56:30.878216] Step 200, train accuracy 0.1449, test accuracy 0.1451
[2019-11-27 12:56:31.222822] Step 220, train accuracy 0.1495, test accuracy 0.1501
[2019-11-27 12:56:31.570305] Step 240, train accuracy 0.1543, test accuracy 0.1542
[2019-11

Обучаю с нормализацией

In [9]:
loss_bn = tf.nn.sigmoid_cross_entropy_with_logits(logits=y_logit_bn, labels=y)
train_op_bn = tf.train.GradientDescentOptimizer(0.01).minimize(loss_bn)

# обучение
init = tf.global_variables_initializer()

sess = tf.Session()
sess.run(init)

for i in range(1000):
  batch_xs, batch_ys = mnist.train.next_batch(100)
  sess.run(train_op_bn, feed_dict={x: batch_xs, y: batch_ys})

  if (i + 1) % 20 == 0:
    acc_train = calc_accuracy(sess, mnist.train, accuracy_bn)
    acc_test = calc_accuracy(sess, mnist.test, accuracy_bn)
    accuracy_history['bn']['train'].append(acc_train)
    accuracy_history['bn']['test'].append(acc_test)
  
    print(f"[{datetime.now()}] Step {i + 1}, train accuracy {acc_train:.4f}, test accuracy {acc_test:.4f}")

[2019-11-27 12:31:20.374065] Step 20, train accuracy 0.8430, test accuracy 0.8531
[2019-11-27 12:31:20.723469] Step 40, train accuracy 0.8715, test accuracy 0.8827
[2019-11-27 12:31:21.104601] Step 60, train accuracy 0.8878, test accuracy 0.8963
[2019-11-27 12:31:21.490234] Step 80, train accuracy 0.8944, test accuracy 0.8996
[2019-11-27 12:31:21.878171] Step 100, train accuracy 0.9061, test accuracy 0.9103
[2019-11-27 12:31:22.360951] Step 120, train accuracy 0.9043, test accuracy 0.9102
[2019-11-27 12:31:22.749724] Step 140, train accuracy 0.9170, test accuracy 0.9197
[2019-11-27 12:31:23.127503] Step 160, train accuracy 0.9225, test accuracy 0.9249
[2019-11-27 12:31:23.508479] Step 180, train accuracy 0.9245, test accuracy 0.9285
[2019-11-27 12:31:23.903821] Step 200, train accuracy 0.9316, test accuracy 0.9343
[2019-11-27 12:31:24.295109] Step 220, train accuracy 0.9329, test accuracy 0.9326
[2019-11-27 12:31:24.689619] Step 240, train accuracy 0.9358, test accuracy 0.9367
[2019-11

![](https://i.imgur.com/j1a8yFu.jpg)