## **Трехслойная сеть для классификации набора данных MNIST**

In [2]:
import sys, numpy as np
from keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

images, labels = (x_train[0:1000].reshape(1000, 28*28) / 255, y_train[0:1000])
one_hot_labels = np.zeros((len(labels), 10))

for i, l in enumerate(labels):
  one_hot_labels[i][l] = 1
labels = one_hot_labels

test_images = x_test.reshape(len(x_test), 28*28) / 255
test_labels = np.zeros((len(y_test), 10))

for i, l in enumerate(y_test):
  test_labels[i][l] = 1

np.random.seed(1)
relu = lambda x: (x > 0) * x
relu2deriv = lambda x: x > 0
alpha, iter, hidden_size, pixels_per_image, num_labels = (0.005, 350, 40, 784, 10)

weights_0_1 = 0.2 * np.random.random((pixels_per_image, hidden_size)) - 0.1  
weights_1_2 = 0.2 * np.random.random((hidden_size, num_labels)) - 0.1

for j in range(iter):
  error, correct_cnt = (0.0, 0)

  for i in range(len(images)):
    layer_0 = images[i:i+1]
    layer_1 = relu(np.dot(layer_0, weights_0_1))
    layer_2 = np.dot(layer_1, weights_1_2)
    error += np.sum((layer_2 - labels[i:i+1]) ** 2)
    correct_cnt += int(np.argmax(layer_2) == np.argmax(labels[i:i+1]))

    layer_2_delta = (layer_2 - labels[i:i+1])
    layer_1_delta = layer_2_delta.dot(weights_1_2.T) * relu2deriv(layer_1)

    weights_1_2 -= alpha * layer_1.T.dot(layer_2_delta)
    weights_0_1 -= alpha * layer_0.T.dot(layer_1_delta)
  
  sys.stdout.write("\r" + 
                   " I:" + str(j) + 
                   " Error: " + str(error / float(len(images)))[0:5] + 
                   " Correct: " + str(correct_cnt / float(len(images))))

Using TensorFlow backend.


Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz
 I:349 Error: 0.003 Correct: 0.999

## **Проверка на точность работы трехслойной сети**

In [9]:
if (j % 10 == 0 or j == iter-1):
  error, correct_cnt = (0.0, 0)
  for i in range(len(test_images)):
    layer_0 = test_images[i:i+1]
    layer_1 = relu(np.dot(layer_0, weights_0_1))
    layer_2 = np.dot(layer_1, weights_1_2)

    error += np.sum((layer_2 - test_labels[i:i+1]) ** 2)
    correct_cnt += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))
    
  sys.stdout.write(" Test-Err: " + str(error / float(len(test_images)))[0:5] + 
                   " Test-Acc: " + str(correct_cnt / float(len(test_images))))
  print()

 Test-Err: 0.355 Test-Acc: 0.829


## **Стандартный способ регуляризации: прореживание (дропаут)**

In [14]:
import sys, numpy as np
from keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

images, labels = (x_train[0:1000].reshape(1000, 28*28) / 255, y_train[0:1000])
one_hot_labels = np.zeros((len(labels), 10))

for i, l in enumerate(labels):
  one_hot_labels[i][l] = 1
labels = one_hot_labels

test_images = x_test.reshape(len(x_test), 28*28) / 255
test_labels = np.zeros((len(y_test), 10))

for i, l in enumerate(y_test):
  test_labels[i][l] = 1

np.random.seed(1)
relu = lambda x: (x > 0) * x
relu2deriv = lambda x: x > 0
alpha, iter, hidden_size, pixels_per_image, num_labels = (0.005, 350, 40, 784, 10)

weights_0_1 = 0.2 * np.random.random((pixels_per_image, hidden_size)) - 0.1  
weights_1_2 = 0.2 * np.random.random((hidden_size, num_labels)) - 0.1

for j in range(iter):
  error, correct_cnt = (0.0, 0)

  for i in range(len(images)):
    layer_0 = images[i:i+1]
    layer_1 = relu(np.dot(layer_0, weights_0_1))
    dropout_mask = np.random.randint(2, size=layer_1.shape) #dropout "прореживание" отключение 50% случайно выбранных узлов
    
    layer_1 *= dropout_mask * 2 #регуляризация "прореживание" первого слоя

    layer_2 = np.dot(layer_1, weights_1_2)
    error += np.sum((layer_2 - labels[i:i+1]) ** 2)
    correct_cnt += int(np.argmax(layer_2) == np.argmax(labels[i:i+1]))

    layer_2_delta = (layer_2 - labels[i:i+1])
    layer_1_delta = layer_2_delta.dot(weights_1_2.T) * relu2deriv(layer_1)

    layer_1_delta *= dropout_mask #регуляризация "прореживание" дельты первого слоя

    weights_1_2 -= alpha * layer_1.T.dot(layer_2_delta)
    weights_0_1 -= alpha * layer_0.T.dot(layer_1_delta)

##############################################################
# Проверка на точность работы трехслойной сети после DropOut #
##############################################################

  if (j % 10 == 0):
    test_error = 0.0
    test_correct_cnt = 0

    for i in range(len(test_images)):
      layer_0 = test_images[i:i+1]
      layer_1 = relu(np.dot(layer_0, weights_0_1))
      layer_2 = np.dot(layer_1, weights_1_2)

      test_error += np.sum((layer_2 - test_labels[i:i+1]) ** 2)
      test_correct_cnt += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))

    sys.stdout.write("\r" + 
                    " I:" + str(j) +
                    " Test-Err:" + str(test_error / float(len(test_images)))[0:5] + 
                    " Test-Acc:" + str(test_correct_cnt / float(len(test_images))) +  
                    " Train-Err:" + str(error / float(len(images)))[0:5] + 
                    " Train-Acc:" + str(correct_cnt / float(len(images))))

 I:340 Test-Err:0.346 Test-Acc:0.8438 Train-Err:0.271 Train-Acc:0.882

## **Пакетный градиентный спуск**

In [20]:
#Этот метод увеличивает скорость обучения и улучшает сходимость

import numpy as np
np.random.seed(1)

relu = lambda x: (x > 0) * x
relu2deriv = lambda x: x > 0 

batch_size = 100
alpha, iter = (0.001, 300)
pixels_per_image, num_labels, hidden_size = (784, 10, 100)

weights_0_1 = 0.2 * np.random.random((pixels_per_image, hidden_size)) - 0.1
weights_1_2 = 0.2 * np.random.random((hidden_size, num_labels)) - 0.1

for j in range(iter):
  error, correct_cnt = (0.0, 0)
  for i in range(int(len(images) / batch_size)):
    batch_start, batch_end = ((i * batch_size), ((i+1) * batch_size))

    layer_0 = images[batch_start:batch_end]
    layer_1 = relu(np.dot(layer_0, weights_0_1))
    dropout_mask = np.random.randint(2, size=layer_1.shape)
    layer_1 *= dropout_mask * 2
    layer_2 = np.dot(layer_1, weights_1_2)

    error += np.sum((layer_2 - labels[batch_start:batch_end]) ** 2)

    for k in range(batch_size):
      correct_cnt += int(np.argmax(layer_2[k:k+1] == np.argmax(labels[batch_start+k:batch_start+k+1])))
      layer_2_delta = (layer_2 - labels[batch_start:batch_end]) / batch_size
      layer_1_delta = layer_2_delta.dot(weights_1_2.T) * relu2deriv(layer_1)
      layer_1_delta *= dropout_mask

      weights_1_2 -= alpha * layer_1.T.dot(layer_2_delta)
      weights_0_1 -= alpha * layer_0.T.dot(layer_1_delta)

  if (j % 10 == 0):
    test_error = 0.0
    test_correct_cnt = 0

    for i in range(len(test_images)):
      layer_0 = test_images[i:i+1]
      layer_1 = relu(np.dot(layer_0, weights_0_1))
      layer_2 = np.dot(layer_1, weights_1_2)

    sys.stdout.write("\r" + 
                    " I:" + str(j) +
                    " Test-Err:" + str(test_error / float(len(test_images)))[0:5] + 
                    " Test-Acc:" + str(test_correct_cnt / float(len(test_images))) +  
                    " Train-Err:" + str(error / float(len(images)))[0:5] + 
                    " Train-Acc:" + str(correct_cnt / float(len(images))))

 I:290 Test-Err:0.0 Test-Acc:0.0 Train-Err:0.224 Train-Acc:0.0