In [79]:
#  Седьмая (и последняя) лаба будет посвящена нейронным сетям. 
#     Для этого нужно взять датасет MNIST и сделать однослойный\многослойный перцептрон для распознавания цифр 0-9. 
#     При этом обязательно, чтобы можно было посмотреть на каких цифрах ваш классификатор ошибается, 
#     а так же должна быть возможность ручного ввода изображений для распознавания и дообучения на них. 
#     Нужно грамотно уметь объяснять все про backpropagation и функции активации. Датасет и инструкция, 
#     как из него извлекать данные лежат здесь: http://yann.lecun.com/exdb/mnist/index.html 
# метод обратного распространения ошибки
# Описание алгоритма: https://habrahabr.ru/post/198268/, 
# http://cyberleninka.ru/article/n/primenenie-svertochnoy-neyronnoy-seti-dlya-raspoznavaniya-rukopisnyh-tsifr
# http://mechanoid.kiev.ua/ml-lenet.html

In [110]:
%matplotlib inline
%load_ext autoreload
%autoreload 5
%autosave 15    

import pandas as pd
import random as rd
import pylab as pl
import numpy as np
from sklearn.decomposition import PCA
from sklearn import preprocessing
import matplotlib.pyplot as plt
import os, struct
from math import exp
from array import array as pyarray
from sklearn.cross_validation import KFold
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.metrics import accuracy_score, f1_score
from scipy.spatial.distance import euclidean, cityblock, cosine, correlation
from IPython.display import display, HTML
from sklearn.metrics import mean_squared_error
plt.rcParams["figure.figsize"]=[15,10]

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Autosaving every 15 seconds


In [118]:
from mnist import MNIST

mndata = MNIST('./samples')

# size 28 * 28, train count = 60000, test count = 10000
image_size = 28 # n = 28 ** 2

train_images, train_labels = mndata.load_training()
train_size = len(train_images)

test_images, test_labels = mndata.load_testing()
test_size = len(test_images)

n = 25

train_pca = np.array(train_images + test_images)
pca = PCA(n_components=n).fit(train_pca)


In [123]:
# init 


def sumprod(a, b):
    return sum(map(lambda x: x[0] * x[1], zip(a, b)))

m = 10 # количество выходных y нейронов
p = 50 # TODO количество скрытых z нейронов


v = [[rd.random() - 0.5 for j in range(p)] for i in range(n)] # Смещение скрытого нейрона j
v0 = [rd.random() - 0.5 for j in range(p)]
w = [[rd.random() - 0.5 for k in range(m)] for j in range(p)] # Смещение нейрона на выходе
w0 = [rd.random() - 0.5 for k in range(m)]

alpha = 0.5 # Скорость обучения

In [124]:
# функция активации (competitive function)

f = lambda x: 1 / (1 + exp(-x)) # [0..1]
f1 = lambda x: f(x) * (1 - f(x)) # z1'(x)

In [136]:
# neural network

def nn_go(x, ans):
    global v, w, v0, w0
    t = [0] * m
    t[ans] = 1
    xx = pca.transform([x])
    xx = preprocessing.normalize(xx)
    xx = xx[0]    
    # step 4. evaluate z
    z_in = [0] * p # Суммарное значение подаваемое на вход скрытого нейрона
    for j in range(p):
        z_in[j] = sumprod([v[i][j] for i in range(n)], xx) + v0[j]
    zz = list(map(f, z_in)) # Скрытый нейрон
    
    # step 5. evaluate y
    y_in = [0] * m # Суммарное значение подаваемое на вход скрытого нейрона
    for k in range(m):
        y_in[k] = sumprod([w[j][k] for j in range(p)], zz) + w0[k]
    yy = list(map(f, y_in)) # Нейрон на выходе    
    
    if (ans == -1):
        res = max(yy)
        for k in range(m):
            if yy[k] == res:
                return k
    
    # step 6. evaluate y error
    sigmak = [(t[k] - yy[k]) * (f1(y_in[k])) for k in range(m)]
    delta_w = [[alpha * sigmak[k] * zz[j] for k in range(m)] for j in range(p)]
    delta_w0 = [alpha * sigmak[k] for k in range(m)]
    
    # step 7. evaluate z error
    sigma_in = [sumprod(sigmak, w[j]) for j in range(p)]
    sigmaj = [sigma_in[j] * f1(z_in[j]) for j in range(p)]
    delta_v = [([alpha * sigmaj[j] * xx[i] for j in range(p)]) for i in range(n)] # TODO
    delta_v0 = [alpha * sigmaj[j] for j in range(p)]
    
    # change weights
    v = [[v[i][j] + delta_v[i][j] for j in range(p)] for i in range(n)] # Смещение скрытого нейрона j
    v0 = [v0[j] + delta_v0[j] for j in range(p)]
    w = [[w[j][k] + delta_w[j][k] for k in range(m)] for j in range(p)] # Смещение нейрона на выходе
    w0 = [w0[k] + delta_w0[k] for k in range(m)]
    
    return mean_squared_error(yy, t)

In [152]:
# neural learning
for steps in range(1):
    mse = 0.0
    for num in range(train_size): #train_size        
        mse += nn_go(train_images[num], train_labels[num])    
        if (num % 1000 == 0):
            print(num, end=' ')
    print(mse / train_size)

# 1. mse = 0.0112276998947
# 2. mse = 0.00955731831638

0 

KeyboardInterrupt: 

In [151]:
def getNumber(x):
    res = nn_go(x, -1)    
    return res

# neural test
ok = [0] * 10
failed = [0] * 10
for i in range(test_size): # test_size
    ans = test_labels[i]
    pred = getNumber(test_images[i])
#     print("answer is", ans, "; prediction is", pred)
    if (ans == pred):
        ok[ans] += 1
    else:
        failed[ans] += 1    
print("RESULT")
for i in range(10):
    print("NUMBER =", i, "; OK =", ok[i], "; FAILED =", failed[i])
print("SUM OK =", sum(ok), "; FAILED =", sum(failed))

# 2. SUM OK = 9421 ; FAILED = 579, the worst digits: 7 (112 failed), 8 (106 failed)

0 1000 2000 3000 4000 5000 6000 7000 8000 9000 RESULT
NUMBER = 0 ; OK = 967 ; FAILED = 13
NUMBER = 1 ; OK = 1123 ; FAILED = 12
NUMBER = 2 ; OK = 947 ; FAILED = 85
NUMBER = 3 ; OK = 976 ; FAILED = 34
NUMBER = 4 ; OK = 935 ; FAILED = 47
NUMBER = 5 ; OK = 802 ; FAILED = 90
NUMBER = 6 ; OK = 928 ; FAILED = 30
NUMBER = 7 ; OK = 916 ; FAILED = 112
NUMBER = 8 ; OK = 868 ; FAILED = 106
NUMBER = 9 ; OK = 959 ; FAILED = 50
SUM OK = 9421 ; FAILED = 579
