In [1]:
import numpy  as np
import pandas as pd

In [2]:
import torch
import torch.nn as nn
import keras
import tensorflow as tf
from keras.datasets import mnist

In [16]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = tf.keras.utils.normalize(X_train, axis=1)
X_test = tf.keras.utils.normalize(X_test, axis=1)
y_train = y_train[:, np.newaxis].T
y_test = y_test[:, np.newaxis].T

x = X_train
xo = np.ndarray((len(x),28**2))
for i in range(len(x)):
  xo[i] = x[i].flatten()
X_train = xo.T

x = X_test
xo = np.ndarray((len(x),28**2))
for i in range(len(x)):
  xo[i] = x[i].flatten()
X_test = xo.T

In [17]:
def onehot(Y):
  y_new = []
  for y in Y[0]:
    t = [0,0,0,0,0,0,0,0,0,0]
    t[y] = 1
    y_new.append(t)
  return np.array(y_new).T

In [18]:
def reverse_onehot(Y):
  y = Y.T
  out = []
  for yi in y:
    out.append(np.argmax(yi))
  return np.array(out).T

In [19]:
y_train = onehot(y_train)
y_test = onehot(y_test)

In [20]:
class dlnet:
    def __init__(self, x, y):
        self.X=x  # входные данные
        self.Y=y  # размеченные данные, target
        self.Yh=np.zeros((1,self.Y.shape[1]))  # реальный выход перцептрона
        self.L=2  # количество слоев
        self.dims = [28*28, 20, 10]  # 28**2 фичей на вход, 20 нейронов в скрытом слое,
                                # 10 нейронов на выходе
        self.param = {}  # параметры и базисы для каждого слоя
        self.ch = {}  # кэш для вякого
        self.grad = {}  #
        self.loss = []  # для хранения значений лоссов
        self.lr=0.003
        self.sam = self.Y.shape[1]  # количество тренировачных образцов

    def nInit(self):
        """
        Инициализирует начальные параметры для сети
        """
        np.random.seed(1)

        self.param['W1'] = np.random.randn(self.dims[1], self.dims[0]) / np.sqrt(self.dims[0])
        self.param['b1'] = np.zeros((self.dims[1], 1))
        self.param['W2'] = np.random.randn(self.dims[2], self.dims[1]) / np.sqrt(self.dims[1])
        self.param['b2'] = np.zeros((self.dims[2], 1))
        return

    def Sigmoid(self, Z):
        return 1/(1+np.exp(-Z))

    def Relu(self, Z):
        return np.maximum(0,Z)

    def forward(self):
        # первый слой
        Z1 = self.param['W1'].dot(self.X) + self.param['b1']
        # функция активации
        A1 = self.Relu(Z1)
        # сохраняем в кэш результаты
        self.ch['Z1'],self.ch['A1']=Z1,A1
        # второй слой
        Z2 = self.param['W2'].dot(A1) + self.param['b2']
        # функция активации
        A2 = self.Sigmoid(Z2)
        # сохраняем в кэш результаты
        self.ch['Z2'],self.ch['A2']=Z2,A2
        # выход сети
        self.Yh=A2
        # считаем лоссы
        loss=self.nloss(A2)
        #print(self.param)
        #print(self.Yh)
        return self.Yh, loss

    def nloss(self,Yh):
        # кросс-энтропия
        loss = (1./self.sam) * (-np.dot(self.Y,np.log(Yh).T) - np.dot(1-self.Y, np.log(1-Yh).T))
        return loss

    def dRelu(self, x):
        x[x<=0] = 0
        x[x>0] = 1
        return x

    def dSigmoid(self, Z):
        s = 1/(1+np.exp(-Z))
        dZ = s * (1-s)
        return dZ

    def backward(self):
        dLoss_Yh = - (np.divide(self.Y, self.Yh ) - np.divide(1 - self.Y, 1 - self.Yh))

        dLoss_Z2 = dLoss_Yh * self.dSigmoid(self.ch['Z2'])
        dLoss_W2 = 1./self.ch['A1'].shape[1] * np.dot(dLoss_Z2,self.ch['A1'].T)
        dLoss_b2 = 1./self.ch['A1'].shape[1] * np.dot(dLoss_Z2, np.ones([dLoss_Z2.shape[1],1]))
        #dLoss_W2 = np.dot(dLoss_Z2,self.ch['A1'].T)
        #dLoss_b2 = np.dot(dLoss_Z2, np.ones([dLoss_Z2.shape[1],1]))

        dLoss_A1 = np.dot(self.param["W2"].T,dLoss_Z2)
        dLoss_Z1 = dLoss_A1 * self.dRelu(self.ch['Z1'])
        dLoss_A0 = np.dot(self.param["W1"].T,dLoss_Z1)
        dLoss_W1 = 1./self.X.shape[1] * np.dot(dLoss_Z1,self.X.T)
        dLoss_b1 = 1./self.X.shape[1] * np.dot(dLoss_Z1, np.ones([dLoss_Z1.shape[1],1]))
        #dLoss_W1 = np.dot(dLoss_Z1,self.X.T)
        #dLoss_b1 = np.dot(dLoss_Z1, np.ones([dLoss_Z1.shape[1],1]))

        self.param["W1"] = self.param["W1"] - self.lr * dLoss_W1
        self.param["b1"] = self.param["b1"] - self.lr * dLoss_b1
        self.param["W2"] = self.param["W2"] - self.lr * dLoss_W2
        self.param["b2"] = self.param["b2"] - self.lr * dLoss_b2

    def gd(self,X, Y, iter = 3000):
        np.random.seed(10)

        self.nInit()

        for i in range(0, iter):
            Yh, loss=self.forward()
            self.backward()

            if i % 1 == 0:
                print ("Cost after iteration %i: %f" %(i, loss.sum()))
                self.loss.append(loss)

        return

    def pred(self,x, y):
        self.X=x
        self.Y=y
        comp = np.zeros((10,x.shape[1]))
        pred, loss= self.forward()
        acc = 0

        for i in range(0, pred.shape[1]):
          #print(pred[:,i])
          comp[np.argmax(pred[:,i]),i] = 1
          acc = acc + np.dot(comp[:,i], y[:,i])

        print(acc/x.shape[1])
        return comp

In [21]:
am = 10000
a = X_train[:,0:am]
b = y_train[:,0:am]

In [26]:
nn = dlnet(a, b)
nn.lr=0.2
nn.dims = [28**2, 20, 10]
nn.gd(X_train, y_train, iter = 200)

Cost after iteration 0: 68.440585
Cost after iteration 1: 58.274883
Cost after iteration 2: 44.529184
Cost after iteration 3: 34.987634
Cost after iteration 4: 33.194717
Cost after iteration 5: 33.042171
Cost after iteration 6: 33.046202
Cost after iteration 7: 33.095489
Cost after iteration 8: 33.167841
Cost after iteration 9: 33.253009
Cost after iteration 10: 33.347900
Cost after iteration 11: 33.450292
Cost after iteration 12: 33.558432
Cost after iteration 13: 33.671695
Cost after iteration 14: 33.789146
Cost after iteration 15: 33.911227
Cost after iteration 16: 34.037467
Cost after iteration 17: 34.168095
Cost after iteration 18: 34.302855
Cost after iteration 19: 34.441670
Cost after iteration 20: 34.584982
Cost after iteration 21: 34.733147
Cost after iteration 22: 34.886033
Cost after iteration 23: 35.043617
Cost after iteration 24: 35.206149
Cost after iteration 25: 35.373644
Cost after iteration 26: 35.546277
Cost after iteration 27: 35.724432
Cost after iteration 28: 35.90

In [27]:
c = nn.pred(X_test, y_test)

0.8817


In [28]:
cn = reverse_onehot(c)
yn = reverse_onehot(y_test)
(cn==yn).sum()

8817

In [25]:
nn.param

{'W1': array([[-0.00048843, -0.08006767, -0.07387169, ..., -0.09314973,
         -0.06869802, -0.04037303],
        [ 0.04976308, -0.06305008, -0.02254226, ...,  0.0136139 ,
         -0.044953  ,  0.02865558],
        [-0.0071055 , -0.08399323, -0.0955485 , ..., -0.08248019,
         -0.01026263, -0.0177748 ],
        ...,
        [-0.06192667, -0.03115272, -0.01610096, ..., -0.01465761,
          0.02152715, -0.03931117],
        [ 0.08412721,  0.0580769 ,  0.01999432, ..., -0.02181765,
          0.0573722 ,  0.0167679 ],
        [ 0.03637285, -0.02879049,  0.01252853, ...,  0.03474712,
         -0.02558065, -0.03943756]]),
 'b1': array([[ 0.30964715],
        [-0.03315856],
        [ 0.09431504],
        [ 0.08110452],
        [-0.01121579],
        [ 0.15386934],
        [-0.01437769],
        [ 0.06722785],
        [ 0.1960012 ],
        [ 0.23346187],
        [-0.03908138],
        [ 0.09218334],
        [ 0.22362162],
        [ 0.20658485],
        [-0.05602078],
        [ 0.0084