In [4]:
import struct
import os
import h5py
import hashlib
import numpy as np
import matplotlib.pyplot as plt
import requests, gzip, os
from scipy.special import expit 
%pylab inline

Populating the interactive namespace from numpy and matplotlib


# Код загрузки данных

In [5]:
def get_data():    
    MNIST_data = h5py.File('MNISTdata.hdf5', 'r')
    x_train = np.float32(MNIST_data['x_train'][:] )
    y_train = np.int32(np.array(MNIST_data['y_train'][:,0]))
    x_test = np.float32( MNIST_data['x_test'][:] )
    y_test = np.int32( np.array( MNIST_data['y_test'][:,0] ) )
    MNIST_data.close()
    n_classes = len(np.unique(y_train))
    n_input = len(x_train)
    return x_train, y_train, x_test, y_test, n_classes, n_input

# Подготовка данных

In [6]:
x_train, y_train, x_test, y_test, output_size, n_input = get_data()

In [7]:
num_filters=5
filter_size=3
input_dim=28
input_size=28*28

In [8]:
K = np.random.randn(filter_size,filter_size, num_filters)/ np.sqrt(filter_size)
W = np.random.randn(output_size,input_dim-filter_size+1,input_dim-filter_size+1,num_filters) / np.sqrt(input_dim-filter_size+1)
bias= np.zeros((output_size, 1))/np.sqrt(output_size)

# Класс СNN

In [10]:
class CNN:
    def __init__(self, n_filters: int, filter_size: int, input_dim: int, input_size: int, output_size: int):
        #
        self.num_filters = n_filters
        self.filter_size =filter_size
        self.input_dim = input_dim
        self.input_size = input_size
        self.output_size = output_size
        #
        self.K = np.random.randn(filter_size,filter_size, num_filters)/ np.sqrt(filter_size)
        self.W = np.random.randn(output_size,input_dim-filter_size+1,input_dim-filter_size+1,num_filters) / np.sqrt(input_dim-filter_size+1)
        self.bias = np.zeros((output_size, 1))/np.sqrt(output_size)
    
    def epoch(self, epoch_n, x_data, y_data, LR):
        n_random = randint(0,len(x_data)-1)
        y = y_data[n_random]
        x = x_data[n_random][:]
        x = np.reshape(x, (self.input_dim, self.input_dim))
        # Предсказание
        rho, y, H, Z, total_correct = self.forward(x, y)
        # Бэкпроп
        diff_bias, diff_K, diff_W  = self.backward(rho, x, y, H, Z)
        # Обновление параметров
       	self.bias = self.bias - LR * diff_bias
       	self.W = self.W - LR * diff_W
       	self.K = self.K - LR * diff_K 
        return total_correct
        
        
    def forward(self, x, y):
        total_correct=0
        Z = self.conv(x, self.K)
        H = self.relu(Z)
        U = np.zeros((self.output_size,1))
        for i in range(self.output_size):
        	temp1 = self.W[i,:,:,:]
        	temp2 = np.multiply(temp1,H)
        	U[i] = np.sum(temp2) + bias[i]
        
        rho = self.softmax_function(U)
        predicted_value = np.argmax(rho)
        if predicted_value == y:
            total_correct += 1
        return rho, y, H, Z, total_correct
        
    def backward(self, rho, x, y, H, Z):
        diff_U = rho - self.convert_y(y)
        diff_bias = diff_U
        diff_W = np.zeros((self.output_size,self.input_dim-self.filter_size+1,self.input_dim-self.filter_size+1,self.num_filters))
        for i in range(self.output_size):
            diff_W[i,:,:,:] = diff_U[i]*H
        delta = np.zeros(H.shape)
        for i in range(self.input_dim-self.filter_size+1):
            for j in range(self.input_dim-self.filter_size+1):
                for p in range(self.num_filters):
                    delta[i,j,p] = np.sum(np.multiply(diff_U,self.W[:,i,j,p]))

        grad_Zdel = np.multiply(self.relu_prime(Z),delta)
        diff_K = self.conv(x, grad_Zdel)
        return diff_bias, diff_K, diff_W
    
    def test(self, x_test, y_test):
        correct=0
        y = y_test[n]
        x = x_test[n][:]
        x = np.reshape(x, (self.input_dim, self.input_dim))

        Z = self.conv(x, self.K)
        H = self.relu(Z)

        for i in range(self.output_size):
            temp1 = self.W[i,:,:,:]
            temp2 = np.multiply(temp1,H)
            U[i] = np.sum(temp2) + bias[i]

        rho = self.softmax_function(U)
        predicted_value = np.argmax(rho)

        if (predicted_value == y):
            correct = 1
        return correct

    @staticmethod
    def relu(x):
        return x*(x>0)
    
    @staticmethod
    # Производня релу
    def relu_prime(x):
        return (x>0)
    
    @staticmethod
    def conv(x,K):
        Z = np.zeros(((x.shape[0]-K.shape[0]+1),(x.shape[0]-K.shape[0]+1),K.shape[2]))
        for p in range(K.shape[2]):
            for i in range(Z.shape[0]):
                for j in range(Z.shape[1]):
                    if (i+3<Z.shape[0] and j+3<Z.shape[1]):
                        x_temp = x[i:i+3,j:j+3]
                        temp = np.multiply(x_temp,K[:,:,p])
                        Z[i,j,p] = np.sum(temp)
        return Z
    
    @staticmethod
    def softmax_function(z):
        ZZ = np.exp(z - max(z))/np.sum(np.exp(z - max(z)))
        return ZZ
    
    @staticmethod
    def convert_y(y):
        arr = np.zeros((output_size,1))
        arr[y] = 1
        return arr


# Oбучение

In [13]:
LR = .01
num_epochs = 7

cnn = CNN(num_filters, filter_size, input_dim, input_size, output_size)
for epochs in range(num_epochs):

    if (epochs > 5):
        LR = 0.001
    if (epochs > 10):
        LR = 0.0001
    if (epochs > 15):
        LR = 0.00001

    total_correct = 0

    for n in range(len(x_train)):
        n_correct = cnn.epoch(epoch_n=epochs, x_data=x_train, y_data=y_train, LR=LR)
        total_correct += n_correct

        
    print("Training accuracy for epoch {} : {}".format(epochs+1, total_correct/np.float(len(x_train))))

KeyboardInterrupt: 

# Тестирование

In [12]:
total_correct = 0
for n in range(len(x_test)):
    correct = cnn.test(x_test, y_test)
    total_correct += correct

print("Test accuracy : {}".format(total_correct/np.float(len(x_test))))

NameError: name 'U' is not defined