In [1]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
from numba import njit, prange
from skimage.measure import block_reduce
import matplotlib.pyplot as plt
import math
import deep_utils as du
%matplotlib inline

In [2]:
mnist = input_data.read_data_sets('MNIST_data', one_hot=True, reshape=False)

train_dataset = mnist.train.images
train_labels = mnist.train.labels
validation_dataset = mnist.validation.images
validation_labels = mnist.validation.labels
test_dataset = mnist.test.images
test_labels = mnist.test.labels

print('train_labels', train_labels.shape)
print('train_dataset', train_dataset.shape)
print('validation_labels', validation_labels.shape)
print('validation_dataset', validation_dataset.shape)
print('test_labels', test_labels.shape)
print('test_dataset', test_dataset.shape, '\n')

train_dataset = np.pad(train_dataset, ((0, 0), (2, 2), (2, 2), (0, 0)), 'constant')
validation_dataset = np.pad(validation_dataset, ((0, 0), (2, 2), (2, 2), (0, 0)), 'constant')
test_dataset = np.pad(test_dataset, ((0, 0), (2, 2), (2, 2), (0, 0)), 'constant')

print('train_dataset', train_dataset.shape)
print('validation_dataset', validation_dataset.shape)
print('test_dataset', test_dataset.shape)

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data/train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Instructions for updating:
Please use tf.one_hot on tensors.
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
train_labels (55000, 10)
train_dataset (55000, 28, 28, 1)
validation_labels (5000, 10)
validation_dataset (5000, 28, 28, 1)
test_labels (10000, 10)
test_dataset (10000, 28, 28, 1) 

train_dataset (55000, 32, 32, 1)
validation_dataset (5000, 32, 32, 1)
test_dataset (10000, 32, 32, 1)


In [3]:
def weight_init_he(np_ar, previous_neurons_num):
    return np.sqrt(2 / previous_neurons_num) * np.random.randn(*np_ar.shape)


layers = []

conv1_w = weight_init_he(np.zeros((5, 5, 1, 6)), previous_neurons_num=32*32)
conv1_b = 0.01
layers.append(conv1_w)

conv2_w = weight_init_he(np.zeros((5, 5, layers[-1].shape[-1], 16)), previous_neurons_num=6*14*14)
conv2_b = 0.01
layers.append(conv2_w)

conv3_w = weight_init_he(np.zeros((5, 5, layers[-1].shape[-1], 10)), previous_neurons_num=16*5*5)
conv3_b = 0.01
layers.append(conv3_w)

dens1_w = weight_init_he(np.zeros((layers[-1].shape[-1], 10)), previous_neurons_num=120)
dens1_b = 0.01
layers.append(dens1_w)

# dens2_w = weight_init_he(np.zeros((layers[-1].shape[-1], 10)), previous_neurons_num=84)
# dens2_b = 0.01
# layers.append(dens2_w)

In [4]:
def weight_init_he(np_ar, previous_neurons_num):
    return np.sqrt(2 / previous_neurons_num) * np.random.randn(*np_ar.shape)

def pooling(np_ar, pooling_func, stride=2):
    # block size for both single image or images array
    block_size = *((len(np_ar.shape)-3)*[1]), stride, stride, 1
    return block_reduce(np_ar, block_size=(block_size), func=pooling_func)

def pipe_operation_conv(np_ar, conv_w, conv_b, activ_func=du.relu):
    
    hks = int((conv_w.shape[1] - 1) / 2)
    conv = np.zeros((np_ar.shape[0] - 2*hks, np_ar.shape[1] - 2*hks, conv_w.shape[-1]))
    
    for i in range(conv_w.shape[-1]):
        conv[:, :, i] = activ_func(du.convolution(np_ar, conv_w[:, :, :, i], stride=1, bias=conv_b))
    
    return conv

def pipe_operation_dense(np_ar, den_w, den_b):
    return np.dot(np_ar, den_w) + den_b

def forward(img):
    c1 = pipe_operation_conv(img, conv1_w, conv1_b)
    p1 = pooling(c1, np.mean, stride=2)

    c2 = pipe_operation_conv(p1, conv2_w, conv2_b)
    p2 = pooling(c2, np.mean, stride=2)

    c3 = pipe_operation_conv(p2, conv3_w, conv3_b)
    c3 = c3.squeeze()

    d1 = pipe_operation_dense(c3, dens1_w, dens1_b)
#     d2 = pipe_operation_dense(d1, dens2_w, dens2_b)
    
    return d1

def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=0)

def batch_gen(dataset, dataset_labels, batch_size, steps, normalization=True):
    for batch in range(steps):
#         print('batch', batch)
        batch_data    = np.take(dataset       , range(32*batch, 32*(batch+1)), axis=0, mode='wrap')
        batch_classes = np.take(dataset_labels, range(32*batch, 32*(batch+1)), axis=0, mode='wrap')
        if normalization:
            batch_mean = batch_data.sum(axis=0) / len(batch_data)
            delta = batch_data - batch_mean
            batch_var = ((delta) ** 2).sum(axis=0) / len(batch_data)
            resulting_batch = (delta)/ ( (batch_var + 1e-8) ** 0.5 )
        else:
            resulting_batch = batch_data

#         plt.imshow(np.squeeze(resulting_batch[0,:,:,:]),cmap='gray')
#         plt.show()

#         plt.imshow(np.squeeze(resulting_batch[4,:,:,:]),cmap='gray')
#         plt.show()

#         print('\n=================================')
#         print('============== Case 3 Implementation ===================')
#         print("Data Shape: ",resulting_batch.shape)
#         print("Data Max: ",resulting_batch.max())
#         print("Data Min: ",resulting_batch.min())
#         print("Data Mean: ",resulting_batch.mean())
#         print("Data Variance: ",resulting_batch.var())
#         plt.hist(resulting_batch.flatten() ,bins='auto')
#         plt.show()
#         print('=================================')            
        
        yield resulting_batch, batch_classes

In [5]:
total = 1
for layer in layers:
    print(layer.shape)
    for dim in layer.shape:
        total *= dim
print(total)
print(total/146313216000000000)

(5, 5, 1, 6)
(5, 5, 6, 16)
(5, 5, 16, 10)
(10, 10)
144000000000
9.841899722852105e-07


In [6]:
print('train_dataset[0].shape', train_dataset[0].shape)

print('CONV')
c1 = pipe_operation_conv(train_dataset[0], conv1_w, conv1_b)
print('c1.shape', c1.shape)
p1 = pooling(c1, np.mean, stride=2)
print('p1.shape', p1.shape)

c2 = pipe_operation_conv(p1, conv2_w, conv2_b)
print('c2.shape', c2.shape)
p2 = pooling(c2, np.mean, stride=2)
print('p2.shape', p2.shape)

c3 = pipe_operation_conv(p2, conv3_w, conv3_b)
print('c3.shape', c3.shape)
c3 = c3.squeeze()
print('c3.shape', c3.shape)

print('DENSE')
d1 = np.dot(c3, dens1_w) + dens1_b
print('d1.shape', d1.shape)

# d2 = np.dot(d1, dens2_w) + dens2_b
# print('d2.shape', d2.shape)

train_dataset[0].shape (32, 32, 1)
CONV
c1.shape (28, 28, 6)
p1.shape (14, 14, 6)
c2.shape (10, 10, 16)
p2.shape (5, 5, 16)
c3.shape (1, 1, 10)
c3.shape (10,)
DENSE
d1.shape (10,)


In [7]:
%%time

def train():
    batch_size = 32
    h = 0.00000001
    learning_rate = 1
    batches = 3
    # prev_dw = np.inf
    b_idx = 1
    loss_threshold = 0.01
    end_training = False

    for batch, classes in batch_gen(train_dataset, train_labels, batch_size=batch_size, steps=batches, normalization=True):

        print('BATCH: ', b_idx)
        b_idx += 1
        classes = np.argmax(classes, axis=1)

        for layer_idx, layer in enumerate(layers):
            print('layer:', layer_idx)

            for idx, val in np.ndenumerate(layer):

                # calculando L(w)
                Lw_results = np.zeros((batch_size,))
                for i, img, cl in zip(range(batch_size), batch, classes):
                    softscores = softmax(forward(img))
                    Lw_results[i] = -np.log(softscores[cl])

                # calculando L(w+h)
                layer[idx] += h
                Lwh_results = np.zeros((batch_size,))
                for i, img, cl in zip(range(batch_size), batch, classes):
                    softscores = softmax(forward(img))
                    Lwh_results[i] = -np.log(softscores[cl])
                layer[idx] -= h

                Lw =  Lw_results.mean()
                Lwh = Lwh_results.mean()

                # dw
                dw = (Lwh - Lw) / h

                #========================================#
                # SGD
                print(idx, end='\t')
                print('Loss:\t', Lw)
                layer[idx] -= dw * learning_rate

                if Lwh < loss_threshold:
                    return

train()

BATCH:  1
layer: 0
(0, 0, 0, 0)	Loss:	 2.3002267500592133
(0, 0, 0, 1)	Loss:	 2.30020275965089
(0, 0, 0, 2)	Loss:	 2.300200130039215
(0, 0, 0, 3)	Loss:	 2.3001656424895742
(0, 0, 0, 4)	Loss:	 2.3001289280464783
(0, 0, 0, 5)	Loss:	 2.300123907413152
(0, 1, 0, 0)	Loss:	 2.3000781191681785
(0, 1, 0, 1)	Loss:	 2.300021665002039
(0, 1, 0, 2)	Loss:	 2.3000179213155088
(0, 1, 0, 3)	Loss:	 2.299902500244708
(0, 1, 0, 4)	Loss:	 2.2999025140593057
(0, 1, 0, 5)	Loss:	 2.2998975177469765
(0, 2, 0, 0)	Loss:	 2.2998561887218383
(0, 2, 0, 1)	Loss:	 2.299700439463413
(0, 2, 0, 2)	Loss:	 2.2996915269255696
(0, 2, 0, 3)	Loss:	 2.2995367914540754
(0, 2, 0, 4)	Loss:	 2.299515982818355
(0, 2, 0, 5)	Loss:	 2.2995177050166484
(0, 3, 0, 0)	Loss:	 2.2994997167421545
(0, 3, 0, 1)	Loss:	 2.2993355923705665
(0, 3, 0, 2)	Loss:	 2.299334782933667
(0, 3, 0, 3)	Loss:	 2.2991409527098083
(0, 3, 0, 4)	Loss:	 2.299116135518292
(0, 3, 0, 5)	Loss:	 2.299114590612799
(0, 4, 0, 0)	Loss:	 2.299102908225802
(0, 4, 0, 1)	Loss:

(0, 0, 3, 14)	Loss:	 2.296035236840458
(0, 0, 3, 15)	Loss:	 2.2960349900668553
(0, 0, 4, 0)	Loss:	 2.296029152660278
(0, 0, 4, 1)	Loss:	 2.2960285526540485
(0, 0, 4, 2)	Loss:	 2.296019982200151
(0, 0, 4, 3)	Loss:	 2.296015568122077
(0, 0, 4, 4)	Loss:	 2.2960117211665523
(0, 0, 4, 5)	Loss:	 2.296011711674212
(0, 0, 4, 6)	Loss:	 2.2959996261050915
(0, 0, 4, 7)	Loss:	 2.2959962638670515
(0, 0, 4, 8)	Loss:	 2.2959941946337112
(0, 0, 4, 9)	Loss:	 2.2959939206110276
(0, 0, 4, 10)	Loss:	 2.2959938584406245
(0, 0, 4, 11)	Loss:	 2.2959905427730964
(0, 0, 4, 12)	Loss:	 2.295989113905251
(0, 0, 4, 13)	Loss:	 2.295988940170379
(0, 0, 4, 14)	Loss:	 2.295988877123598
(0, 0, 4, 15)	Loss:	 2.2959887230235867
(0, 0, 5, 0)	Loss:	 2.2959849091793334
(0, 0, 5, 1)	Loss:	 2.295983591367998
(0, 0, 5, 2)	Loss:	 2.2959734755454333
(0, 0, 5, 3)	Loss:	 2.2959723446170752
(0, 0, 5, 4)	Loss:	 2.2959483439304282
(0, 0, 5, 5)	Loss:	 2.295948320538458
(0, 0, 5, 6)	Loss:	 2.295943391252521
(0, 0, 5, 7)	Loss:	 2.295942

KeyboardInterrupt: 