In [1]:
#Convolutional Network
import numpy as np
from PIL import Image

In [2]:
# show picture
import pickle

def getDate(file):
    with open(file, 'rb') as fileOpen:
        dict = pickle.load(fileOpen, encoding="bytes")
    return dict



In [3]:
dict1 = getDate("CS231n/cifar-10-batches-py/data_batch_1")
data_1 = dict1[b'data']
label_1 = dict1[b'labels']

In [40]:
def showPicture(imgX, imgY, input_dim):
    imgX = imgX.reshape(input_dim)
    imgs = imgX
    img0 = imgs[0]
    img1 = imgs[1]
    img2 = imgs[2]
    i0 = Image.fromarray(img0)
    i1 = Image.fromarray(img1)
    i2 = Image.fromarray(img2)
    img = Image.merge("RGB", (i0, i1, i2))
    img.save("CS231n/cifar-10-batches-py/pictures/" +
             str(imgY) +
             ".png", "png")

    return img


In [41]:
img = showPicture(data_1[0], label_1[0], (3, 32, 32))

In [140]:
class ThreeLayersNetwork:
    def __init__(self, input_dim=(3, 32, 32), num_filters=32, filter_size=7,
                 hidden_dim=128, num_classes=10, weight_scale=1e-3, reg=1e-6,
                 learning_rate=1e-5, dtype=np.float32):
        self.params = {}
        self.input_dim = input_dim
        self.reg = reg
        self.dtype = dtype
        self.learning_rate = learning_rate
        C, H, W = input_dim
        self.params['W1'] = weight_scale * np.random.randn(num_filters,
                                                           C,
                                                           filter_size,
                                                           filter_size)
        self.params['b1'] = np.zeros(num_filters)
        self.params['W2'] = weight_scale * np.random.randn(int(num_filters * H * W / 4),
                                                           hidden_dim)
        self.params['b2'] = np.zeros(hidden_dim)
        self.params['W3'] = weight_scale * np.random.randn(hidden_dim, num_classes)
        self.params['b3'] = np.zeros(num_classes)

        pass

    def conv_front(self, x, weight, b, conv_param):
        stride, pad = conv_param['stride'], conv_param['pad']
        N, C, H, W = x.shape
        F, C, HH, WW = weight.shape
        x_padded = np.pad(x,
                          ((0, 0), (0, 0), (pad, pad), (pad, pad)),
                          mode='constant')
        H_new = int((H + 2 * pad - HH) / stride + 1)
        W_new = int((W + 2 * pad - WW) / stride + 1)

        output = np.zeros((N, F, H_new, W_new))

        for i in range(N):
            for f in range(F):
                for h in range(H_new):
                    for w in range(W_new):
                        window = x_padded[i, :,
                                 h * stride:h * stride + HH,
                                 w * stride:w * stride + WW]
                        output[i, f, h, w] = np.sum(window * weight[f]) + b[f]
                        pass
        cache = (x, weight, b, conv_param)
        return output, cache

    def relu(self, x_new):
        output = np.maximum(0, x_new)
        cache = x_new
        return output, cache
        pass

    def pooling(self, x, pool_param):
        pool_height, pool_weight, stride = pool_param["pool_height"], \
                                           pool_param["pool_weight"], \
                                           pool_param["stride"]
        N, C, H, W = x.shape
        H_new = 1 + int((H - pool_height) / stride)
        W_new = 1 + int((W - pool_weight) / stride)
        output = np.zeros((N, C, H_new, W_new))
        for i in range(N):
            for c in range(C):
                for h in range(H_new):
                    for w in range(W_new):
                        window = x[i,
                                 c,
                                 h * stride:h * stride + pool_height,
                                 w * stride:w * stride + pool_weight]
                        output[i, c, h, w] = np.max(window)
        cache = (x, pool_param)
        return output, cache

    def full_connection(self, x, w, b):
        score = x.dot(w) + b
        cache = (x, w, b)
        return score, cache

    def softmax(self, x, y, w0, w1):
        n = x.shape[0]
        correct_classes = x[range(x.shape[0]), list(y)].reshape(-1, 1)
        exp_sum = np.sum(np.exp(x), axis=1).reshape(-1, 1)
        loss = np.sum(np.log(exp_sum) - correct_classes)
        loss = loss / n + \
               0.5 * self.reg * np.sum(w0 ** 2) + \
               0.5 * self.reg * np.sum(w1 ** 2)

        dw = np.exp(x) / exp_sum
        dw[range(n), list(y)] -= 1
        dw /= n
        return loss, dw

    def back_propagation(self, dout, cache):
        x, w, b = cache
        n = dout.shape[0]
        dx = dout.dot(w.T)
        dw = x.T.dot(dout)
        db = np.sum(dout, axis=0) / n
        return dx, dw, db
        pass

    def relu_back(self, dx, cache):
        x = cache
        dout = dx
        dout[x <= 0] = 0
        return dout
        pass

    def back_pool(self, dout, cache):
        x, pool_param = cache
        pool_height, pool_weight, stride = pool_param["pool_height"], \
                                           pool_param["pool_weight"], \
                                           pool_param["stride"]
        N, C, H, W = x.shape
        H_new = int((H - pool_height) / stride) + 1
        W_new = int((W - pool_weight) / stride) + 1
        dx = np.zeros_like(x)
        for i in range(N):
            for c in range(C):
                for h in range(H_new):
                    for w in range(W_new):
                        window = x[i,
                                 c,
                                 h * stride:h * stride + pool_height,
                                 w * stride:w * stride + pool_weight]
                        max_number = np.max(window)
                        dx[i,
                        c,
                        h * stride:h * stride + pool_height,
                        w * stride:w * stride + pool_weight] = \
                            (window == max_number) * dout[i, c, h, w]

        return dx
        pass

    def back_conv(self, dout, cache):
        x, weight, b, conv_param = cache
        stride, pad = conv_param['stride'], conv_param['pad']
        F, C, HH, WW = weight.shape
        N, C, H, W = x.shape
        dx = np.zeros_like(x)
        dw = np.zeros_like(weight)
        db = np.zeros_like(b)
        x = np.pad(x, ((0, 0), (0, 0), (pad, pad), (pad, pad)), mode='constant')
        dx = np.pad(dx, ((0, 0), (0, 0), (pad, pad), (pad, pad)), mode='constant')
        H_new = int((H + 2 * pad - HH) / stride + 1)
        W_new = int((W + 2 * pad - WW) / stride + 1)
        
        for i in range(N):
            for f in range(F):
                for h in range(H_new):
                    for w in range(W_new):
                        window = x[i,
                                 :,
                                 h * stride:h * stride + HH,
                                 w * stride:w * stride + WW]
                        db[f] += dout[i, f, h, w]
                        dw[f] += window * dout[i, f, h, w]
                        dx = np.float32(dx)
                        dx[i,
                        :,
                        h * stride:h * stride + HH,
                        w * stride:w * stride + WW] += weight[f] * dout[i, f, h, w]
        return dx, dw, db
        pass

    def loss(self, x, y=None):
        W1, b1 = self.params['W1'], self.params['b1']
        W2, b2 = self.params['W2'], self.params['b2']
        W3, b3 = self.params['W3'], self.params['b3']

        filter_size = W1.shape[2]
        conv_param = {'stride': int(1), 'pad': int((filter_size - 1) / 2)}
        pool_param = {'pool_height': 2, 'pool_weight': 2, 'stride': 2}

        output, cache_conv = self.conv_front(x, W1, b1, conv_param)

        output, cache_relu = self.relu(output)

        output, cache_pool = self.pooling(output, pool_param)

        pool_shape = output.shape

        n = output.shape[0]

        output = output.reshape(n, -1)

        output, cache_full = self.full_connection(output, W2, b2)

        output, cache_full_relu = self.relu(output)

        output, cache_full_2 = self.full_connection(output, W3, b3)

        loss, dx = self.softmax(output, y, W2, W3)

        print("loss : ", loss)

        dout, dw, db = self.back_propagation(dx, cache_full_2)

        W3 -= self.learning_rate * dw

        b3 -= self.learning_rate * db

        dout = self.relu_back(dout, cache_full_relu)

        dout, dw, db = self.back_propagation(dout, cache_full)

        W2 -= self.learning_rate * dw

        b2 -= self.learning_rate * db

        dout = dout.reshape(pool_shape)

        dout = self.back_pool(dout, cache_pool)

        dout = self.relu_back(dout, cache_relu)

        dx, dw, db = self.back_conv(dout, cache_conv)

        W1 -= self.learning_rate * dw

        b1 -= self.learning_rate * db

        self.params['W3'] = W3
        self.params['b3'] = b3
        self.params['W2'] = W2
        self.params['b2'] = b2
        self.params['W1'] = W1
        self.params['b1'] = b1

        return loss
    pass


In [141]:
network = ThreeLayersNetwork()
start = 0
batch = 100
x = data_1[start: batch + start].reshape(batch-start, 3, 32, 32)
y = label_1[start: batch + start]
for i in range(10):
    network.loss(x, y)


loss :  2.3025855947961538


loss :  2.3025854228188067


loss :  2.30258524916966


loss :  2.302585075466517


loss :  2.302584901800097


loss :  2.302584728476044


loss :  2.3025845549452786


loss :  2.3025843812341953


loss :  2.30258420758394


loss :  2.302584033949131
