In [1]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from scipy.misc import imread
import tensorflow as tf
import numpy as np
import os
import sys
import math
import gzip
import glob
import random
import shutil
import pickle

In [2]:
# functions.py
def identity_function(x):
    return x


def step_function(x):
    return np.array(x > 0, dtype=np.int)


def sigmoid(x):
    return 1 / (1 + np.exp(-x))    


def sigmoid_grad(x):
    return (1.0 - sigmoid(x)) * sigmoid(x)
    

def relu(x):
    return np.maximum(0, x)


def relu_grad(x):
    grad = np.zeros(x)
    grad[x>=0] = 1
    return grad
    

def softmax(x):
    if x.ndim == 2:
        x = x.T
        x = x - np.max(x, axis=0)
        y = np.exp(x) / np.sum(np.exp(x), axis=0)
        return y.T 

    x = x - np.max(x)
    return np.exp(x) / np.sum(np.exp(x))


def mean_squared_error(y, t):
    return 0.5 * np.sum((y-t)**2)


def cross_entropy_error(y, t):
    if y.ndim == 1:
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)

    if t.size == y.size:
        t = t.argmax(axis=1)
             
    batch_size = y.shape[0]
    #print("y: " + str(y[np.arange(batch_size), t]))
    return -np.sum(np.log(y[np.arange(batch_size), t])) / batch_size


def softmax_loss(X, t):
    y = softmax(X)
    return cross_entropy_error(y, t)


In [3]:
# util.py
def smooth_curve(x):
    """
    http://glowingpython.blogspot.jp/2012/02/convolution-with-numpy.html
    """
    window_len = 11
    s = np.r_[x[window_len-1:0:-1], x, x[-1:-window_len:-1]]
    w = np.kaiser(window_len, 2)
    y = np.convolve(w/w.sum(), s, mode='valid')
    return y[5:len(y)-5]


def shuffle_dataset(x, t):
    """
    """
    permutation = np.random.permutation(x.shape[0])
    x = x[permutation,:] if x.ndim == 2 else x[permutation,:,:,:]
    t = t[permutation]

    return x, t

def conv_output_size(input_size, filter_size, stride=1, pad=0):
    return (input_size + 2*pad - filter_size) / stride + 1


def im2col(input_data, filter_h, filter_w, stride=1, pad=0):
    """
    Parameters
    ----------
    input_data : 이미지 데이터
    filter_h : 필터 높이
    filter_w : 필터 폭
    stride : 스트라이드
    pad : 패드

    Returns
    -------
    col : 2차원행렬
    """
    N, C, H, W = input_data.shape
    out_h = (H + 2 * pad - filter_h) // stride + 1
    out_w = (W + 2 * pad - filter_w) // stride + 1

    img = np.pad(input_data, [(0,0), (0,0), (pad, pad), (pad, pad)], 'constant')
    col = np.zeros((N, C, filter_h, filter_w, out_h, out_w))

    for y in range(filter_h):
        y_max = y + stride * out_h
        for x in range(filter_w):
            x_max = x + stride * out_w
            col[:, :, y, x, :, :] = img[:, :, y:y_max:stride, x:x_max:stride]

    col = col.transpose(0, 4, 5, 1, 2, 3).reshape(N * out_h * out_w, -1)
    return col


def col2im(col, input_shape, filter_h, filter_w, stride=1, pad=0):
    """

    Parameters
    ----------
    col :
    input_shape : 이미지 데이터 Shape（例：(10, 1, 28, 28)）
    filter_h
    filter_w
    stride
    pad

    Returns
    -------

    """
    N, C, H, W = input_shape
    out_h = (H + 2*pad - filter_h)//stride + 1
    out_w = (W + 2*pad - filter_w)//stride + 1
    col = col.reshape(N, out_h, out_w, C, filter_h, filter_w).transpose(0, 3, 4, 5, 1, 2)

    img = np.zeros((N, C, H + 2*pad + stride - 1, W + 2*pad + stride - 1))
    for y in range(filter_h):
        y_max = y + stride*out_h
        for x in range(filter_w):
            x_max = x + stride*out_w
            img[:, :, y:y_max:stride, x:x_max:stride] += col[:, :, y, x, :, :]

    return img[:, :, pad:H + pad, pad:W + pad]

In [4]:
# layers.py
class ReLU:
    def __init__(self):
        self.mask = None

    def forward(self, x):
        self.mask = (x <= 0)
        out = x.copy()
        out[self.mask] = 0
        return out

    def backward(self, din):
        din[self.mask] = 0
        dx = din
        return dx


class Sigmoid:
    def __init__(self):
        self.out = None

    def forward(self, x):
        out = sigmoid(x)
        self.out = out
        return out

    def backward(self, din):
        dx = din * self.out * (1.0 - self.out)
        return dx


class Affine:
    def __init__(self, W, b):
        self.W = W
        self.b = b
        self.x = None
        self.dW = None
        self.db = None

    def forward(self, x):
        self.x = x
        out = np.dot(self.x, self.W) + self.b
        return out

    def backward(self, din):
        dx = np.dot(din, self.W.T)
        self.dW = np.dot(self.x.T, din)
        self.db = np.sum(din, axis=0)
        return dx


class SoftmaxWithCrossEntropyLoss:
    def __init__(self):
        self.loss = None
        self.y = None
        self.t = None

    def forward(self, x, t):
        self.t = t
        self.y = softmax(x)
        self.loss = cross_entropy_error(self.y, self.t)
        return self.loss

    def backward(self, din=1):
        batch_size = self.t.shape[0]
        dx = (self.y - self.t) / float(batch_size)
        return dx


class Dropout:
    """
    http://arxiv.org/abs/1207.0580
    """
    def __init__(self, dropout_ratio=0.5):
        self.dropout_ratio = dropout_ratio
        self.mask = None

    def forward(self, x, is_train=True):
        if is_train:
            self.mask = np.random.rand(*x.shape) >= self.dropout_ratio
            return x * self.mask
        else:
            return x

    def backward(self, dout):
        return dout * self.mask


class BatchNormalization:
    """
    http://arxiv.org/abs/1502.03167
    """
    def __init__(self, gamma, beta, momentum=0.9, running_mean=None, running_var=None):
        self.gamma = gamma
        self.beta = beta
        self.momentum = momentum
        self.input_shape = None

        self.running_mean = running_mean
        self.running_var = running_var  
        
        self.batch_size = None
        self.xc = None
        self.std = None
        self.dgamma = None
        self.dbeta = None

    def forward(self, x, is_train=True):
        self.input_shape = x.shape
        if x.ndim != 2:
            N, C, H, W = x.shape
            x = x.reshape(N, -1)

        out = self.__forward(x, is_train)
        
        return out.reshape(*self.input_shape)
            
    def __forward(self, x, is_train):
        if self.running_mean is None:
            N, D = x.shape
            self.running_mean = np.zeros(D)
            self.running_var = np.zeros(D)
                        
        if is_train:
            mu = x.mean(axis=0)
            xc = x - mu
            var = np.mean(xc**2, axis=0)
            std = np.sqrt(var + 10e-7)
            xn = xc / std
            
            self.batch_size = x.shape[0]
            self.xc = xc
            self.xn = xn
            self.std = std
            self.running_mean = self.momentum * self.running_mean + (1-self.momentum) * mu
            self.running_var = self.momentum * self.running_var + (1-self.momentum) * var            
        else:
            xc = x - self.running_mean
            xn = xc / ((np.sqrt(self.running_var + 10e-7)))
            
        out = self.gamma * xn + self.beta 
        return out

    def backward(self, dout):
        if dout.ndim != 2:
            N, C, H, W = dout.shape
            dout = dout.reshape(N, -1)

        dx = self.__backward(dout)

        dx = dx.reshape(*self.input_shape)
        return dx

    def __backward(self, dout):
        dbeta = dout.sum(axis=0)
        dgamma = np.sum(self.xn * dout, axis=0)
        dxn = self.gamma * dout
        dxc = dxn / self.std
        dstd = -np.sum((dxn * self.xc) / (self.std * self.std), axis=0)
        dvar = 0.5 * dstd / self.std
        dxc += (2.0 / self.batch_size) * self.xc * dvar
        dmu = np.sum(dxc, axis=0)
        dx = dxc - dmu / self.batch_size
        
        self.dgamma = dgamma
        self.dbeta = dbeta
        
        return dx


class Convolution:
    def __init__(self, W, b, stride=1, pad=0):
        self.W = W
        self.b = b
        self.stride = stride
        self.pad = pad

        self.x = None   
        self.col = None
        self.col_W = None
        
        self.dW = None
        self.db = None

    def forward(self, x):
        FN, C, FH, FW = self.W.shape
        N, C, H, W = x.shape
        out_h = 1 + int((H + 2*self.pad - FH) / self.stride)
        out_w = 1 + int((W + 2*self.pad - FW) / self.stride)

        col = im2col(x, FH, FW, self.stride, self.pad)
        col_W = self.W.reshape(FN, -1).T

        out = np.dot(col, col_W) + self.b
        out = out.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2)

        self.x = x
        self.col = col
        self.col_W = col_W

        return out

    def backward(self, dout):
        FN, C, FH, FW = self.W.shape
        dout = dout.transpose(0,2,3,1).reshape(-1, FN)

        self.db = np.sum(dout, axis=0)
        self.dW = np.dot(self.col.T, dout)
        self.dW = self.dW.transpose(1, 0).reshape(FN, C, FH, FW)

        dcol = np.dot(dout, self.col_W.T)
        dx = col2im(dcol, self.x.shape, FH, FW, self.stride, self.pad)

        return dx


class Pooling:
    def __init__(self, pool_h, pool_w, stride=1, pad=0):
        self.pool_h = pool_h
        self.pool_w = pool_w
        self.stride = stride
        self.pad = pad
        
        self.x = None
        self.arg_max = None

    def forward(self, x):
        N, C, H, W = x.shape
        out_h = int(1 + (H - self.pool_h) / self.stride)
        out_w = int(1 + (W - self.pool_w) / self.stride)

        col = im2col(x, self.pool_h, self.pool_w, self.stride, self.pad)
        col = col.reshape(-1, self.pool_h*self.pool_w)

        arg_max = np.argmax(col, axis=1)
        out = np.max(col, axis=1)
        out = out.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2)

        self.x = x
        self.arg_max = arg_max

        return out

    def backward(self, dout):
        dout = dout.transpose(0, 2, 3, 1)
        
        pool_size = self.pool_h * self.pool_w
        dmax = np.zeros((dout.size, pool_size))
        dmax[np.arange(self.arg_max.size), self.arg_max.flatten()] = dout.flatten()
        dmax = dmax.reshape(dout.shape + (pool_size,)) 
        
        dcol = dmax.reshape(dmax.shape[0] * dmax.shape[1] * dmax.shape[2], -1)
        dx = col2im(dcol, self.x.shape, self.pool_h, self.pool_w, self.stride, self.pad)
        
        return dx

In [5]:
# optimizers.py
class SGD:
    def __init__(self, lr=0.01):
        self.lr = lr
        
    def update(self, params, grads):
        for key in params.keys():
            params[key] -= self.lr * grads[key] 


class Momentum:
    def __init__(self, lr=0.01, momentum=0.9):
        self.lr = lr
        self.momentum = momentum
        self.v = None
        
    def update(self, params, grads):
        if self.v is None:
            self.v = {}
            for key, val in params.items():                                
                self.v[key] = np.zeros_like(val)
                
        for key in params.keys():
            self.v[key] = self.momentum*self.v[key] - self.lr*grads[key] 
            params[key] += self.v[key]


class Nesterov:
    def __init__(self, lr=0.01, momentum=0.9):
        self.lr = lr
        self.momentum = momentum
        self.v = None
        
    def update(self, params, grads):
        if self.v is None:
            self.v = {}
            for key, val in params.items():
                self.v[key] = np.zeros_like(val)
            
        for key in params.keys():
            self.v[key] *= self.momentum
            self.v[key] -= self.lr * grads[key]
            params[key] += self.momentum * self.momentum * self.v[key]
            params[key] -= (1 + self.momentum) * self.lr * grads[key]


class AdaGrad:
    def __init__(self, lr=0.01):
        self.lr = lr
        self.h = None
        
    def update(self, params, grads):
        if self.h is None:
            self.h = {}
            for key, val in params.items():
                self.h[key] = np.zeros_like(val)
            
        for key in params.keys():
            self.h[key] += grads[key] * grads[key]
            params[key] -= self.lr * grads[key] / (np.sqrt(self.h[key]) + 1e-7)


class RMSprop:
    def __init__(self, lr=0.01, decay_rate = 0.99):
        self.lr = lr
        self.decay_rate = decay_rate
        self.h = None
        
    def update(self, params, grads):
        if self.h is None:
            self.h = {}
            for key, val in params.items():
                self.h[key] = np.zeros_like(val)
            
        for key in params.keys():
            self.h[key] *= self.decay_rate
            self.h[key] += (1 - self.decay_rate) * grads[key] * grads[key]
            params[key] -= self.lr * grads[key] / (np.sqrt(self.h[key]) + 1e-7)


class Adam:
    def __init__(self, lr=0.001, beta1=0.9, beta2=0.999):
        self.lr = lr
        self.beta1 = beta1
        self.beta2 = beta2
        self.iter = 0
        self.m = None
        self.v = None
        
    def update(self, params, grads):
        if self.m is None:
            self.m, self.v = {}, {}
            for key, val in params.items():
                self.m[key] = np.zeros_like(val)
                self.v[key] = np.zeros_like(val)
        
        self.iter += 1
        lr_t  = self.lr * np.sqrt(1.0 - self.beta2**self.iter) / (1.0 - self.beta1**self.iter)         
        
        for key in params.keys():
            self.m[key] += (1 - self.beta1) * (grads[key] - self.m[key])
            self.v[key] += (1 - self.beta2) * (grads[key]**2 - self.v[key])
            params[key] -= lr_t * self.m[key] / (np.sqrt(self.v[key]) + 1e-7)


In [6]:
# initializers.py


class Initializer:
    def __init__(self, params, params_size_list, use_batch_normalization=False):
        self.params = params
        self.params_size_list = params_size_list
        self.use_batch_normalization = use_batch_normalization

    def initialize_params(self):
        pass

    def get_params(self):
        return self.params


class Zero_Initializer(Initializer):
    def initialize_params(self, use_batch_normalization):
        for idx in range(1, len(self.params_size_list)):
            self.params['W' + str(idx)] = np.zeros(self.params_size_list[idx - 1], self.params_size_list[idx])
            self.params['b' + str(idx)] = np.zeros(self.params_size_list[idx])
            if self.use_batch_normalization and idx < len(self.params_size_list) - 1:
                self.params['gamma' + str(idx)] = np.zeros(self.params_size_list[idx])
                self.params['beta' + str(idx)] = np.zeros(self.params_size_list[idx])

class N1_Initializer(Initializer):
    def initialize_params(self):
        for idx in range(1, len(self.params_size_list)):
            self.params['W' + str(idx)] = np.random.randn(self.params_size_list[idx - 1], self.params_size_list[idx])
            self.params['b' + str(idx)] = np.random.randn(self.params_size_list[idx])
            if self.use_batch_normalization and idx < len(self.params_size_list) - 1:
                self.params['gamma' + str(idx)] = np.random.randn(self.params_size_list[idx])
                self.params['beta' + str(idx)] = np.random.randn(self.params_size_list[idx])

class N2_Initializer(Initializer):
    def initialize_params(self):
        for idx in range(1, len(self.params_size_list)):
            self.params['W' + str(idx)] = np.random.randn(self.params_size_list[idx - 1], self.params_size_list[idx]) * 0.01
            self.params['b' + str(idx)] = np.random.randn(self.params_size_list[idx]) * 0.01
            if self.use_batch_normalization and idx < len(self.params_size_list) - 1:
                self.params['gamma' + str(idx)] = np.random.randn(self.params_size_list[idx]) * 0.01
                self.params['beta' + str(idx)] = np.random.randn(self.params_size_list[idx]) * 0.01

class Xavier_Initializer(Initializer):
    def initialize_params(self):
        for idx in range(1, len(self.params_size_list)):
            self.params['W' + str(idx)] = np.random.randn(self.params_size_list[idx - 1], self.params_size_list[idx]) / np.sqrt(self.params_size_list[idx - 1])
            self.params['b' + str(idx)] = np.random.randn(self.params_size_list[idx]) / np.sqrt(self.params_size_list[idx - 1])
            if self.use_batch_normalization and idx < len(self.params_size_list) - 1:
                self.params['gamma' + str(idx)] = np.random.randn(self.params_size_list[idx]) / np.sqrt(self.params_size_list[idx - 1])
                self.params['beta' + str(idx)] = np.random.randn(self.params_size_list[idx]) / np.sqrt(self.params_size_list[idx - 1])


class He_Initializer(Initializer):
    def initialize_params(self):
        for idx in range(1, len(self.params_size_list)):
            self.params['W' + str(idx)] = np.random.randn(self.params_size_list[idx - 1], self.params_size_list[idx]) * np.sqrt(2) / np.sqrt(self.params_size_list[idx - 1])
            self.params['b' + str(idx)] = np.random.randn(self.params_size_list[idx]) * np.sqrt(2) / np.sqrt(self.params_size_list[idx - 1])
            if self.use_batch_normalization and idx < len(self.params_size_list) - 1:
                self.params['gamma' + str(idx)] = np.random.randn(self.params_size_list[idx]) * np.sqrt(2) / np.sqrt(self.params_size_list[idx - 1])
                self.params['beta' + str(idx)] = np.random.randn(self.params_size_list[idx]) * np.sqrt(2) / np.sqrt(self.params_size_list[idx - 1])

In [7]:
# networks.py

activation_layers = {
    'Sigmoid': Sigmoid,
    'ReLU': ReLU
}

optimizers = {
    "SGD": SGD,
    "Momentum": Momentum,
    "Nesterov": Nesterov,
    "AdaGrad": AdaGrad,
    "RMSprop": RMSprop,
    "Adam": Adam
}

initializers = {
    'Zero': Zero_Initializer,
    'N1': N1_Initializer,
    'N2': N2_Initializer,
    'Xavier': Xavier_Initializer,
    'He': He_Initializer
}


class MultiLayerNet:
    def __init__(self, input_size, hidden_size_list, output_size, activation='ReLU', initializer='He',
                 optimizer='AdaGrad', learning_rate=0.01):
        self.input_size = input_size
        self.output_size = output_size
        self.hidden_size_list = hidden_size_list
        self.hidden_layer_num = len(hidden_size_list)

        # Weight Initialization
        self.params = {}
        self.weight_initialization(initializer)

        # Layering
        self.layers = OrderedDict()
        self.last_layer = None
        self.layering(activation)

        # Optimizer Initialization
        self.optimizer = optimizers[optimizer](lr=learning_rate)

    def weight_initialization(self, initializer):
        params_size_list = [self.input_size] + self.hidden_size_list + [self.output_size]
        initializer_obj = initializers[initializer](self.params, params_size_list)
        initializer_obj.initialize_params();

    def layering(self, activation):
        for idx in range(1, self.hidden_layer_num + 1):
            self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)], self.params['b' + str(idx)])
            self.layers['Activation' + str(idx)] = activation_layers[activation]()

        idx = self.hidden_layer_num + 1
        self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)], self.params['b' + str(idx)])

        self.last_layer = SoftmaxWithCrossEntropyLoss()

    def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)

        return x

    def loss(self, x, t):
        y = self.predict(x)
        return self.last_layer.forward(y, t)

    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        if t.ndim != 1 : t = np.argmax(t, axis=1)

        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy

    def backpropagation_gradient(self, x, t):
        # forward
        self.loss(x, t)

        # backward
        din = 1
        din = self.last_layer.backward(din)

        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            din = layer.backward(din)

        grads = {}
        for idx in range(1, self.hidden_layer_num+2):
            grads['W' + str(idx)] = self.layers['Affine' + str(idx)].dW
            grads['b' + str(idx)] = self.layers['Affine' + str(idx)].db

        return grads

    def learning(self, x_batch, t_batch):
        grads = self.backpropagation_gradient(x_batch, t_batch)
        self.optimizer.update(self.params, grads)


class MultiLayerNetExtended:
    def __init__(self, input_size, hidden_size_list, output_size, activation='ReLU', initializer='N2',
                 optimizer='AdaGrad', learning_rate=0.01,
                 use_batch_normalization=False,
                 use_weight_decay=False, weight_decay_lambda=0.0,
                 use_dropout=False, dropout_ratio_list=None):
        self.input_size = input_size
        self.output_size = output_size
        self.hidden_size_list = hidden_size_list
        self.hidden_layer_num = len(hidden_size_list)

        self.use_batch_normalization = use_batch_normalization

        self.use_weight_decay = use_weight_decay
        self.weight_decay_lambda = weight_decay_lambda

        self.use_dropout = use_dropout
        self.dropout_ratio_list = dropout_ratio_list

        # Weight Initialization
        self.params = {}
        self.weight_initialization(initializer)

        # Layering
        self.layers = OrderedDict()
        self.last_layer = None
        self.layering(activation)

        # Optimization Method
        self.optimizer = optimizers[optimizer](lr=learning_rate)

    def weight_initialization(self, initializer):
        params_size_list = [self.input_size] + self.hidden_size_list + [self.output_size]
        initializer_obj = initializers[initializer](self.params,
                                                    params_size_list,
                                                    self.use_batch_normalization)
        initializer_obj.initialize_params();

    def layering(self, activation):
        for idx in range(1, self.hidden_layer_num + 1):
            self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)], self.params['b' + str(idx)])
            if self.use_batch_normalization:
                self.layers['Batch_Normalization' + str(idx)] = BatchNormalization(self.params['gamma' + str(idx)],
                                                                                   self.params['beta' + str(idx)])
            self.layers['Activation' + str(idx)] = activation_layers[activation]()

            if self.use_dropout:
                self.layers['Dropout' + str(idx)] = Dropout(self.dropout_ratio_list[idx - 1])

        idx = self.hidden_layer_num + 1
        self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)], self.params['b' + str(idx)])

        self.last_layer = SoftmaxWithCrossEntropyLoss()

    def predict(self, x, is_train=False):
        for key, layer in self.layers.items():
            if "BatchNorm" in key or "Dropout" in key:
                x = layer.forward(x, is_train)
            else:
                x = layer.forward(x)
        return x

    def loss(self, x, t, is_train=False):
        y = self.predict(x, is_train)

        if self.use_weight_decay:
            weight_decay = 0.0
            for idx in range(1, self.hidden_layer_num + 2):
                W = self.params['W' + str(idx)]
                weight_decay += 0.5 * self.weight_decay_lambda * np.sum(W**2)
            return self.last_layer.forward(y, t) + weight_decay
        else:
            return self.last_layer.forward(y, t)

    def accuracy(self, x, t):
        y = self.predict(x, is_train=False)
        y = np.argmax(y, axis=1)
        if t.ndim != 1 : t = np.argmax(t, axis=1)

        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy

    def backpropagation_gradient(self, x, t):
        # forward
        self.loss(x, t, is_train=True)

        # backward
        din = 1
        din = self.last_layer.backward(din)

        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            din = layer.backward(din)

        grads = {}
        for idx in range(1, self.hidden_layer_num + 2):
            if self.use_weight_decay:
                grads['W' + str(idx)] = self.layers['Affine' + str(idx)].dW + self.weight_decay_lambda * self.params['W' + str(idx)]
            else:
                grads['W' + str(idx)] = self.layers['Affine' + str(idx)].dW
            grads['b' + str(idx)] = self.layers['Affine' + str(idx)].db

            if self.use_batch_normalization and idx <= self.hidden_layer_num:
                grads['gamma' + str(idx)] = self.layers['Batch_Normalization' + str(idx)].dgamma
                grads['beta' + str(idx)] = self.layers['Batch_Normalization' + str(idx)].dbeta

        return grads

    def learning(self, x_batch, t_batch):
        grads = self.backpropagation_gradient(x_batch, t_batch)
        self.optimizer.update(self.params, grads)

In [8]:
# deep_convnet.py


optimizers = {
    "SGD": SGD,
    "Momentum": Momentum,
    "Nesterov": Nesterov,
    "AdaGrad": AdaGrad,
    "RMSprop": RMSprop,
    "Adam": Adam
}

class DeepConvNet:
    """
        conv - relu - conv- relu - pool -
        conv - relu - conv- relu - pool -
        conv - relu - conv- relu - pool -
        affine - relu - dropout - affine - dropout - softmax
    """
    def __init__(self, input_dim=(1, 320, 240),
                 conv_param_1 = {'filter_num':16, 'filter_size':3, 'pad':1, 'stride':1},
                 conv_param_2 = {'filter_num':16, 'filter_size':3, 'pad':1, 'stride':1},
                 conv_param_3 = {'filter_num':32, 'filter_size':3, 'pad':1, 'stride':1},
                 conv_param_4 = {'filter_num':32, 'filter_size':3, 'pad':2, 'stride':1},
                 conv_param_5 = {'filter_num':64, 'filter_size':3, 'pad':1, 'stride':1},
                 conv_param_6 = {'filter_num':64, 'filter_size':4, 'pad':2, 'stride':1},
                 hidden_size=50, output_size=3, optimizer = 'Adam', learning_rate=0.01):

        pre_node_nums = np.array([1*3*3, 16*3*3, 16*3*3, 32*3*3, 32*3*3, 64*3*3, 64*21*16, hidden_size])

        weight_init_scales = np.sqrt(2.0 / pre_node_nums)
        
        self.params = {}

        pre_channel_num = input_dim[0]

        for idx, conv_param in enumerate([conv_param_1, conv_param_2, conv_param_3, conv_param_4, conv_param_5, conv_param_6]):
            self.params['W' + str(idx+1)] = weight_init_scales[idx] * np.random.randn(conv_param['filter_num'], pre_channel_num, conv_param['filter_size'], conv_param['filter_size'])
            self.params['b' + str(idx+1)] = np.zeros(conv_param['filter_num'])
            pre_channel_num = conv_param['filter_num']

        self.params['W7'] = weight_init_scales[6] * np.random.randn(64*16*21, hidden_size)
        self.params['b7'] = np.zeros(hidden_size)
        self.params['W8'] = weight_init_scales[7] * np.random.randn(hidden_size, output_size)
        self.params['b8'] = np.zeros(output_size)

        self.layers = []
        self.layers.append(Convolution(self.params['W1'], self.params['b1'], 
                           conv_param_1['stride'], conv_param_1['pad']))
        self.layers.append(ReLU())
        self.layers.append(Convolution(self.params['W2'], self.params['b2'], 
                           conv_param_2['stride'], conv_param_2['pad']))
        self.layers.append(ReLU())
        self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2))
        self.layers.append(Convolution(self.params['W3'], self.params['b3'], 
                           conv_param_3['stride'], conv_param_3['pad']))
        self.layers.append(ReLU())
        self.layers.append(Convolution(self.params['W4'], self.params['b4'],
                           conv_param_4['stride'], conv_param_4['pad']))
        self.layers.append(ReLU())
        self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2))
        self.layers.append(Convolution(self.params['W5'], self.params['b5'],
                           conv_param_5['stride'], conv_param_5['pad']))
        self.layers.append(ReLU())
        self.layers.append(Convolution(self.params['W6'], self.params['b6'],
                           conv_param_6['stride'], conv_param_6['pad']))
        self.layers.append(ReLU())
        self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2))
        self.layers.append(Affine(self.params['W7'], self.params['b7']))
        self.layers.append(ReLU())
        self.layers.append(Dropout(0.5))
        self.layers.append(Affine(self.params['W8'], self.params['b8']))
        self.layers.append(Dropout(0.5))
        
        self.last_layer = SoftmaxWithCrossEntropyLoss()

        # Optimizer Initialization
        self.optimizer = optimizers[optimizer](lr=learning_rate)

    def predict(self, x, train_flg=False):
        isFirstAffine = False
        for layer in self.layers:
            if isinstance(layer, Affine) and not isFirstAffine:
                isFirstAffine = True
                x = x.reshape(-1, 64*21*16)

            if isinstance(layer, Dropout):
                x = layer.forward(x, train_flg)
            else:
                x = layer.forward(x)
        return x

    def loss(self, x, t):
        y = self.predict(x, train_flg=True)
        return self.last_layer.forward(y, t)

    def accuracy(self, x, t, batch_size=10):
        if t.ndim != 1 : t = np.argmax(t, axis=1)

        acc = 0.0

        for i in range(int(x.shape[0] / batch_size)):
            tx = x[i*batch_size:(i+1)*batch_size]
            tt = t[i*batch_size:(i+1)*batch_size]
            y = self.predict(tx, train_flg=False)
            y = np.argmax(y, axis=1)
            acc += np.sum(y == tt)

        return acc / x.shape[0]

    def backpropagation_gradient(self, x, t):
        # forward
        self.loss(x, t)

        # backward
        dout = 1
        dout = self.last_layer.backward(dout)

        tmp_layers = self.layers.copy()
        tmp_layers.reverse()

        isFirstPooling = False
        for layer in tmp_layers:
            if isinstance(layer, Pooling) and not isFirstPooling:
                isFirstPooling = True
                dout = dout.reshape(-1, 64, 21, 16)
            dout = layer.backward(dout)

        # 設定
        grads = {}
        for i, layer_idx in enumerate((0, 2, 5, 7, 10, 12, 15, 18)):
            grads['W' + str(i+1)] = self.layers[layer_idx].dW
            grads['b' + str(i+1)] = self.layers[layer_idx].db

        return grads

    def learning(self, x_batch, t_batch):
        grads = self.backpropagation_gradient(x_batch, t_batch)
        self.optimizer.update(self.params, grads)

    def save_params(self, file_name="params.pkl"):
        with open(file_name, 'wb') as f:
            pickle.dump(self.params, f)

    def load_params(self, file_name="params.pkl"):
        with open(file_name, 'rb') as f:
            params = pickle.load(f)

        for key, val in params.items():
            self.params[key] = val

        for i, layer_idx in enumerate((0, 2, 5, 7, 10, 12, 15, 18)):
            self.layers[layer_idx].W = self.params['W' + str(i+1)]
            self.layers[layer_idx].b = self.params['b' + str(i+1)]


In [15]:
IMG_width = 320
IMG_height = 240
IMG_channel = 1
DATASET_PATH = "/Users/yhhan/Downloads/trainimage/dataset_jpg_gray"
kinds = ["train", "validation", "test"]
train_img = np.array([], dtype="float32")
train_label = np.array([], dtype="int8")
validation_img = np.array([], dtype="float32")
validation_label = np.array([], dtype="int8")
test_img = np.array([], dtype="float32")
test_label = np.array([], dtype="int8")
(label_train, filename_train) = (0, 0)
(label_validation, filename_validation) = (0, 0)
(label_test, filename_test) = (0, 0)


def file_info(category_name, dataset_path=DATASET_PATH):
    # 디렉토리 상의 파일경로와 파일의 제일 앞에 매겨진 숫자정보(라벨 정보)를 긁어서 반환
    full_path =  dataset_path + '/' + category_name + '/' + '*.jpg'
    image_filenames = glob.glob(full_path)
    filename = []
    label = []
    for image_filename in image_filenames:
        filename.append(image_filename.split("/")[3])
        label.append(image_filename.split("/")[3].split("-")[0])
    return (label, filename)


def display_image(image, label):
    %matplotlib
    inline

    fig = plt.figure(figsize=(20, 5))
    for i in range(5):
        print(label[i])
        img = image[i]
        img = img.reshape(240, 320)
        img.shape = (240, 320)
        plt.subplot(150 + (i + 1))
        plt.imshow(img)


def data_processing_about_train(idx, kind):
    for i in range(idx):
        global train_img
        global train_label

        (labels, filenames) = file_info(kind)
        random_value = random.randrange(0, len(filenames))
        filename = filenames[random_value]
        label = labels[random_value]
        #         label = _change_one_hot_label(train_label, label)
        #         label = labels[random_value]
        #         image_display(kinds, filename)
        full_path = ( DATASET_PATH + '/' + kind + '/' + filename)
        image = imread(full_path)
        image = image.reshape(IMG_width * IMG_height * IMG_channel) / 255.0
        train_img = np.append(train_img, image)
        label = _change_one_hot_label(label)
        train_label = np.append(train_label, label)
        # file move
        # 학습한 데이터는 이동시킴
        print("---------")
        print(len(filenames))
        print(filename)
        src =  DATASET_PATH + '/' + kind + '/'
        dir =  DATASET_PATH + '/' + 'tmp' + '/' + kind + '/'
        shutil.move(src + filename, dir + filename)


def data_processing_about_validation(idx, kind):
    for i in range(idx):
        global validation_img
        global validation_label

        (labels, filenames) = file_info(kind)
        random_value = random.randrange(0, len(filenames))
        filename = filenames[random_value]
        label = labels[random_value]
        #         label = _change_one_hot_label(train_label, label)
        #         label = labels[random_value]
        #         image_display(kinds, filename)
        full_path = ( DATASET_PATH + '/' + kind + '/' + filename)
        image = imread(full_path)
        image = image.reshape(IMG_width * IMG_height * IMG_channel) / 255.0
        validation_img = np.append(validation_img, image)
        label = _change_one_hot_label(label)
        validation_label = np.append(validation_label, label)
        # file move
        print("---------")
        print(len(filenames))
        print(filename)
        src =  DATASET_PATH + '/' + kind + '/'
        dir =  DATASET_PATH + '/' + 'tmp' + '/' + kind + '/'
        shutil.move(src + filename, dir + filename)


def data_processing_about_test(idx, kind):
    for i in range(idx):
        global test_img
        global test_label

        (labels, filenames) = file_info(kind)
        random_value = random.randrange(0, len(filenames))
        filename = filenames[random_value]
        label = labels[random_value]
        #         label = _change_one_hot_label(train_label, label)
        #         label = labels[random_value]
        #         image_display(kinds, filename)
        full_path = ( DATASET_PATH + '/' + kind + '/' + filename)
        image = imread(full_path)
        image = image.reshape(IMG_width * IMG_height * IMG_channel) / 255.0
        test_img = np.append(test_img, image)
        label = _change_one_hot_label(label)
        test_label = np.append(test_label, label)
        # file move
        print("---------")
        print(len(filenames))
        print(filename)
        src =  DATASET_PATH + '/' + kind + '/'
        dir =  DATASET_PATH + '/' + 'tmp' + '/' + kind + '/'
        shutil.move(src + filename, dir + filename)


def _change_one_hot_label(target_label):
    target_label = int(target_label)
    T = np.zeros((1, 3))
    T[0][target_label] = 1

    return T


def store_dataset(dataset_path=DATASET_PATH):
    global train_img
    global train_label
    global validation_img
    global validation_label
    global test_img
    global test_label

    (_, idx_train) = file_info("train")
    (_, idx_validation) = file_info("validation")
    (_, idx_test) = file_info("test")
    if len(idx_train) != 0 or len(idx_validation) != 0 or len(idx_test) != 0:
        data_processing_about_train(len(idx_train), "train")
        data_processing_about_validation(len(idx_validation), "validation")
        data_processing_about_test(len(idx_test), "test")

        # file로 쓰기
        train_img.tofile(dataset_path + '/' + 'train_img_dataset')
        train_label.tofile(dataset_path + '/' + 'train_label')
        validation_img.tofile(dataset_path + '/' + 'validation_img_dataset')
        validation_label.tofile(dataset_path + '/' + 'validation_label')
        test_img.tofile(dataset_path + '/' + 'test_img_dataset')
        test_label.tofile(dataset_path + '/' + 'test_label')
    else:
        print("[!] Already Finished Generateing Dataset. Please check directory.")

def load_dataset(dataset_path, flatten=True):
    (img_train, label_train) = (0,0)
    (img_validation, label_validation) = (0,0)
    (img_test, label_test) = (0,0)
    filenames = glob.glob(dataset_path + "/*")
    filename = []
    for filename in filenames:
        print(filename, )
        img_train = np.fromfile(open(dataset_path + "/" + "train_img_dataset", 'rb')).reshape(451,IMG_width * IMG_height * IMG_channel)
        label_train = np.fromfile(open(dataset_path + "/" + "train_label", 'rb')).reshape(451, 3)
        img_validation = np.fromfile(open(dataset_path + "/" + "validation_img_dataset", 'rb')).reshape(65, IMG_width * IMG_height * IMG_channel)
        label_validation = np.fromfile(open(dataset_path + "/" + "validation_label", 'rb')).reshape(65,3)
        img_test = np.fromfile(open(dataset_path + "/" + "test_img_dataset", 'rb')).reshape(130,IMG_width * IMG_height * IMG_channel)
        label_test = np.fromfile(open(dataset_path + "/" + "test_label", 'rb')).reshape(130, 3)
        
    if not flatten:
        img_train      = img_train.reshape(-1, 1, 240, 320)
        img_validation = img_validation.reshape(-1, 1, 240, 320)
        img_test       = img_test.reshape(-1, 1, 240, 320)
            
    return ((img_train, label_train), (img_validation, label_validation), (img_test, label_test))

In [16]:
# train_deepnet.py
(img_train, label_train), (img_validation, label_validation), (img_test, label_test) = load_dataset(dataset_path="/Users/yhhan/Downloads/trainimage/imdata", flatten=False)

print(img_train.shape)
print(label_train.shape)
print(img_validation.shape)
print(label_validation.shape)
print(img_test.shape)
print(label_test.shape)

num_epochs = 50
train_size = img_train.shape[0]
batch_size = 1
learning_rate = 0.01


network = DeepConvNet(learning_rate = learning_rate)


train_error_list = []
validation_error_list = []

test_accuracy_list = []
epoch_list = []

num_batch = math.ceil(train_size / batch_size)

for i in range(num_epochs):
    for j in range(num_batch):
        print(j, end=", ")
        sys.stdout.flush()
        x_batch = img_train[j * batch_size: j * batch_size + batch_size]
        t_batch = label_train[j * batch_size: j * batch_size + batch_size]
        network.learning(x_batch, t_batch)

    print()

    epoch_list.append(i)

    train_loss = network.loss(x_batch, t_batch)
    train_error_list.append(train_loss)

    validation_loss = network.loss(img_validation, label_validation)
    validation_error_list.append(validation_loss)

    test_accuracy = network.accuracy(img_test, label_test)
    test_accuracy_list.append(test_accuracy)

    print("Epoch: {0:5d}, Train Error: {1:7.5f}, Validation Error: {2:7.5f} - Test Accuracy: {3:7.5f}".format(
        i,
        train_loss,
        validation_loss,
        test_accuracy
    ))

# Draw Graph about Error Values & Accuracy Values
def draw_error_values_and_accuracy(epoch_list, train_error_list, validation_error_list, test_accuracy_list):
    # Draw Error Values and Accuracy
    fig = plt.figure(figsize=(20, 5))
    plt.subplot(121)
    plt.plot(epoch_list[1:], train_error_list[1:], 'r', label='Train')
    plt.plot(epoch_list[1:], validation_error_list[1:], 'g', label='Validation')
    plt.ylabel('Total Error')
    plt.xlabel('Epochs')
    plt.grid(True)
    plt.legend(loc='upper right')

    plt.subplot(122)
    plt.plot(epoch_list[1:], test_accuracy_list[1:], 'b', label='Test')
    plt.ylabel('Accuracy')
    plt.xlabel('Epochs')
    plt.yticks(np.arange(0.0, 1.0, 0.05))
    plt.grid(True)
    plt.legend(loc='lower right')
    plt.show()

def draw_false_prediction(diff_index_list):
    fig = plt.figure(figsize=(20, 5))
    for i in range(5):
        j = diff_index_list[i]
        print("False Prediction Index: %s, Prediction: %s, Ground Truth: %s" % (j, prediction[j], ground_truth[j]))
        img = np.array(img_test[j])
        img.shape = (240, 320)
        plt.subplot(150 + (i + 1))
        plt.imshow(img, cmap='gray')

draw_error_values_and_accuracy(epoch_list, train_error_list, validation_error_list, test_accuracy_list)

prediction = np.argmax(network.predict(img_test), axis=1)
ground_truth = np.argmax(label_test, axis=1)

print(prediction)
print(ground_truth)

diff_index_list = []
for i in range(len(img_test)):
    if (prediction[i] != ground_truth[i]):
        diff_index_list.append(i)

print("Total Test Image: {0}, Number of False Prediction: {1}".format(len(img_test), len(diff_index_list)))
print("Test Accuracy:", float(len(img_test) - len(diff_index_list)) / float(len(img_test)))
draw_false_prediction(diff_index_list)

network.save_params("deep_convnet_params.pkl")
print("Saved Network Parameters!")



/Users/yhhan/Downloads/trainimage/imdata/test_img_dataset
/Users/yhhan/Downloads/trainimage/imdata/test_label
/Users/yhhan/Downloads/trainimage/imdata/train_img_dataset
/Users/yhhan/Downloads/trainimage/imdata/train_label
/Users/yhhan/Downloads/trainimage/imdata/validation_img_dataset
/Users/yhhan/Downloads/trainimage/imdata/validation_label
(451, 1, 240, 320)
(451, 3)
(65, 1, 240, 320)
(65, 3)
(130, 1, 240, 320)
(130, 3)
0, 

ValueError: cannot reshape array of size 81344 into shape (21504)