## Convolution Neural Network

### index

- filter
- padding
- stride
- activation
- FC layer

### motivation

CNN의 내부동작원리의 이해를 돕기 위한 자료

### data

MNIST dataset

### reference 
- cs231n convolution neural network
- https://github.com/raphey/numpy-cnn/blob/master/nn_util.py




In [1]:
# import module
import numpy as np
import warnings
from sklearn.datasets import fetch_mldata
from sklearn.utils import shuffle


### mnist data 불러오기

In [None]:
# dataset download and prepare datasetting
def import_and_prepare_mnist_data(valid_portion=0.1, test_portion=0.1, flat=True):
    """
    Imports mnist data, shuffles it, and splits it into training, validation, and testing sets.
    If flat parameter is set to False, each image will be reshaped from (784) to (28 x 28 x 1), for convolution.
    training, validation, and testing are dicts with three keys each:
      'x': the image data
      'y_': the one-hot encoded labels
      'y_as_int': the labels as integers, for quick accuracy checking
    """

    mnist = fetch_mldata('MNIST original')
    data_size = len(mnist['data'])

    img_data, int_targets = shuffle_data(mnist)

    if not flat:
        img_data = img_data.reshape(-1, 1, 28, 28)

    scaled_data = img_data / 255.0

    int_targets = int_targets.astype(int)

    one_hots = one_hot_encode(int_targets)

    # Cutoff indices between training/validation and validation/testing
    validation_start = int((1.0 - valid_portion - test_portion) * data_size)
    testing_start = int((1.0 - test_portion) * data_size)

    train = {'x': scaled_data[:validation_start],
             'y_': one_hots[:validation_start],
             'y_as_int': int_targets[:validation_start]}

    valid = {'x': scaled_data[validation_start: testing_start],
             'y_': one_hots[validation_start: testing_start],
             'y_as_int': int_targets[validation_start: testing_start]}

    test = {'x': scaled_data[testing_start:],
            'y_': one_hots[testing_start:],
            'y_as_int': int_targets[testing_start:]}

    return train, valid, test

### padding

- 이미지 데이터의 rank를 잘 파악해야합니다.
- 이미지의 rank가 상황마다 어떤식으로 표현되는지 파악해야합니다.

In [2]:
# padding function definition
def pad_image(img_array, top_pad, bottom_pad, left_pad, right_pad):
    """
    Pads the width and height dimensions of an image array or batch of image arrays
    with zeros according to padding parameters, and returns a new padded array.
    
    img_array can be a single flat image with dimensions (height, width), 
    an image with depth(channel) with dimensions (depth, height, width),  
    or a batch of images with depth with dimensions (batch size, depth, height, width).
    
    ---------------------------------------------------------------------------------------
    img_array : image data
    top_pad: top space
    bottom_pad :bottom space
    left_pad : left space
    right_pad : right space
    
    
    """
    img_height = img_array.shape[-2]
    img_width = img_array.shape[-1]

    # Set the correct shape for the padded version for 2, 3, or 4 dimensions
    padded_shape = list(img_array.shape)
    padded_shape[-2] += top_pad + bottom_pad # height setting
    padded_shape[-1] += left_pad + right_pad # weight setting

    padded_img = np.zeros(padded_shape) # all zero

    if len(img_array.shape) == 2: # shape=(width, height)
        padded_img[top_pad: top_pad + img_height, left_pad: left_pad + img_width] = img_array
    elif len(img_array.shape) == 3: # shape=(depth, width, height)
        padded_img[:, top_pad: top_pad + img_height, left_pad: left_pad + img_width] = img_array
    else: # shape=(depth, channel, width, height)
        padded_img[:, :, top_pad: top_pad + img_height, left_pad: left_pad + img_width] = img_array

    return padded_img

In [4]:
def initialize_weight_array(l, w, stddev=None, relu=False, sigma_cutoff=2.0):
    """
    Initializes a weight array with l rows and w columns.
    If stddev is not specified, default initialization is designed to create a variance of 1.0,
    meaning stddev is sqrt(1 / N_in). If the weight array is going to be used with relu
    activation, the default stddev will be sqrt(2 / N_in), since presumably half the neurons
    won't fire.
    sigma_cutoff determines the max number of stddevs away from 0 an initialized value can be.
    """
    if stddev is None:
        if relu:
            stddev = (2.0 / l) ** 0.5
        else:
            stddev = (1.0 / l) ** 0.5

    weights = []
    while len(weights) < l * w:
        new_rand_val = np.random.randn() * stddev
        if abs(new_rand_val) < sigma_cutoff * stddev: # sigma cutoff가 weight를 선택함
            weights.append(new_rand_val)
    return np.array(weights).reshape(l, w)

### convolution layer class

- Produces a volume of size W2×H2×D2 where:

- W2=(W1−F+2P)/S+1

- H2=(H1−F+2P)/S+1 (i.e. width and height are computed equally by symmetry)
- D2=K

In [3]:
class Layer(object):
    """
    Base class for layers, which will include matrices, activation functions, and
    convolution layers.
    """
    def __init__(self):
        self.input = None
        self.output = None
        self.output_side_deltas = None
        self.input_side_deltas = None

    def forward_pass(self):
        raise NotImplementedError

    def backward_pass(self, backprop_params):
        raise NotImplementedError


In [None]:
class SigmoidLayer(Layer):
    """
    Sigmoid activation layer. Input and output have the same shape, as do the input-side and
    output-side deltas.
    """
    def forward_pass(self):
        self.output = 1.0 / (1.0 + np.exp(-self.input))
        return self.output

    def backward_pass(self, backprop_params):
        # Backprop parameters are not used.
        self.input_side_deltas = self.output_side_deltas * self.output * (1.0 - self.output)
        return self.input_side_deltas


class SoftmaxLayer(Layer):
    """
    Softmax activation layer, to be used right before output. Backprop is skipped entirely,
    under the assumption that this will be used with cross-entropy loss.
    """
    def forward_pass(self):
        exp_z = np.exp(self.input)
        sums = np.sum(exp_z, axis=1, keepdims=True)
        self.output = exp_z / sums
        return self.output

    def backward_pass(self, backprop_params):
        # Backprop parameters are not used
        self.input_side_deltas = self.output_side_deltas
        return self.input_side_deltas

In [None]:
class ConvolutionLayer(Layer):

    def __init__(self,channels_out,channels_in,window_size,stride,pad=False,relu=True):
        super().__init__()
        self.channels_out=channels_out # output의 depth
        self.channels_in=channels_in # input의 depth
        self.window_size=window_size 
        self.stride=stride
        self.pad=pad
        
        self.shape_4d=(channels_out,channels_in,window_size,window_size)
        # filter 정의
        # l :  channels_in*window_size**2
        # W :  channels_out ?? filter 개수
        self.filter_2d=initialize_weight_array(channels_in*window_size**2,channels_out,relu=relu)
        self.filter_4d=self.filter_2d.reshape(self.shape_4d)
        self.b=np.zeros(shape=(1,channels_out))
        
        self.batch_size=None
        self.padded_input=None
        self.top_pad = None
        self.bottom_pad = None
        self.left_pad = None
        self.right_pad = None
        self.reshaped_input = None
        self.output_height = None
        self.output_width = None
    
    def forward_pass(self):
        if self.pad:
            # pad 값 연산
            # window_size에 따라 변화한다. 하지만 다르게 줄 수도 있다. default 값 설정하기
            # cs231n 노트 참고 : http://cs231n.github.io/convolutional-networks/
            _,_,input_h,input_w=self.input.shape
            self.top_pad=(self.window_size-1)//2 # // operation : 몫
            self.bottom_pad=self.window_size//2-(input_h-1)%self.stride
            self.left_pad=(self.window_size-1)//2
            self.right_pad=(self.window_size//2)-(input_w-1)%self.stride
            self.padded_input=pad_image(self.input,self.top_pad,self.bottom_pad,self.left_pad,self.right_pad)
            
        else : # pad값이 안들어오면, padded된 것이라고 간주
            self.padded_input=self.input
        
        
        
        self.reshaped_input=self.img_batch_to_conv_stacks() # 이해하기 
        self.batch_size=self.input.shape[0]
        
        reshaped_output=np.dot(self.reshaped_input,self.filter_2d)+self.b # 이해해보기
        self.output=reshaped_output.T.reshape(self.channels_out, self.batch_size,self.output_height,self.output_width).transpose(1,0,2,3)
        
        return self.output
    
    def backward_pass(self,backprop_params):
        alpha_adj, lam= backprop_params
        
        # self.output_side_deltas : ?? 어디서 나온것인지 확인하기
        # transpose : 행렬의 axis의 이동
        reshaped_output_side_deltas=self.output_side_deltas.transpose(1,0,2,3).reshape(self.channels_out,-1).T
        
        reshaped_input_side_deltas=np.dot(reshaped_output_side_deltas, self.filter_2d.T)
        
        self.input_side_deltas=self.conv_stack_deltas_to_input_deltas(reshaped_input_side_deltas)
        
        if self.pad:
            new_bottom_index=self.input_side_deltas[2]-self.bottom_pad
            new_right_index=self.input_side_deltas[3]-self.right_pad
            self.input_side_deltas=self.input_side_deltas[:,:,self.top_pad:new_bottom_index,\
                                                         self.left_pad:new_right_index]
            
        self.filter_2d+=alpha_adj*np.dot(self.reshaped_input.T,reshaped_output_side_deltas)
        self.filter_4d=self.filter_2d.T.reshape(self.shape_4d)
        
        self.b+=alpha_adj*self.output_side_deltas.sum(axis=(0,2,3))
        
        
            
        if lam:
            self.filter_2d*=(1.0-lam*alpha_adj)
            self.filter_4d*=(1.0-lam*alpha_adj)
            
        return self.input_side_deltas
    
    # input_data를 filter와 inner product할 수 있게 바꿔준다.
    # cnn의 중요한 부분
    def img_batch_to_conv_stacks(self):
        """
        Takes the current input, a batch of images with depth, and sets the reshape_input property to be series
        of convolutional stacks obtained by passing a square prism window with matching depth across each image
        (left to right along the top, then next row down, etc, then same for remaining channels, then next image).
        Each window prism is unrolled into a single 1-D row, and the stack array has dimensions
        (batch size * number_of_windows) by (window_size^2 * depth).
        """
        batch_size, img_depth, img_height,img_weight=self.padded_input.shape
        unrolled_window_size=self.window_size**2*img_depth
        
        # output의 width, height 정의, depth는 filter의 개수
        self.output_height=(img_height-self.window_size)//self.stride+1
        self.output_width=(img_weight-self.window_size)//self.stride+1
        
        conv_stack=[]
        
        for k in range(0, batch_size): # batch끼리
            # height기준으로, img_height-self.window_size+1 범위에서 ,stride만큼 이동 
            for i in range(0, img_height-self.window_size+1,self.stride): 
                # weight기준으로, img_weight-self.window_size+1 범위에서 ,stride만큼 이동
                for j in range(0, img_weight-self.window_size+1,self.stride): 
                    conv_stack.append(self.padded_input[k,:,i:i+self.window_size,j:j+self.window_size]\
                                     .reshape(unrolled_window_size)) # 내적을 하기 위한 전처리
                    
        # conv_stack shape가 어떻게 되는지 확인하기
                    
        return np.array(conv_stack)
        
    def conv_stack_deltas_to_input_deltas(self, reshaped_input_side_deltas):
        reshaped_input_side_deltas=reshaped_input_side_deltas.reshape(self.batch_size, self.output_height, self.output_width,-1)
        deconvolved_input_side_deltas=np.zeros(self.padded_input.shape)
        
        for k in range(self.batch_size):
            for i in range(self.output_height):
                for j in range(self.output_width):
                    patch_to_add=reshaped_input_side_deltas[k][i][j].\
                    reshape(self.channels_in,self.window_size,self.window_size)
                    
                    in_side_i=self.stride*i
                    in_side_j=self.stride*j
                    deconvolved_input_side_deltas[0,0:self.channels_in,in_side_i:in_side_i+self.window_size,\
                                                 in_side_j:in_side_j+self.window_size]+=patch_to_add
                    
                    
                    

        return deconvolved_input_side_deltas
    