In [None]:
import numpy as np
from skimage.util.shape import view_as_windows

##########
#   convolutional layer
#   you can re-use your previous implementation, or modify it if necessary
##########

class nn_convolutional_layer:

    def __init__(self, Wx_size, Wy_size, input_size, in_ch_size, out_ch_size, std=1e0):
    
        # initialization of weights
        self.W = np.random.normal(0, std / np.sqrt(in_ch_size * Wx_size * Wy_size / 2),
                                  (out_ch_size, in_ch_size, Wx_size, Wy_size))
        self.b = 0.01 + np.zeros((1, out_ch_size, 1, 1))
        self.input_size = input_size

    def update_weights(self, dW, db):
        self.W += dW
        self.b += db

    def get_weights(self):
        return self.W, self.b

    def set_weights(self, W, b):
        self.W = W
        self.b = b

    def forward(self, x):
        ##########
        ##########
        #   Complete the method with your implementation
        ##########
        ##########
        
        

        N, C, H, W = x.shape
        FN,C,FH,FW=self.W.shape
        filter_width=FW
        filter_height=FH
        num_filters=FN
        
        out_h = int((H + - filter_height) + 1)
        out_w = int((W +- filter_width) + 1)

        img = np.pad(x, [(0,0), (0,0), (0, 0), (0, 0)], 'constant')                   #np.pad는 패딩하기 위한것 np.pad(패딩당하는 array, [axis=0 앞뒤로 0개 0개씩 넣음, axis=1 앞뒤로 0개,0개 넣음.... ])여기선 패딩 안 함
        col = np.zeros((N, C, filter_height, filter_width, out_h, out_w))                 #더하기 직전의 필터링결과, 뒤에서부터 읽자. N=1일때, 필터링의 최종 결과(더했을떄)는 out_h,out_w 형태의 2차원 배열,
                                                                                          #이 2차원 배열의 원소는 사실 filter_height, weight,filter로 이루어진 3차원 배열,
                                                                                          #col(1,1,2,2,3,3) 이건 필터링된 것의 3,3번째 원소에서 원래 img의 2,2,1에 있는 원소

        for i in range(filter_height):
          i_max = i + 1*out_h
          for j in range(filter_width):
              j_max = j + 1*out_w
              col[:, :, i, j, :, :] = img[:, :, i:i_max:1, j:j_max:1]                   #i,j는 필터의 i,j위치에 있는 값들,즉 필터가 slide하면서 움직인다. 
                                                                                        #그때, 각 슬라이딩마다 i,j에 있는 모든값을 col[...,i,j...]에 저장하는 것

        self.col = col.transpose(0, 4, 5, 1, 2, 3).reshape(N*out_h*out_w, -1)
        self.col_W=self.W.reshape(FN,-1).T
        out=np.dot(self.col,self.col_W)+self.b.reshape((1,FN))  #여기 왜??
        
        out=out.reshape(N,out_h,out_w,-1).transpose(0,3,2,1)

        return out

    def backprop(self, x, dLdy):
        

        db = np.sum(dLdy, axis=(0,2,3))  # (N, F, H_new, W_new) -> (F,)
        db = db.reshape(1,28,1,1)
        dLdb=db
        N, C, H, W = x.shape
        FN,C,FH,FW=self.W.shape
        
        filter_width=FW
        filter_height=FH
        num_filters=FN
        YN,num_filter,FH,YW=dLdy.shape  #N=FN=YN 배치사이즈
        
        out_h=(H + - filter_height) + 1

       

        # dLdx=(8,3,32,32), dLdW=(8,3,3,3), dLdb=(1,8,1,1)
        dLdx = np.zeros(x.shape) #x.shape로
        dLdW = np.zeros(self.W.shape)  #self.w.shape

        
       
        for idx_batch in range(N):                #dLdy 배치 중 1개 고름
            for idx_filter in range(num_filter):           #1개 고른 것 중에서 필터를 고름.
                # filter = (3,3,3)
                filter = self.W[idx_filter]                   #나중에 dLdy와 w를 곱해줌

                for idx_rgb in range(x.shape[1]):             #필터를 기준으로
                    
                    part_filter = filter[idx_rgb]
                    part_filter = np.rot90(np.rot90(part_filter))       #filter의 데이터 순서를 바꿔줌

                    
                    part_dLdy = dLdy[idx_batch][idx_filter]
                    pad_dLdy = np.pad(part_dLdy, pad_width=2) #dLdy에 패딩해준다.

                    
                    window_part_dLdy = view_as_windows(pad_dLdy, (self.W.shape[2], self.W.shape[3])).reshape((H, H, -1))
                    dot_product = window_part_dLdy.dot(part_filter.reshape(-1, 1)).squeeze()                            #window별로 곱해줌

                    # dLdx update
                    dLdx[idx_batch][idx_rgb] = dLdx[idx_batch][idx_rgb] + dot_product


        for idx_batch in range(dLdy.shape[0]):                #dLdy 배치 중에서 하나 고른다
            for idx_filter in range(dLdy.shape[1]):           #1개 고른 것 중에서 필터를 고르기.
                
                part_dLdy = dLdy[idx_batch][idx_filter]

                for idx_rgb in range(x.shape[1]):
                    
                    
                    part_x = x[idx_batch][idx_rgb]
                    window_part_x = view_as_windows(part_x, (out_h, out_h)).reshape((self.W.shape[2], self.W.shape[3], -1))
                    dot_product = window_part_x.dot(part_dLdy.reshape((-1, 1))).squeeze()

                    dLdW[idx_filter][idx_rgb] = dLdW[idx_filter][idx_rgb] + dot_product

        return dLdx, dLdW, dLdb

##########
#   max pooling layer
#   you can re-use your previous implementation, or modify it if necessary
##########

class nn_max_pooling_layer:
    def __init__(self, stride, pool_size):
        self.stride = stride
        self.pool_size = pool_size

    def forward(self, x):
        ##########
        ##########
        #   Complete the method with your implementation
        ##########
        ##########
        
        N,C,H,W=x.shape
        out_h=int(1+(H-self.pool_size)/self.stride)
        out_w=int(1+(W-self.pool_size)/self.stride)
        img = np.pad(x, [(0,0), (0,0), (0, 0), (0, 0)], 'constant')
        col = np.zeros((N, C, self.pool_size, self.pool_size, out_h, out_w))

        for i in range(self.pool_size):
          i_max = i + 2*out_h
          for j in range(self.pool_size):
              j_max = j + 2*out_w
              col[:, :, i, j, :, :] = img[:, :, i:i_max:2, j:j_max:2]

        col = col.transpose(0, 4, 5, 1, 2, 3).reshape(N*out_h*out_w, -1)              #2차원 배열로 변환 각 행마다, window로 본 data가 일렬로 배열됨
        col=col.reshape(-1,self.pool_size*self.pool_size)                                       #col이라는 2차원 배열은 각 행에 모든 channel도 포함함. channel을 끊어줘야함. 채널을 아래 행으로 내려야 
        arg_max=np.max(col,axis=1)
        out=arg_max.reshape(N,out_h,out_w,C).transpose(0,3,2,1)
        

        return out

    def backprop(self, x, dLdy):
        N, C, out_h, out_w = dLdy.shape           #N은 배치개수, c는 채널, outH=outW이건 아웃풋의 높이,너비
        H, W = x.shape[2], x.shape[3]           #H는 x의 높이, W는 x의 너비
        
        dLdx = np.zeros(x.shape)                #dLdx틀 만들어주기

        for batch in range(N):                            #dLdy를 2차원으로 자르기 (pooling 특성 생각)
            dout_row = dLdy[batch].reshape(C, out_h*out_w)  # dydL을 batch마다 나눠주고, 그걸 2차원으로 만듦, 채널이 열은 채널이 기준, 행은 dLdy의 원소
            part_dLdy = 0
            for i in range(0, H-self.pool_size+1, self.stride):
                for j in range(0, W-self.pool_size+1, self.stride):
                    pool_region = x[batch,:,i:i+self.pool_size,j:j+self.pool_size].reshape(C,self.pool_size**2) #특정 batch의 x 생각...이 특정 x의 pooling filter 들어간 모든 데이터를 2차원으로 만들어준다.
                                                                                                 #이 2차원은 channel이 행,channel이 기준 채널마다 channel에 속한,filter에 들어간 데이터를 열로 만듦
                    max_pool_indices = pool_region.argmax(axis=1)                                #위 pool_region에서 가로로 제일 큰 원소의 index찾음. 채널마다 제일 큰 인덱스 찾는 것,
                    dout_cur = dout_row[:,part_dLdy]                                                #batch마다 나눠진 dLdy의 데이터를 열로 잘라서 가져옴, 세로로 가져옴
                    part_dLdy += 1
                    
                    dmax_pool = np.zeros(pool_region.shape)
                    dmax_pool[np.arange(C),max_pool_indices] = dout_cur                          #제일 큰 원소 index는 dout_cur넣어줌, backpropagation 연산대로 넣어줌
                    dLdx[batch,:,i:i+self.pool_size,j:j+self.pool_size] += dmax_pool.reshape(C,self.pool_size,self.pool_size)

        return dLdx



##########
#   fully connected layer
##########
# fully connected linear layer.
# parameters: weight matrix matrix W and bias b
# forward computation of y=Wx+b
# for (input_size)-dimensional input vector, outputs (output_size)-dimensional vector
# x can come in batches, so the shape of y is (batch_size, output_size)
# W has shape (output_size, input_size), and b has shape (output_size,)

class nn_fc_layer:

    def __init__(self, input_size, output_size, std=1):
        # Xavier/He init
        self.W = np.random.normal(0, std/np.sqrt(input_size/2), (output_size, input_size))
        self.b=0.01+np.zeros((output_size))

    def forward(self,x):
        ##########
        ##########
        #   Complete the method with your implementation
        ##########
        ##########
        N=x.shape[0]
        O=self.W.shape[1]
        out = x.reshape((N, O)) @ self.W.T + self.b
        out=out

        return out

    def backprop(self,x,dLdy):

        ##########
        ##########
        #   Complete the method with your implementation
        ##########
        ##########
        N=x.shape[0]
        O=self.W.shape[1]
        
        dLdb=np.sum(dLdy,axis=0) ##여기 잘못됨???
        dLdW=np.dot(dLdy.T,x.reshape(N,O))
        dLdx=np.dot(dLdy,self.W).reshape(x.shape)

        return dLdx,dLdW,dLdb

    def update_weights(self,dLdW,dLdb):

        # parameter update
        self.W=self.W+dLdW
        self.b=self.b+dLdb

    def get_weights(self):
        return self.W, self.b

    def set_weights(self, W, b):
        self.W = W
        self.b = b

##########
#   activation layer
##########
#   This is ReLU activation layer.
##########

class nn_activation_layer:
    
    # performs ReLU activation
    def __init__(self):
        pass
    
    def forward(self, x):
        ##########
        ##########
        #   Complete the method with your implementation
        ##########
        ##########
        out = np.where(x>0, x, 0)
        return out
    
    def backprop(self, x, dLdy):
        ##########
        ##########
        #   Complete the method with your implementation
        ##########
        ##########
        dLdx = np.where(x>0, dLdy, 0)
        
        return dLdx


##########
#   softmax layer
#   you can re-use your previous implementation, or modify it if necessary
##########

class nn_softmax_layer:

    def __init__(self):
        pass

    def forward(self, x):
        ##########
        ##########
        #   Complete the method with your implementation
        ##########
        ##########
        exp_x = np.exp(x) 
        exp_sum_x = np.sum(exp_x, axis=1)[:,np.newaxis] 
        out = exp_x / exp_sum_x # (N, O), (20,2)
        return out

    def backprop(self, x, dLdy):
        ##########
        ##########
        #   Complete the method with your implementation
        ##########
        ##########
        exp_x = np.exp(x)                
        soft = exp_x / np.sum(exp_x, axis =1)[:,np.newaxis]    

        dLdx = dLdy * soft + soft   

        return dLdx

##########
#   cross entropy layer
#   you can re-use your previous implementation, or modify it if necessary
##########

class nn_cross_entropy_layer:

    def __init__(self):
        pass

    def forward(self, x, y):

        ##########
        ##########
        #   Complete the method with your implementation
        ##########
        ##########
        N=x.shape[0]
        
        Loss=0
        
        for i in range(N):
          label=y[i]
          Loss-=np.log(x[i][label])

        Loss=Loss/N
        return Loss

    def backprop(self, x, y):
        ##########
        ##########
        #   Complete the method with your implementation
        ##########
        ##########
        N=x.shape[0]
        dLdx=np.zeros((N,10))
        bi=np.arange(2*N)
        bi=bi.reshape(N,2)
        Loss=0
        

        for i in range(N):
          label=y[i]
          dLdx[i][label]=-1/x[i][label]
                 


        return dLdx