# Import

In [31]:
import numpy as np
import os
from scipy.special import expit

# Define

In [10]:
def IsNone(target):
    return (type(target) == type(None))

def ArraySign(input_array):
    # return +1, 0, -1 respect to positive, zero, negtive
    return 1.*(input_array>0) - 1.*(input_array<0)

def CutValue(input_array, cut_value):
    output = np.abs(input_array)
    output = ArraySign(input_array) * (output * (output < cut_value) + cut_value * (output >= cut_value))
    return output

def WeightedSum(input_array, weight):
    try:
        return (input_array.reshape(-1)*weight).sum()
    except:
        raise ValueError("weight should be an 1-d array with the same size with input_array")

def WeightedRow(input_array, weight):
    try:
        return input_array*weight.reshape(-1,1)
    except:
        raise ValueError("weight should be an 1-d array with the same length with first shape of input_array")

def OverPenalty(input_value, rate = 0.1, threshold=0.):
    output = np.abs(input_value) - threshold
    output *= (output > 0)
    output *= rate * ArraySign(input_value)
    return output

def RowOperate(matrix, threshold = 0.1**15):
    reduced_matrix = np.array(matrix)
    filtered_matrix = np.array(matrix)
    shape = matrix.shape # matrix size
    mask = np.ones(shape)
    pivots = -1*np.ones((min(shape)), dtype = np.int) # store pivots, # of pivots <= min(rows, columns)
    for t in range(len(pivots)):
        filtered_matrix = reduced_matrix * mask # filter
        if np.abs(filtered_matrix).max() < threshold:
            break
        
        pivot_row, pivot_col = np.unravel_index(np.abs(filtered_matrix).argmax(), shape) # pivot row, pivot column
        reduced_matrix[pivot_row] /= reduced_matrix[pivot_row][pivot_col]
        multi = np.array(reduced_matrix[:, pivot_col])
        multi[pivot_row] = 0.
        reduced_matrix -= np.dot(multi.reshape((shape[0], 1)), reduced_matrix[pivot_row].reshape((1, shape[1])))
        mask[pivot_row] = 0.
        mask[:, pivot_col] = 0.
        pivots[pivot_row] = pivot_col # the column-index of pivot_row-th row is pivot_col
    
    reduced_matrix = reduced_matrix[pivots != -1,:]
    pivots = pivots[pivots != -1]
    return reduced_matrix, pivots

def McmcNormal(points, drop_times = 10, mean=0., std=1.):
    # Useing Markov chain Monte Carlo method to get a new point from normal distribution with given points
    # each element is get from mean and std
    output = np.random.normal(mean, std, points.shape[1:])
    if drop_times>1:
        for t in range(1, drop_times):
            candicate = np.random.normal(mean, std, points.shape[1:])
            candicate_distance = np.sqrt(np.square(np.subtract(points, candicate)).sum(axis=tuple(np.arange(1, len(points.shape))))).min()
            # distance of candicate to target
            output_distance = np.sqrt(np.square(np.subtract(points, output)).sum(axis=tuple(np.arange(1, len(points.shape))))).min()
            # distance of currently output to target
            if np.random.rand()*output_distance < candicate_distance:
                output = np.array(candicate)
    
    return output

def McmcColExtend(input_matrix, drop_times):
    # return a matrix with a new col with same level by McmcNormal
    input_matrix = input_matrix.T # transpose cols to rows
    output = np.insert(input_matrix,
                       len(input_matrix),
                       McmcNormal(input_matrix, drop_times, input_matrix.mean(), input_matrix.std()),
                       axis=0
                      ).T # transpose rows back to cols

    return output

# Data

class Data():
    def __init__(self, inputs=np.zeros((0,0)), labels=np.zeros((0,0)), weight = None):
        self.SetData(inputs, labels, weight)
    
    def SetData(self, inputs=np.zeros((0,0)), labels=np.zeros((0,0)), weight = None):
        if len(inputs) != len(labels):
            raise ValueError("num_datums error, #inputs != #labels.")
        
        self.inputs = inputs
        self.labels = labels
        if IsNone(weight):
            self.weight = np.ones((inputs.shape[0]))
        elif weight.shape != (len(inputs)):
            self.weight = np.ones((inputs.shape[0]))
            print("WARNING : weight shape error, set uniform weight.")
        elif weight.sum() <= 0:
            self.weight = np.ones((inputs.shape[0]))
            print("WARNING : get non-positive weight sum, set uniform weight.")
        else:
            self.weight = weight
        
        self.weight /= self.weight.sum()
    
    def GetNumDatums(self):
        return len(self.inputs)
    
    def GetDatumSize(self):
        # return size of input and label
        return (self.inputs.shape[1], self.labels.shape[1])
    
    def IsClassification(self):
        output = ((self.labels == 0) + (self.labels == 1)).all()
        output *= (self.labels.shape[1]>1)
        output *= (self.labels.sum(axis=1)==1).all()
        return output

# Data end

# VariableArray

class VariableArray():
    def __init__(self, size, cwise_step_initial=0.1):
        self.value = np.random.normal(0., 1., size) # array value
        self.total_deri = np.zeros(self.value.shape) # total derivative, used to descent
        self.last_total_deri = np.zeros(self.value.shape) # last total derivative
        self.moving = np.zeros(self.value.shape) # moving array
        self.cwise_step = cwise_step_initial*np.ones(self.value.shape) # component-wise step
        
        self.regulariz_rate = 0.
        self.regulariz_margin = 0.
    
    def SetValue(self, input_value, cwise_step_initial=0.1):
        self.value = np.array(input_value) # array value
        self.total_deri = np.zeros(self.value.shape) # total derivative, used to descent
        self.last_total_deri = np.zeros(self.value.shape) # last total derivative
        self.moving = np.zeros(self.value.shape) # moving array
        self.cwise_step = cwise_step_initial*np.ones(self.value.shape) # component-wise step
    
    def SetDeri(self, input_value):
        if input_value.shape != self.total_deri.shape:
            raise ValueError("input_value shape error")
        
        self.total_deri = np.array(input_value)
    
    def DeriModify(self, input_value):
        if input_value.shape != self.total_deri.shape:
            raise ValueError("input_value shape error")
        
        self.total_deri += input_value
    
    def ZeroDeri(self):
        self.total_deri *= 0
    
    def SetRegularizer(self, rate, margin):
        self.regulariz_rate = max(rate, 0.)
        self.regulariz_margin = max(margin, 0.)
    
    def ResetCwiseStep(self, input_cwise_step):
        self.cwise_step = input_cwise_step * np.ones(self.cwise_step.shape)
    
    def Regularize(self):
        if self.regulariz_rate != 0:
            self.total_deri += OverPenalty(self.value, self.regulariz_rate, self.regulariz_margin)
    
    def Descent(self, step=1., method="normal", move_max=1.):
        self.Regularize()
        if method == "normal":
            self.moving = self.total_deri * step
            self.moving = -1*CutValue(self.moving, move_max)
        elif method == "Rprop":
            self.moving = ArraySign(self.total_deri)
            self.movint_return = ArraySign(self.total_deri*self.last_total_deri)
            self.cwise_step *= 1.2*(self.movint_return>0) + 1.*(self.movint_return==0) + 0.5*(self.movint_return<0)
            self.cwise_step = CutValue(self.cwise_step, move_max)
            self.moving *= -1*self.cwise_step
        else:
            raise ValueError("descent method error")
        
        self.value += self.moving
        
        self.last_total_deri = np.array(self.total_deri)
        self.ZeroDeri()

# VariableArray end

# Activation functions defined start

class Identity():
    def Forward(self, flow_in):
        return flow_in
    
    def Diff(self, flow_in):
        return np.ones(flow_in.shape, dtype = np.float64)
    
    def Backward(self, flow_in, back_flow):
        return self.Diff(flow_in) * back_flow

class Sigmoid():
    def Forward(self, flow_in):
        return expit(flow_in)
    
    def Diff(self, flow_in):
        return expit(flow_in)*expit(-flow_in)
    
    def Backward(self, flow_in, back_flow):
        return self.Diff(flow_in) * back_flow

class Hypertan():
    def Forward(self, flow_in):
        flow_in = CutValue(flow_in, 100)
        return np.tanh(flow_in)
    
    def Diff(self, flow_in):
        flow_in = CutValue(flow_in, 100) # cut value out of [-100, 100] to 100, cosh(-100) = cosh(100)
        return 1. / np.square(np.cosh(flow_in))
    
    def Backward(self, flow_in, back_flow):
        return self.Diff(flow_in) * back_flow

class SoftSign():
    def Forward(self, flow_in):
        return ArraySign(flow_in)*(1. - 1./(np.abs(flow_in) + 1.))
    
    def Diff(self, flow_in):
        return 1. / np.square(np.abs(flow_in) + 1.)
    
    def Backward(self, flow_in, back_flow):
        return self.Diff(flow_in) * back_flow

class Relu():
    def Forward(self, flow_in):
        return flow_in*(flow_in>0)
    
    def Diff(self, flow_in):
        return 1.*(flow_in>0)
    
    def Backward(self, flow_in, back_flow):
        return self.Diff(flow_in) * back_flow

class LeakyRelu():
    def __init__(self, alpha = 0.1):
        self.alpha = alpha
    
    def Forward(self, flow_in):
        return flow_in*(flow_in>0) + self.alpha*flow_in*(flow_in<0)
    
    def Diff(self, flow_in):
        return 1.*(flow_in>0) + self.alpha*(flow_in<0)
    
    def Backward(self, flow_in, back_flow):
        return self.Diff(flow_in) * back_flow

class SoftPlus():
    def Forward(self, flow_in):
        return np.log(1. + np.exp(flow_in))
    
    def Diff(self, flow_in):
        return expit(flow_in)
    
    def Backward(self, flow_in, back_flow):
        return self.Diff(flow_in) * back_flow

class Selu():
    def __init__(self):
        self.ahpha = 1.05071
        self.beta = 1.67326
    
    def Forward(self, flow_in):
        return self.ahpha*(flow_in*(flow_in>=0) + self.beta*(np.exp(flow_in) - 1)*(flow_in<0))
    
    def Diff(self, flow_in):
        return self.ahpha*(1.*(flow_in>=0) + self.beta*np.exp(flow_in)*(flow_in<0))
    
    def Backward(self, flow_in, back_flow):
        return self.Diff(flow_in) * back_flow

class Softmax():
    def Forward(self, flow_in):
        output = flow_in - flow_in.max(axis=1).reshape(-1,1)
        output = np.exp(output)
        output /= output.sum(axis=1).reshape(-1,1)
        return output
    
    def Backward(self, flow_in, back_flow):
        flow_out = self.Forward(flow_in) # result of self.trans
        return flow_out*back_flow - flow_out*((flow_out*back_flow).sum(axis=1).reshape(-1,1))

# Activation functions defined end

# Layer

class Layer():
    def __init__(self, num_unit, activationFunction):
        if type(activationFunction) == type:
            raise TypeError("activationFunction should be a class. eg: Use 'Sigmoid()', not 'Sigmoid'")
        
        self.num_unit = num_unit
        self.activationFunction = activationFunction
        self.linear = VariableArray((0, self.num_unit)) # linear weights working before active function
        self.bias = VariableArray((1, self.num_unit)) # bias working before active function
        self.flow_in = np.zeros((0, self.num_unit))
        self.flow_out = np.zeros((0, self.num_unit))
    
    def Forward(self, flow_in):
        self.flow_in = np.dot(flow_in, self.linear.value) + self.bias.value
        self.flow_out = self.activationFunction.Forward(self.flow_in)
    
    def Backward(self, back_flow, layer_source):
        deri = self.activationFunction.Backward(self.flow_in, back_flow)
        self.linear.DeriModify(np.dot(layer_source.T, deri))
        self.bias.DeriModify(np.sum(deri, axis=0).reshape(1, -1))
        deri = np.dot(deri, self.linear.value.T)
        return deri
    
    def ZeroDeri(self):
        self.linear.ZeroDeri()
        self.bias.ZeroDeri()
    
    def SetRegularizer(self, rate, margin):
        self.linear.SetRegularizer(rate, margin)
        self.bias.SetRegularizer(rate, margin)
    
    def Descent(self, step, method):
        self.linear.Descent(step, method)
        self.bias.Descent(step, method)
    
    def ResetCwiseStep(self, new_cwise_step):
        self.linear.ResetCwiseStep(new_cwise_step)
        self.bias.ResetCwiseStep(new_cwise_step)
    
    def GetPCA(self, weight):
        if IsNone(weight):
            weight = np.ones((self.flow_out.shape[0])) / self.flow_out.shape[0]
        
        centered_flow_out = self.flow_out - self.flow_out.mean(axis=0)
        cov = np.dot(centered_flow_out.T * weight, centered_flow_out) # covariance matrix
        output = np.linalg.eigh(cov)
        output[1] = output[1].T # transpose eigen vector from column to row
        return output
    
    def GetDimension(self):
        return self.linear.value.size + self.bias.value.size

# Layer end

# Loss function

class LossFunction():
    def __init__(self, method):
        self.method = method
    
    def SetMethod(self, method):
        self.method = method
    
    def GetLoss(self, inference, target, weight):
        if self.method == "r2":
            output = WeightedSum(np.square(inference - target).sum(axis=1), weight)
            output /= WeightedSum(np.square(target - target.mean(axis=0)).sum(axis=1), weight)
        elif self.method == "cross entropy":
            output = WeightedSum((-target*np.log(inference)).sum(axis=1), weight)
        else:
            raise ValueError("loss function method should be 'r2', 'cross entropy'")
        
        return output
    
    def Backward(self, inference, target, weight):
        if self.method == "r2":
            output = WeightedRow(2.*(inference - target), weight)
            output /= WeightedSum(np.square(target - target.mean(axis=0)).sum(axis=1), weight)
        elif self.method == "cross entropy":
            output = WeightedRow(-(target/inference), weight)
        else:
            raise ValueError("loss function method should be 'r2', 'cross entropy'")
        
        return output

class Terminator():
    def __init__(self, short_period = 5, long_period = 10, threshold = 0.):
        try:
            short_period = int(short_period)
            long_period = int(long_period)
        except:
            raise ValueError("period should be a value, and will be transfer to int")
        
        if short_period <= 0:
            short_period = 1
            print("WARNING : short_period <= 0, set 1")
        
        if long_period <= short_period:
            long_period = short_period + 1
            print("WARNING : long_period <= short_period, set short_period + 1")
        
        self.short_period = short_period
        self.long_period = long_period
        self.threshold = threshold
        self.record = []
    
    def Reset(self, short_period, long_period, threshold = 0.):
        try:
            short_period = int(short_period)
            long_period = int(long_period)
        except:
            raise ValueError("period should be a value, and will be transfer to int")
        
        if short_period <= 0:
            short_period = 1
            print("WARNING : short_period <= 0, set 1")
        
        if long_period <= short_period:
            long_period = short_period + 1
            print("WARNING : long_period <= short_period, set %d" %(short_period + 1))
        
        self.short_period = short_period
        self.long_period = long_period
        self.threshold = threshold
        self.record = []
    
    def Hit(self, input_value):
        try:
            input_value = float(input_value)
        except:
            raise ValueError("input_value should be a real value")
        
        self.record = [input_value] + self.record[:self.long_period-1]
        if len(self.record) == self.long_period:
            return (np.mean(self.record[:self.short_period]) - self.threshold > np.mean(self.record))
        else:
            return False
    
    def Clear(self):
        self.record = []

class DogikoNeuralNetwork():
    def __init__(self):
        self.hiddenLayerList = []
        self.outputFunction = None
        self.outputLayer = None
        self.lossFunction = None
        self.trainData = Data()
        self.validData = Data()
        self.testData = Data()
        self.regulariz_rate = 0.
        self.regulariz_margin = 0.
        self.has_build = False
        self.hit_tolerance = 0.1
        self.terminator = Terminator()
    
    def SetLossFunction(self, method):
        if method not in ["r2", "cross entropy"]:
            raise ValueError("loss function method should be 'r2', 'cross entropy'")
        
        self.lossFunction = LossFunction(method)
    
    def SetRegularizer(self, rate, margin):
        if rate < 0.:
            print("WARNING : regulariz_rate error, get negative value, setting to 0.")
        
        if margin < 0.:
            print("WARNING : regulariz_margin error, get negative value, setting to 0.")
        
        self.regulariz_rate = max(rate, 0.)
        self.regulariz_margin = max(margin, 0.)
        
        if self.has_build:
            for l in range(self.GetNumHiddenLayers()):
                self.hiddenLayerList[l].SetRegularizer(self.regulariz_rate, self.regulariz_margin)
            
            self.outputLayer.SetRegularizer(self.regulariz_rate, self.regulariz_margin)
    
    def SetTrainData(self, inputs, labels, weight = None):
        if self.has_build:
            if inputs.shape[1] != self.inputs_size:
                raise ValueError("inputs size doesn't match with this model")
            
            if labels.shape[1] != self.labels_size:
                raise ValueError("labels size doesn't match with this model")
        
        self.trainData.SetData(inputs, labels, weight)
    
    def SetValidData(self, inputs, labels, weight = None):
        if self.has_build:
            if inputs.shape[1] != self.inputs_size:
                raise ValueError("inputs size doesn't match with this model")
            
            if labels.shape[1] != self.labels_size:
                raise ValueError("labels size doesn't match with this model")
        
        self.validData.SetData(inputs, labels, weight)
    
    def SetTestData(self, inputs, labels, weight = None):
        if self.has_build:
            if inputs.shape[1] != self.inputs_size:
                raise ValueError("inputs size doesn't match with this model")
            
            if labels.shape[1] != self.labels_size:
                raise ValueError("labels size doesn't match with this model")
        
        self.testData.SetData(inputs, labels, weight)
    
    def AddHiddenLayer(self, num_unit, activationFunction):
        self.hiddenLayerList.append(Layer(num_unit, activationFunction))
        if self.has_build:
            print("WARNING : a hidden layer added after build, please re-build model or set related value manually.")
    
    def SetOutputFunction(self, activationFunction):
        # units of output layer is fixed as same as label size
        self.outputFunction = activationFunction
        if self.has_build:
            self.outputLayer.activationFunction = self.outputFunction
    
    def SetHitTolerance(self, tolerance):
        try:
            if float(tolerance) <= 0.:
                print("setting tolerance failed, tolerance should be positive real value")
            
            self.hit_tolerance = float(tolerance)
        except:
            print("setting tolerance failed, tolerance should be positive real value")
    
    def SetTerminator(self, short_period, long_period, threshold = 0.):
        self.terminator.Reset(short_period, long_period, threshold)
        
    def ClearTerminator(self):
        self.terminator.Clear()
    
    def GetNumHiddenLayers(self):
        return len(self.hiddenLayerList)
    
    def Build(self):
        if IsNone(self.lossFunction):
            raise ValueError("Set loss function before build.")
        
        if self.lossFunction.method == "cross entropy":
            if type(self.outputFunction) not in [Sigmoid, Softmax, SoftSign]:
                print ("WARNING : chosen loss function is cross entropy but the output of output layer function may out of (0, 1)")
            
        
        if IsNone(self.outputFunction):
            self.outputFunction = Identity()
            print ("WARNING : doesn't set outputFunction before build, set Identity().")
        
        if len(set([self.trainData.GetDatumSize()[0],
                    self.validData.GetDatumSize()[0],
                    self.testData.GetDatumSize()[0]
                   ])) == 1:
            self.inputs_size = self.trainData.GetDatumSize()[0]
        else:
            raise ValueError("train, valid, test data must have the same inputs size")
        
        if len(set([self.trainData.GetDatumSize()[1],
                    self.validData.GetDatumSize()[1],
                    self.testData.GetDatumSize()[1]
                   ])) == 1:
            self.labels_size = self.trainData.GetDatumSize()[1]
        else:
            raise ValueError("train, valid, test data must have the same labels size")
        
        # when hidden layer exist, set first layer value
        if self.GetNumHiddenLayers() >0:
            self.hiddenLayerList[0].linear.SetValue(np.random.normal(0.,
                                                                     1.,
                                                                     (self.inputs_size, self.hiddenLayerList[0].num_unit)
                                                                    )
                                                   )
            self.hiddenLayerList[0].bias.SetValue(np.random.normal(0.,
                                                                   1.,
                                                                   (1, self.hiddenLayerList[0].num_unit)
                                                                  )
                                                 )
            # normalize flow in by modify layer variables
            self.hiddenLayerList[0].Forward(self.trainData.inputs)
            flow_in_mean = self.hiddenLayerList[0].flow_in.mean(axis=0)
            flow_in_std = self.hiddenLayerList[0].flow_in.std(axis=0) + 0.000000001 # bias for prevent zero std
            self.hiddenLayerList[0].linear.value /= flow_in_std
            self.hiddenLayerList[0].bias.value -= flow_in_mean
            self.hiddenLayerList[0].bias.value /= flow_in_std
            self.hiddenLayerList[0].Forward(self.trainData.inputs)
            # when hidden layer num >= 2, set internal layer value
            for l in range(1, self.GetNumHiddenLayers()):
                self.hiddenLayerList[l].linear.SetValue(np.random.normal(0.,
                                                                         1.,
                                                                         (self.hiddenLayerList[l-1].num_unit,
                                                                          self.hiddenLayerList[l].num_unit
                                                                         )
                                                                        )
                                                       )
                self.hiddenLayerList[l].bias.SetValue(np.random.normal(0.,
                                                                       1.,
                                                                       (1, self.hiddenLayerList[l].num_unit)
                                                                      )
                                                     )
                # normalize flow in by modify layer variables
                self.hiddenLayerList[l].Forward(self.hiddenLayerList[l-1].flow_out)
                flow_in_mean = self.hiddenLayerList[l].flow_in.mean(axis=0)
                flow_in_std = self.hiddenLayerList[l].flow_in.std(axis=0) + 0.000000001 # bias for prevent zero std
                self.hiddenLayerList[l].linear.value /= flow_in_std
                self.hiddenLayerList[l].bias.value -= flow_in_mean
                self.hiddenLayerList[l].bias.value /= flow_in_std
                self.hiddenLayerList[l].Forward(self.hiddenLayerList[l-1].flow_out)
            
            # set output layer
            self.outputLayer = Layer(self.labels_size, self.outputFunction)
            self.outputLayer.linear.SetValue(np.random.normal(0.,
                                                              1.,
                                                              (self.hiddenLayerList[-1].num_unit,
                                                               self.outputLayer.num_unit
                                                              )
                                                             )
                                            )
            # normalize flow in by modify layer variables
            self.outputLayer.Forward(self.hiddenLayerList[-1].flow_out)
            flow_in_mean = self.outputLayer.flow_in.mean(axis=0)
            flow_in_std = self.outputLayer.flow_in.std(axis=0) + 0.000000001 # bias for prevent zero std
            self.outputLayer.linear.value /= flow_in_std
            self.outputLayer.bias.value -= flow_in_mean
            self.outputLayer.bias.value /= flow_in_std
            self.outputLayer.Forward(self.hiddenLayerList[-1].flow_out)
            
        else: # case no hiddenlayer
            self.outputLayer.linear.SetValue(np.random.normal(0.,
                                                              1.,
                                                              (self.inputs_size,
                                                               self.outputLayer.num_unit
                                                              )
                                                             )
                                            )
        
            self.outputLayer.bias.SetValue(np.random.normal(0.,
                                                            1.,
                                                            (1, self.outputLayer.num_unit)
                                                           )
                                          )
            # normalize flow out to fit labels by modify layer variables
            self.outputLayer.Forward(self.trainData.inputs)
            flow_in_mean = self.outputLayer.flow_in.mean(axis=0)
            flow_in_std = self.outputLayer.flow_in.std(axis=0) + 0.000000001 # bias for prevent zero std
            self.outputLayer.linear.value /= flow_in_std
            self.outputLayer.bias.value -= flow_in_mean
            self.outputLayer.bias.value /= flow_in_std
            self.outputLayer.Forward(self.trainData.inputs)
        
        # Set regularizer
        for l in range(self.GetNumHiddenLayers()):
            self.hiddenLayerList[l].SetRegularizer(self.regulariz_rate, self.regulariz_margin)
        
        self.outputLayer.SetRegularizer(self.regulariz_rate, self.regulariz_margin)
        
        self.has_build = True
    
    def save_model(self, target_folder):
        if type(target_folder) != str:
            raise TypeError("target_folder should be a str")
        
        while target_folder[-1] =="/":
            target_folder = target_folder[:-1]
        
        if not os.path.exists(target_folder):
            os.makedirs(target_folder)
        
        target_folder += "/"
        
        info_dict = {}
        info_dict["inputs size"] = self.inputs_size
        info_dict["labels size"] = self.labels_size
        info_dict["regularizer"] = {"rate" : self.regulariz_rate,
                                    "margin" : self.regulariz_margin
                                   }
        info_dict["tolerance"] = self.tolerance
        info_dict["num units"] = []
        info_dict["activation function"] = []
        for l in range(self.GetNumHiddenLayers()):
            info_dict["num units"].append(self.hiddenLayerList[l].num_unit)
            info_dict["activation function"].append(self.hiddenLayerList[l].activationFunction)
        
        info_dict["output function"] = self.outputFunction
        info_dict["terminator"] = self.terminator
        np.save(target_folder + "/model_info.npy", info_dict)
        for l in range(self.GetNumHiddenLayers()):
            np.save(target_folder + "/L%d.npy" %(l), self.hiddenLayerList[l].linear.value)
            np.save(target_folder + "/B%d.npy" %(l), self.hiddenLayerList[l].bias.value)
        
        np.save(target_folder + "/Lo.npy" %(l), self.outputLayer.linear.value)
        np.save(target_folder + "/Bo.npy" %(l), self.outputLayer.bias.value)
    
    def GetDimension(self):
        output = self.outputLayer.GetDimension()
        for l in range(self.GetNumHiddenLayers()):
            output += self.hiddenLayerList[l].GetDimension()
        
        return output
    
    def GetInference(self, inputs):
        if inputs.shape[1] != self.inputs_size:
            raise ValueError("inputs size shoud be %d, get %d" %(self.inputs_size, inputs.shape[1]))
        
        if self.GetNumHiddenLayers() > 0:
            self.hiddenLayerList[0].Forward(inputs)
            for l in range(1, self.GetNumHiddenLayers()):
                self.hiddenLayerList[l].Forward(self.hiddenLayerList[l-1].flow_out)
            
            self.outputLayer.Forward(self.hiddenLayerList[-1].flow_out)
        else:
            self.outputLayer.Forward(inputs)
        
        return self.outputLayer.flow_out
    
    def GetLoss(self, inference, target, weight = None):
        if inference.shape != target.shape:
            raise ValueError("inference and target non-equal")
        
        return self.lossFunction.GetLoss(inference, target, weight)
    
    def GetTrainLoss(self):
        return self.GetLoss(self.GetInference(self.trainData.inputs), self.trainData.labels, self.trainData.weight)
    
    def GetValidLoss(self):
        return self.GetLoss(self.GetInference(self.validData.inputs), self.validData.labels, self.validData.weight)
    
    def GetTestLoss(self):
        return self.GetLoss(self.GetInference(self.testData.inputs), self.testData.labels, self.testData.weight)
    
    def GetAccuracy(self, inference, target, tolerance = 0.1):
        # tolerance : for regression(r2) model, given a tolerance to verify hit or miss,
        #             this variable is meaningless for classifycation
        if self.lossFunction.method == "cross entropy":
            output = (inference.argmax(axis=1) == target.argmax(axis=1)).mean()
        elif self.lossFunction.method == "r2":
            output = np.square(inference - target).sum(axis=1)
            output = (output < tolerance).mean()
        
        return output
    
    def GetTrainAccuracy(self, tolerance = None):
        if IsNone(tolerance):
            tolerance = self.hit_tolerance
        
        return self.GetAccuracy(self.GetInference(self.trainData.inputs), self.trainData.labels, tolerance)
    
    def GetValidAccuracy(self, tolerance = None):
        if IsNone(tolerance):
            tolerance = self.hit_tolerance
        
        return self.GetAccuracy(self.GetInference(self.validData.inputs), self.validData.labels, tolerance)
    
    def GetTestAccuracy(self, tolerance = None):
        if IsNone(tolerance):
            tolerance = self.hit_tolerance
        
        return self.GetAccuracy(self.GetInference(self.testData.inputs), self.testData.labels, tolerance)
    
    def Backward(self, inputs, labels, weight):
        deri = self.lossFunction.Backward(self.GetInference(inputs), labels, weight)
        if self.GetNumHiddenLayers() >0:
            deri = self.outputLayer.Backward(deri, self.hiddenLayerList[-1].flow_out)
            for l in range(self.GetNumHiddenLayers()-1, 0, -1):
                deri = self.hiddenLayerList[l].Backward(deri, self.hiddenLayerList[l-1].flow_out)
            
            deri = self.hiddenLayerList[0].Backward(deri, inputs)
        else:
            deri = self.outputLayer.Backward(deri, inputs)
        
    def ZeroDeri(self):
        for l in range(self.GetNumHiddenLayers()):
            deri = self.hiddenLayerList[l].ZeroDeri()
        
        deri = self.outputLayer.ZeroDeri()
    
    def ResetCwiseStep(self):
        for l in range(self.GetNumHiddenLayers()):
            deri = self.hiddenLayerList[l].ResetCwiseStep()
        
        deri = self.outputLayer.ResetCwiseStep()
    
    def Descent(self, step = 1., method = "normal"):
        for l in range(self.GetNumHiddenLayers()):
            self.hiddenLayerList[l].Descent(step, method)
        
        self.outputLayer.Descent(step, method)
    
    def BatchFit(self, batch_inputs, batch_labels, batch_weight, step = 1., method = "normal"):
        batch_weight /= batch_weight.sum()
        self.Backward(batch_inputs, batch_labels, batch_weight)
        self.Descent(step, method)
    
    def EpochFit(self, batch_size = None, step = 1., method = "normal"):
        if type(batch_size) == type(None):
            self.BatchFit(self.testData.inputs, self.testData.labels, self.testData.weight, step, method)
        elif type(batch_size) == int:
            if batch_size > 0:
                for b in range(np.ceil(self.testData.GetNumDatums()/ batch_size).astype(np.int)):
                    self.BatchFit(self.testData.inputs[b*batch_size: (b+1)*batch_size],
                                  self.testData.labels[b*batch_size: (b+1)*batch_size],
                                  step,
                                  method
                                 )
            else:
                raise ValueError("batch_size should be positive int")
            
        else:
            raise ValueError("batch_size should be positive int")
    
    def Train(self, times, batch_size = None, step = 1., method = "normal", is_termination = False):
        self.ResetCwiseStep()
        self.terminator.Clear()
        for t in range(times):
            self.EpochFit(batch_size, step, method)
            if is_termination:
                if self.terminator.Hit(10*np.log10(self.GetValidLoss() + 0.000000001)):
                # 0.000000001, bias for prevent error when log(0)
                    return t+1
        
        return times
    
    def AddUnit(self, layer_index, num_added = 1, output_linear_bound = 1., cwise_step_initial = 0.1, drop_times=5):
        if layer_index not in range(self.GetNumHiddenLayers()):
            raise ValueError("layer_index should be an int from 0 to (#layer-1) for hiddenlayer")
        
        if type(num_added) != int:
            raise ValueError("num_added should be positive int")
        elif num_added <= 0:
            raise ValueError("num_added should be positive int")
        
        try:
            if output_linear_bound < 0.:
                raise ValueError("output_weight_bound should be non-negative")
        except:
            raise ValueError("output_weight_bound should a non-negative real value")
        
        new_linear = self.hiddenLayerList[layer_index].linear.value
        new_bias = self.hiddenLayerList[layer_index].bias.value
        for u in range(num_added):
            new_linear = McmcColExtend(new_linear, drop_times)
            new_bias = McmcColExtend(new_bias, drop_times)
        
        self.hiddenLayerList[layer_index].linear.SetValue(new_linear, cwise_step_initial)
        self.hiddenLayerList[layer_index].bias.SetValue(new_bias, cwise_step_initial)
        
        if layer_index < self.GetNumHiddenLayers() - 1: # if not final hiddenlayer
            new_output_linear = self.hiddenLayerList[layer_index+1].linear.value
            new_output_linear = np.append(new_output_linear,
                                         output_linear_bound*2*np.random.rand(num_added, self.hiddenLayerList[layer_index+1].num_unit),
                                         axis=0
                                         )
            self.hiddenLayerList[layer_index+1].linear.SetValue(new_output_linear, cwise_step_initial)
        
        else: # if final hiddenlayer
            new_output_linear = self.outputLayer.linear.value
            new_output_linear = np.append(new_output_linear,
                                         output_linear_bound*2*np.random.rand(num_added, self.outputLayer.num_unit),
                                         axis=0
                                         )
            self.outputLayer.linear.SetValue(new_output_linear, cwise_step_initial)
        
        self.hiddenLayerList[layer_index].num_unit += num_added        
# --------------------------------------------





# --------------------------------------------


"""
    def neuron_refined(self, l, reference_data = None, threshold = 0.01):
        # l : the # of layer
        # threshold : threshold for information contained of dimension be remaind
        if type(l) != int:
            raise TypeError("l should be the layer no. of hidden layer, an int between 0 to (neural_number - 2)")
        elif (l >= self.ln - 1) or (l < 0):
            raise ValueError("l should be the layer no. of hidden layer, an int between 0 to (neural_number - 2)")
        
        try:
            if ((threshold< 1) and (threshold>0)) or (type(threshold) == int):
                if (threshold > self.ly[l].nn-1):
                    raise ValueError("int threshold error : removed #neuron should less than currently #neuron")
                elif (threshold < -self.ly[l].nn) or (threshold==0):
                    return None
                    # do nothing if remove no #neuron (threshold=0) or want to remain #neuron more than currently
            else:
                raise ValueError("threshold : a value in (0, 1), or an nonzero int")
        except:
            raise ValueError("threshold : a value in (0, 1), or an nonzero int")
        
        if type(reference_data) == type(None):
            self.prediction(self.tx)
        else:
            self.prediction(reference_data)
        
        ym = self.ly[l].y.mean(axis=1).reshape((self.ly[l].nn,1)) # y (output of Layer) mean of each neurons
        yn = self.ly[l].y - ym # centralized y
        ab = np.dot(self.ly[l+1].w.v, ym) # Adjusted bias
        ev, em = np.linalg.eigh(np.dot(yn, yn.T)) # eigenvalues and eigenmatrix(with eigenvectors as columns)
        ir = ev/ev.sum() # info ratio for each eigenvector
        # op, pv :column operator result and pivots
        if (threshold< 1) and (threshold>0):
            op, pv = column_operate(em[:,ir > threshold])
        else:
            op, pv = column_operate(em[:,ir >= ir[ir.argsort()[threshold]]])
            
        nw = np.dot(self.ly[l+1].w.v, op) # new weight
        self.ly[l+1].b.assign_values(self.ly[l+1].b.v + (np.dot(self.ly[l+1].w.v, ym) -np.dot(nw, ym[pv])))
        self.ly[l+1].w.assign_values(nw) # l+1 weight should be rewrite after l+1 bias have been rewrite
        self.ly[l].w.assign_values(self.ly[l].w.v[pv])
        self.ly[l].b.assign_values(self.ly[l].b.v[pv])
        self.ly[l].nn = len(pv)
    
    def inter_layer_linear_regression(self, layer_interval):
        try:
            ls = layer_interval[0] # layer start
            le = layer_interval[1] # layer end
            if (ls < le) and (ls >= 0) and (le < self.ln):
                if ls == 0:
                    ri = np.array(self.px.T) # regression input
                else:
                    ri = np.array(self.ly[ls-1].y)
                
                ri = np.append(ri, np.ones((1, ri.shape[1])), axis=0) # append 1. for each datum as bias
                ro = np.array(self.ly[le].x)
            else:
                raise ValueError("layer_interval should be list-like, two int (a, b), with 0 <= a < b < total layer")
        
        except:
            raise ValueError("layer_interval should be list-like, two int (a, b), with 0 <= a < b < total layer")
        
        rr = np.linalg.lstsq(ri.T, ro.T) # regression result (matrix, residuals, rank of ri, singuler values of ri)
        if len(rr[1]) == 0:
            raise ValueError("output data of layer" + str(ls-1) + "(= -1, for input data) should be full rank, try self.nruron_refine first")
        
        return rr[0], rr[1]/ri.shape[1]
    
    def find_linearist_layers(self, reference_data = None):
        output = (0, 0, np.inf, np.array([[]]), np.zeros((0,0)))
        if type(reference_data) == type(None):
            self.prediction(self.tx)
        else:
            self.prediction(reference_data)
        
        for l1 in range(self.ln-1):
            for l2 in range(i+1, self.ln):
                rr = self.inter_layer_linear_regression((l1,l2))
                if np.sqrt(rr[1].sum()) < output[2]:
                    output = (l1, l2, np.sqrt(rr[1].sum()), rr[0])
        
        return output
    
    def layer_filled(self, layer_interval, weights, bias):
        try:
            ls = layer_interval[0] # layer start
            le = layer_interval[1] # layer end
            if (ls < le) and (ls >= 0) and (le < self.ln):
                pass
            else:
                raise ValueError("layer_interval should be list-like, two int (a, b), with 0 <= a < b < total layer")
        except:
            raise ValueError("layer_interval should be list-like, two int (a, b), with 0 <= a < b < total layer")
        
        if weights.shape[0] != bias.shape[0]:
            raise ValueError("weights.shape[0] doesn't match bias.shape[0]")
        
        if weights.shape[0] != self.ly[le].nn:
            raise ValueError("weights.shape[0] doesn't match #neuron of layer at end of layer_interval")
        
        self.ly[le].w.assign_values(weights)
        self.ly[le].b.assign_values(bias)
        self.ly = self.ly[:ls] + self.ly[le:]
        self.ln = len(self.ly)
    
    def linear_filled(self, layer_interval):
        try:
            ls = layer_interval[0] # layer start
            le = layer_interval[1] # layer end
            if (ls < le) and (ls >= 0) and (le < self.ln):
                pass
            else:
                raise ValueError("layer_interval should be list-like, two int (a, b), with 0 <= a < b < total layer")
        except:
            raise ValueError("layer_interval should be list-like, two int (a, b), with 0 <= a < b < total layer")
            
        rr = self.inter_layer_linear_regression(layer_interval)
        self.layer_filled(layer_interval, rr[0].T[:,:-1], rr[0].T[:,-1:])
    
    def insert_layer(self, position, weights, bias, activation_function, next_layer_weights, next_layer_bias):
        if type(position) == int:
            if position in range(self.ln):
                pass
        else:
            raise ValueError("position should be int between 0 to self.ln")
        
        if type(activation_function) == type:
            raise TypeError("activation_function should be a class. eg: Use 'Sigmoid()', not 'Sigmoid'")
        
        ilo, ili = weights.shape # input and output size of inserted layer
        nlo, nli = next_layer_weights.shape # input and output size of next layer
        
        if position == 0:
            if ili != self.xs:
                raise ValueError("weights.shape error, cheak input and output size for this new layer")
        else:
            if ili != self.ly[position-1].nn:
                raise ValueError("weights.shape error, cheak input and output size for this new layer")
        
        if (ilo != bias.shape[0]) or (ilo != nli):
            raise ValueError("to define #neuron of new layer, all related weighs and bias size should be consistent")
        
        if nlo != self.ly[position].nn:
            raise ValueError("next_layer_weights.shape error, cheak #neuron of next layer")
        
        if next_layer_bias.shape[0] != self.ly[position].nn:
            raise ValueError("next_layer_bias.shape error, cheak #neuron of next layer")
        
        if (bias.shape[1] != 1) or (next_layer_bias.shape[1] != 1):
            raise ValueError("bias shape should be (#neuron, 1)")
        
        l = position
        
        self.ly.insert(l, Layer(ilo, activation_function))
        self.ly[l].w.assign_values(weights)
        self.ly[l].b.assign_values(bias)
        self.ly[l+1].w.assign_values(next_layer_weights)
        self.ly[l+1].b.assign_values(next_layer_bias)
        
        self.ln = len(self.ly)
    
    def identity_dig(self, position, activation_function):
        if type(position) == int:
            if position in range(self.ln):
                pass
            else:
                raise ValueError("position should be int between 0 to self.ln")
        else:
            raise ValueError("position should be int between 0 to self.ln")
        
        if type(activation_function) == type:
            raise TypeError("activation_function should be a class. eg: Use 'Sigmoid()', not 'Sigmoid'")
        
        l = position
        # ids : size of identity transform, input size of new layer
        if l == 0:
            ids = self.xs
        else:
            ids = self.ly[l-1].nn
        
        if type(activation_function) in [Relu, SoftPlus]:
            liw = np.concatenate((np.identity(ids), -np.identity(ids)), axis = 0)
            lib = np.zeros((2*ids, 1))
            low = np.concatenate((np.identity(ids), -np.identity(ids)), axis = 1)
            lob = np.zeros((ids, 1))
        elif type(activation_function) == LeakyRelu:
            liw = np.concatenate((np.identity(ids), -np.identity(ids)), axis = 0)
            lib = np.zeros((2*ids, 1))
            low = np.concatenate((np.identity(ids), -np.identity(ids)), axis = 1) / (1.+activation_function.alpha)
            lob = np.zeros((ids, 1))
        elif type(activation_function) == Identity:
            liw = np.identity(ids)
            lib = np.zeros((2*ids, 1))
            low = np.identity(ids)
            lob = np.zeros((ids, 1))
        elif type(activation_function) in [Sigmoid, Hypertan, Selu]:
            # li : input of new layer
            if l == 0:
                li = np.array(self.tx.T)
            else:
                li = np.array(self.ly[l-1].y)
            
            lim = li.mean(axis=1)
            lis = li.std(axis=1) + 1.
            
            liw = np.diag(1./lis)
            if type(activation_function) == Selu:
                lib = 1.-(lim/lis).reshape(-1,1) # let mean become one before transform by activation function
            else:
                lib = -(lim/lis).reshape(-1,1) # let mean become zero before transform by activation function
            
            lo = activation_function.trans(np.dot(liw, li)+lib)
            lo = np.append(lo, np.ones((1, lo.shape[1])), axis=0) # append 1. for each datum as bias
            rr = np.linalg.lstsq(lo.T, li.T) # regression result (matrix, residuals, rank of ri, singuler values of ri)
            # since the goal is construct identity, try to find linear transform form layer output to layer input
            low = rr[0].T[:,:-1]
            lob = rr[0].T[:,-1:]
        else:
            raise TypeError("activation_function type error")
        
        nlw = np.dot(self.ly[l].w.v, low)
        nlb = np.dot(self.ly[l].w.v, lob) + self.ly[l].b.v
        
        self.insert_layer(l,
                          liw,
                          lib,
                          activation_function,
                          nlw,
                          nlb
                         )
    
    def save_weight(self, dir_name):
        for l in range(self.ln):
            np.save(dir_name + "/w%i.npy" % l, self.ly[l].w.v)
            np.save(dir_name + "/b%i.npy" % l, self.ly[l].b.v)
    
    def load_weight(self, dir_name):
        for l in range(self.ln):
            try:
                if l == 0:
                    if np.load(dir_name + "/w%i.npy" % l).shape[1] != self.xs:
                        raise ValueError("layer %i input size error, cheak weight size." % l)
                else:
                    if np.load(dir_name + "/w%i.npy" % l).shape[1] != self.ly[l-1].nn:
                        raise ValueError("layer %i input size error, cheak weight size." % l)

                if np.load(dir_name + "/w%i.npy" % l).shape[0] != self.ly[l].nn:
                    raise ValueError("layer %i neuron size error, cheak weight size." % l)

                if np.load(dir_name + "/b%i.npy" % l).shape[0] != self.ly[l].nn:
                    raise ValueError("layer %i neuron size error, cheak bias size." % l)

                if np.load(dir_name + "/b%i.npy" % l).shape[1] != 1:
                    raise ValueError("layer %i bias size error, should be 1." % l)
            
            except:
                raise ValueError("load .npy error, cheak dir.")
            
            self.ly[l].w.assign_values(np.load(dir_name + "/w%i.npy" % l))
            self.ly[l].b.assign_values(np.load(dir_name + "/b%i.npy" % l))

"""

"done"

'done'

In [482]:
X = ((np.arange(300)-150)/100).reshape(-1,1)
Y = np.zeros((X.shape[0], 2))
Y[:, :1] = 1*(X<0.)
Y[:, 1:] = 1 - Y[:, :1]

NN = DogikoNeuralNetwork()
NN.SetTrainData(X, Y)
NN.SetValidData(X, Y)
NN.SetTestData(X, Y)
NN.SetLossFunction("cross entropy")
NN.SetRegularizer(0.0001, 3.)
NN.AddHiddenLayer(4, Hypertan())
NN.SetOutputFunction(Softmax())
NN.SetTerminator(10,20,-0.1)
NN.Build()

In [487]:
print(NN.Train(times=500, method="Rprop", is_termination=True))
NN.GetTrainLoss()

20


0.0032287947607516788

[1, 2, 3]


In [24]:
foo = np.random.normal(2,2, (100, 5))

In [25]:
W = np.random.normal(1,3, (5, 5))

In [26]:
b = np.random.normal(0,2, (1, 5))

In [28]:
m = (np.dot(foo, W) + b).mean(axis=0)
v = 1/(np.dot(foo, W) + b).std(axis=0)

In [29]:
W *= v
b = (b-m)*v

In [30]:
(np.dot(foo, W) + b).mean(axis=0)

array([ 5.95079541e-16, -4.21884749e-17, -2.44249065e-16,  1.88737914e-16,
       -9.88098492e-17])

'awgrethbt'