In [1]:
import numpy as np
np.random.seed(1)

In [2]:
### Sigmoid
def sigmoid(Z):
    
    Z[Z > 709] =  709 #prevent np.exp overflow
    Z[Z <-709] =  0   #prevent np.exp overflow
    
    A = np.where(Z >= 0, 
                    1. / (1. + np.exp(-Z)), 
                    np.exp(Z) / (1. + np.exp(Z))) # Activation
    C = Z                       # Cache
    
    return A , C 


def sigmoid_prime(dA, C): ## Derivative and Cache
    
    Z = C                 ## Cache
    
    S = 1./(1.+np.exp(-Z))
    ds = S * (1-S)
    
    dZ = dA * S
    
    return dZ


### RELU
def relu(Z, alpha=0.00001):
    C = Z                       # Cache
    A = np.maximum(alpha*Z,Z)     # Activation
    
    return A , C 


def relu_prime(dA, C):
    
    Z = C                      ## Cache
    
    dZ = np.array(dA, copy=True)
    
    dZ[Z <= 0] = 0
    dZ[Z >  0] = 1
    
    return dZ


### tanH
def tanh(Z):
    
    C = Z                       # Cache
    A = np.tanh(Z)              # Activation
    
    return A , C


def tanh_prime(dA, C):
    
    Z = C                      ## Cache
    
    dZ = 1-np.tanh(dA)**2
    
    return dZ

### Identity
def identity(Z):
    
    C = Z                       # Cache
    A = Z                       # Activation
    
    return A , C


def identity_prime(dA, C):
    
    Z = C                      ## Cache
    
    dZ = dA
    
    return dZ


### SoftMax
def softmax(Z, axis=1):
    
    C = Z
    A = np.exp(x) / np.sum(np.exp(x), axis = axis, keepdims = True)
    
    return A , C

def softmax_prime(dA, C):
    
    Z = C                      ## Cache
    
    s = dA.reshape(-1,1)
    dZ = p.diagflat(s) - np.dot(s, s.T)
    
    return dZ

In [3]:
### Loss L1
def L1(yhat, y):
    
    loss = np.sum(abs(y-yhat))
    
    return loss


### Loss L2
def L2(yhat, y):
    
    loss = np.dot( (y-yhat),(y-yhat) )
    
    return loss


### Loss Mean Square Error
def mse(yhat, y):
    
    loss = np.mean(np.power(y-yhat, 2))
    
    return loss


def mse_prime(yhat, y):
    
    loss_prime = (2 * (y-yhat)) /y.size
    
    return loss_prime


### Loss cross entopy cost
def cross_entropy_cost(yhat, y):
    
    m = y.shape[1]
    
    cost = np.squeeze( (1./m) * (-np.dot(y,np.log(yhat).T) - np.dot(1-y, np.log(1-yhat).T)) )
    
    return cost

### Loss cross entopy cost
def cross_entropy_cost_prime(yhat, y):
    
    dA = - (np.divide(y, yhat) - np.divide(1 - y, 1 - yhat))
    
    return dA


In [4]:
activation_map = {
        'Identity':(identity,identity_prime),
        'Sigmoid' :(sigmoid ,sigmoid_prime ),
        'Relu'    :(relu    ,relu_prime    ),
        'Tanh'    :(tanh    ,tanh_prime    ),
        'Softmax' :(softmax ,softmax_prime )
    }

In [104]:
class NN_Unit:
    def __init__(self, activation_fn='Identity'):
        
        self.n_X = None  # Input_size
        
        self.activation_f = activation_map[activation_fn][0]
        self.activation_b = activation_map[activation_fn][1]
        
    def forward(self, X):
        raise NotImplementedError
        
    def backward(self, dA, cache):
        raise NotImplementedError
        
    def init(self, n_X, n_A, n_L):
        raise NotImplementedError
        

In [6]:
# from nn_unit import NN_Unit

In [105]:
class FullyConnectedUnit(NN_Unit):
    
    def __init__(self, n_X, n_A, activation_fn): # num Input , num_output
        super(FullyConnectedUnit, self).__init__(activation_fn)
        
        np.random.seed(1)
        self.n_X = n_X
        
        self.W = np.random.randn(n_A, n_X) * 0.01
        self.B = np.zeros((n_A, 1))
        
        print(f"n_A {n_A}")
        print(f"n_X {n_X}")
        print(self.W)
        print(self.B)
        
                
    def forward(self, X):
        
        print(self.W)
        print(self.B)
        
        self.X = X
        
        self.Z = self.W.dot(self.X) + self.B
        
        Z_cache = (self.X, self.W, self.B)
        
        self.A, A_cache = self.activation_f(self.Z)
        
        return self.A, (Z_cache, A_cache)

    def backward(self, dA, cache):
        
        Z_cache, A_cache = cache
        
        dZ = self.activation_b(dA, A_cache)
        
        A , W , B = Z_cache        
        
        m = A.shape[1]
        
        dW = 1./m * np.dot(dZ,A.T)
        dB = 1./m * np.sum(dZ, axis = 1, keepdims = True)
        dA = np.dot(W.T,dZ)
        
        return dA, dW, dB
    
    def init(self, n_X, n_A, n_L):
        self.W = np.random.randn(n_A, n_X) / np.sqrt(n_L)
        self.B = np.zeros((n_A, 1))
    
        

In [107]:
class NN_Layer:
    def __init__(self):
        self.nn_unit = None
        
    def forward(self, X):
        raise NotImplementedError
        
    def backward(self, dA, cache):
        raise NotImplementedError
        
    def update(self, grads, lr=0.001):
        raise NotImplementedError
        
    def init(self, n_L):
        raise NotImplementedError

In [95]:
# from layer import NN_Layer

In [96]:
class FullyConnectedLayer(NN_Layer):
    
    def __init__(self, num_in, num_out, activation_fn): # num Input , num_output
        
        self.ins  = num_in
        self.outs = num_out
        self.nn_unit = FullyConnectedUnit(num_in,num_out,activation_fn)
                
    def forward(self, X):
        
        self.A, (Z_cache, A_cache) = self.nn_unit.forward(X)
        
        return self.A, (Z_cache, A_cache)

    def backward(self, dA, cache):
        
        dA, dW, dB = self.nn_unit.backward(dA, cache)
        
        return dA, dW, dB
    
    def update(self, dW, dB, lr=0.01):
        self.nn_unit.W = self.nn_unit.W - (lr * dW)
        self.nn_unit.B = self.nn_unit.B - (lr * dB)
        
    def init(self, n_L):
        self.nn_unit.init(self.ins, self.outs, n_L)

In [97]:
class NN_Network:

    def __init__(self):
        self.layers = []
        
    def add_layer(self, layer):
        num_out, num_in = layer.outs, layer.ins
        self.layers.append(layer)
        
    def train(self, X, Y, epochs= 100):
        for e in range(0,epochs):
            A = X
            caches = []
            for idx, l in enumerate(self.layers):
                A, cache = l.forward(A)
                caches.append(cache)
            
            cost = cross_entropy_cost(A, Y)
            dA = cross_entropy_cost_prime(A, Y)
            
            cnt = len(self.layers)
            Y = Y.reshape(A.shape)
            
            for i in reversed(range(0, cnt)):
                dA, dW, dB = self.layers[i].backward(dA, caches[i])
                self.layers[i].update(dW, dB)

            print(f"-------------- {e}")
            print(f"-------------- ")
            if e%100 == 0 and e != 0:
                print(f"Cost at epoch {e} is {cost}")
        
    def predict(self, X):
        A = X
        for idx, l in enumerate(self.layers):
            A, cache = l.forward(A)
        print(A)

In [98]:
def load_data():
    train_dataset = h5py.File('datasets/train_catvnoncat.h5', "r")
    train_set_x_orig = np.array(train_dataset["train_set_x"][:]) # your train set features
    train_set_y_orig = np.array(train_dataset["train_set_y"][:]) # your train set labels

    test_dataset = h5py.File('datasets/test_catvnoncat.h5', "r")
    test_set_x_orig = np.array(test_dataset["test_set_x"][:]) # your test set features
    test_set_y_orig = np.array(test_dataset["test_set_y"][:]) # your test set labels

    classes = np.array(test_dataset["list_classes"][:]) # the list of classes
    
    train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
    test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))
    
    return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes

In [99]:
import h5py
train_x_orig, train_y, test_x_orig, test_y, classes = load_data()

In [100]:
train_x_flatten = train_x_orig.reshape(train_x_orig.shape[0], -1).T   # The "-1" makes reshape flatten the remaining dimensions
test_x_flatten = test_x_orig.reshape(test_x_orig.shape[0], -1).T

# Standardize data to have feature values between 0 and 1.
train_x = train_x_flatten/255.
test_x = test_x_flatten/255.

print ("train_x's shape: " + str(train_x.shape))
print ("test_x's shape: " + str(test_x.shape))

train_x's shape: (12288, 209)
test_x's shape: (12288, 50)


In [103]:
fc0 = FullyConnectedLayer(12288,7,'Relu')
#fc1 = FullyConnectedLayer(512,16,'Relu')
#fc2 = FullyConnectedLayer(256,128,'Relu')
#fc3 = FullyConnectedLayer(128,64,'Relu')
#fc4 = FullyConnectedLayer(64,32,'Relu')
#fc5 = FullyConnectedLayer(32,16,'Relu')
fcf = FullyConnectedLayer(7,1,'Sigmoid')

n_A 7
n_X 12288
[[ 0.01624345 -0.00611756 -0.00528172 ... -0.00527214 -0.0038034
   0.00949412]
 [ 0.01009231  0.00229889 -0.00664099 ...  0.00689859 -0.00488322
   0.0020761 ]
 [-0.0035634  -0.00195481  0.00636803 ...  0.00822751 -0.00104425
  -0.00657957]
 ...
 [-0.00315398  0.0124543  -0.01304592 ...  0.00370126  0.01033177
  -0.00896044]
 [ 0.00487913  0.00350058 -0.00084749 ... -0.00559281  0.00325022
   0.00231429]
 [ 0.00534074  0.02512388 -0.01044755 ...  0.0216518  -0.00842579
   0.00359795]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]]
n_A 1
n_X 7
[[ 0.01624345 -0.00611756 -0.00528172 -0.01072969  0.00865408 -0.02301539
   0.01744812]]
[[0.]]


In [67]:
model = NN_Network()

In [46]:
model.add_layer(fc0)
#model.add_layer(fc1)
#model.add_layer(fc2)
#model.add_layer(fc3)
#model.add_layer(fc4)
#model.add_layer(fc5)
model.add_layer(fcf)

In [48]:
model.train(train_x, train_y, 1)

[[-0.00379768 -0.00385948 -0.00345322 ... -0.00330145 -0.00343037
  -0.0026822 ]
 [-0.00376859 -0.00389774 -0.00338244 ... -0.00329301 -0.00336374
  -0.00270092]
 [-0.00374925 -0.00393072 -0.00344285 ... -0.00330735 -0.00343654
  -0.00265322]
 ...
 [-0.00383882 -0.00386569 -0.00341439 ... -0.00327958 -0.00335463
  -0.00272589]
 [-0.00379776 -0.00388591 -0.00335989 ... -0.00323993 -0.00334741
  -0.00269147]
 [-0.00375046 -0.00386726 -0.00343029 ... -0.00325462 -0.00342353
  -0.00267463]]
[[-0.01]
 [-0.01]
 [-0.01]
 [-0.01]
 [-0.01]
 [-0.01]
 [-0.01]]
[[-0.00068194 -0.00065841 -0.00072186 -0.00069462 -0.00070966 -0.00071642
  -0.00069991]]
[[-0.00311035]]
-------------- 0
-------------- 


In [None]:
model.predict(test_x)

In [None]:
fc0.nn_unit.W

In [None]:
Y=X
Y.shape

In [None]:
unit = FullyConnectedUnit(2,2,'Sigmoid')

In [None]:
np.random.seed(1)
X = np.random.randn(1, 30)
X

In [None]:
Y=(X>0).astype(int)
Y

In [None]:
b1 = np.zeros((4,1))
b1

In [None]:
b2 = np.array([[ 0]])
b2

In [None]:
W1 = np.array([[-0.00416758, -0.00056267],
        [-0.02136196,  0.01640271],
        [-0.01793436, -0.00841747],
        [ 0.00502881, -0.01245288]])
W1

In [None]:
W2 = np.array([[-0.01057952, -0.00909008,  0.00551454,  0.02292208]])
W2

In [None]:
X

In [None]:
unit2 = FullyConnectedUnit(2,1,'Sigmoid')

In [None]:
A1, cache1 = unit.forward(X)
A1

In [None]:

A2, cache2 = unit2.forward(A1)
A2

In [None]:
cache[0] ## X,W,B

In [None]:
cache[1] ## Z

In [None]:
y=np.array([[0],[1]])
y

In [None]:
cross_entropy_cost()

In [None]:

Z = cache[0][1].dot(cache[0][0])
Z

In [None]:
X.shape

In [None]:
x(np.random.rand(2,2))