In [1]:
import numpy as np

In [360]:
### Sigmoid
def sigmoid(Z):
    
    A = 1.0/(1.0 + np.exp(-Z))  # Activation
    C = Z                       # Cache
    
    return A , C 


def sigmoid_prime(dA, C): ## Derivative and Cache
    
    Z = C                 ## Cache
    
    S, _  = sigmoid(dA)
    ds = S * (1-S)
    
    dZ = dA * S
    
    return dZ


### RELU
def relu(Z, alpha=0.00001):
    C = Z                       # Cache
    A = np.maximum(alpha,Z)     # Activation
    
    return A , C 


def relu_prime(dA, C):
    
    Z = C                      ## Cache
    
    dZ = np.array(dA, copy=True)
    
    dZ[Z <= 0] = 0
    dZ[Z >  0] = 1
    
    return dZ


### tanH
def tanh(Z):
    
    C = Z                       # Cache
    A = np.tanh(Z)              # Activation
    
    return A , C


def tanh_prime(dA, C):
    
    Z = C                      ## Cache
    
    dZ = 1-np.tanh(dA)**2
    
    return dZ

### Identity
def identity(Z):
    
    C = Z                       # Cache
    A = Z                       # Activation
    
    return A , C


def identity_prime(dA, C):
    
    Z = C                      ## Cache
    
    dZ = dA
    
    return dZ


### SoftMax
def softmax(Z, axis=1):
    
    C = Z
    A = np.exp(x) / np.sum(np.exp(x), axis = axis, keepdims = True)
    
    return A , C

def softmax_prime(dA, C):
    
    Z = C                      ## Cache
    
    s = dA.reshape(-1,1)
    dZ = p.diagflat(s) - np.dot(s, s.T)
    
    return dZ

In [739]:
### Loss L1
def L1(yhat, y):
    
    loss = np.sum(abs(y-yhat))
    
    return loss


### Loss L2
def L2(yhat, y):
    
    loss = np.dot( (y-yhat),(y-yhat) )
    
    return loss


### Loss Mean Square Error
def mse(yhat, y):
    
    loss = np.mean(np.power(y-yhat, 2))
    
    return loss


def mse_prime(yhat, y):
    
    loss_prime = (2 * (y-yhat)) /y.size
    
    return loss_prime


### Loss cross entopy cost
def cross_entropy_cost(yhat, y):
    
    m = y.shape[1]
    
    cost = np.squeeze( (1./m) * (-np.dot(y,np.log(yhat).T) - np.dot(1-y, np.log(1-yhat).T)) )
    
    return cost

### Loss cross entopy cost
def cross_entropy_cost_prime(yhat, y):
    
    dA = - (np.divide(y, yhat) - np.divide(1 - y, 1 - yhat))
    
    return dA


In [740]:
activation_map = {
        'Identity':(identity,identity_prime),
        'Sigmoid' :(sigmoid ,sigmoid_prime ),
        'Relu'    :(relu    ,relu_prime    ),
        'Tanh'    :(tanh    ,tanh_prime    ),
        'Softmax' :(softmax ,softmax_prime )
    }

In [741]:
class NN_Unit:
    def __init__(self, activation_fn='Identity'):
        
        self.n_X = None  # Input_size
        
        self.activation_f = activation_map[activation_fn][0]
        self.activation_b = activation_map[activation_fn][1]
        
    def forward(self, X):
        raise NotImplementedError
        
    def backward(self, dA, cache):
        raise NotImplementedError
    

In [742]:
# from nn_unit import NN_Unit

In [743]:
class FullyConnectedUnit(NN_Unit):
    
    def __init__(self, n_X, n_A, activation_fn): # num Input , num_output
        super(FullyConnectedUnit, self).__init__(activation_fn)
        
        self.n_X = n_X
        
        self.W = np.random.rand(n_A, n_X) * 0.001
        self.B = np.zeros((n_A, 1))
                
    def forward(self, X):
        self.X = X
        
        self.Z = self.W.dot(self.X) + self.B
        
        Z_cache = (self.X, self.W, self.B)
        
        self.A, A_cache = self.activation_f(self.Z)
        
        return self.A, (Z_cache, A_cache)

    def backward(self, dA, cache):
        
        Z_cache, A_cache = cache
        A , W , B = Z_cache        
        
        dZ = self.activation_b(dA, A_cache)
        
        m = A.shape[1]
        
        dW = 1./m * np.dot(dZ,A.T)
        dB = 1./m * np.sum(dZ, axis = 1, keepdims = True)
        dA = np.dot(W.T,dZ)
        
        return dA, dW, dB
        

In [744]:
class NN_Layer:
    def __init__(self):
        self.nn_unit = None
        
    def forward(self, X):
        raise NotImplementedError
        
    def backward(self, dA, cache):
        raise NotImplementedError
        
    def update(self, grads, lr=0.001):
        raise NotImplementedError

In [745]:
# from layer import NN_Layer

In [746]:
class FullyConnectedLayer(NN_Layer):
    
    def __init__(self, num_in, num_out, activation_fn): # num Input , num_output
        
        self.ins = num_in
        self.outs = num_out
        self.nn_unit = FullyConnectedUnit(num_in,num_out,activation_fn)
                
    def forward(self, X):
        
        self.A, (Z_cache, A_cache) = self.nn_unit.forward(X)
        
        return self.A, (Z_cache, A_cache)

    def backward(self, dA, cache):
        
        dA, dW, dB = self.nn_unit.backward(dA, cache)
        
        return dA, dW, dB
    
    def update(self, grads, lr=0.0075):
        self.nn_unit.W = self.nn_unit.W - (lr * grads[1])
        self.nn_unit.B = self.nn_unit.B - (lr * grads[2])

In [747]:
class NN_Network:

    def __init__(self):
        self.layers = []
        self.outs   = []
        self.caches = []
        self.grads  = []
    
    def add_layer(self, layer):
        num_out, num_in = layer.outs, layer.ins
        self.layers.append(layer)
        self.outs.append(np.zeros((num_out, 1)))
        self.caches.append( (0,0) )
        self.grads.append( (0,0,0) )
        
    def train(self, X, Y, epochs= 100):
        for e in range(0,epochs):
            A = X
            for idx, l in enumerate(self.layers):
                A, cache = l.forward(A)
                self.outs[idx] = A
                self.caches[idx] = cache            

            cost = cross_entropy_cost(A, Y)
            dA = cross_entropy_cost_prime(A, Y)
            
            cnt = len(self.layers)
            for i in reversed(range(0, cnt)):
                dA, dW, dB = self.layers[i].backward(dA, self.caches[i])
                self.grads[i] = (dA,dW,dB)

            for i in range(0, cnt):
                self.layers[i].update(self.grads[i])            
            
            if e%100 == 0 and e != 0:
                print(f"Cost at epoch {e} is {cost}")
        
    def predict(self, X, Y):
        A = X
        for idx, l in enumerate(self.layers):
            A, cache = l.forward(A)
        print(A)

In [748]:
def load_data():
    train_dataset = h5py.File('datasets/train_catvnoncat.h5', "r")
    train_set_x_orig = np.array(train_dataset["train_set_x"][:]) # your train set features
    train_set_y_orig = np.array(train_dataset["train_set_y"][:]) # your train set labels

    test_dataset = h5py.File('datasets/test_catvnoncat.h5', "r")
    test_set_x_orig = np.array(test_dataset["test_set_x"][:]) # your test set features
    test_set_y_orig = np.array(test_dataset["test_set_y"][:]) # your test set labels

    classes = np.array(test_dataset["list_classes"][:]) # the list of classes
    
    train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
    test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))
    
    return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes

In [749]:
import h5py
train_x_orig, train_y, test_x_orig, test_y, classes = load_data()

In [750]:
train_x_flatten = train_x_orig.reshape(train_x_orig.shape[0], -1).T   # The "-1" makes reshape flatten the remaining dimensions
test_x_flatten = test_x_orig.reshape(test_x_orig.shape[0], -1).T

# Standardize data to have feature values between 0 and 1.
train_x = train_x_flatten/255.
test_x = test_x_flatten/255.

print ("train_x's shape: " + str(train_x.shape))
print ("test_x's shape: " + str(test_x.shape))

train_x's shape: (12288, 209)
test_x's shape: (12288, 50)


In [751]:
fc1 = FullyConnectedLayer(12288,100,'Relu')
fc2 = FullyConnectedLayer(100,1,'Sigmoid')

In [752]:
model = NN_Network()

In [753]:
model.add_layer(fc1)
model.add_layer(fc2)

In [754]:
model.train(train_x, train_y, 2500)

Cost at epoch 100 is 0.6440368060776518
Cost at epoch 200 is 0.675799682649059
Cost at epoch 300 is 0.7455867313390562
Cost at epoch 400 is 0.8362724763418984
Cost at epoch 500 is 0.9388070133124284
Cost at epoch 600 is 1.0485109483248025
Cost at epoch 700 is 1.1628038737899724
Cost at epoch 800 is 1.2801230532737273
Cost at epoch 900 is 1.3994744751049797
Cost at epoch 1000 is 1.5202072799615736
Cost at epoch 1100 is 1.641886831562591
Cost at epoch 1200 is 1.7642189243658075
Cost at epoch 1300 is 1.887002533854245
Cost at epoch 1400 is 2.0100994071920097
Cost at epoch 1500 is 2.1334140309228915
Cost at epoch 1600 is 2.256880210320694


  A = 1.0/(1.0 + np.exp(-Z))  # Activation


Cost at epoch 1700 is 2.3804519689161854
Cost at epoch 1800 is 2.50409732420074
Cost at epoch 1900 is 2.6277940044922112
Cost at epoch 2000 is 2.751526488889438
Cost at epoch 2100 is 2.8752839554175127
Cost at epoch 2200 is 2.9990588556990474
Cost at epoch 2300 is 3.122845923373247
Cost at epoch 2400 is 3.2466414835501776


In [562]:
model.predict(X, Y)

[[0.32501195 0.32501195 0.32501195 0.32501195 0.32501195 0.32501195
  0.32501195 0.32501195 0.32501195 0.32501195 0.32501195 0.32501195
  0.32501195 0.32501195 0.32501195 0.32501195 0.32501195 0.32501195
  0.32501195 0.32501195 0.32501195 0.32501195 0.32501195 0.32501195
  0.32501195 0.32501195 0.32501195 0.32501195 0.32501195 0.32501195]]


In [499]:
Y=X
Y.shape

(1, 3)

In [466]:
unit = FullyConnectedUnit(2,2,'Sigmoid')

In [551]:
np.random.seed(1)
X = np.random.randn(1, 30)
X

array([[ 1.62434536, -0.61175641, -0.52817175, -1.07296862,  0.86540763,
        -2.3015387 ,  1.74481176, -0.7612069 ,  0.3190391 , -0.24937038,
         1.46210794, -2.06014071, -0.3224172 , -0.38405435,  1.13376944,
        -1.09989127, -0.17242821, -0.87785842,  0.04221375,  0.58281521,
        -1.10061918,  1.14472371,  0.90159072,  0.50249434,  0.90085595,
        -0.68372786, -0.12289023, -0.93576943, -0.26788808,  0.53035547]])

In [557]:
Y=(X>0).astype(int)
Y

array([[1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1,
        1, 1, 1, 0, 0, 0, 0, 1]])

In [113]:
b1 = np.zeros((4,1))
b1

array([[0.],
       [0.],
       [0.],
       [0.]])

In [114]:
b2 = np.array([[ 0]])
b2

array([[0]])

In [115]:
W1 = np.array([[-0.00416758, -0.00056267],
        [-0.02136196,  0.01640271],
        [-0.01793436, -0.00841747],
        [ 0.00502881, -0.01245288]])
W1

array([[-0.00416758, -0.00056267],
       [-0.02136196,  0.01640271],
       [-0.01793436, -0.00841747],
       [ 0.00502881, -0.01245288]])

In [116]:
W2 = np.array([[-0.01057952, -0.00909008,  0.00551454,  0.02292208]])
W2

array([[-0.01057952, -0.00909008,  0.00551454,  0.02292208]])

In [117]:
X

array([[ 1.62434536, -0.61175641, -0.52817175],
       [-1.07296862,  0.86540763, -2.3015387 ]])

In [118]:
unit2 = FullyConnectedUnit(2,1,'Sigmoid')

In [119]:
A1, cache1 = unit.forward(X)
A1

(2, 3)
(2, 2)


array([[0.50115918, 0.50059516, 0.49369569],
       [0.50024059, 0.49995443, 0.49966299]])

In [None]:

A2, cache2 = unit2.forward(A1)
A2

In [None]:
cache[0] ## X,W,B

In [None]:
cache[1] ## Z

In [None]:
y=np.array([[0],[1]])
y

In [None]:
cross_entropy_cost()

In [None]:

Z = cache[0][1].dot(cache[0][0])
Z

In [None]:
X.shape

In [None]:
x(np.random.rand(2,2))