In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [6]:
s=pd.DataFrame({'s' : [5],
               't' :[7] })

In [7]:
type(np.array([5]))

numpy.ndarray

In [8]:
class DenseNeuralNetwork():
    def __init__(self,layer_dims=(None),):
        if not isinstance(layer_dims, tuple):
            raise TypeError("Input 'layer_dims' must be of type tuple")
        self.layer_dims=layer_dims

    def activation_functions (self,activations):
        """
        activations : activation functions used for every layer; options include:
            - linear
            - relu
            - sigmoid
            - tanh
            - softmax
        """
        if not isinstance(activations, list):
            raise TypeError("Input must be of type list")
        allowed_activ = set(['linear','relu', 'sigmoid', 'tanh', 'softmax'])
        activ_set = set(activations)
        diff = activ_set.difference(allowed_activ)
        if(diff) :
            raise InterruptedError(f'Invalid activation functions, allowed activations are {allowed_activ}')
        L = len(self.layer_dims)
        assert(len(activations)== L-1),"Number of activations must equall number of layers"
        self.__activations = activations
    def fit(self,X,Y,cost_function,print_cost=False,learning_rate=None,num_iterations=None,seed=None):
        """
    Fits the model using the given data and parameters.
    Parameters:
    - X: Input features
    - Y: Target values
    - print_cost: Whether to print the cost during optimization
    - learning_rate: The learning rate for optimization
    - num_iterations: Number of iterations for optimization
    - cost_function: The cost function to use; options include:
        - "MSE" for Mean Squared Error (Regression)
        - "BinaryCrossEntropy" for Binary Cross-Entropy (Binary Classification)
        - "CategoricalCrossEntropy" for Categorical Cross-Entropy (Multi-Class Classification)
        """
        X= np.array(X)
        Y=np.array(Y)
       # if (not isinstance(X, numpy.ndarray)) or (not isinstance(X, pandas.core.series.Series)) :
         #   raise TypeError("Training data must be a pandas.series or numpy.ndarray")
       # if (not isinstance(Y, numpy.ndarray)) or (not isinstance(Y, pandas.core.series.Series)) :
        #    raise TypeError("Training data must be a pandas.series or numpy.ndarray")
        if (X.shape[0] != self.layer_dims[0]):
            raise IndentationError(f"Expected an input of dimension ({self.layer_dims[0]},-), got ({X.shape[0]},-)")
        if (Y.shape[0] != self.layer_dims[-1]):
            raise IndentationError(f"Expected an input of dimension ({self.layer_dims[-1]},-), got ({Y.shape[0]},-)")
        self.X= X
        self.Y= Y
        self.__learning_rate = learning_rate
        self.__num_iterations = num_iterations
        self.__cost_function=cost_function
        costs = []                         # keep track of cost
        parameters  = intialise_parameters(layers_dims,seed)
        print(parameters.keys())
        for i in range(0, num_iterations):
            AL,caches = forward_propagation(X, parameters,self.__activations)
            cost = compute_cost(cost_function,AL,Y)
            grads = back_propagation(AL, Y, caches,cost_function,self.__activations)
            parameters = update_parameters(parameters,grads,learning_rate)
            if print_cost and i % 100 == 0:
                print ("Cost after iteration %i: %f" %(i, cost))
            if print_cost and i % 100 == 0:
                costs.append(cost)      
        plt.plot(np.squeeze(costs))
        plt.ylabel('cost')
        plt.xlabel('iterations (per hundreds)')
        plt.title("Learning rate =" + str(learning_rate))
        plt.show()
        self.__parameters = parameters       
    def predict(self,X, y):
        m = X.shape[1]
        n = len(self.__parameters) // 2 
        p = np.zeros((1,m))
        probas, caches = forward_propagation(X, self.__parameters,self.__activations)
        for i in range(0, probas.shape[1]):
            if probas[0,i] > 0.5:
                p[0,i] = 1
            else:
                p[0,i] = 0
        print("Accuracy: "  + str(np.sum((p == y)/m)))
        self.predictions = p

In [9]:
def linear_backward_(dA,cache):
    Z = cache 
    dZ= dA * 1
    return dZ
def sigmoid_backward(dA,cache):
    Z = cache
    s = 1/(1+np.exp(-Z))
    dZ = dA * s * (1-s)
    return dZ
def relu_backward(dA,cache):
    Z=cache
    dZ = dA
    print('relu back :','Z' , Z.shape ,'dZ' ,  dZ.shape)
    dZ[dA<=0]=0
    return dZ
def tanh_backward(dA,cache):
    Z=cache
    tanh_Z= (np.exp(Z) - np.exp(-1*Z))/(np.exp(Z) + np.exp(-1*Z))
    dZ = dA * 1-tanh_Z**2
    return dZ
def softmax_backward(dA,cache,Y):
    Z=cache
    e_x = np.exp(Z- np.max(Z))  # Subtract max(Z) for numerical stability
    props= e_x / e_x.sum(axis=0) 
    dZ= ( props - Y )

In [10]:
def intialise_parameters(layers_dim,seed):
    if(seed):
        np.random.seed(seed)
    L=len(layers_dim)
    parameters={}
    for l in range(1,L):
        parameters['W' + str(l)] = np.random.randn(layers_dim[l],layers_dim[l-1]) * np.sqrt(1/layers_dim[l-1])
        parameters['b'+ str(l)] = np.zeros((layers_dim[l],1))
        print('W' + str(l) , f'  {(layers_dim[l],layers_dim[l-1])}')
        # Check the dimensions
        assert(parameters['W' + str(l)].shape == (layers_dim[l], layers_dim[l-1]))
        assert(parameters['b' + str(l)].shape == (layers_dim[l], 1))        
    return parameters

In [11]:
def linear(Z):
    return Z
def sigmoid(Z):
    return 1/(1+np.exp(-1*Z))
def relu(Z):
    return np.maximum(0, Z)
def tanh(Z):
    return (np.exp(Z) - np.exp(-1*Z))/(np.exp(Z) + np.exp(-1*Z))
def softmax(Z):
    e_x = np.exp(Z- np.max(Z))  # Subtract max(Z) for numerical stability
    return e_x / e_x.sum(axis=0)    

In [12]:
def MSE_Backward(A,Y):
    return 2*(A-Y)
def BCE_Backward(A,Y):
    epsilon = 1e-10  # small value to avoid division by zero
    A = np.clip(A, epsilon, 1 - epsilon)
    return  - (np.divide(Y, A) - np.divide(1 - Y, 1 - A)) 
def CCE_Backward(A,Y):
    epsilon = 1e-10  # small value to avoid division by zero
    A = np.clip(A, epsilon, 1 - epsilon)
    return  - np.divide(Y, A) 

In [13]:
def linear_forward_activation(A_prev,W,b,activation):
    Z=np.dot(W,A_prev) + b
    if(activation == 'relu'):
        A=relu(Z)
    elif(activation=='sigmoid'):
        A= sigmoid(Z)
    elif(activation=='tanh'):
        A=tanh(Z)
    elif(activation=='softmax'):
        A=softmax(Z)
    else:
        A=lineare(Z)
    linear_cache=(A,W,b)
    activation_cache=Z
    cache=(linear_cache,activation_cache)
    return A, cache

In [14]:
def forward_propagation(X,parameters,activations):
    L=len(parameters) // 2 #number of layers
    A=X
    caches=[]
    for l in range(1,L):
        A_prev=A
        print('forward : ','A_prev ' ,A_prev.shape)
        A,cache = linear_forward_activation(A_prev,parameters['W'+str(l)],parameters['b'+str(l)],activations[l-1])
        caches.append(cache)
    A , cache = linear_forward_activation(A,parameters['W'+str(L)], parameters['b'+str(L)], activations[L-1])
    caches.append(cache)
    return A,caches

In [15]:
def MSE(A,Y):
    return  np.sum((A-Y)**2)
def BCE (A,Y):
    epsilon=1e-15
    A = np.clip(A, epsilon, 1 - epsilon)  # Clip values to avoid log(0)
    return -1*(Y * np.log(A) + (1-Y) * np.log(1-A))
def CCE (A,Y):
    epsilon=1e-15
    A = np.clip(A, epsilon, 1 - epsilon)  # Clip values to avoid log(0)
    return -1 * np.sum(Y *log(A) ,axis=1)

In [16]:
def compute_cost(function,A,Y):
    m=Y.shape[1]
    if(function.lower() =='mse'):
       return (1/(2*m)) * MSE(A,Y)
    elif(function.lower() == 'binarycrossentropy'):
       return (1/(2*m)) * BCE(A,Y)   
    elif(function.lower() == 'categoricalcrossentropy'):
       return (1/(2*m)) * CCE(A,Y)
    else:
        raise ImportError("Invalid Cost function")

In [17]:
def linear_backward(dZ, cache): 
    A_prev, W, b = cache
  #  A_prev (1, 209) W (1, 5) B (1, 1)
    print('A_prev' , A_prev.shape,'W', W.shape,'B', b.shape,'dZ',dZ.shape)
    m = A_prev.shape[1]
    dA_prev = (W.T @ dZ)
    dW = ( dZ @ A_prev.T) /m
    db = np.sum(dZ , axis=1 ,keepdims=True)/m
    return dA_prev, dW, db

In [18]:
def linear_activation_backward(dA, cache, activation):
    linear_cache, activation_cache = cache
    if(activation == 'relu'):
        dZ=relu_backward(dA,activation_cache)
    elif(activation=='sigmoid'):
        dZ= sigmoid_backward(dA,activation_cache)
    elif(activation=='tanh'):
        dZ=tanh_backward(dA,activation_cache)
    elif(activation=='softmax'):
        dZ=softmax_backward(dA,activation_cache)
    else:
        dZ=linear_backward_(dA)
    dA_prev, dW, db = linear_backward(dZ, linear_cache)
    print('dW' , dW.shape,'dA_prev' , dA_prev.shape)
    return dA_prev, dW, db

In [19]:
def back_propagation(A,Y,caches,cost_func,activiations):
    Y = Y.reshape(A.shape) # after this line, Y is the same shape as A
    if(cost_func.lower() =='mse'):
       dA = MSE_Backward(A,Y)
    elif(cost_func.lower() == 'binarycrossentropy'):
       dA = BCE_Backward(A,Y)
    elif(cost_func.lower() == 'categoricalcrossentropy'):
       dA = CCE_Backward(A,Y)
    grads={}
    L=len(caches)
    m = A.shape[1]
    print('Layers ' , L)
    grads["dA"+str(L-1)], grads["dW" + str(L)], grads["db"+str(L)] = linear_activation_backward(dA , caches[L-1] , activiations[-1])
    for l in reversed(range(1,L-1)):
        grads["dA" + str(l)] , grads["dW" + str(l + 1)] , grads["db" + str(l + 1)] = linear_activation_backward(grads["dA" +str(l+1)] , caches[l+1] , activiations[l])
    return grads

In [20]:
def update_parameters(parameters, grads, learning_rate):
    L = len(parameters) // 2 # number of layers in the neural network
    for l in range(1,L+1):
        parameters['W' + str(l)].T = parameters['W' + str(l)].T - learning_rate * grads['dW' + str(l) ]
        parameters['b' + str(l)] = parameters['b' + str(l)] - learning_rate * grads['db' + str(l) ]
    return parameters

In [21]:
import h5py
import matplotlib.pyplot as plt
def load_data():
    train_dataset = h5py.File('datasets/train_catvnoncat.h5', "r")
    train_set_x_orig = np.array(train_dataset["train_set_x"][:]) # your train set features
    train_set_y_orig = np.array(train_dataset["train_set_y"][:]) # your train set labels

    test_dataset = h5py.File('datasets/test_catvnoncat.h5', "r")
    test_set_x_orig = np.array(test_dataset["test_set_x"][:]) # your test set features
    test_set_y_orig = np.array(test_dataset["test_set_y"][:]) # your test set labels

    classes = np.array(test_dataset["list_classes"][:]) # the list of classes
    
    train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
    test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))
    
    return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes

In [22]:
train_x_orig, train_y, test_x_orig, test_y, classes = load_data()
# Reshape the training and test examples 
train_x_flatten = train_x_orig.reshape(train_x_orig.shape[0], -1).T   # The "-1" makes reshape flatten the remaining dimensions
test_x_flatten = test_x_orig.reshape(test_x_orig.shape[0], -1).T

# Standardize data to have feature values between 0 and 1.
train_x = train_x_flatten/255
test_x = test_x_flatten/255

In [23]:
layers_dims = (12288, 20, 7, 5, 1) #  4-layer model

In [24]:
dnn = DenseNeuralNetwork(layer_dims=layers_dims)

In [25]:
dnn.activation_functions(['relu','relu','relu','sigmoid'])

In [26]:
dnn.fit(train_x,train_y,cost_function='BinaryCrossEntropy',print_cost=True,learning_rate= 0.0075,num_iterations = 3000,seed=1)

W1   (20, 12288)
W2   (7, 20)
W3   (5, 7)
W4   (1, 5)
dict_keys(['W1', 'b1', 'W2', 'b2', 'W3', 'b3', 'W4', 'b4'])
forward :  A_prev  (12288, 209)
forward :  A_prev  (20, 209)
forward :  A_prev  (7, 209)
Layers  4
A_prev (1, 209) W (1, 5) B (1, 1) dZ (1, 209)
dW (1, 1) dA_prev (5, 209)
relu back : Z (1, 209) dZ (5, 209)
A_prev (1, 209) W (1, 5) B (1, 1) dZ (5, 209)


ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 5 is different from 1)