In [2]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


### Activation Functions

In [3]:
def sigmoid(Z):
    A = 1/(1+np.exp(-Z)) 
    return A


def softmax(x):
    exponents = np.exp(x)
    return exponents / np.sum(exponents)

def relu(Z):
    A = np.maximum(0,Z)
    return A

def tanh(x):
    return np.tanh(x)

def derivative_relu(Z):
    return np.array(Z > 0, dtype = 'float')

def derivative_tanh(x):
    return (1 - np.power(x, 2))

### Load initial Parameters

In [4]:
initial_weights = pd.read_csv('Task_1/b/w-100-40-4.csv', header=None)
# initial_weights = pd.read_csv('Task_1/a/w.csv', header=None)

In [5]:
initial_biases  = pd.read_csv('Task_1/b/b-100-40-4.csv', header=None)
# initial_biases = pd.read_csv('Task_1/a/b.csv', header=None)

In [6]:
w1 = initial_weights.iloc[0:14, 1:].to_numpy().astype(np.float32).T
w2 = initial_weights.iloc[14:114, 1:41].to_numpy().astype(np.float32).T
w3 = initial_weights.iloc[114:, 1:5].to_numpy().astype(np.float32).T

In [7]:
w1.shape, w2.shape, w3.shape

((100, 14), (40, 100), (4, 40))

### Load initial Biases and weights

In [8]:
b1 = initial_biases.iloc[0, 1:].to_numpy().astype(np.float32).T
b2 = initial_biases.iloc[1, 1:41].to_numpy().astype(np.float32).T
b3 = initial_biases.iloc[2, 1:5].to_numpy().astype(np.float32).T

In [9]:
# reshape b1s
b1 = b1.reshape(-1,1)
b2 = b2.reshape(-1,1)
b3 = b3.reshape(-1,1)

### Initialize parameters

In [10]:
def initialize_parameters(layer_dims):
    
    parameters = {}
    L = len(layer_dims)            

    parameters['W' + str(1)] = w1
    parameters['b' + str(1)] = b1

    parameters['W' + str(2)] = w2
    parameters['b' + str(2)] = b2

    parameters['W' + str(3)] = w3
    parameters['b' + str(3)] = b3   
        
    return parameters

In [11]:

layer_dims = [14, 100, 40, 4]
params = initialize_parameters(layer_dims)

for l in range(1, len(layer_dims)):
    print("Shape of W" + str(l) + ":", params['W' + str(l)].shape)
    print("Shape of B" + str(l) + ":", params['b' + str(l)].shape, "\n")

Shape of W1: (100, 14)
Shape of B1: (100, 1) 

Shape of W2: (40, 100)
Shape of B2: (40, 1) 

Shape of W3: (4, 40)
Shape of B3: (4, 1) 



### Forward Propagation

In [12]:
def forward_propagation(X, parameters, activation):
   
    forward_cache = {}
    L = len(parameters) // 2                  
    
    forward_cache['A0'] = X

    for l in range(1, L):
        forward_cache['Z' + str(l)] = parameters['W' + str(l)].dot(forward_cache['A' + str(l-1)]) + parameters['b' + str(l)]
        
        if activation == 'tanh':
            forward_cache['A' + str(l)] = tanh(forward_cache['Z' + str(l)])
        else:
            forward_cache['A' + str(l)] = relu(forward_cache['Z' + str(l)])
            

    forward_cache['Z' + str(L)] = parameters['W' + str(L)].dot(forward_cache['A' + str(L-1)]) + parameters['b' + str(L)]
    
    if forward_cache['Z' + str(L)].shape[0] == 1:
        forward_cache['A' + str(L)] = sigmoid(forward_cache['Z' + str(L)])
    else :
        forward_cache['A' + str(L)] = softmax(forward_cache['Z' + str(L)])
    
    return forward_cache['A' + str(L)], forward_cache

### Cost function

In [13]:
def compute_cost(AL, Y):
    m = Y.shape[1]
    
    if Y.shape[0] == 1:
        cost = (1./m) * (-np.dot(Y,np.log(AL).T) - np.dot(1-Y, np.log(1-AL).T))
    else:
        cost = -(1./m) * np.sum(Y * np.log(AL))
        
    cost = np.squeeze(cost)      # To make sure your cost's shape is what we expect (e.g. this turns [[17]] into 17).
    
    return cost

### Backward propagation

In [14]:
def backward_propagation(AL, Y, parameters, forward_cache, activation):
    
    grads = {}
    L = len(parameters)//2
    m = AL.shape[1]
    
    grads["dZ" + str(L)] = AL - Y  # After derivation of cross entropy loss and softmax activation function
    grads["dW" + str(L)] = 1./m * np.dot(grads["dZ" + str(L)],forward_cache['A' + str(L-1)].T)
    grads["db" + str(L)] = 1./m * np.sum(grads["dZ" + str(L)], axis = 1, keepdims = True)
    
    for l in reversed(range(1, L)):
        if activation == 'tanh':
            grads["dZ" + str(l)] = np.dot(parameters['W' + str(l+1)].T,grads["dZ" + str(l+1)])*derivative_tanh(forward_cache['A' + str(l)])
        else:
            grads["dZ" + str(l)] = np.dot(parameters['W' + str(l+1)].T,grads["dZ" + str(l+1)])*derivative_relu(forward_cache['A' + str(l)])
            
        grads["dW" + str(l)] = 1./m * np.dot(grads["dZ" + str(l)],forward_cache['A' + str(l-1)].T)
        grads["db" + str(l)] = 1./m * np.sum(grads["dZ" + str(l)], axis = 1, keepdims = True)

    return grads

### Update parameters (Gradient Descent)

In [15]:
def update_parameters(parameters, grads, learning_rate):

    L = len(parameters) // 2 
    
    for l in range(L):
        parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - learning_rate * grads["dW" + str(l+1)]
        parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - learning_rate * grads["db" + str(l+1)]
        
    return parameters

### Get predictions and calculate accuracy

In [16]:
def predict(X, y, parameters, activation):

    m = X.shape[1]
    y_pred, caches = forward_propagation(X, parameters, activation)
    
    if y.shape[0] == 1:
        y_pred = np.array(y_pred > 0.5, dtype = 'float')
    else:
        y = np.argmax(y, 0)
        y_pred = np.argmax(y_pred, 0)
    
    return np.round(np.sum((y_pred == y)/m), 2)

### Create Custom Model for Our Requirements

In [17]:
def model(X, Y, layers_dims, learning_rate = 0.03, activation = 'relu', num_iterations = 3000):

    np.random.seed(1)
    costs = []              
    
    parameters = initialize_parameters(layers_dims)
    # print("parameters", parameters)

    for i in range(0, num_iterations):

        AL, forward_cache = forward_propagation(X, parameters, activation)

        cost = compute_cost(AL, Y)
        costs.append(cost)

        grads = backward_propagation(AL, Y, parameters, forward_cache, activation)

        parameters = update_parameters(parameters, grads, learning_rate)
        

       
    return parameters,grads,costs

In [18]:
X=[-1, 1, 1, 1, -1, -1, 1, -1, 1, 1, -1, -1, 1, 1]
y = [0,0,0,1]

# make this suitable for input as NN
X_train = np.array(X).reshape(-1, 1)
Y_train = np.array(y).reshape(-1, 1)

print(X_train.shape, Y_train.shape)

layers_dims = [14, 100, 40, 4]
lr = 0.01
iters = 1

(14, 1) (4, 1)


### Get trained parameters , Gradients and cost then save it in CSVs

In [19]:
parameters ,grads , costs= model(X_train, Y_train, layers_dims, learning_rate = lr, activation = 'relu', num_iterations = iters)

In [20]:
parameters

{'W1': array([[ 1.08655214,  1.80982471,  1.01029217, ..., -0.71842277,
         -0.59960604,  1.51287401],
        [-1.98744071,  0.12322588,  0.30731344, ...,  0.11292759,
         -0.08467472, -0.73554295],
        [ 1.9036324 ,  1.99985731,  0.87112796, ...,  0.52140337,
          1.50393093,  0.15683161],
        ...,
        [-0.30217358, -1.27342188,  0.11899082, ..., -0.47128811,
          0.04086657, -1.30403125],
        [ 0.74348986, -0.32424814, -0.90852946, ...,  0.13183239,
         -0.84833121, -0.8020395 ],
        [-1.28044534, -0.00368237,  0.46613842, ...,  0.64362311,
          1.24858057, -0.20315255]]),
 'b1': array([[-0.85362327],
        [-0.72440469],
        [ 0.26183882],
        [ 0.49014536],
        [-0.56905711],
        [-0.84271878],
        [-0.47026345],
        [-0.41469008],
        [-1.49534976],
        [ 0.18982403],
        [-1.68693888],
        [-0.06435248],
        [-0.77225488],
        [ 0.20731127],
        [-1.24754643],
        [ 1.1235

In [21]:
grads
# make all grads np.float32 type
# Convert all arrays to np.float32
grads_dict = grads

In [22]:
grads_dict['dW1'] = grads_dict['dW1'].T
grads_dict['dW2'] = grads_dict['dW2'].T
grads_dict['dW3'] = grads_dict['dW3'].T

In [23]:
grads_dict['dW1'].shape , grads_dict['dW2'].shape, grads_dict['dW3'].shape

((14, 100), (100, 40), (40, 4))

In [24]:
# Prepare lists to hold rows and keys
rows = []

# Iterate over the dictionary
for key, value in grads_dict.items():
    
    if(key[1] == 'W'):
        
        for row in value:
            
            rows.append(row)  # Append the 1D array (row)
        # Track which key this row belongs to

rows.reverse()
# Create DataFrame
df_dws = pd.DataFrame(rows)



In [25]:
df_dws.shape

(154, 100)

In [26]:
dbs = {
    'db1': grads_dict['db1'].flatten(),
    'db2': grads_dict['db2'].flatten(),
    'db3': grads_dict['db3'].flatten()
}



In [27]:
# Convert dbs to DataFrame and save to CSV
df_dbs = pd.DataFrame.from_dict(dbs, orient='index')
df_dbs.to_csv('./answer/db.csv', header=False, index=False)


df_dws.to_csv('./answer/dw.csv', header=False, index=False)