In [None]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision 
import torchvision.transforms as transforms
import torchvision.models as models

# loading the pretrained model of Vgg_16 which uses batch_normalization
net = models.vgg16_bn(pretrained=True)

# set the load_path for all image file
load_path = './images/'

# set the save_path for the extracted features file for all the classes
save_path = './Feature_extraction_2D/'
#os.mkdir(save_path)
os.makedirs(save_path, exist_ok=True)

# will get the names of the files present in the load path
# The training data
get_class_names = os.listdir(load_path)

# for each class file
get_class_names

for i in get_class_names:
    # To save the file with the same name for the extracted features
    new_save_path = save_path + i
    
    # To load the class file
    class_path = load_path + i

    # to load the numpy file
    img = np.load(class_path)

    # To append the extracted features
    arr = []

    # for each image in the class file
    for j in img:   

        # converting the numpy array to tensor
        j = torch.tensor(j)
        
        # reshaping the image to [batch_size,number_of_channel,height,width]
        j = j.view([-1,3,32,32])
        
        # rescaling the image to [1,3,224,224]
        # vgg_net the required input is of size 224*224 and single image so batch size 1 
        j = F.interpolate(j,(224,224))
        
        # Extracting the features from the middle layer of the network
        z = net.features(j)
        
        # Features extracted are of size [1,512,7,7]
        # Taking the average pooling for each channel
        m = F.avg_pool2d(z,(7,7),1,0)
        
        # Now the features are of size [1,512,1,1]
        #reshaping the features to [512] 
        m = m.view([-1]).detach()
        
        # converting it back to numpy array
        m = np.asarray(m)

        # appending to the arr
        arr.append(m)

    arr = np.asarray(arr)
    print(arr.shape)

    # To save the numpy array  
    np.save(new_save_path,arr)

In [33]:
def sigmoid(x):
  return 1/(1+np.exp(-x))

import os
import numpy as np
import matplotlib.pyplot as plt
from random import shuffle

path = './Feature_extraction_2D/'
class_names = os.listdir(path)

print(class_names)

val = 0
data_points = []
data_points_class = []
for i in class_names:
    load_name = os.path.join(path,i)
    extracted_features = np.load(load_name)
    for j in extracted_features:
        data_points.append(j)
        data_points_class.append(val)
    print(val,i)
    val += 1

temp = list(zip(data_points,data_points_class))
shuffle(temp)

data_points,data_points_class = zip(*temp)
data_points = np.asanyarray(data_points)
data_points_class = np.asanyarray(data_points_class)
print(data_points_class)

np.shape(data_points)

# setting nodes

dim_hidden_1 = 40
dim_hidden_2 = 28
pca_components = 512
C = 0.01

from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(data_points, data_points_class,test_size = 0.2)

# from sklearn import decomposition

# pca1 = decomposition.PCA(n_components = pca_components)
# pca1.fit(X_train)
# X_train = pca1.transform(X_train)
# print(X_train.shape)

# pca2 = decomposition.PCA(n_components = pca_components)
# pca2.fit(X_test)
# X_test = pca2.transform(X_test)
# print(X_test.shape)

ytrain = Y_train
b = np.zeros((Y_train.size, Y_train.max()+1))
b[np.arange(Y_train.size),Y_train] = 1
Y_train=b
print(Y_train)

# Random Initialization and Parameters


learning_rate=0.0005
learning_Rate=0.0005
num_iterations=60000
delta_cost = 1e-8
beta = 0.9
gamma = 0.99
m=1200
lam=0.4
A_W1 = np.random.randn(dim_hidden_1,pca_components) * np.sqrt(2/pca_components)
A_b1 = np.zeros(shape=(dim_hidden_1, 1))
A_W2 = np.random.randn(dim_hidden_2, dim_hidden_1) * np.sqrt(2/dim_hidden_1)
A_b2 = np.zeros(shape=(dim_hidden_2, 1))
A_W3 = np.random.randn(5, dim_hidden_2) * np.sqrt(2/dim_hidden_2)
A_b3 = np.zeros(shape=(5, 1))


opcode = 2

W1=A_W1
W2=A_W2
W3=A_W3
b1=A_b1
b2=A_b2
b3=A_b3

costs=[]
cost = 0
vW1 = np.zeros_like(W1)
vW2 = np.zeros_like(W2)
vW3 = np.zeros_like(W3)
vb1 = np.zeros_like(b1)
vb2 = np.zeros_like(b2)
vb3 = np.zeros_like(b3)

rvW1 = np.zeros_like(W1)
rvW2 = np.zeros_like(W2)
rvW3 = np.zeros_like(W3)
rvb1 = np.zeros_like(b1)
rvb2 = np.zeros_like(b2)
rvb3 = np.zeros_like(b3)

# Training

for i in range(0, num_iterations):
# forward prop   
    Z1 = np.dot(W1, X_train.T) + b1
    A1 = np.tanh(Z1)
    
    Z2 = np.dot(W2, A1) + b2
    A2 = np.tanh(Z2)
    
    Z3 = np.dot(W3, A2) + b3
    A3 = sigmoid(Z3)
    
#  cost    
    prev_cost = cost
    logprobs = np.multiply(np.log(A3), Y_train.T) + np.multiply((1 - Y_train.T), np.log(1 - A3))
    cost =( - np.sum(logprobs) / m)+(lam * (np.sum(np.square(W1)) + np.sum(np.square(W2)) + np.sum(np.square(W3))) / (2 * m))
    cost = float(np.squeeze(cost))
    if i % 100 == 0:
        costs.append(cost)

# back prop
    dZ3 = A3 - Y_train.T
    dW3 = (1 / m) * np.dot(dZ3, A2.T)+((lam* W3) / m)
    db3 = (1 / m) * np.sum(dZ3, axis=1, keepdims=True)
    dZ2 = np.multiply(np.dot(W3.T, dZ3), 1 - np.power(A2, 2))
    dW2 = (1 / m) * np.dot(dZ2, A1.T)+((lam * W2) / m)
    db2 = (1 / m) * np.sum(dZ2, axis=1, keepdims=True)
    dZ1 = np.multiply(np.dot(W2.T, dZ2), 1 - np.power(A1, 2))
    dW1 = (1 / m) * np.dot(dZ1, X_train)+(lam * W1) / m
    db1 = (1 / m) * np.sum(dZ1, axis=1, keepdims=True)
    
    if(opcode == 0):
        
        W1 = W1 - learning_rate * dW1
        b1 = b1 - learning_rate * db1
        W2 = W2 - learning_rate * dW2
        b2 = b2 - learning_rate * db2
        W3 = W3 - learning_rate * dW3
        b3 = b3 - learning_rate * db3
        
    else:
        vW1 = beta * vW1 + (1 - beta) * dW1
        vb1 = beta * vb1 + (1 - beta) * db1
        vW2 = beta * vW2 + (1 - beta) * dW2
        vb2 = beta * vb2 + (1 - beta) * db2
        vW3 = beta * vW3 + (1 - beta) * dW3
        vb3 = beta * vb3 + (1 - beta) * db3
        if(opcode == 1):
            
            dW1 = vW1
            db1 = vb1
            dW2 = vW2
            db2 = vb2
            dW3 = vW3
            db3 = vb3
            
            W1 = W1 - learning_rate * dW1
            b1 = b1 - learning_rate * db1
            W2 = W2 - learning_rate * dW2
            b2 = b2 - learning_rate * db2
            W3 = W3 - learning_rate * dW3
            b3 = b3 - learning_rate * db3
            
        else:
            rvW1 = gamma * rvW1 + (1 - gamma) * np.power(dW1,2)
            rvb1 = gamma * rvb1 + (1 - gamma) * np.power(db1,2)
            rvW2 = gamma * rvW2 + (1 - gamma) * np.power(dW2,2)
            rvb2 = gamma * rvb2 + (1 - gamma) * np.power(db2,2)
            rvW3 = gamma * rvW3 + (1 - gamma) * np.power(dW3,2)
            rvb3 = gamma * rvb3 + (1 - gamma) * np.power(db3,2)

            E = 1e-08
            
            vvW1=np.zeros_like(W1)
            vvW2=np.zeros_like(W2)
            vvW3=np.zeros_like(W3)
            vvb1=np.zeros_like(b1)
            vvb2=np.zeros_like(b2)
            vvb3=np.zeros_like(b3)
            
            svW1=np.zeros_like(W1)
            svW2=np.zeros_like(W2)
            svW3=np.zeros_like(W3)
            svb1=np.zeros_like(b1)
            svb2=np.zeros_like(b2)
            svb3=np.zeros_like(b3)


            vvW1 = vW1/(1 - np.power(beta,(i+1)))
            vvb1 = vb1/(1 - np.power(beta,(i+1)))
            vvW2 = vW2/(1 - np.power(beta,(i+1)))
            vvb2 = vb2/(1 - np.power(beta,(i+1)))
            vvW3 = vW3/(1 - np.power(beta,(i+1)))
            vvb3 = vb3/(1 - np.power(beta,(i+1)))

            svW1 = rvW1/(1 - np.power(gamma,(i+1)))
            svb1 = rvb1/(1 - np.power(gamma,(i+1)))
            svW2 = rvW2/(1 - np.power(gamma,(i+1)))
            svb2 = rvb2/(1 - np.power(gamma,(i+1)))
            svW3 = rvW3/(1 - np.power(gamma,(i+1)))
            svb3 = rvb3/(1 - np.power(gamma,(i+1)))
            
            dW1 = vvW1 / np.sqrt(svW1 + E)
            db1 = vvb1 / np.sqrt(svb1 + E)
            dW2 = vvW2 / np.sqrt(svW2 + E)
            db2 = vvb2 / np.sqrt(svb2 + E)
            dW3 = vvW3 / np.sqrt(svW3 + E)
            db3 = vvb3 / np.sqrt(svb3 + E)
            
            W1 = W1 - learning_rate * dW1
            b1 = b1 - learning_rate * db1
            W2 = W2 - learning_rate * dW2
            b2 = b2 - learning_rate * db2
            W3 = W3 - learning_rate * dW3
            b3 = b3 - learning_rate * db3
            
    if(i%1000==0):
        print("cost "+str(i/1000),cost)
        
    if(abs(prev_cost - cost) < delta_cost):
        break

# Avg. error vs Epoch plot

plt.plot(costs)
plt.ylabel('cost')
plt.xlabel('iterations (per hundreds)')
plt.title("Avg. error vs Epoch plot - Adam - Image data")
plt.show()

# Working on training data

Z1 = np.dot(W1, X_train.T) + b1
A1 = np.tanh(Z1)

Z2 = np.dot(W2, A1) + b2
A2 = np.tanh(Z2)

Z3 = np.dot(W3, A2) + b3
A3 = sigmoid(Z3)

pred = np.argmax(A3,axis=0)

# Working on development data




Z1 = np.dot(W1, X_test.T) + b1
A1 = np.tanh(Z1)

Z2 = np.dot(W2, A1) + b2
A2 = np.tanh(Z2)

Z3 = np.dot(W3, A2) + b3
A3 = sigmoid(Z3)

pred1= np.argmax(A3,axis=0)

# accuracy scores

count=0
i=0
while i<1200:
    if(pred[i]==ytrain[i]):
        count=count+1
    i=i+1
print("train accuracy =",count/1200)

count=0
i=0
while i<300:
    if(pred1[i]==Y_test[i]):
        count=count+1
    i=i+1
print("test accuracy =",count/300)

the_plot_conf(pred,ytrain,'Training - Adam - Image Data')

the_plot_conf(pred1,Y_test,'Development - Adam - Image Data')



opcode = 1

W1=A_W1
W2=A_W2
W3=A_W3
b1=A_b1
b2=A_b2
b3=A_b3

costs=[]
cost = 0
vW1 = np.zeros_like(W1)
vW2 = np.zeros_like(W2)
vW3 = np.zeros_like(W3)
vb1 = np.zeros_like(b1)
vb2 = np.zeros_like(b2)
vb3 = np.zeros_like(b3)

rvW1 = np.zeros_like(W1)
rvW2 = np.zeros_like(W2)
rvW3 = np.zeros_like(W3)
rvb1 = np.zeros_like(b1)
rvb2 = np.zeros_like(b2)
rvb3 = np.zeros_like(b3)

# Training

for i in range(0, num_iterations):
# forward prop   
    Z1 = np.dot(W1, X_train.T) + b1
    A1 = np.tanh(Z1)
    
    Z2 = np.dot(W2, A1) + b2
    A2 = np.tanh(Z2)
    
    Z3 = np.dot(W3, A2) + b3
    A3 = sigmoid(Z3)
    
#  cost    
    prev_cost = cost
    logprobs = np.multiply(np.log(A3), Y_train.T) + np.multiply((1 - Y_train.T), np.log(1 - A3))
    cost =( - np.sum(logprobs) / m)+(lam * (np.sum(np.square(W1)) + np.sum(np.square(W2)) + np.sum(np.square(W3))) / (2 * m))
    cost = float(np.squeeze(cost))
    if i % 100 == 0:
        costs.append(cost)

# back prop
    dZ3 = A3 - Y_train.T
    dW3 = (1 / m) * np.dot(dZ3, A2.T)+((lam* W3) / m)
    db3 = (1 / m) * np.sum(dZ3, axis=1, keepdims=True)
    dZ2 = np.multiply(np.dot(W3.T, dZ3), 1 - np.power(A2, 2))
    dW2 = (1 / m) * np.dot(dZ2, A1.T)+((lam * W2) / m)
    db2 = (1 / m) * np.sum(dZ2, axis=1, keepdims=True)
    dZ1 = np.multiply(np.dot(W2.T, dZ2), 1 - np.power(A1, 2))
    dW1 = (1 / m) * np.dot(dZ1, X_train)+(lam * W1) / m
    db1 = (1 / m) * np.sum(dZ1, axis=1, keepdims=True)
    
    if(opcode == 0):
        
        W1 = W1 - learning_rate * dW1
        b1 = b1 - learning_rate * db1
        W2 = W2 - learning_rate * dW2
        b2 = b2 - learning_rate * db2
        W3 = W3 - learning_rate * dW3
        b3 = b3 - learning_rate * db3
        
    else:
        vW1 = beta * vW1 + (1 - beta) * dW1
        vb1 = beta * vb1 + (1 - beta) * db1
        vW2 = beta * vW2 + (1 - beta) * dW2
        vb2 = beta * vb2 + (1 - beta) * db2
        vW3 = beta * vW3 + (1 - beta) * dW3
        vb3 = beta * vb3 + (1 - beta) * db3
        if(opcode == 1):
            
            W1 = W1 - learning_rate * vW1
            b1 = b1 - learning_rate * vb1
            W2 = W2 - learning_rate * vW2
            b2 = b2 - learning_rate * vb2
            W3 = W3 - learning_rate * vW3
            b3 = b3 - learning_rate * vb3
            
        else:
            rvW1 = gamma * rvW1 + (1 - gamma) * np.power(dW1,2)
            rvb1 = gamma * rvb1 + (1 - gamma) * np.power(db1,2)
            rvW2 = gamma * rvW2 + (1 - gamma) * np.power(dW2,2)
            rvb2 = gamma * rvb2 + (1 - gamma) * np.power(db2,2)
            rvW3 = gamma * rvW3 + (1 - gamma) * np.power(dW3,2)
            rvb3 = gamma * rvb3 + (1 - gamma) * np.power(db3,2)

            E = 1e-08
            
            vvW1=np.zeros_like(W1)
            vvW2=np.zeros_like(W2)
            vvW3=np.zeros_like(W3)
            vvb1=np.zeros_like(b1)
            vvb2=np.zeros_like(b2)
            vvb3=np.zeros_like(b3)
            
            svW1=np.zeros_like(W1)
            svW2=np.zeros_like(W2)
            svW3=np.zeros_like(W3)
            svb1=np.zeros_like(b1)
            svb2=np.zeros_like(b2)
            svb3=np.zeros_like(b3)


            vvW1 = vW1/(1 - np.power(beta,(i+1)))
            vvb1 = vb1/(1 - np.power(beta,(i+1)))
            vvW2 = vW2/(1 - np.power(beta,(i+1)))
            vvb2 = vb2/(1 - np.power(beta,(i+1)))
            vvW3 = vW3/(1 - np.power(beta,(i+1)))
            vvb3 = vb3/(1 - np.power(beta,(i+1)))

            svW1 = rvW1/(1 - np.power(gamma,(i+1)))
            svb1 = rvb1/(1 - np.power(gamma,(i+1)))
            svW2 = rvW2/(1 - np.power(gamma,(i+1)))
            svb2 = rvb2/(1 - np.power(gamma,(i+1)))
            svW3 = rvW3/(1 - np.power(gamma,(i+1)))
            svb3 = rvb3/(1 - np.power(gamma,(i+1)))
            
            dW1 = vvW1 / np.sqrt(svW1 + E)
            db1 = vvb1 / np.sqrt(svb1 + E)
            dW2 = vvW2 / np.sqrt(svW2 + E)
            db2 = vvb2 / np.sqrt(svb2 + E)
            dW3 = vvW3 / np.sqrt(svW3 + E)
            db3 = vvb3 / np.sqrt(svb3 + E)
            
            W1 = W1 - learning_rate * dW1
            b1 = b1 - learning_rate * db1
            W2 = W2 - learning_rate * dW2
            b2 = b2 - learning_rate * db2
            W3 = W3 - learning_rate * dW3
            b3 = b3 - learning_rate * db3
            
    if(i%1000==0):
        print("cost "+str(i/1000),cost)        
    if(abs(prev_cost - cost) < delta_cost):
        break

# Avg. error vs Epoch plot

plt.plot(costs)
plt.ylabel('cost')
plt.xlabel('iterations (per hundreds)')
plt.title("Avg. error vs Epoch plot - Gen Delta - Image data")
plt.show()

# Working on training data

Z1 = np.dot(W1, X_train.T) + b1
A1 = np.tanh(Z1)

Z2 = np.dot(W2, A1) + b2
A2 = np.tanh(Z2)

Z3 = np.dot(W3, A2) + b3
A3 = sigmoid(Z3)

pred = np.argmax(A3,axis=0)

# Working on development data




Z1 = np.dot(W1, X_test.T) + b1
A1 = np.tanh(Z1)

Z2 = np.dot(W2, A1) + b2
A2 = np.tanh(Z2)

Z3 = np.dot(W3, A2) + b3
A3 = sigmoid(Z3)

pred1= np.argmax(A3,axis=0)

# accuracy scores

count=0
i=0
while i<1200:
    if(pred[i]==ytrain[i]):
        count=count+1
    i=i+1
print("train accuracy =",count/1200)

count=0
i=0
while i<300:
    if(pred1[i]==Y_test[i]):
        count=count+1
    i=i+1
print("test accuracy =",count/300)

the_plot_conf(pred,ytrain,'Training - gen-Delta - Image Data')

the_plot_conf(pred1,Y_test,'Development - gen-Delta - Image Data')



opcode = 0

W1=A_W1
W2=A_W2
W3=A_W3
b1=A_b1
b2=A_b2
b3=A_b3

costs=[]
cost = 0
vW1 = np.zeros_like(W1)
vW2 = np.zeros_like(W2)
vW3 = np.zeros_like(W3)
vb1 = np.zeros_like(b1)
vb2 = np.zeros_like(b2)
vb3 = np.zeros_like(b3)

rvW1 = np.zeros_like(W1)
rvW2 = np.zeros_like(W2)
rvW3 = np.zeros_like(W3)
rvb1 = np.zeros_like(b1)
rvb2 = np.zeros_like(b2)
rvb3 = np.zeros_like(b3)

# Training

for i in range(0, num_iterations):
# forward prop   
    Z1 = np.dot(W1, X_train.T) + b1
    A1 = np.tanh(Z1)
    
    Z2 = np.dot(W2, A1) + b2
    A2 = np.tanh(Z2)
    
    Z3 = np.dot(W3, A2) + b3
    A3 = sigmoid(Z3)
    
#  cost    
    prev_cost = cost
    logprobs = np.multiply(np.log(A3), Y_train.T) + np.multiply((1 - Y_train.T), np.log(1 - A3))
    cost =( - np.sum(logprobs) / m)+(lam * (np.sum(np.square(W1)) + np.sum(np.square(W2)) + np.sum(np.square(W3))) / (2 * m))
    cost = float(np.squeeze(cost))
    if i % 100 == 0:
        costs.append(cost)

# back prop
    dZ3 = A3 - Y_train.T
    dW3 = (1 / m) * np.dot(dZ3, A2.T)+((lam* W3) / m)
    db3 = (1 / m) * np.sum(dZ3, axis=1, keepdims=True)
    dZ2 = np.multiply(np.dot(W3.T, dZ3), 1 - np.power(A2, 2))
    dW2 = (1 / m) * np.dot(dZ2, A1.T)+((lam * W2) / m)
    db2 = (1 / m) * np.sum(dZ2, axis=1, keepdims=True)
    dZ1 = np.multiply(np.dot(W2.T, dZ2), 1 - np.power(A1, 2))
    dW1 = (1 / m) * np.dot(dZ1, X_train)+(lam * W1) / m
    db1 = (1 / m) * np.sum(dZ1, axis=1, keepdims=True)
    
    if(opcode == 0):
        
        W1 = W1 - learning_rate * dW1
        b1 = b1 - learning_rate * db1
        W2 = W2 - learning_rate * dW2
        b2 = b2 - learning_rate * db2
        W3 = W3 - learning_rate * dW3
        b3 = b3 - learning_rate * db3
        
    else:
        vW1 = beta * vW1 + (1 - beta) * dW1
        vb1 = beta * vb1 + (1 - beta) * db1
        vW2 = beta * vW2 + (1 - beta) * dW2
        vb2 = beta * vb2 + (1 - beta) * db2
        vW3 = beta * vW3 + (1 - beta) * dW3
        vb3 = beta * vb3 + (1 - beta) * db3
        if(opcode == 1):
            
            dW1 = vW1
            db1 = vb1
            dW2 = vW2
            db2 = vb2
            dW3 = vW3
            db3 = vb3
            
            W1 = W1 - learning_rate * dW1
            b1 = b1 - learning_rate * db1
            W2 = W2 - learning_rate * dW2
            b2 = b2 - learning_rate * db2
            W3 = W3 - learning_rate * dW3
            b3 = b3 - learning_rate * db3
            
        else:
            rvW1 = gamma * rvW1 + (1 - gamma) * np.power(dW1,2)
            rvb1 = gamma * rvb1 + (1 - gamma) * np.power(db1,2)
            rvW2 = gamma * rvW2 + (1 - gamma) * np.power(dW2,2)
            rvb2 = gamma * rvb2 + (1 - gamma) * np.power(db2,2)
            rvW3 = gamma * rvW3 + (1 - gamma) * np.power(dW3,2)
            rvb3 = gamma * rvb3 + (1 - gamma) * np.power(db3,2)

            E = 1e-08
            
            vvW1=np.zeros_like(W1)
            vvW2=np.zeros_like(W2)
            vvW3=np.zeros_like(W3)
            vvb1=np.zeros_like(b1)
            vvb2=np.zeros_like(b2)
            vvb3=np.zeros_like(b3)
            
            svW1=np.zeros_like(W1)
            svW2=np.zeros_like(W2)
            svW3=np.zeros_like(W3)
            svb1=np.zeros_like(b1)
            svb2=np.zeros_like(b2)
            svb3=np.zeros_like(b3)


            vvW1 = vW1/(1 - np.power(beta,(i+1)))
            vvb1 = vb1/(1 - np.power(beta,(i+1)))
            vvW2 = vW2/(1 - np.power(beta,(i+1)))
            vvb2 = vb2/(1 - np.power(beta,(i+1)))
            vvW3 = vW3/(1 - np.power(beta,(i+1)))
            vvb3 = vb3/(1 - np.power(beta,(i+1)))

            svW1 = rvW1/(1 - np.power(gamma,(i+1)))
            svb1 = rvb1/(1 - np.power(gamma,(i+1)))
            svW2 = rvW2/(1 - np.power(gamma,(i+1)))
            svb2 = rvb2/(1 - np.power(gamma,(i+1)))
            svW3 = rvW3/(1 - np.power(gamma,(i+1)))
            svb3 = rvb3/(1 - np.power(gamma,(i+1)))
            
            dW1 = vvW1 / np.sqrt(svW1 + E)
            db1 = vvb1 / np.sqrt(svb1 + E)
            dW2 = vvW2 / np.sqrt(svW2 + E)
            db2 = vvb2 / np.sqrt(svb2 + E)
            dW3 = vvW3 / np.sqrt(svW3 + E)
            db3 = vvb3 / np.sqrt(svb3 + E)
            
            W1 = W1 - learning_rate * dW1
            b1 = b1 - learning_rate * db1
            W2 = W2 - learning_rate * dW2
            b2 = b2 - learning_rate * db2
            W3 = W3 - learning_rate * dW3
            b3 = b3 - learning_rate * db3
            
    if(i%1000==0):
        print("cost "+str(i/1000),cost)
    if(abs(prev_cost - cost) < delta_cost):
        break

# Avg. error vs Epoch plot

plt.plot(costs)
plt.ylabel('cost')
plt.xlabel('iterations (per hundreds)')
plt.title("Avg. error vs Epoch plot -Delta - Image data")
plt.show()

# Working on training data

Z1 = np.dot(W1, X_train.T) + b1
A1 = np.tanh(Z1)

Z2 = np.dot(W2, A1) + b2
A2 = np.tanh(Z2)

Z3 = np.dot(W3, A2) + b3
A3 = sigmoid(Z3)

pred = np.argmax(A3,axis=0)

# Working on development data




Z1 = np.dot(W1, X_test.T) + b1
A1 = np.tanh(Z1)

Z2 = np.dot(W2, A1) + b2
A2 = np.tanh(Z2)

Z3 = np.dot(W3, A2) + b3
A3 = sigmoid(Z3)

pred1= np.argmax(A3,axis=0)

# accuracy scores

count=0
i=0
while i<1200:
    if(pred[i]==ytrain[i]):
        count=count+1
    i=i+1
print("train accuracy =",count/1200)

count=0
i=0
while i<300:
    if(pred1[i]==Y_test[i]):
        count=count+1
    i=i+1
print("test accuracy =",count/300)

the_plot_conf(pred,ytrain,'Training - Delta - Image Data')

the_plot_conf(pred1,Y_test,'Development - Delta - Image Data')

