In [None]:
# -*- coding: utf-8 -*-
"""
Author:-aam35
Procedure to train 4 layer MLP using LRA-E on mnist 
"""

import numpy as np
import os
import sys
import tensorflow as tf
#import tensorflow_datasets
#data = tensorflow_datasets.load('mnist')
#from tensorflow.examples.tutorials.mnist import input_data
import time
#tf.enable_eager_execution()
#tf.executing_eagerly() -- Follow these 2 steps if working in TF2.0_beta

# random seed to get the consistent result
tf.random.set_seed(1234)

#data = input_data.read_data_sets("data/MNIST_data/", one_hot=True)
(image_train, label_train), (image_test, label_test) = tf.keras.datasets.fashion_mnist.load_data()

image_val = image_train[50000:60000]
image_train = image_train[0:50000]
label_val = label_train[50000:60000]
label_train = label_train[0:50000]

image_train = image_train.reshape(50000, 28*28)
image_val = image_val.reshape(10000, 28*28)
image_test = image_test.reshape(10000, 28*28)

image_train = image_train/255.0
image_val = image_val/255.0
image_test = image_test/255.0

label_train = tf.one_hot(label_train,depth=10)
label_val = tf.one_hot(label_val,depth=10)
label_test = tf.one_hot(label_test,depth=10)

image_train = tf.cast(image_train, tf.float32)
image_val = tf.cast(image_val, tf.float32)
image_test = tf.cast(image_test, tf.float32)

label_train = tf.cast(label_train, tf.float32)
label_val = tf.cast(label_val, tf.float32)
label_test = tf.cast(label_test, tf.float32)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz


In [None]:
from keras import backend as K
def squash(inputs):
    # take norm of input vectors
    squared_norm = K.sum(K.square(inputs), axis = -1, keepdims = True)
 
    # use the formula for non-linear function to return squashed output
    return ((squared_norm/(1+squared_norm))/(K.sqrt(squared_norm+K.epsilon())))*inputs

In [None]:
minibatch_size = 50
learning_rate = 0.001

## model 1
size_input = 784 # MNIST data input (img shape: 28*28)
size_hidden = 256
size_output = 10 # MNIST total classes (0-9 digits)
beta = 0.1
gamma = 1.0


# Define class to build mlp model
class MLP(object):
    def __init__(self, size_input, size_hidden, size_output, device=None):
        """
        size_input: int, size of input layer
        size_hidden: int, size of hidden layer
        size_output: int, size of output layer
        device: str or None, either 'cpu' or 'gpu' or None. If None, the device to be used will be decided automatically during Eager Execution
        """
        self.size_input, self.size_hidden, self.size_output, self.device =\
        size_input, size_hidden, size_output, device
    
        # Initialize weights between input layer and hidden layer
        self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden],stddev=0.1),name="W1")
        # Initialize biases for hidden layer
        self.b1 = tf.Variable(tf.zeros([1, self.size_hidden]), name = "b1")
        # Initialize weights between hidden layer and output layer
        self.W2 = tf.Variable(tf.random.normal([self.size_hidden, self.size_hidden],stddev=0.1),name="W2")
        # Initialize biases for output layer
        self.b2 = tf.Variable(tf.zeros([1, self.size_hidden]),name="b2")
        
        self.W3 = tf.Variable(tf.random.normal([self.size_hidden, self.size_hidden],stddev=0.1),name="W3")
        # Initialize biases for output layer
        self.b3 = tf.Variable(tf.zeros([1, self.size_hidden]),name="b3")
        
        self.W4 = tf.Variable(tf.random.normal([self.size_hidden, self.size_output],stddev=0.1),name="W4")
        # Initialize biases for output layer
        self.b4 = tf.Variable(tf.zeros([1, self.size_output]),name="b4")
        
        self.E2 = tf.Variable(tf.random.normal([self.size_hidden, self.size_hidden],stddev=1.0),name="E2")
        self.E3 = tf.Variable(tf.random.normal([self.size_hidden, self.size_hidden],stddev=1.0),name="E3")
        self.E4 = tf.Variable(tf.random.normal([self.size_output, self.size_hidden],stddev=1.0),name="E4")
    

        
        # Define variables to be updated during backpropagation
        self.variables_w = [self.W1, self.W2,self.W3,self.W4,self.E2,self.E3,self.E4]
        #self.variables_e = [self.E2,self.E3,self.E4]
        
    
    # prediction
    def forward(self, X):
        """
        forward pass
        X: Tensor, inputs
        """
        if self.device is not None:
            try:	
                with tf.device('gpu:0' if self.device=='gpu' else 'cpu'):
                    self.y = self.compute_output(X)
            except:
                 self.y = self.compute_output(X) #Sometimes windows hardware or ubuntu 14.04 throw error with python2.7
        else:
            self.y = self.compute_output(X)
        #self.y = self.compute_output(X)   
        return self.y
    
    ## loss function
    def loss(self, y_pred, y_true):
        '''
        y_pred - Tensor of shape (batch_size, size_output)
        y_true - Tensor of shape (batch_size, size_output)
        '''
        y_true_tf = tf.cast(tf.reshape(y_true, (-1, self.size_output)), dtype=tf.float32)
        y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
        return tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y_pred_tf, labels=y_true_tf))
        
        
    ##BP backward pass
    def backward(self, X_train, y_train):
        """
        backward pass
        """
        # optimizer
        # Test with SGD,Adam, RMSProp
        optimizer =  tf.compat.v1.train.GradientDescentOptimizer(learning_rate=learning_rate)
        #optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
        with tf.GradientTape() as tape:
            predicted = self.forward(X_train)
            current_loss = self.loss(predicted, y_train)
            #print(current_loss)
        #print(current_loss.shape)
        grads = tape.gradient(current_loss, self.variables_w)
        optimizer.apply_gradients(zip(grads, self.variables_w),
                              global_step=tf.compat.v1.train.get_or_create_global_step())
        
    ## forward pass to get pre and post activations    
    def compute_output(self, X):
        """
        Custom method to obtain output tensor during forward pass
        """
        # Cast X to float32
        X_tf = tf.cast(X, dtype=tf.float32)
        #Remember to normalize your dataset before moving forward
        # Compute values in hidden layer
        self.what = tf.matmul(X_tf, self.W1) + self.b1
        self.hhat = tf.math.sin(self.what)
        self.what1 = tf.matmul(self.hhat,self.W2)+ self.b2
        self.hhat1 = tf.math.sin(self.what1)
        self.what2 = tf.matmul(self.hhat1,self.W3) + self.b3
        self.hhat2 = tf.math.sin(self.what2)
        # Compute output
        self.logits = tf.matmul(self.hhat2, self.W4) + self.b4
        self.z4 = tf.nn.softmax(self.logits)
        #Now consider two things , First look at inbuild loss functions if they work with softmax or not and then change this
        #Second add tf.Softmax(output) and then return this variable
        #print(output)
        return (self.logits)
        #return output
        
    def compute_lra_updates(self, X_train, Y_train):
        """
        LRA_update
        
        """
        #Compute targets/updates
        
        optimizer = tf.compat.v1.train.GradientDescentOptimizer(learning_rate=learning_rate)
        
        e4 = tf.subtract(self.z4,(Y_train))
        d3 = tf.matmul(tf.transpose(self.E4),tf.transpose(e4))
        d3_b = tf.multiply(d3,beta) 
        #print("second_subtract")
        y3_z = tf.math.sin(tf.subtract(tf.transpose(self.what2),(d3_b)))
        
        e3 = tf.subtract(self.hhat2,tf.transpose(y3_z))
        d2 = tf.matmul(self.E3,tf.transpose(e3))
        d2_b = tf.multiply(d2,beta)
        y2_z = tf.math.sin(tf.subtract(self.what1,tf.transpose(d2_b)))
        
        e2 = tf.subtract(self.hhat1,y2_z)
        d1 = tf.matmul(self.E2,tf.transpose(e2))
        d1_b = tf.multiply(d1,beta)
        y1_z = tf.math.sin(tf.subtract(self.what,tf.transpose(d1_b)))
        
        e1 = tf.subtract(self.hhat,y1_z)
        e2 = tf.cast(e2,tf.float32)
        e1 = tf.cast(e1,tf.float32)
        e3 = tf.cast(e3,tf.float32)
        e4 = tf.cast(e4,tf.float32)
        
        dW4 = tf.matmul(e4,self.hhat2,transpose_a=True)
        dW3 = tf.matmul(e3,self.hhat1,transpose_a = True)
        dW2 = tf.matmul(e2,self.hhat,transpose_a = True)
        dW1 = tf.matmul(e1,X_train,transpose_a = True)
        
        #dW4 = dW4/(tf.norm(dW4) + 0.00000001)
        #dW3 = dW3/(tf.norm(dW3) + 0.00000001)
        #dW2 = dW2/(tf.norm(dW2) + 0.00000001)
        #dW1 = dW1/(tf.norm(dW1) + 0.00000001)
        #print(dW4.shape)
        #print(dW3.shape)
        #print(dW2.shape)
        #print(dW1.shape)
        
        
        
        dW4_e = (dW4)
        dW4_e = tf.multiply(dW4_e,gamma)
        
        dW3_e = (dW3)
        dW3_e = tf.multiply(dW3_e,gamma)
        
        dW2_e = (dW2)
        dW2_e = tf.multiply(dW2_e,gamma)
        
        grads_w = [tf.transpose(dW1),tf.transpose(dW2),tf.transpose(dW3),tf.transpose(dW4),dW2_e, dW3_e, dW4_e]
        
#         dW_4 = tf.multiply(tf.transpose(dW4),0.001)
#         dW_3 = tf.multiply(tf.transpose(dW3),0.001)
#         dW_2 = tf.multiply(tf.transpose(dW2),0.001)
#         dW_1 = tf.multiply(tf.transpose(dW1),0.001)
        
        
        optimizer.apply_gradients(zip(grads_w, self.variables_w),global_step=tf.compat.v1.train.get_or_create_global_step())
        

def accuracy_function(yhat,true_y):
  yhat = tf.nn.softmax(yhat)
  correct_prediction = tf.equal(tf.argmax(yhat, 1), tf.argmax(true_y, 1))
  accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
  return accuracy

# Initialize model using GPU
mlp_on_cpu = MLP(size_input, size_hidden, size_output, device='gpu')

num_epochs = 20

time_start = time.time()
num_train = 55000


for epoch in range(num_epochs):
        train_ds = tf.data.Dataset.from_tensor_slices((image_train, label_train)).map(lambda x, y: (x, tf.cast(y, tf.float32))).shuffle(buffer_size=1000)\
           .batch(batch_size=minibatch_size)
        loss_total = tf.Variable(0, dtype=tf.float32)
        epoch_k = 0.0
        for inputs, outputs in train_ds:
          preds = mlp_on_cpu.forward(inputs)
          loss_total = loss_total + mlp_on_cpu.loss(preds, outputs)
          epoch_k+=1
          #mlp_on_cpu.compute_lra_updates(inputs, outputs)
          mlp_on_cpu.backward(inputs,outputs)
        print('Number of Epoch = {} - loss:= {:.4f}'.format(epoch + 1, loss_total.numpy() / epoch_k))
        preds = mlp_on_cpu.compute_output(image_train)
        accuracy_train = accuracy_function(preds,label_train)
        accuracy_train = accuracy_train * 100
        print ("Training Accuracy = {}".format(accuracy_train.numpy()))
        
        preds_val = mlp_on_cpu.compute_output(image_val)
        accuracy_val = accuracy_function(preds_val,label_val)
        accuracy_val = accuracy_val * 100
        print ("Validation Accuracy = {}".format(accuracy_val.numpy()))
 
    
# test accuracy
preds_test = mlp_on_cpu.compute_output(image_test)
accuracy_test = accuracy_function(preds_test,label_test)
# To keep sizes compatible with model
accuracy_test = accuracy_test * 100
print ("Test Accuracy = {}".format(accuracy_test.numpy()))

        
time_taken = time.time() - time_start
print('\nTotal time taken (in seconds): {:.2f}'.format(time_taken))
#For per epoch_time = Total_Time / Number_of_epochs

Number of Epoch = 1 - loss:= 1.1333
Training Accuracy = 73.9780044555664
Validation Accuracy = 73.62999725341797
Number of Epoch = 2 - loss:= 0.6980
Training Accuracy = 77.8800048828125
Validation Accuracy = 77.42000579833984
Number of Epoch = 3 - loss:= 0.6122
Training Accuracy = 79.80400085449219
Validation Accuracy = 78.93000030517578
Number of Epoch = 4 - loss:= 0.5660
Training Accuracy = 81.00399780273438
Validation Accuracy = 79.97999572753906
Number of Epoch = 5 - loss:= 0.5352
Training Accuracy = 81.85399627685547
Validation Accuracy = 80.65999603271484
Number of Epoch = 6 - loss:= 0.5124
Training Accuracy = 82.51399993896484
Validation Accuracy = 81.1500015258789
Number of Epoch = 7 - loss:= 0.4943
Training Accuracy = 83.05799865722656
Validation Accuracy = 81.72000122070312
Number of Epoch = 8 - loss:= 0.4794
Training Accuracy = 83.55000305175781
Validation Accuracy = 82.06999969482422
Number of Epoch = 9 - loss:= 0.4669
Training Accuracy = 83.93599700927734
Validation Accura

## First points
* You replace 10% or 15% of the samples testing 
* Create test set using torch atk
test BP, DFA and LRA-E orig+atk torch = 100%
* Create validation set using torch atk, and while training regularize your model 

## Pending Work:
* Run various trials (5-10) for stability - take average value
* DFA code analysis and debug
* Comparison of BP vs LRA-E with respect to activation functions
* Hyper parameter tuning of LRA-E - beta and gamma 
* confusion matrix for top/worst models 0% and 100%

In [None]:
!pip install torchattacks

Collecting torchattacks
  Downloading torchattacks-3.2.6-py3-none-any.whl (105 kB)
[?25l[K     |███                             | 10 kB 22.4 MB/s eta 0:00:01[K     |██████▏                         | 20 kB 27.9 MB/s eta 0:00:01[K     |█████████▎                      | 30 kB 22.1 MB/s eta 0:00:01[K     |████████████▍                   | 40 kB 14.1 MB/s eta 0:00:01[K     |███████████████▌                | 51 kB 6.5 MB/s eta 0:00:01[K     |██████████████████▋             | 61 kB 7.5 MB/s eta 0:00:01[K     |█████████████████████▊          | 71 kB 8.2 MB/s eta 0:00:01[K     |████████████████████████▉       | 81 kB 9.1 MB/s eta 0:00:01[K     |███████████████████████████▉    | 92 kB 10.1 MB/s eta 0:00:01[K     |███████████████████████████████ | 102 kB 8.9 MB/s eta 0:00:01[K     |████████████████████████████████| 105 kB 8.9 MB/s 
[?25hInstalling collected packages: torchattacks
Successfully installed torchattacks-3.2.6


In [None]:
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import torch
import torch.nn as nn
import torch.optim as optim

import torchvision.utils
from torchvision import models
import torchvision.datasets as dsets
import torchvision.transforms as transforms

import torchattacks
from torchattacks import PGD, FGSM


In [None]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        #change to MLP
        self.layer = nn.Sequential(
            nn.Conv2d(1,16,5), # 16*24*24
            nn.ReLU(),
            nn.Conv2d(16,32,5), # 32*20*20
            nn.ReLU(),
            nn.MaxPool2d(2,2), # 32*10*10
            nn.Conv2d(32,64,5), # 64*6*6
            nn.ReLU(),
            nn.MaxPool2d(2,2) #64*3*3
        )
        
        self.fc_layer = nn.Sequential(
            nn.Linear(64*3*3,100),
            nn.ReLU(),
            nn.Linear(100,10)
        )       
        
    def forward(self,x):
        out = self.layer(x)
        out = out.view(-1,64*3*3)
        out = self.fc_layer(out)

        return out

In [None]:
class MLP2(nn.Module):
  '''
    Multilayer Perceptron.
  '''
  def __init__(self):
    super().__init__()
    self.layers = nn.Sequential(
      nn.Flatten(),
      nn.Linear(784, 256),
      nn.ReLU(),
      nn.Linear(256, 256),
      nn.ReLU(),
      nn.Linear(256, 256),
      nn.ReLU(),
      nn.Linear(256, 10)
    )


  def forward(self, x):
    '''Forward pass'''
    return self.layers(x)

In [None]:
mnist_train = dsets.FashionMNIST(root='./data/',
                          train=True,
                          transform=transforms.ToTensor(),
                          download=True)


mnist_test = dsets.FashionMNIST(root='./data/',
                         train=False,
                         transform=transforms.ToTensor(),
                         download=True)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/26421880 [00:00<?, ?it/s]

Extracting ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/29515 [00:00<?, ?it/s]

Extracting ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/4422102 [00:00<?, ?it/s]

Extracting ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/5148 [00:00<?, ?it/s]

Extracting ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw



In [None]:
from sklearn.utils import shuffle
from torch.utils.data import SubsetRandomSampler
from sklearn.model_selection import train_test_split
batch_size = 128
#train_indices = [ i for i in range(0,500000)]
#val_indices = [i for i in range(50000,mnist_train.data.shape[0])]
test_size = 1.0/6.0
train_indices, test_indices, _, _ = train_test_split(
    range(len(mnist_train)),
    mnist_train.targets,
    test_size=test_size,
    shuffle= False
)
train_subset, val_subset = torch.utils.data.random_split(mnist_train, (50000, 10000))
#print(len(train_indices))
#train_subset = SubsetRandomSampler( train_indices)
#val_subset = SubsetRandomSampler( test_indices)
#train_subset = mnist_train[0:50000]
#val_subset = mnist_train[50000:60000]

In [None]:
batch_size = 128
train_loader  = torch.utils.data.DataLoader(dataset=mnist_train,
                                           batch_size=batch_size,
                                           shuffle=False)
#valid_loader = torch.utils.data.DataLoader(dataset=val_subset,
 #                                          batch_size=2000,
  #                                         shuffle=False)

test_loader = torch.utils.data.DataLoader(dataset=mnist_test,
                                         batch_size=500,
                                         shuffle=False)

In [None]:
model = MLP2().cpu()

In [None]:
loss = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
atk = PGD(model, eps=0.3, alpha=0.1, steps=7)
num_epochs = 5
for epoch in range(num_epochs):

    total_batch = len(mnist_train) // batch_size
    
    for i, (batch_images, batch_labels) in enumerate(train_loader):
        X = atk(batch_images, batch_labels).cpu()
        Y = batch_labels.cpu()

        pre = model(X)
        cost = loss(pre, Y)

        optimizer.zero_grad()
        cost.backward()
        optimizer.step()

        if (i+1) % 100 == 0:
            print('Epoch [%d/%d], lter [%d/%d], Loss: %.4f'
                 %(epoch+1, num_epochs, i+1, total_batch, cost.item()))

Epoch [1/5], lter [100/468], Loss: 2.2823
Epoch [1/5], lter [200/468], Loss: 2.2186
Epoch [1/5], lter [300/468], Loss: 2.1194
Epoch [1/5], lter [400/468], Loss: 2.1762
Epoch [2/5], lter [100/468], Loss: 1.9646
Epoch [2/5], lter [200/468], Loss: 2.0425
Epoch [2/5], lter [300/468], Loss: 1.9524
Epoch [2/5], lter [400/468], Loss: 2.1418
Epoch [3/5], lter [100/468], Loss: 1.8550
Epoch [3/5], lter [200/468], Loss: 1.9606
Epoch [3/5], lter [300/468], Loss: 1.9068
Epoch [3/5], lter [400/468], Loss: 2.0681
Epoch [4/5], lter [100/468], Loss: 1.8216
Epoch [4/5], lter [200/468], Loss: 1.8657
Epoch [4/5], lter [300/468], Loss: 1.8348
Epoch [4/5], lter [400/468], Loss: 2.0214
Epoch [5/5], lter [100/468], Loss: 1.7388
Epoch [5/5], lter [200/468], Loss: 1.7698
Epoch [5/5], lter [300/468], Loss: 1.7958
Epoch [5/5], lter [400/468], Loss: 1.9583


In [None]:
model.eval()

correct = 0
total = 0

atk = FGSM(model, eps=0.3)
iter = 0
accuracy_test = 0.0

img = []
lbl = []
for images, labels in test_loader:
    #80/20 split and make sure labels are same 
    if iter == 0:
      images = atk(images, labels).cpu()
    #outputs = model(images)
    images = images.reshape(images.shape[0], 28*28)
    images = tf.cast(images, tf.float32)
    img.append(images)
    label1 = tf.one_hot(labels,depth=10)
    label1 = tf.cast(label1, tf.float32)
    lbl.append(label1)
    iter +=1


img_test = tf.concat(img,0)
print(img_test.shape)
lbl_test = tf.concat(lbl,0)
predicted = mlp_on_cpu.compute_output(img_test)
accuracy_test = accuracy_function(predicted,lbl_test)


    
accuracy_test = accuracy_test * 100
print ("Test Accuracy = {}".format(accuracy_test.numpy()))

#print('Robust accuracy: %.2f %%' % (100 * float(correct) / total))

(10000, 784)
Test Accuracy = 41.68000030517578


In [None]:
valid_loader.dataset.indices

In [None]:
model.eval()

correct = 0
total = 0

atk = FGSM(model, eps=0.3)
iter = 0
accuracy_test = 0.0

img = []
lbl = []
for images, labels in valid_loader:
    #80/20 split and make sure labels are same 
    if iter == 0:
      images = atk(images, labels).cpu()
    #outputs = model(images)
    images = images.reshape(images.shape[0], 28*28)
    images = tf.cast(images, tf.float32)
    img.append(images)
    label1 = tf.one_hot(labels,depth=10)
    label1 = tf.cast(label1, tf.float32)
    lbl.append(label1)
    iter +=1


image_val = tf.concat(img,0)
print(image_val.shape)
label_val = tf.concat(lbl,0)


(10000, 784)


In [None]:
mlp_on_cpu2 = MLP(size_input, size_hidden, size_output, device='gpu')

num_epochs = 20

time_start = time.time()
num_train = 55000

for epoch in range(num_epochs):
        train_ds = tf.data.Dataset.from_tensor_slices((image_train, label_train)).map(lambda x, y: (x, tf.cast(y, tf.float32))).shuffle(buffer_size=1000)\
           .batch(batch_size=minibatch_size)
        loss_total = tf.Variable(0, dtype=tf.float32)
        epoch_k = 0.0
        for inputs, outputs in train_ds:
          preds = mlp_on_cpu2.forward(inputs)
          loss_total = loss_total + mlp_on_cpu2.loss(preds, outputs)
          epoch_k+=1
          mlp_on_cpu2.compute_lra_updates(inputs, outputs)
          #mlp_on_cpu.backward(inputs,outputs)
        print('Number of Epoch = {} - loss:= {:.4f}'.format(epoch + 1, loss_total.numpy() / epoch_k))
        preds = mlp_on_cpu2.compute_output(image_train)
        accuracy_train = accuracy_function(preds,label_train)
        accuracy_train = accuracy_train * 100
        print ("Training Accuracy = {}".format(accuracy_train.numpy()))
        
        preds_val = mlp_on_cpu2.compute_output(image_val)
        accuracy_val = accuracy_function(preds_val,label_val)
        accuracy_val = accuracy_val * 100
        print ("Validation Accuracy = {}".format(accuracy_val.numpy()))
 
    
# test accuracy
preds_test = mlp_on_cpu2.compute_output(image_test)
accuracy_test = accuracy_function(preds_test,label_test)
# To keep sizes compatible with model
accuracy_test = accuracy_test * 100
print ("Test Accuracy = {}".format(accuracy_test.numpy()))

        
time_taken = time.time() - time_start
print('\nTotal time taken (in seconds): {:.2f}'.format(time_taken))
#For per epoch_time = Total_Time / Number_of_epochs

Number of Epoch = 1 - loss:= 0.4955
Training Accuracy = 91.11000061035156
Validation Accuracy = 85.15999603271484
Number of Epoch = 2 - loss:= 0.2607
Training Accuracy = 93.34200286865234
Validation Accuracy = 87.0999984741211
Number of Epoch = 3 - loss:= 0.2047
Training Accuracy = 94.66799926757812
Validation Accuracy = 87.9800033569336
Number of Epoch = 4 - loss:= 0.1691
Training Accuracy = 95.59400177001953
Validation Accuracy = 88.45999908447266
Number of Epoch = 5 - loss:= 0.1446
Training Accuracy = 96.25199890136719
Validation Accuracy = 88.52000427246094
Number of Epoch = 6 - loss:= 0.1263
Training Accuracy = 96.60199737548828
Validation Accuracy = 88.94000244140625
Number of Epoch = 7 - loss:= 0.1134
Training Accuracy = 97.1259994506836
Validation Accuracy = 89.47000122070312
Number of Epoch = 8 - loss:= 0.1021
Training Accuracy = 97.43599700927734
Validation Accuracy = 89.59000396728516
Number of Epoch = 9 - loss:= 0.0923
Training Accuracy = 97.72200012207031
Validation Accura

In [None]:
#import torch

#image_test1 = image_test + torch.randn((image_test.shape[0],image_test.shape[1])) * .1 + 0.0
#image_test1 = tf.clip_by_value(image_test1, clip_value_min=0, clip_value_max=1)
#preds_test = mlp_on_cpu.compute_output(image_test1)
#accuracy_test = accuracy_function(preds_test,label_test)
#accuracy_test = accuracy_test * 100
#print ("Test Accuracy = {}".format(accuracy_test.numpy()))