# Training Script 
Direction: Kana to Alpha

Encoder: GRU

Decoder: GRU

Hyper Parameter: *NUM_UNITS*

In [1]:
import tensorflow as tf

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from sklearn.model_selection import train_test_split

import unicodedata
import re
import numpy as np
import os
import io
import time
import datetime

import json

from functools import total_ordering

from RedBlackTree.rbtree import RedBlackNode
from RedBlackTree.rbtree import RedBlackTree
from StackDecoder.stack_decoder import StackDecoderPath
from StackDecoder.stack_decoder import StackDecoder


## Parameter Definitions

In [2]:
TOKENIZER_ALPHAS                 = 'training_data/alphas_tokenizer.json'
TOKENIZER_KANAS                  = 'training_data/kanas_tokenizer.json'

TRANING_DATA_FILE_90_10_10       = "training_data/alpha_to_kana_train.txt"
VALIDATION_DATA_FILE_90_10_10    = "training_data/alpha_to_kana_validation.txt"

EPOCHS                           = 1000
BATCH_SIZE                       =   64
NUM_UNITS                        =  512 # <= Hyper Parameter

VALIDATION_BEAM_WIDTH            =    5
VALIDATION_NBEST                 =    5
VALIDATION_MAX_LEN_KANAS_CUTOFF  =   12
VALIDATION_MAX_LEN_ALPHAS_CUTOFF =   16

CHECKPOINT_DIR                   = f'training_output/kana_to_alpha_{str(NUM_UNITS)}_wo_attn'

## Arrange Tokeniers and Training & Validation Sets

In [3]:
# Load tokenizers
with open(TOKENIZER_ALPHAS) as f:
    data = json.load(f)
    alphas_tokenizer = tf.keras.preprocessing.text.tokenizer_from_json(data)

with open(TOKENIZER_KANAS) as f:
    data = json.load(f)
    kanas_tokenizer = tf.keras.preprocessing.text.tokenizer_from_json(data)
    
EMB_DIM_ALPHAS = len( alphas_tokenizer.word_index ) + 1
EMB_DIM_KANAS  = len( kanas_tokenizer.word_index  ) + 1

In [4]:
# Load training data and validation data

train_kanas = []
train_alphas = []
with open( TRANING_DATA_FILE_90_10_10, "r", encoding="utf-8" ) as fp_train:
    for line in fp_train:
        alpha, kana = line.strip().split('\t')
        train_kanas.append(kana)
        train_alphas.append(alpha)

valid_kanas = []
valid_alphas = []
with open( VALIDATION_DATA_FILE_90_10_10, "r", encoding="utf-8" ) as fp_valid:
    for line in fp_valid:
        alpha, kana = line.strip().split( '\t' )
        valid_alphas.append( '<' + alpha + '>' )
        valid_kanas.append( '<' + kana + '>' )

validation_pairs = list(zip(valid_alphas, valid_kanas))


# Interleave with spaces so that we can utilize Kera's tokenizer.

train_kanas_spaced = []
for kana_str in train_kanas:
    kana_list = []
    kana_list[:0] = kana_str
    train_kanas_spaced.append( "< " + ' '.join(kana_list) + " >" ) 

train_alphas_spaced = []
for alpha_str in train_alphas:
    alpha_list = []
    alpha_list[:0] = alpha_str
    train_alphas_spaced.append( "< " + ' '.join(alpha_list) + " >" ) 

train_alphas_tensor = alphas_tokenizer.texts_to_sequences(train_alphas_spaced)
train_alphas_tensor = tf.keras.preprocessing.sequence.pad_sequences(train_alphas_tensor, padding='post')

train_kanas_tensor  = kanas_tokenizer.texts_to_sequences(train_kanas_spaced)
train_kanas_tensor  = tf.keras.preprocessing.sequence.pad_sequences(train_kanas_tensor, padding='post')

max_length_alphas, max_length_kanas = train_alphas_tensor.shape[1], train_kanas_tensor.shape[1]

In [5]:
BUFFER_SIZE = len(train_alphas_tensor)
steps_per_epoch = len(train_alphas_tensor)//BATCH_SIZE

dataset = tf.data.Dataset.from_tensor_slices((train_alphas_tensor, train_kanas_tensor)).shuffle(BUFFER_SIZE)
dataset = dataset.batch(BATCH_SIZE, drop_remainder=True)

## Create Encoder and Decoder

In [6]:
class Encoder(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, enc_units, batch_sz):
    super(Encoder, self).__init__()
    self.batch_sz = batch_sz
    self.enc_units = enc_units
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
    self.gru = tf.keras.layers.GRU(self.enc_units,
                                   return_sequences=True,
                                   return_state=True,
                                   recurrent_initializer='glorot_uniform')

  def call(self, x, state):
    x = self.embedding(x)
    x, state = self.gru(x, initial_state = state)
    return x, state

  def initialize_hidden_state(self):
    return tf.zeros((self.batch_sz, self.enc_units))

In [7]:
encoder = Encoder(EMB_DIM_KANAS, EMB_DIM_KANAS, NUM_UNITS, BATCH_SIZE)

In [8]:
class Decoder(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, dec_units, batch_sz):
    super(Decoder, self).__init__()
    self.batch_sz = batch_sz
    self.dec_units = dec_units
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
    self.gru = tf.keras.layers.GRU(self.dec_units,
                                   return_sequences=False,
                                   return_state=True,
                                   recurrent_initializer='glorot_uniform')
    self.fc = tf.keras.layers.Dense(vocab_size)   

  def call(self, x, state):
    x = self.embedding(x)
    x, state = self.gru(x, state)
    x = self.fc(x)
    return x, state

In [9]:
decoder = Decoder(EMB_DIM_ALPHAS, EMB_DIM_ALPHAS, NUM_UNITS, BATCH_SIZE)

## Check the Shapes of the Encoder and the Decoder

In [10]:
for alpha, kana in dataset.take(steps_per_epoch):
    encoder_state = encoder.initialize_hidden_state()
    encoder_out, encoder_state2 = encoder(kana, encoder_state)
    decoder_state = encoder_state
    decoder_pred, decoder_state2 = decoder(alpha, decoder_state)
    break
kana.shape, alpha.shape, encoder_state.shape, encoder_out.shape, encoder_state2.shape, decoder_pred.shape, decoder_state2.shape

(TensorShape([64, 14]),
 TensorShape([64, 18]),
 TensorShape([64, 512]),
 TensorShape([64, 14, 512]),
 TensorShape([64, 512]),
 TensorShape([64, 30]),
 TensorShape([64, 512]))

In [11]:
optimizer = tf.keras.optimizers.Adam()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
    from_logits=True, reduction='none')

def loss_function(real, pred):
  mask = tf.math.logical_not(tf.math.equal(real, 0))
  loss_ = loss_object(real, pred)

  mask = tf.cast(mask, dtype=loss_.dtype)
  loss_ *= mask

  return tf.reduce_mean(loss_)

## One Training Step with Forward and Backprop with Incremental Teacher Forcing

In [12]:
@tf.function
def train_step(inp, targ, enc_hidden):
  loss = 0

  with tf.GradientTape() as tape:
    enc_output, enc_hidden = encoder(inp, enc_hidden)

    dec_hidden = enc_hidden

    dec_input = tf.expand_dims([alphas_tokenizer.word_index['<']] * BATCH_SIZE, 1)
    
    # Teacher forcing - feeding the target as the next input
    for t in range(1, targ.shape[1]):
       # passing enc_output to the decoder
      predictions, dec_hidden = decoder(dec_input, dec_hidden)
    
      loss += loss_function(targ[:, t], predictions)
    
      # using teacher forcing
      dec_input = tf.expand_dims(targ[:, t], 1)

    # dec_input = tf.expand_dims(targ, 1)
    # dec_output, dec_hidden = decoder(dec_input, dec_hidden)
    
    
  batch_loss = (loss / int(targ.shape[1]))

  variables = encoder.trainable_variables + decoder.trainable_variables

  gradients = tape.gradient(loss, variables)

  optimizer.apply_gradients(zip(gradients, variables))

  return batch_loss

## Checkpoint to Save the Models

In [13]:
# File outputs (checkpoints and metrics for tensorboard)

checkpoint_prefix = os.path.join(CHECKPOINT_DIR, "ckpt")
checkpoint = tf.train.Checkpoint(optimizer=optimizer,
                                 encoder=encoder,
                                 decoder=decoder)

current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
train_log_dir = os.path.join( CHECKPOINT_DIR, current_time, 'train' )
validation_log_dir = os.path.join( CHECKPOINT_DIR, current_time, 'validation' )
train_summary_writer = tf.summary.create_file_writer( train_log_dir )
validation_summary_writer = tf.summary.create_file_writer( validation_log_dir )


## Validation using Validation Set by Nbest Stack Decoder
The score is calculated by edit distance in Nbest against target.

In [14]:
# Validation

# Following levenshtein() is taken from 
# https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Levenshtein_distance#Python
# under  Creative Commons Attribution-ShareAlike License.
def levenshtein(s1, s2):
    if len(s1) < len(s2):
        return levenshtein(s2, s1)

    # len(s1) >= len(s2)
    if len(s2) == 0:
        return len(s1)

    previous_row = range(len(s2) + 1)
    for i, c1 in enumerate(s1):
        current_row = [i + 1]
        for j, c2 in enumerate(s2):
            insertions = previous_row[j + 1] + 1 # j+1 instead of j since previous_row and current_row are one character longer
            deletions = current_row[j] + 1       # than s2
            substitutions = previous_row[j] + (c1 != c2)
            current_row.append(min(insertions, deletions, substitutions))
        previous_row = current_row
    
    return previous_row[-1]



def validate(sentence_pairs):

    BOS = alphas_tokenizer.word_index['<']
    EOS = alphas_tokenizer.word_index['>']

    stack_decoder = StackDecoder(decoder, BOS, EOS, use_attn = False)

    avg_edit_dist = 0.0
    index = 0
    for alphas, kanas in sentence_pairs:
        if (index % 10 == 0):
            print('validating {}/{}'.format(index, len(sentence_pairs)))
        index += 1
        inputs = [kanas_tokenizer.word_index[i] for i in kanas]
        inputs = tf.keras.preprocessing.sequence.pad_sequences([inputs],
                                                         maxlen=max_length_kanas,
                                                         padding='post')
        inputs = tf.convert_to_tensor(inputs)

        hidden = [tf.zeros((1, NUM_UNITS))]
        enc_out, enc_hidden = encoder(inputs, hidden)

        dec_hidden = enc_hidden
        dec_input = tf.expand_dims([alphas_tokenizer.word_index['<']], 0)

        nbest_raw = stack_decoder.NBest( enc_out, enc_hidden, VALIDATION_BEAM_WIDTH, VALIDATION_NBEST, VALIDATION_MAX_LEN_KANAS_CUTOFF + 2 ) 
    
        min_edit_dist = -1
        for r in nbest_raw:
            candidate = ""
            for i in r.sentence:
                candidate += alphas_tokenizer.index_word[i] 
            edit_dist = levenshtein(alphas, candidate)
            if min_edit_dist == -1 or edit_dist < min_edit_dist:
                min_edit_dist = edit_dist
        avg_edit_dist += min_edit_dist
    return avg_edit_dist / len(sentence_pairs)


## Training Execution

In [15]:
#checkpoint.restore('./CHECKPOINT_DIR/ckpt-1')

for epoch in range(EPOCHS):
  start = time.time()

  enc_hidden = encoder.initialize_hidden_state()
  total_loss = 0

  for (batch, (alpha, kana)) in enumerate(dataset.take(steps_per_epoch)):

    batch_loss = train_step(kana, alpha, enc_hidden)
    total_loss += batch_loss

    if batch % 100 == 0:
      print('Epoch {} Batch {} Loss {:.4f}'.format(epoch + 1,
                                                   batch,
                                                   batch_loss.numpy()))
  # saving (checkpoint) the model every epoch
  checkpoint.save(file_prefix = checkpoint_prefix)

  accuracy = validate(list(validation_pairs)[0:100])
    
  print('Epoch {} Loss {:.4f}'.format(epoch + 1,
                                      total_loss / steps_per_epoch))
  print('Validation Accuracy {:0.4f}'.format(accuracy))
  print('Time taken for 1 epoch {} sec\n'.format(time.time() - start))

  with train_summary_writer.as_default():
    tf.summary.scalar('loss', total_loss / steps_per_epoch, step=epoch)
    tf.summary.scalar('accuracy', accuracy, step=epoch)

Epoch 1 Batch 0 Loss 1.6001
Epoch 1 Batch 100 Loss 1.2233
Epoch 1 Batch 200 Loss 1.1262
Epoch 1 Batch 300 Loss 1.1279
Epoch 1 Batch 400 Loss 1.0528
Epoch 1 Batch 500 Loss 1.0156
Epoch 1 Batch 600 Loss 0.9018
Epoch 1 Batch 700 Loss 0.8792
Epoch 1 Batch 800 Loss 0.9761
Epoch 1 Batch 900 Loss 0.9210
Epoch 1 Batch 1000 Loss 0.8522
Epoch 1 Batch 1100 Loss 0.8102
Epoch 1 Batch 1200 Loss 0.8005
Epoch 1 Batch 1300 Loss 0.7405
Epoch 1 Batch 1400 Loss 0.7707
Epoch 1 Batch 1500 Loss 0.6882
Epoch 1 Batch 1600 Loss 0.6948
Epoch 1 Batch 1700 Loss 0.6951
Epoch 1 Batch 1800 Loss 0.6463
validating 0/100
validating 10/100
validating 20/100
validating 30/100
validating 40/100
validating 50/100
validating 60/100
validating 70/100
validating 80/100
validating 90/100
Epoch 1 Loss 0.9104
Validation Accuracy 3.4600
Time taken for 1 epoch 444.9984362125397 sec

Epoch 2 Batch 0 Loss 0.6072
Epoch 2 Batch 100 Loss 0.6019
Epoch 2 Batch 200 Loss 0.5967
Epoch 2 Batch 300 Loss 0.5779
Epoch 2 Batch 400 Loss 0.5398
Epo

Epoch 10 Batch 1800 Loss 0.1293
validating 0/100
validating 10/100
validating 20/100
validating 30/100
validating 40/100
validating 50/100
validating 60/100
validating 70/100
validating 80/100
validating 90/100
Epoch 10 Loss 0.1401
Validation Accuracy 1.0100
Time taken for 1 epoch 427.99034905433655 sec

Epoch 11 Batch 0 Loss 0.1034
Epoch 11 Batch 100 Loss 0.1219
Epoch 11 Batch 200 Loss 0.1009
Epoch 11 Batch 300 Loss 0.1040
Epoch 11 Batch 400 Loss 0.1214
Epoch 11 Batch 500 Loss 0.1017
Epoch 11 Batch 600 Loss 0.1725
Epoch 11 Batch 700 Loss 0.1308
Epoch 11 Batch 800 Loss 0.1130
Epoch 11 Batch 900 Loss 0.1108
Epoch 11 Batch 1000 Loss 0.1324
Epoch 11 Batch 1100 Loss 0.1347
Epoch 11 Batch 1200 Loss 0.1453
Epoch 11 Batch 1300 Loss 0.1715
Epoch 11 Batch 1400 Loss 0.1304
Epoch 11 Batch 1500 Loss 0.1057
Epoch 11 Batch 1600 Loss 0.1584
Epoch 11 Batch 1700 Loss 0.1262
Epoch 11 Batch 1800 Loss 0.1411
validating 0/100
validating 10/100
validating 20/100
validating 30/100
validating 40/100
validatin

Epoch 20 Batch 200 Loss 0.0553
Epoch 20 Batch 300 Loss 0.0793
Epoch 20 Batch 400 Loss 0.0693
Epoch 20 Batch 500 Loss 0.0594
Epoch 20 Batch 600 Loss 0.0612
Epoch 20 Batch 700 Loss 0.0824
Epoch 20 Batch 800 Loss 0.0899
Epoch 20 Batch 900 Loss 0.0775
Epoch 20 Batch 1000 Loss 0.0535
Epoch 20 Batch 1100 Loss 0.0838
Epoch 20 Batch 1200 Loss 0.0781
Epoch 20 Batch 1300 Loss 0.0646
Epoch 20 Batch 1400 Loss 0.1061
Epoch 20 Batch 1500 Loss 0.0566
Epoch 20 Batch 1600 Loss 0.0860
Epoch 20 Batch 1700 Loss 0.0944
Epoch 20 Batch 1800 Loss 0.1129
validating 0/100
validating 10/100
validating 20/100
validating 30/100
validating 40/100
validating 50/100
validating 60/100
validating 70/100
validating 80/100
validating 90/100
Epoch 20 Loss 0.0762
Validation Accuracy 1.0400
Time taken for 1 epoch 430.89204621315 sec

Epoch 21 Batch 0 Loss 0.0540
Epoch 21 Batch 100 Loss 0.0453
Epoch 21 Batch 200 Loss 0.0624
Epoch 21 Batch 300 Loss 0.0495
Epoch 21 Batch 400 Loss 0.0513
Epoch 21 Batch 500 Loss 0.0682
Epoch 21 

Epoch 29 Batch 1400 Loss 0.0612
Epoch 29 Batch 1500 Loss 0.1049
Epoch 29 Batch 1600 Loss 0.0751
Epoch 29 Batch 1700 Loss 0.0690
Epoch 29 Batch 1800 Loss 0.0874
validating 0/100
validating 10/100
validating 20/100
validating 30/100
validating 40/100
validating 50/100
validating 60/100
validating 70/100
validating 80/100
validating 90/100
Epoch 29 Loss 0.0643
Validation Accuracy 1.2200
Time taken for 1 epoch 438.7625346183777 sec

Epoch 30 Batch 0 Loss 0.0653
Epoch 30 Batch 100 Loss 0.0450
Epoch 30 Batch 200 Loss 0.0576
Epoch 30 Batch 300 Loss 0.0480
Epoch 30 Batch 400 Loss 0.0666
Epoch 30 Batch 500 Loss 0.0634
Epoch 30 Batch 600 Loss 0.0833
Epoch 30 Batch 700 Loss 0.0482
Epoch 30 Batch 800 Loss 0.0603
Epoch 30 Batch 900 Loss 0.0604
Epoch 30 Batch 1000 Loss 0.0818
Epoch 30 Batch 1100 Loss 0.0744
Epoch 30 Batch 1200 Loss 0.0554
Epoch 30 Batch 1300 Loss 0.0629
Epoch 30 Batch 1400 Loss 0.0773
Epoch 30 Batch 1500 Loss 0.0897
Epoch 30 Batch 1600 Loss 0.0678
Epoch 30 Batch 1700 Loss 0.0777
Epo

Epoch 39 Batch 0 Loss 0.0552
Epoch 39 Batch 100 Loss 0.0432
Epoch 39 Batch 200 Loss 0.0318
Epoch 39 Batch 300 Loss 0.0641
Epoch 39 Batch 400 Loss 0.0526
Epoch 39 Batch 500 Loss 0.0619
Epoch 39 Batch 600 Loss 0.0556
Epoch 39 Batch 700 Loss 0.0669
Epoch 39 Batch 800 Loss 0.0608
Epoch 39 Batch 900 Loss 0.0594
Epoch 39 Batch 1000 Loss 0.0603
Epoch 39 Batch 1100 Loss 0.0810
Epoch 39 Batch 1200 Loss 0.0761
Epoch 39 Batch 1300 Loss 0.0874
Epoch 39 Batch 1400 Loss 0.0670
Epoch 39 Batch 1500 Loss 0.0593
Epoch 39 Batch 1600 Loss 0.0935
Epoch 39 Batch 1700 Loss 0.0581
Epoch 39 Batch 1800 Loss 0.0616
validating 0/100
validating 10/100
validating 20/100
validating 30/100
validating 40/100
validating 50/100
validating 60/100
validating 70/100
validating 80/100
validating 90/100
Epoch 39 Loss 0.0597
Validation Accuracy 1.2200
Time taken for 1 epoch 422.05377650260925 sec

Epoch 40 Batch 0 Loss 0.0393
Epoch 40 Batch 100 Loss 0.0560
Epoch 40 Batch 200 Loss 0.0520
Epoch 40 Batch 300 Loss 0.0640
Epoch 40

Epoch 48 Batch 1200 Loss 0.0505
Epoch 48 Batch 1300 Loss 0.0701
Epoch 48 Batch 1400 Loss 0.0650
Epoch 48 Batch 1500 Loss 0.0807
Epoch 48 Batch 1600 Loss 0.0698
Epoch 48 Batch 1700 Loss 0.0719
Epoch 48 Batch 1800 Loss 0.0556
validating 0/100
validating 10/100
validating 20/100
validating 30/100
validating 40/100
validating 50/100
validating 60/100
validating 70/100
validating 80/100
validating 90/100
Epoch 48 Loss 0.0571
Validation Accuracy 1.2000
Time taken for 1 epoch 421.4986927509308 sec

Epoch 49 Batch 0 Loss 0.0575
Epoch 49 Batch 100 Loss 0.0691
Epoch 49 Batch 200 Loss 0.0521
Epoch 49 Batch 300 Loss 0.0698
Epoch 49 Batch 400 Loss 0.0629
Epoch 49 Batch 500 Loss 0.0410
Epoch 49 Batch 600 Loss 0.0525
Epoch 49 Batch 700 Loss 0.0765
Epoch 49 Batch 800 Loss 0.0486
Epoch 49 Batch 900 Loss 0.0478
Epoch 49 Batch 1000 Loss 0.0408
Epoch 49 Batch 1100 Loss 0.0614
Epoch 49 Batch 1200 Loss 0.0608
Epoch 49 Batch 1300 Loss 0.0716
Epoch 49 Batch 1400 Loss 0.0752
Epoch 49 Batch 1500 Loss 0.0638
Epo

validating 90/100
Epoch 57 Loss 0.0548
Validation Accuracy 1.1100
Time taken for 1 epoch 428.3659625053406 sec

Epoch 58 Batch 0 Loss 0.0394
Epoch 58 Batch 100 Loss 0.0504
Epoch 58 Batch 200 Loss 0.0622
Epoch 58 Batch 300 Loss 0.0365
Epoch 58 Batch 400 Loss 0.0472
Epoch 58 Batch 500 Loss 0.0470
Epoch 58 Batch 600 Loss 0.0489
Epoch 58 Batch 700 Loss 0.0472
Epoch 58 Batch 800 Loss 0.0625
Epoch 58 Batch 900 Loss 0.0432
Epoch 58 Batch 1000 Loss 0.0686
Epoch 58 Batch 1100 Loss 0.0406
Epoch 58 Batch 1200 Loss 0.0705
Epoch 58 Batch 1300 Loss 0.0511
Epoch 58 Batch 1400 Loss 0.0573
Epoch 58 Batch 1500 Loss 0.0685
Epoch 58 Batch 1600 Loss 0.0740
Epoch 58 Batch 1700 Loss 0.0546
Epoch 58 Batch 1800 Loss 0.0600
validating 0/100
validating 10/100
validating 20/100
validating 30/100
validating 40/100
validating 50/100
validating 60/100
validating 70/100
validating 80/100
validating 90/100
Epoch 58 Loss 0.0561
Validation Accuracy 1.2000
Time taken for 1 epoch 429.19161343574524 sec

Epoch 59 Batch 0 L

Epoch 67 Batch 900 Loss 0.0679
Epoch 67 Batch 1000 Loss 0.0630
Epoch 67 Batch 1100 Loss 0.0466
Epoch 67 Batch 1200 Loss 0.0763
Epoch 67 Batch 1300 Loss 0.0539
Epoch 67 Batch 1400 Loss 0.0594
Epoch 67 Batch 1500 Loss 0.0580
Epoch 67 Batch 1600 Loss 0.0710
Epoch 67 Batch 1700 Loss 0.0756
Epoch 67 Batch 1800 Loss 0.0616
validating 0/100
validating 10/100
validating 20/100
validating 30/100
validating 40/100
validating 50/100
validating 60/100
validating 70/100
validating 80/100
validating 90/100
Epoch 67 Loss 0.0538
Validation Accuracy 1.2700
Time taken for 1 epoch 420.63246417045593 sec

Epoch 68 Batch 0 Loss 0.0420
Epoch 68 Batch 100 Loss 0.0766
Epoch 68 Batch 200 Loss 0.0355
Epoch 68 Batch 300 Loss 0.0577
Epoch 68 Batch 400 Loss 0.0454
Epoch 68 Batch 500 Loss 0.0378
Epoch 68 Batch 600 Loss 0.0608
Epoch 68 Batch 700 Loss 0.0389
Epoch 68 Batch 800 Loss 0.0610
Epoch 68 Batch 900 Loss 0.0563
Epoch 68 Batch 1000 Loss 0.0427
Epoch 68 Batch 1100 Loss 0.0638
Epoch 68 Batch 1200 Loss 0.0690
Epo

validating 30/100
validating 40/100
validating 50/100
validating 60/100
validating 70/100
validating 80/100
validating 90/100
Epoch 76 Loss 0.0534
Validation Accuracy 1.0700
Time taken for 1 epoch 418.15266585350037 sec

Epoch 77 Batch 0 Loss 0.0378
Epoch 77 Batch 100 Loss 0.0485
Epoch 77 Batch 200 Loss 0.0549
Epoch 77 Batch 300 Loss 0.0572
Epoch 77 Batch 400 Loss 0.0426
Epoch 77 Batch 500 Loss 0.0464
Epoch 77 Batch 600 Loss 0.0440
Epoch 77 Batch 700 Loss 0.0486
Epoch 77 Batch 800 Loss 0.0402
Epoch 77 Batch 900 Loss 0.0703
Epoch 77 Batch 1000 Loss 0.0459
Epoch 77 Batch 1100 Loss 0.0570
Epoch 77 Batch 1200 Loss 0.0582
Epoch 77 Batch 1300 Loss 0.0471
Epoch 77 Batch 1400 Loss 0.0793
Epoch 77 Batch 1500 Loss 0.0674
Epoch 77 Batch 1600 Loss 0.0373
Epoch 77 Batch 1700 Loss 0.0523
Epoch 77 Batch 1800 Loss 0.0652
validating 0/100
validating 10/100
validating 20/100
validating 30/100
validating 40/100
validating 50/100
validating 60/100
validating 70/100
validating 80/100
validating 90/100
Epoc

Epoch 86 Batch 500 Loss 0.0342
Epoch 86 Batch 600 Loss 0.0623
Epoch 86 Batch 700 Loss 0.0347
Epoch 86 Batch 800 Loss 0.0519
Epoch 86 Batch 900 Loss 0.0444
Epoch 86 Batch 1000 Loss 0.0562
Epoch 86 Batch 1100 Loss 0.0488
Epoch 86 Batch 1200 Loss 0.0700
Epoch 86 Batch 1300 Loss 0.0504
Epoch 86 Batch 1400 Loss 0.0536
Epoch 86 Batch 1500 Loss 0.0638
Epoch 86 Batch 1600 Loss 0.0682
Epoch 86 Batch 1700 Loss 0.0359
Epoch 86 Batch 1800 Loss 0.0423
validating 0/100
validating 10/100
validating 20/100
validating 30/100
validating 40/100
validating 50/100
validating 60/100
validating 70/100
validating 80/100
validating 90/100
Epoch 86 Loss 0.0513
Validation Accuracy 1.0700
Time taken for 1 epoch 423.33358550071716 sec

Epoch 87 Batch 0 Loss 0.0438
Epoch 87 Batch 100 Loss 0.0375
Epoch 87 Batch 200 Loss 0.0433
Epoch 87 Batch 300 Loss 0.0347
Epoch 87 Batch 400 Loss 0.0570
Epoch 87 Batch 500 Loss 0.0523
Epoch 87 Batch 600 Loss 0.0573
Epoch 87 Batch 700 Loss 0.0688
Epoch 87 Batch 800 Loss 0.0358
Epoch 

KeyboardInterrupt: 