[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/sascha-senko/TensorflowCourse/blob/main/ANNwTFHW9.ipynb)

## Global TODO: Only for Hermann

* Add the 2 digits as inputs to each datum
* Add labels to each datum

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import sys
import random
%load_ext tensorboard
%tensorflow_version 2.x
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2D, Flatten, Dense, Conv2DTranspose, \
 Reshape, MaxPooling2D, Dropout, BatchNormalization, UpSampling2D, ReLU, \
 ELU, Layer
from tensorflow import debugging as debug
import tensorflow_probability as tfp
from functools import partial

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


## Define some constants for dataset

In [None]:
# arbitrarily set. Feel free to change these
DATA_SIZE = 10000
SEQ_SIZE = 25
SHUFFLE_SIZE = DATA_SIZE
PREFETCH_SIZE = tf.data.experimental.AUTOTUNE
BATCH_SIZE = 32

## Define Dataset

In [None]:
# helper function
my_one_hot = lambda x: tf.one_hot(tf.cast(x, tf.int32), depth=10, axis=-1)

def digit_sequence(data_size, size=SEQ_SIZE):
    num = 0
    label = None

    while num < data_size:
        # get sequence
        seq = np.random.randint(10, size=size)   

        # get context

        # get all unique digits of sequence
        digits = np.unique(seq)
        # context digits
        context = np.random.choice(digits, size=2, replace=False)

        # get label

        # counts how much more often the first context digit was observed over the second
        first_vs_second_occurance = 0

        for i in range(size):
            digit = seq[i]
            if digit == context[0]:
                first_vs_second_occurance += 1
            elif digit == context[1]:
                first_vs_second_occurance -= 1

        if first_vs_second_occurance >= 0:        
            label = 0
        else:
            label = 1    

        # one-hotify seq and context
        seq = my_one_hot(seq)
        context = my_one_hot(context)    

        # yield the two context digits, the sequence and the label
        yield seq, context, label 
        num += 1

x_train = tf.data.Dataset.from_generator(digit_sequence, args=[DATA_SIZE], output_signature=(
    tf.TensorSpec((25, 10)), 
    tf.TensorSpec((2, 10)),
    tf.TensorSpec(())
)).batch(BATCH_SIZE)

## LSTM Cell

In [None]:
class LSTM_Cell(tf.keras.layers.Layer):
    def __init__(self, hidden_size):
        super(LSTM_Cell, self).__init__()

        self.hidden_size = hidden_size
        #gates
        self.input_gate = Dense(hidden_size, activation="sigmoid")
        # setting forget bias to one initially is important, 
        # probably because the very first hidden and cell state that gets fed in 
        # call is just a dummy zero vector and doesn't provide any information  
        self.forget_gate = Dense(hidden_size, bias_initializer='ones', activation="sigmoid")
        self.output_gate = Dense(hidden_size, activation="sigmoid")
        self.cell_state_candidates = Dense(hidden_size, activation="tanh")

    @tf.function
    def call(self, input, hidden_state, cell_state):
        # x is 1-D
        concat_input = tf.concat([hidden_state, input], axis=-1) 
        new_cell_state = cell_state * self.forget_gate(concat_input) 
        new_cell_state += self.input_gate(concat_input) * self.cell_state_candidates(concat_input)
        new_hidden_state = tf.keras.activations.tanh(cell_state) * self.output_gate(concat_input) # new hidden state is also output
        return new_cell_state, new_hidden_state      

## LSTM

In [None]:
class LSTM(Model):
    def __init__(self, hidden_size):
        super(LSTM, self).__init__()
        # TODO: read in layer
        self.hidden_size = hidden_size
        self.LSTM_Cell = LSTM_Cell(hidden_size)
        # < 0.5 for first context digit occuring more often, >= 0.5 for second
        self.read_out = Dense(1, activation='sigmoid')
        
    # TODO: Let's see if call works, as I am initializing a state  
    # if I end up using tf.Variable, make sure it's untrainable
    #@tf.function
    def call(self, x):
        results = tf.TensorArray(tf.float32, size=SEQ_SIZE)

        hidden_state = tf.zeros((BATCH_SIZE, self.hidden_size))
        cell_state = tf.zeros((BATCH_SIZE, self.hidden_size))
        
        # TODO: check if it's zero everytime right here with debug
        for index in range(SEQ_SIZE):
            digit = x[:,index,:]
            # TODO: check if compatible with tf.function
            cell_state, hidden_state = self.LSTM_Cell(digit, hidden_state, cell_state)
            results.write(index, hidden_state)

        # TODO
        results = tf.transpose(results.stack(), perm=[1,0,2])

        output = self.read_out(results)

        return output

## Define some constants

In [None]:
NUM_EPOCHS = 10
LEARNING_RATE = 0.0001   
OPTIMIZER = tf.keras.optimizers.Adam(LEARNING_RATE)
BCE = tf.keras.losses.BinaryCrossentropy() 
NUM_BATCHES = (int(x_train.cardinality()))
HIDDEN_SIZE = 10

import datetime
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
lstm_log_dir = 'logs/gradient_tape/' + current_time + '/lstm'
train_writer = tf.summary.create_file_writer(lstm_log_dir)

In [None]:
@tf.function
def train_step(model, inputs, label, optimizer):
    # loss_object and optimizer_object are instances of respective tensorflow classes
    with tf.GradientTape() as tape:
        prediction = model(inputs)[:,-1,0]
        loss = BCE(label, prediction)
        gradients = tape.gradient(loss, model.trainable_variables)

    # update weights  
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    prediction = tf.cast(tf.math.round(prediction), label.dtype)
    correct = tf.math.equal(prediction, correct)

    return loss, correct

## Helper functions

In [None]:
# TODO: Comment about BPTT
# TODO: Regression or classification problem?
# TODO: args
# TODO: weight update still incorrect. Remember that we have an output for each part
@tf.function
def train_step(model, inputs, label, optimizer):
    # loss_object and optimizer_object are instances of respective tensorflow classes
    with tf.GradientTape() as tape:
        # we are only interested in the overall prediction
        prediction = model(inputs)[:,-1,0]
        loss = BCE(label, prediction)
        gradients = tape.gradient(loss, model.trainable_variables)

    # update weights  
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    prediction = tf.cast(tf.math.round(prediction), label.dtype)
    correct = tf.math.equal(prediction, correct)

    return loss, correct

# TODO: epoch needs to be tf.function compatible
#@tf.function
def one_epoch(model, optimizer, loss_tracker, accuracy_tracker, train_data, epoch):
    # reset statistics
    loss_tracker.reset_states()
    accuracy_tracker.reset_states()

    # TODO: not just input, but also other things
    for inputs, contexts, labels in train_data:
        
        # TODO: dunno if recursive tf.function worked so well
        loss, accuracy = train_step(model, inputs, labels, optimizer)

        loss_tracker.update_state(loss)
        accuracy_tracker.update_state(accuracy)

    # Write statistics into summary
    with train_writer.as_default():
        tf.summary.scalar('loss', loss_tracker.result(), step=epoch)
        tf.summary.scalar('accuracy', accuracy_tracker.result(), step=epoch)

## Train

In [179]:
# Clear any logs from previous runs
%rm -rf ./logs/

# remove all active models for memory purposes
tf.keras.backend.clear_session()

model = LSTM(HIDDEN_SIZE)

loss_tracker = tf.keras.metrics.Mean()
accuracy_tracker = tf.keras.metrics.Mean()

for epoch in range(NUM_EPOCHS):
    print('Epoch: ' + str(epoch+1))
    one_epoch(model, OPTIMIZER, loss_tracker, accuracy_tracker, x_train, epoch)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
    prediction = model(inputs, contexts)[:,-1,0]  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py", line 1012, in __call__
    outputs = call_fn(inputs, *args, **kwargs)  File "<ipython-input-176-0147b6841a8b>", line 36, in call
    results.write(index, hidden_state)  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/tf_should_use.py", line 249, in wrapped
    error_in_function=error_in_function)
Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x7f11e581bb70>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "<ipython-input-178-dfe75ad0c03f>", line 33, in one_epoch
    loss, accuracy = train_step(model, inputs, contexts, labels, optimizer)  File "<ipython-input-178-dfe75ad0c03f>", line 10, in train_step
    pr

InvalidArgumentError: ignored

In [None]:
# Open tensorboard
%tensorboard --logdir logs/gradient_tape