In [195]:
import tensorflow as tf
import numpy as np

# Simple one-cell Model

In [196]:
class LSTM_Cell(tf.keras.layers.Layer):
  def __init__(self):
    super(LSTM_Cell,self).__init__()
    self.fgate = tf.keras.layers.Dense(H_size,activation='sigmoid',bias_initializer=tf.keras.initializers.Ones())
    self.ingate = tf.keras.layers.Dense(H_size,activation='sigmoid')
    self.cand = tf.keras.layers.Dense(H_size,activation='tanh')
    self.outgate = tf.keras.layers.Dense(H_size,activation="sigmoid")
    
  def call(self,input,hidden_state,cell_state):
    concat_input = tf.concat((hidden_state,input),axis=1)
    cell_state = cell_state * self.fgate(concat_input)
    update = self.ingate(concat_input) * self.cand(concat_input)
    cell_state = cell_state + update
    output_hidden_state = tf.keras.activations.tanh(cell_state)*self.outgate(concat_input)
    return output_hidden_state,cell_state


In [197]:
class Model2(tf.keras.Model):
  
  def __init__(self):
    super(Model2,self).__init__()
    #self.in_layer = tf.keras.Dense(30,activation='relu') # is this necessary?
    self.cell = LSTM_Cell()
    # nicht im Model sondern viell nur von außerhalb in die call Funktion geben
    self.h_prev = np.zeros((batch_size,H_size))
    self.C_prev = np.zeros((batch_size,H_size))
    
    self.out_layer = tf.keras.layers.Dense(units=3,activation='sigmoid')
    
  def reset_states(self):
    self.h_prev = np.zeros((batch_size,H_size))
    self.C_prev = np.zeros((batch_size,H_size))

  # The call function takes the input over multipletimesteps and creates (and returns!)  
  # the outputs over multiple timesteps.
  def call(self,inputs):
    #inputs = self.in_layer(inputs)
    self.reset_states()
    h_states = np.asarray([])#tf.TensorArray(tf.float32, size=0, dynamic_size=True, clear_after_read=False)
    c_states = np.asarray([])#tf.TensorArray(tf.float32, size=0, dynamic_size=True, clear_after_read=False)
    # feed sequential input into the cell (a batch of two digits and one sequence element at a time)
    for t in tf.range(seq_length):
      input = inputs[:,t,:]
      # print(input.shape)
      # compute and save hidden and cell states
      self.h_prev,self.C_prev = self.cell(input,self.h_prev,self.C_prev)
      # arrays to unroll the LSTM (outputs)
      h_states = np.append(h_states,self.h_prev)#write(t,self.h_prev)
      c_states = np.append(c_states,self.C_prev)#write(t,self.C_prev)
    outputs = self.out_layer(self.h_prev)
    # print(outputs)
    return outputs

# Data Generator

In [198]:
def train_seq_generator():
  while(True):
    train_data = []
    targets = []
    for i in range(batch_size):
      seq = []
      d1 = np.random.randint(0,10)
      d2 = np.random.randint(0,10)
      counts = {x:0 for x in range(10)}
      for x in range(seq_length):
        ext = np.random.randint(0,10)
        counts[ext]=counts[ext]+1
        seq.append(ext)
      label = [2] if counts[d1]<counts[d2] else [1] if counts[d1]>counts[d2] else [0] # d1,d2 equally often 
      seq = tf.constant(seq)
      d1 = tf.one_hot(d1,depth=10)
      d2 = tf.one_hot(d2,depth=10)
      x_t_total = [[*d1,*d2,*tf.one_hot(elem,depth=10)] for elem in seq]
      label = tf.squeeze([tf.reshape(tf.one_hot(label,depth=3,),(-1,))])
      train_data.append(tf.constant(np.asarray(x_t_total)))
      targets.append(label)
    yield tf.constant(np.asarray(train_data)), tf.constant(np.asarray(targets))

# Training

In [199]:
def train_step(model, input, target, loss_function, optimizer):
    
    # loss_object and optimizer_object are instances of respective tensorflow classes
    with tf.GradientTape() as tape:
        prediction = model(input)
        loss = loss_function(target, prediction)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    
    # accuracy using a 0.5 threshold
    acc = np.sum(np.abs((target-prediction))<0.5)/(target.shape[0]*target.shape[1])

    return np.mean(loss), acc


In [200]:
# set initial parameters
seq_length = 25 
batch_size = 32
num_batches = 20
H_size = 2 # Size of the hidden state

In [201]:
def train(model, steps=5, running_average_factor = 0.95):
    
    dataset = tf.data.Dataset.from_generator(
      train_seq_generator,
      output_signature=(
          tf.TensorSpec(shape=(32,25,30), dtype=tf.float32, name='seq_data'),
          tf.TensorSpec(shape=(32, 3), dtype=tf.float32, name='targets')))
    optimizer = tf.keras.optimizers.Adam()
    cross_entropy_loss = tf.keras.losses.CategoricalCrossentropy()

    losses = []
    acces = []

    running_average_loss = 0
    running_average_acc = 0
    i=0
    print_every=1

    for i in range(steps):
        # generating a new sample in each training step
        [(seq,lbl)] = list(dataset.take(1))
        loss, acc = train_step(model,seq, lbl, cross_entropy_loss, optimizer)
        
        # compute the running averages of training loss and accuracy
        running_average_loss = running_average_factor * running_average_loss  + (1 - running_average_factor) * loss
        running_average_acc = running_average_factor * running_average_acc  + (1 - running_average_factor) * acc
        losses.append(running_average_loss)
        acces.append(running_average_acc)
        
        if i%print_every==0:
            print(f"Training step {i}: average loss is {np.round(losses[-1],2)}, accuracy of {np.round(acces[-1], 2)} %")
        
        if i == steps:
            break
        i+=1
    return losses, acces

In [202]:
# train the simple model
model = Model2()
losses, acces = train(model,steps=50)

Training step 0: average loss is 0.06, accuracy of 0.02 %
Training step 1: average loss is 0.11, accuracy of 0.05 %
Training step 2: average loss is 0.17, accuracy of 0.07 %
Training step 3: average loss is 0.21, accuracy of 0.09 %
Training step 4: average loss is 0.26, accuracy of 0.11 %
Training step 5: average loss is 0.31, accuracy of 0.13 %
Training step 6: average loss is 0.34, accuracy of 0.15 %
Training step 7: average loss is 0.39, accuracy of 0.17 %
Training step 8: average loss is 0.42, accuracy of 0.19 %
Training step 9: average loss is 0.46, accuracy of 0.2 %
Training step 10: average loss is 0.49, accuracy of 0.22 %
Training step 11: average loss is 0.52, accuracy of 0.23 %
Training step 12: average loss is 0.55, accuracy of 0.24 %
Training step 13: average loss is 0.58, accuracy of 0.26 %
Training step 14: average loss is 0.6, accuracy of 0.27 %
Training step 15: average loss is 0.63, accuracy of 0.29 %
Training step 16: average loss is 0.65, accuracy of 0.3 %
Training s