# Pre-Processing

In [48]:
import pandas as pd

In [49]:
df = pd.DataFrame(columns=[
    "day", "month", "year", "hour",
    "client", 
    "solar_power_cat", "solar_power_num", 
    # columns that will contain either (feat) TemperatureC,DewpointC,PressurehPa,WindDirectionDegrees,WindSpeedKMH,WindSpeedGustKMH,Humidity,HourlyPrecipMM,dailyrainMM,SolarRadiationWatts_m2
    # and significant negative contribution, negative contribution, positivie contribution and significant positive contribution
    "feat1", "feat2", "contri1", "contri2",
    # columns that will contain either none, no use, uncertain or use
    "air conditioner", "washing machine", "dishwasher", "water heater", "heater",
    "report"
])

df_meta = pd.read_csv("../GenerationCorpus/metadata.csv", index_col=0)

In [50]:
df_meta.head()

Unnamed: 0,date,client,solar_power_cat,solar_power_num,explanation,use_devices,uncertain_devices,nouse_devices,filename
0,"October 7th 2023, 15o'clock",Ema,high,10.143,"#('dewpoint', 'positive contribution')",#dishwasher&air conditioner,#,#water heater,report_1_1.txt
1,"June 8th 2023, 16o'clock",Ana,medium,2.8,"#('temperature', 'negative impact')",#heater,#washing machine,#air conditioner,report_1_2.txt
2,"February 27th 2023, 14o'clock",Ema,high,9.429,"#('dewpoint', 'positive contribution')&('tempe...",#heater&dishwasher&washing machine,#,#,report_1_3.txt
3,"November 28th 2023, 15o'clock",Ana,very high,13.857,"#('solar radiation', 'significant positive con...",#water heater&washing machine&dishwasher,#air conditioner,#,report_1_4.txt
4,"April 6th 2024, 6o'clock",Diana,low,3.429,"#('solar radiation', 'negative impact')&('hour...",#dishwasher,#,#air conditioner,report_1_5.txt


In [51]:
df.head()

Unnamed: 0,day,month,year,hour,client,solar_power_cat,solar_power_num,feat1,feat2,contri1,contri2,air conditioner,washing machine,dishwasher,water heater,heater,report


In [52]:
import string

def seperate_date(line):
    components = line["date"].split()
    month = components[0]
    raw_day = components[1]
    if raw_day[1] in string.ascii_lowercase:
        day = raw_day[0]
    else:
        day = raw_day[0:2]

    year = components[2][0:4]

    raw_hour = components[3]
    if raw_hour[1] in string.ascii_lowercase:
        hour = raw_hour[0]
    else:
        hour = raw_hour[0:2]

    return hour, day, month, year

def seperate_explanation(line):
    components = line["explanation"].split("&")
    new_components = []
    explanation = []
    for c in components:
        c = c.replace("#", "")
        c = c.replace("(", "")
        c = c.replace(")", "")
        c = c.replace("'", "")
        new_components = c.split(",")
        new_components[1] = new_components[1].replace(" ", "", 1)

        explanation.append(new_components)
    
    return explanation


def seperate_devices(line):
    component1 = line["use_devices"]
    component2 = line["uncertain_devices"]
    component3 = line["nouse_devices"]

    component1 = component1.split("&")
    component2 = component2.split("&")
    component3 = component3.split("&")

    devices = {
        "use": [],
        "uncertain": [],
        "no": []
    }
    for c1 in component1:
        c1 = c1.replace("#", "")
        devices["use"].append(c1)

    for c2 in component2:
        c2 = c2.replace("#", "")
        devices["uncertain"].append(c2)
    
    for c3 in component3:
        c3 = c3.replace("#", "")
        devices["no"].append(c3)

    return devices

In [53]:
for i in df_meta.index:
    row = {
        "day": "", "month": "", "year": "", "hour": "",
        "client": "", 
        "solar_power_cat": "", "solar_power_num": "", 
        "feat1": "", "feat2": "", "contri1": "", "contri2": "",
        "air conditioner": "", "washing machine": "", "dishwasher": "", "water heater": "", "heater": "",
        "report": ""
    }
    
    line = df_meta.iloc[i]
    hour, day, month, year = seperate_date(line)
    explanation = seperate_explanation(line)
    devices = seperate_devices(line)

    row["day"] = day
    row["month"] = month
    row["year"] = year
    row["hour"] = hour
    row["client"] = line["client"]
    row["solar_power_num"] = str(line["solar_power_num"])
    row["solar_power_cat"] = line["solar_power_cat"]
    count = 1
    for exp in explanation:
        row["feat" + str(count)] = exp[0]
        row["contri" + str(count)] = exp[1]

        count += 1

    level = 0
    for key in devices:
        for item in devices[key]:
            if not item == "":
                row[item] = str(level)
        
        level += 1
    
    f = open("../GenerationCorpus/corpus/" + line["filename"], "r")
    row["report"] = f.read()

    df.loc[len(df)] = row

df.head()

Unnamed: 0,day,month,year,hour,client,solar_power_cat,solar_power_num,feat1,feat2,contri1,contri2,air conditioner,washing machine,dishwasher,water heater,heater,report
0,7,October,2023,15,Ema,high,10.143,dewpoint,,positive contribution,,0.0,,0.0,2.0,,Report for Solar Power Predictions for Ema - O...
1,8,June,2023,16,Ana,medium,2.8,temperature,,negative impact,,2.0,1.0,,,0.0,"Dear Ana,\n\nWe hope this message finds you we..."
2,27,February,2023,14,Ema,high,9.429,dewpoint,temperature,positive contribution,positive contribution,,0.0,0.0,,0.0,"Dear Ema,\n\nWe hope this letter finds you wel..."
3,28,November,2023,15,Ana,very high,13.857,solar radiation,,significant positive contribution,,1.0,0.0,0.0,0.0,,"Dear Ana,\n\nI am pleased to present you with ..."
4,6,April,2024,6,Diana,low,3.429,solar radiation,hourly precipitation,negative impact,significant negative impact,2.0,,0.0,,,"Dear Diana,\n\nWe hope this message finds you ..."


In [54]:
df.to_csv("./nlp_data.csv")

In [55]:
lines = pd.read_csv("./nlp_data.csv")
a = " ".join(str(v) for v in lines.iloc[i].values[:-1])
print(a)

255 17 October 2024 11 Franciso medium 3.067 daily rain nan negative impact nan 2.0 0.0 2.0 nan 1.0


# Encoder-Decoder Model

In [56]:
import tensorflow as tf
import tensorflow_addons as tfa

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from sklearn.model_selection import train_test_split

import unicodedata
import re
import numpy as np
import os
import io
import time

In [57]:
class NMTDataset:
    def __init__(self):
        self.problem_type = 'tokens2report'
        self.inp_lang_tokenizer = None
        self.targ_lang_tokenizer = None


    def unicode_to_ascii(self, s):
        return ''.join(c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn')

    ## Step 1 and Step 2 
    def preprocess_sentence(self, w):
        w = self.unicode_to_ascii(w.lower().strip())

        # creating a space between a word and the punctuation following it
        # eg: "he is a boy." => "he is a boy ."
        # Reference:- https://stackoverflow.com/questions/3645931/python-padding-punctuation-with-white-spaces-keeping-punctuation
        w = re.sub(r"([?.!,¿])", r" \1 ", w)
        w = re.sub(r'[" "]+', " ", w)

        # replacing everything with space except (a-z, A-Z, ".", "?", "!", ",")
        w = re.sub(r"[^a-zA-Z?.!,¿]+", " ", w)

        w = w.strip()

        # adding a start and an end token to the sentence
        # so that the model know when to start and stop predicting.
        w = '<start> ' + w + ' <end>'
        return w

    def create_dataset(self):
        # path : path to spa-eng.txt file
        # num_examples : Limit the total number of training example for faster training (set num_examples = len(lines) to use full data)
        lines = pd.read_csv("./nlp_data.csv")
        # load dataframe here
        word_pairs = [
            [
                lines.iloc[i].values[-1],
                " ".join(str(v) for v in lines.iloc[i].values[:-1])
            ] for i in lines.index
        ]
        
        print(word_pairs)
        return zip(*word_pairs)

    # Step 3 and Step 4values
    def tokenize(self, lang):
        # lang = list of sentences in a language (in this case first or second sequence)

        # print(len(lang), "example sentence: {}".format(lang[0]))
        lang_tokenizer = tf.keras.preprocessing.text.Tokenizer(filters='', oov_token='<OOV>')
        lang_tokenizer.fit_on_texts(lang)

        ## tf.keras.preprocessing.text.Tokenizer.texts_to_sequences converts string (w1, w2, w3, ......, wn) 
        ## to a list of correspoding integer ids of words (id_w1, id_w2, id_w3, ...., id_wn)
        tensor = lang_tokenizer.texts_to_sequences(lang) 

        ## tf.keras.preprocessing.sequence.pad_sequences takes argument a list of integer id sequences 
        ## and pads the sequences to match the longest sequences in the given input
        tensor = tf.keras.preprocessing.sequence.pad_sequences(tensor, padding='post')

        return tensor, lang_tokenizer

    def load_dataset(self):
        # creating cleaned input, output pairs
        targ_lang, inp_lang = self.create_dataset()

        input_tensor, inp_lang_tokenizer = self.tokenize(inp_lang)
        target_tensor, targ_lang_tokenizer = self.tokenize(targ_lang)

        return input_tensor, target_tensor, inp_lang_tokenizer, targ_lang_tokenizer

    def call(self, BUFFER_SIZE, BATCH_SIZE):
        input_tensor, target_tensor, self.inp_lang_tokenizer, self.targ_lang_tokenizer = self.load_dataset()

        input_tensor_train, input_tensor_val, target_tensor_train, target_tensor_val = train_test_split(input_tensor, target_tensor, test_size=0.2)

        train_dataset = tf.data.Dataset.from_tensor_slices((input_tensor_train, target_tensor_train))
        train_dataset = train_dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

        val_dataset = tf.data.Dataset.from_tensor_slices((input_tensor_val, target_tensor_val))
        val_dataset = val_dataset.batch(BATCH_SIZE, drop_remainder=True)

        return train_dataset, val_dataset, self.inp_lang_tokenizer, self.targ_lang_tokenizer

In [58]:
BUFFER_SIZE = 32000
BATCH_SIZE = 64

dataset_creator = NMTDataset()
train_dataset, val_dataset, inp_lang, targ_lang = dataset_creator.call(BUFFER_SIZE, BATCH_SIZE)
example_input_batch, example_target_batch = next(iter(train_dataset))
example_input_batch.shape, example_target_batch.shape
example_input_batch, example_target_batch
# <start> october ema high . dewpoint nan positive contribution nan . nan . . nan <end>



2023-04-08 22:00:24.085525: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype int32 and shape [204,239]
	 [[{{node Placeholder/_1}}]]
2023-04-08 22:00:24.085764: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype int32 and shape [204,239]
	 [[{{node Placeholder/_1}}]]


(<tf.Tensor: shape=(64, 24), dtype=int32, numpy=
 array([[ 81,  67,  27, ...,   0,   0,   0],
        [311,  68,  52, ...,   0,   0,   0],
        [418,  48,  37, ...,   0,   0,   0],
        ...,
        [359,  50,  59, ...,   0,   0,   0],
        [356,  62,  51, ...,   0,   0,   0],
        [248,  76,  63, ...,   0,   0,   0]], dtype=int32)>,
 <tf.Tensor: shape=(64, 239), dtype=int32, numpy=
 array([[  36,  427,   27, ...,    0,    0,    0],
        [  36,  353,   27, ...,    0,    0,    0],
        [2062, 2063,  219, ...,    0,    0,    0],
        ...,
        [  36,  615,   27, ...,    0,    0,    0],
        [  36,  338,   74, ...,    0,    0,    0],
        [  38,   20, 1518, ...,    0,    0,    0]], dtype=int32)>)

In [59]:
vocab_inp_size = len(inp_lang.word_index)+1
vocab_tar_size = len(targ_lang.word_index)+1
max_length_input = example_input_batch.shape[1]
max_length_output = example_target_batch.shape[1]

embedding_dim = 256
units = 1024
steps_per_epoch = df.shape[0]//BATCH_SIZE

print("max_length_tokens, max_length_reports, vocab_size_tokens, vocab_size_reports")
max_length_input, max_length_output, vocab_inp_size, vocab_tar_size


max_length_tokens, max_length_reports, vocab_size_tokens, vocab_size_reports


(24, 239, 464, 2172)

In [60]:
class Encoder(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, enc_units, batch_sz):
    super(Encoder, self).__init__()
    self.batch_sz = batch_sz
    self.enc_units = enc_units
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)

    ##-------- LSTM layer in Encoder ------- ##
    # maybe try a GRU layer
    self.lstm_layer = tf.keras.layers.LSTM(self.enc_units,
                                   return_sequences=True,
                                   return_state=True,
                                   recurrent_initializer='glorot_uniform')

  def call(self, x, hidden):
    x = self.embedding(x)
    output, h, c = self.lstm_layer(x, initial_state = hidden)
    return output, h, c

  def initialize_hidden_state(self):
    return [tf.zeros((self.batch_sz, self.enc_units)), tf.zeros((self.batch_sz, self.enc_units))]


In [61]:
encoder = Encoder(vocab_inp_size, embedding_dim, units, BATCH_SIZE)

# sample input
sample_hidden = encoder.initialize_hidden_state()
sample_output, sample_h, sample_c = encoder(example_input_batch, sample_hidden)
print ('Encoder output shape: (batch size, sequence length, units) {}'.format(sample_output.shape))
print ('Encoder h vecotr shape: (batch size, units) {}'.format(sample_h.shape))
print ('Encoder c vector shape: (batch size, units) {}'.format(sample_c.shape))


Encoder output shape: (batch size, sequence length, units) (64, 24, 1024)
Encoder h vecotr shape: (batch size, units) (64, 1024)
Encoder c vector shape: (batch size, units) (64, 1024)


In [62]:
class Decoder(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, dec_units, batch_sz, attention_type='luong'):
    super(Decoder, self).__init__()
    self.batch_sz = batch_sz
    self.dec_units = dec_units
    self.attention_type = attention_type

    # Embedding Layer
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)

    #Final Dense layer on which softmax will be applied
    self.fc = tf.keras.layers.Dense(vocab_size)

    # Define the fundamental cell for decoder recurrent structure
    self.decoder_rnn_cell = tf.keras.layers.LSTMCell(self.dec_units)
    
    # Sampler
    self.sampler = tfa.seq2seq.sampler.TrainingSampler()

    # Create attention mechanism with memory = None
    self.attention_mechanism = self.build_attention_mechanism(self.dec_units, 
                                                              None, self.batch_sz*[max_length_input], self.attention_type)

    # Wrap attention mechanism with the fundamental rnn cell of decoder
    self.rnn_cell = self.build_rnn_cell()

    # Define the decoder with respect to fundamental rnn cell
    self.decoder = tfa.seq2seq.BasicDecoder(self.rnn_cell, sampler=self.sampler, output_layer=self.fc)


  def build_rnn_cell(self):
    rnn_cell = tfa.seq2seq.AttentionWrapper(self.decoder_rnn_cell, 
                                  self.attention_mechanism, attention_layer_size=self.dec_units)
    return rnn_cell

  def build_attention_mechanism(self, dec_units, memory, memory_sequence_length, attention_type='luong'):
    # ------------- #
    # typ: Which sort of attention (Bahdanau, Luong)
    # dec_units: final dimension of attention outputs 
    # memory: encoder hidden states of shape (batch_size, max_length_input, enc_units)
    # memory_sequence_length: 1d array of shape (batch_size) with every element set to max_length_input (for masking purpose)
    
    if(attention_type=='bahdanau'):
      return tfa.seq2seq.BahdanauAttention(units=dec_units, memory=memory, memory_sequence_length=memory_sequence_length)
    else:
      return tfa.seq2seq.LuongAttention(units=dec_units, memory=memory, memory_sequence_length=memory_sequence_length)

  def build_initial_state(self, batch_sz, encoder_state, Dtype):
    decoder_initial_state = self.rnn_cell.get_initial_state(batch_size=batch_sz, dtype=Dtype)
    decoder_initial_state = decoder_initial_state.clone(cell_state=encoder_state)
    return decoder_initial_state


  def call(self, inputs, initial_state):
    x = self.embedding(inputs)
    outputs, _, _ = self.decoder(x, initial_state=initial_state, sequence_length=self.batch_sz*[max_length_output-1])
    return outputs


In [63]:
decoder = Decoder(vocab_tar_size, embedding_dim, units, BATCH_SIZE, 'luong')
sample_x = tf.random.uniform((BATCH_SIZE, max_length_output))
decoder.attention_mechanism.setup_memory(sample_output)
initial_state = decoder.build_initial_state(BATCH_SIZE, [sample_h, sample_c], tf.float32)


sample_decoder_outputs = decoder(sample_x, initial_state)

print("Decoder Outputs Shape: ", sample_decoder_outputs.rnn_output.shape)

Decoder Outputs Shape:  (64, 238, 2172)


In [64]:
optimizer = tf.keras.optimizers.Adam()

def loss_function(real, pred):
  # real shape = (BATCH_SIZE, max_length_output)
  # pred shape = (BATCH_SIZE, max_length_output, tar_vocab_size )
  cross_entropy = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')
  loss = cross_entropy(y_true=real, y_pred=pred)
  mask = tf.logical_not(tf.math.equal(real,0))   #output 0 for y=0 else output 1
  mask = tf.cast(mask, dtype=loss.dtype)  
  loss = mask* loss
  loss = tf.reduce_mean(loss)
  return loss

In [65]:
checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoint = tf.train.Checkpoint(optimizer=optimizer,
                                 encoder=encoder,
                                 decoder=decoder)


In [66]:
@tf.function
def train_step(inp, targ, enc_hidden):
  loss = 0

  with tf.GradientTape() as tape:
    enc_output, enc_h, enc_c = encoder(inp, enc_hidden)


    dec_input = targ[ : , :-1 ] # Ignore <end> token
    real = targ[ : , 1: ]         # ignore <start> token

    # Set the AttentionMechanism object with encoder_outputs
    decoder.attention_mechanism.setup_memory(enc_output)

    # Create AttentionWrapperState as initial_state for decoder
    decoder_initial_state = decoder.build_initial_state(BATCH_SIZE, [enc_h, enc_c], tf.float32)
    pred = decoder(dec_input, decoder_initial_state)
    logits = pred.rnn_output
    loss = loss_function(real, logits)

  variables = encoder.trainable_variables + decoder.trainable_variables
  gradients = tape.gradient(loss, variables)
  optimizer.apply_gradients(zip(gradients, variables))

  return loss

In [67]:
EPOCHS = 1

for epoch in range(EPOCHS):
  start = time.time()

  enc_hidden = encoder.initialize_hidden_state()
  total_loss = 0

  for (batch, (inp, targ)) in enumerate(train_dataset.take(steps_per_epoch)):
    batch_loss = train_step(inp, targ, enc_hidden)
    total_loss += batch_loss

    if batch % 100 == 0:
      print('Epoch {} Batch {} Loss {:.4f}'.format(epoch + 1,
                                                   batch,
                                                   batch_loss.numpy()))
  # saving (checkpoint) the model every 2 epochs
  if (epoch + 1) % 2 == 0:
    checkpoint.save(file_prefix = checkpoint_prefix)

  print('Epoch {} Loss {:.4f}'.format(epoch + 1,
                                      total_loss / steps_per_epoch))
  print('Time taken for 1 epoch {} sec\n'.format(time.time() - start))
 

2023-04-08 22:00:32.087487: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype int32 and shape [204,239]
	 [[{{node Placeholder/_1}}]]
2023-04-08 22:00:32.088115: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype int32 and shape [204,239]
	 [[{{node Placeholder/_1}}]]
2023-04-08 22:00:32.337980: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_d

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x7f025735ecb0>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "/home/pedro/Desktop/challenge-3/venv/lib/python3.10/site-packages/tensorflow_addons/seq2seq/decoder.py", line 508, in body
    return (  File "/home/pedro/Desktop/challenge-3/venv/lib/python3.10/site-packages/tensorflow/python/util/nest.py", line 916, in map_structure
    return pack_sequence_as(  File "/home/pedro/Desktop/challenge-3/venv/lib/python3.10/site-packages/tensorflow/python/util/nest.py", line 917, in <listcomp>
    structure[0], [func(*x) for x in entries],  File "/home/pedro/Desktop/challenge-3/venv/lib/python3.10/site-packages/tensorflow_addons/seq2seq/decoder.py", line 506, in <lambda>
    lambda ta, out: ta.write(time, out), outputs_ta, emit  File "/home/pedro/Desktop/challenge-3/venv/lib/python3.10/sit

2023-04-08 22:00:35.470675: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-04-08 22:00:35.472661: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-04-08 22:00:35.473602: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

Epoch 1 Batch 0 Loss 4.5291
Epoch 1 Loss 3.2863
Time taken for 1 epoch 53.25918507575989 sec



In [68]:
def evaluate_sentence(sentence):
  sentence = dataset_creator.preprocess_sentence(sentence)

  inputs = [inp_lang.word_index[i] for i in sentence.split(' ')]
  inputs = tf.keras.preprocessing.sequence.pad_sequences([inputs],
                                                          maxlen=max_length_input,
                                                          padding='post')
  inputs = tf.convert_to_tensor(inputs)
  inference_batch_size = inputs.shape[0]
  print(inputs)

  enc_start_state = [tf.zeros((inference_batch_size, units)), tf.zeros((inference_batch_size,units))]
  enc_out, enc_h, enc_c = encoder(inputs, enc_start_state)

  start_tokens = tf.fill([inference_batch_size], targ_lang.word_index['<start>'])
  end_token = targ_lang.word_index['<end>']

  greedy_sampler = tfa.seq2seq.GreedyEmbeddingSampler()

  # Instantiate BasicDecoder object
  decoder_instance = tfa.seq2seq.BasicDecoder(cell=decoder.rnn_cell, sampler=greedy_sampler, output_layer=decoder.fc)
  # Setup Memory in decoder stack
  decoder.attention_mechanism.setup_memory(enc_out)

  # set decoder_initial_state
  decoder_initial_state = decoder.build_initial_state(inference_batch_size, [enc_h, enc_c], tf.float32)


  ### Since the BasicDecoder wraps around Decoder's rnn cell only, you have to ensure that the inputs to BasicDecoder 
  ### decoding step is output of embedding layer. tfa.seq2seq.GreedyEmbeddingSampler() takes care of this. 
  ### You only need to get the weights of embedding layer, which can be done by decoder.embedding.variables[0] and pass this callabble to BasicDecoder's call() function

  decoder_embedding_matrix = decoder.embedding.variables[0]

  outputs, _, _ = decoder_instance(decoder_embedding_matrix, start_tokens = start_tokens, end_token= end_token, initial_state=decoder_initial_state)
  return outputs.sample_id.numpy()

def translate(sentence):
  result = evaluate_sentence(sentence)
  print(result)
  result = targ_lang.sequences_to_texts(result)
  print('Input: %s' % (sentence))
  print('Predicted translation: {}'.format(result))


In [72]:
# restoring the latest checkpoint in checkpoint_dir
checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))

ValueError: Received incompatible tensor with shape (981, 256) when attempting to restore variable with shape (2172, 256) and name decoder_3/embedding_7/embeddings:0.

In [73]:
# first point
translate(u'0 7 October 2023 15 Ema high 10.143 dewpoint nan positive contribution nan 0.0 nan 0.0 2.0 nan')
# first data point tf.Tensor([[ 4 37 39 11  2 20  3  8  9  3  3  3  5  0  0  0  0  0  0  0  0  0]], shape=(1, 22), dtype=int32)
# 0 9 October 2023 15 Ana high 10.143 dewpoint nan positive contribution nan 0 nan 0 2 nan

KeyError: '<start>'