In [1]:
import numpy as np
import tensorflow as tf

In [2]:
source_texts = ['hello', 'how are you', 'goodbye']
target_texts = ['bonjour', 'comment ça va', 'au revoir']

In [7]:
source_vocab=set(''.join(source_texts))
print(source_vocab)
target_vocab=set(''.join(target_texts))
source_vocab_size=len(source_vocab)
target_vocab_size=len(target_vocab)

{'d', 'g', 'w', 'b', 'l', 'r', ' ', 'u', 'a', 'e', 'y', 'h', 'o'}


In [12]:
source_char_to_int={char:i for i,char in enumerate(source_vocab)}
print(source_char_to_int)
target_char_to_int={char:i for i,char in enumerate(target_vocab)}
source_int_to_char={i:char for i,char in enumerate(source_vocab)}
print(source_int_to_char)
target_int_to_char={i:char for i,char in enumerate(target_vocab)}

{'d': 0, 'g': 1, 'w': 2, 'b': 3, 'l': 4, 'r': 5, ' ': 6, 'u': 7, 'a': 8, 'e': 9, 'y': 10, 'h': 11, 'o': 12}
{0: 'd', 1: 'g', 2: 'w', 3: 'b', 4: 'l', 5: 'r', 6: ' ', 7: 'u', 8: 'a', 9: 'e', 10: 'y', 11: 'h', 12: 'o'}


In [15]:
source_sequence=[[source_char_to_int[char]for char in text]for text in source_texts]
print(source_sequence)
target_sequence=[[target_char_to_int[char]for char in text]for text in target_texts]
print(target_sequence)

[[11, 9, 4, 4, 12], [11, 12, 2, 6, 8, 5, 9, 6, 10, 12, 7], [1, 12, 12, 0, 3, 10, 9]]
[[3, 7, 1, 14, 7, 9, 4], [2, 7, 11, 11, 12, 1, 13, 8, 0, 10, 8, 5, 10], [10, 9, 8, 4, 12, 5, 7, 6, 4]]


In [20]:
max_length=max(len(sen) for sen in source_texts)
print(max_length)
source_sequence=tf.keras.preprocessing.sequence.pad_sequences(source_sequence,maxlen=max_length,padding='post')
print(source_sequence)
target_sequence=tf.keras.preprocessing.sequence.pad_sequences(target_sequence,maxlen=max_length,padding='post')
print(target_sequence)

11
[[11  9  4  4 12  0  0  0  0  0  0]
 [11 12  2  6  8  5  9  6 10 12  7]
 [ 1 12 12  0  3 10  9  0  0  0  0]]
[[ 3  7  1 14  7  9  4  0  0  0  0]
 [11 11 12  1 13  8  0 10  8  5 10]
 [10  9  8  4 12  5  7  6  4  0  0]]


In [46]:
input_shape=(max_length,source_vocab_size)
output_shape=(max_length,target_vocab_size)
model=tf.keras.models.Sequential([tf.keras.layers.Embedding(source_vocab_size,64,input_length=max_length),
                      tf.keras.layers.SimpleRNN(128,return_sequences=True),
                      tf.keras.layers.Dense(64,activation='softmax')
                      ])
model.compile(loss='categorical_crossentropy',optimizer='adam')

In [50]:
target_sequence_one_hot = np.array([tf.keras.utils.to_categorical(seq, num_classes=target_vocab_size) for seq in target_sequence])
print(target_sequence_one_hot)

[[[0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
  [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
  [0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
  [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
  [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 

In [51]:
model.fit(source_sequence, target_sequence_one_hot, epochs=10)

Epoch 1/10


AttributeError: 'NoneType' object has no attribute 'items'