## Spell Corrector For Turkish Language

In [1]:
# Libraries
from os import listdir                                         # Directory list
import pandas as pd                                            # To see clear description
from sklearn.model_selection import train_test_split           # To split the data into test, training
import numpy as np                                             # Random number
from nltk import tokenize                                      # Sentence tokenizer
import re                                                      # Remove punctuation
import copy                                                    # Deep copy
import tensorflow as tf                                        # Model
import time                                                    # Training time

In [2]:
# Tensorflow version
tf.__version__

'2.4.1'

In [3]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
physical_devices = tf.config.list_physical_devices('GPU') 
tf.config.experimental.set_memory_growth(physical_devices[0], True)

Num GPUs Available:  1


### Prepare the Dataset

In [4]:
# Book File Names
path = 'books/'
files = listdir(path)
files

['Aklından Bir Sayı Tut - John Verdon.txt',
 'Anna Karenina - Lev Nikolayeviç Tolstoy.txt',
 'Aşk ve Gurur - Jane Austen.txt',
 'Beyaz Diş - Jack London.txt',
 'Beyaz Zambaklar Ülkesinde.txt',
 'Bülbülü Öldürmek - Harper Lee.txt',
 "Erkekler Mars'tan, kadınlar Venüs'ten.txt",
 'Eylül - Mehmed Rauf.txt',
 'Gurur ve Önyargı - Jane Austen.txt',
 'Harry Potter ve Felsefe Taşı - J. K. Rowling.txt',
 'Hatasız Düşünme Sanatı - Rolf Dobelli.txt',
 'Karamazov Kardeşler - Fyodor Mihailoviç Dostoyevski.txt',
 'Körlük - José Saramago.txt',
 'Peygamberler Tarihi - Asim Koksal.txt',
 'Rothschild Para İmparatorluğu - George Armstrong.txt',
 'Saatleri Ayarlama Enstitüsü - Ahmet Hamdi Tanpınar.txt',
 'Satranç - Stefan Zweig.txt',
 "Sherlock Holmes'ün Maceraları.txt",
 'Simyacı - Paulo Coelho.txt',
 'Sineklerin Tanrısı - William Golding.txt',
 'Suç ve Ceza - Fyodor Mihailoviç Dostoyevski.txt',
 'Taht Oyunları - George R.R. Martin.txt',
 'Tesla - Anlaşılamamış Dahi - Margaret Cheney.txt',
 'Yeraltından N

In [5]:
# Read book content
books = []
for file in files:
    with open(path + file, encoding="utf8") as book:
        books.append(book.read())
books[0][:800]

"İÇİNDEKİLER\n\n\n\nTanıtım\n\n\n\nÖvgüler\n\n\n\nGiriş\n\n\n\nBirinci Kısım\n\nÖlümcül Anılar\n\n\n\nPolisin sanatı\n\nKusursuz kurban\n\n\n\nCennette bela\n\n\n\nSeni o kadar iyi tanıyorum ki, ne düşündüğünü biliyorum\n\n\n\nKötü olasılıklar\n\n\n\nBoyanmış gül kadar kırmızı kan için\n\n\n\nKara delik\n\n\n\nKaya ve taş\n\n\n\nEşsiz adam\n\n\n\nKusursuz mekan\n\n\n\nEşsiz hizmet\n\n\n\nDürüstlüğün önemi\n\nSuçlu hissetmek gereksiz\n\n\n\nMeydan okuma\n\n\n\nÇatışma\n\n\n\nBaşlangıcın sonu\n\n\n\nİkinci Kısım\n\n\n\nKorkunç Oyunlar\n\n\n\nKan gölü\n\n\n\nHiçbir yere gitmeyen ayak izleri\n\n\n\nDünyanın pislikleri\n\n\n\nBir aile dostu\n\n\n\nÖncelikler\n\n\n\nDüzeltme\n\n\n\nİz bırakmadan\n\n\n\nYılın cinayeti\n\n\n\nGurney'in sorgulanması\n\n\n\nBoş çek\n\n\n\nSheridan'ı tanımak\n\n\n\nOlay yerine dönüş\n\n\n\nGeriye doğru\n\n\n\nBüyük köşk\n\n\n\nBronx'tan gelen telefon\n\n\n\nÜçüncü Kısım\n\n\n\nBaşa Dönüş\n\n\n\nTemizlikçi geliyor\n\n\n\nBerbat"

In [6]:
# Word count for each book
for book_no,book in enumerate(books):
    words = book.split()
    print(str(len(words)) + " words  ==>  " + files[book_no])

93781 words  ==>  Aklından Bir Sayı Tut - John Verdon.txt
254903 words  ==>  Anna Karenina - Lev Nikolayeviç Tolstoy.txt
90445 words  ==>  Aşk ve Gurur - Jane Austen.txt
55548 words  ==>  Beyaz Diş - Jack London.txt
30685 words  ==>  Beyaz Zambaklar Ülkesinde.txt
59758 words  ==>  Bülbülü Öldürmek - Harper Lee.txt
62934 words  ==>  Erkekler Mars'tan, kadınlar Venüs'ten.txt
67901 words  ==>  Eylül - Mehmed Rauf.txt
82955 words  ==>  Gurur ve Önyargı - Jane Austen.txt
55657 words  ==>  Harry Potter ve Felsefe Taşı - J. K. Rowling.txt
35176 words  ==>  Hatasız Düşünme Sanatı - Rolf Dobelli.txt
236642 words  ==>  Karamazov Kardeşler - Fyodor Mihailoviç Dostoyevski.txt
85703 words  ==>  Körlük - José Saramago.txt
123858 words  ==>  Peygamberler Tarihi - Asim Koksal.txt
35892 words  ==>  Rothschild Para İmparatorluğu - George Armstrong.txt
93209 words  ==>  Saatleri Ayarlama Enstitüsü - Ahmet Hamdi Tanpınar.txt
17522 words  ==>  Satranç - Stefan Zweig.txt
464023 words  ==>  Sherlock Holmes'ü

In [7]:
# To have appropriate tokenization
for book_no, book in enumerate(books):
    # Remove extra space
    book = " ".join(book.split())
    # ... -> .
    book = re.sub(r'\.\.\.','.',book)
    
    book = re.sub(r'.”', '.', book)
    books[book_no] = book
    
books[0][:800]

"İÇİNDEKİLER Tanıtım Övgüler Giriş Birinci Kısım Ölümcül Anılar Polisin sanatı Kusursuz kurban Cennette bela Seni o kadar iyi tanıyorum ki, ne düşündüğünü biliyorum Kötü olasılıklar Boyanmış gül kadar kırmızı kan için Kara delik Kaya ve taş Eşsiz adam Kusursuz mekan Eşsiz hizmet Dürüstlüğün önemi Suçlu hissetmek gereksiz Meydan okuma Çatışma Başlangıcın sonu İkinci Kısım Korkunç Oyunlar Kan gölü Hiçbir yere gitmeyen ayak izleri Dünyanın pislikleri Bir aile dostu Öncelikler Düzeltme İz bırakmadan Yılın cinayeti Gurney'in sorgulanması Boş çek Sheridan'ı tanımak Olay yerine dönüş Geriye doğru Büyük köşk Bronx'tan gelen telefon Üçüncü Kısım Başa Dönüş Temizlikçi geliyor Berbat bir gece Karanlık bir gün Işığa doğru sendeleyiş Bir olaydan diğerine Üçlü felaket Zor adam Seninle bir randevumuz var,"

In [8]:
# Split text to sentences.
sentences = []
for book in books:
    # Tokenize
    tokenized_book = tokenize.sent_tokenize(book)
    for sentence in tokenized_book:
        sentences.append(sentence)
print("Total {} sentences.".format(len(sentences)))

Total 308954 sentences.


In [9]:
sentences[70:75]

['Gözler her zaman işin en zor kısmıydı - gözler ve ağız - ama anahtar noktalardı.',
 'Bazen küçücük bir noktanın duruşu ve yoğunluğu üzerinde saatlerce çalışırdı.',
 'O kadar uğraşmasına rağmen bazen çok iyi sonuçlar elde edemezdi.',
 "Yeterince iyi olmadıkları için, bu sonuçlardan Sonya'ya ve tabii ki Madeleine'e bahsetmezdi.",
 'Gözlerin sırrı, gerilimi ve çelişkiyi her şeyden daha iyi yakalayabilmesindeydi.']

In [10]:
# Clean sentences
cleaned_sentences = []
for sentence in sentences:
    # Remove all, hold space and string
    sentence = re.sub(r'[^\w\s]','',sentence)
    sentence = re.sub(r'_','',sentence)
    # Remove extra space
    sentence = " ".join(sentence.split())
    cleaned_sentences.append(sentence)

In [11]:
cleaned_sentences[70:75]

['Gözler her zaman işin en zor kısmıydı gözler ve ağız ama anahtar noktalardı',
 'Bazen küçücük bir noktanın duruşu ve yoğunluğu üzerinde saatlerce çalışırdı',
 'O kadar uğraşmasına rağmen bazen çok iyi sonuçlar elde edemezdi',
 'Yeterince iyi olmadıkları için bu sonuçlardan Sonyaya ve tabii ki Madeleinee bahsetmezdi',
 'Gözlerin sırrı gerilimi ve çelişkiyi her şeyden daha iyi yakalayabilmesindeydi']

In [12]:
# Convert characters to integers
voc2int = {}
ch_count = 0
for sentence in cleaned_sentences:
    for character in sentence:
        if character not in voc2int:
            voc2int[character] = ch_count
            ch_count += 1

# Add special tokens to voc2int
# PAD - Padding
# EOS - End of sentence
# GO  - Start of the sentence
tokens = ['<PAD>','<EOS>','<GO>']
for token in tokens:
    voc2int[token] = ch_count
    ch_count += 1

In [13]:
print('Length of the vocabulary: ' + str(len(voc2int)))
print(sorted(voc2int))

Length of the vocabulary: 98
[' ', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '<EOS>', '<GO>', '<PAD>', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'Â', 'Å', 'Ç', 'É', 'Î', 'Ö', 'Û', 'Ü', 'à', 'á', 'â', 'ä', 'ç', 'è', 'é', 'ê', 'ë', 'í', 'î', 'ó', 'ô', 'ö', 'ù', 'ú', 'û', 'ü', 'Ğ', 'ğ', 'İ', 'ı', 'Ş', 'ş']


In [14]:
# Convert integers to characters
int2voc = {}
for character, value in voc2int.items():
    int2voc[value] = character
print(int2voc[0])

İ


In [15]:
# Convert sentences to integers
def sentence_to_integer(sentence):
    int_sentence = []
    for character in sentence:
        int_sentence.append(voc2int[character])
    return int_sentence

In [16]:
# Convert sentences to integers
sent2int = []

for sentence in cleaned_sentences:
    sent2int.append(sentence_to_integer(sentence))

print(cleaned_sentences[3])
print(sent2int[3])

Not şöyle devam etmektedir Sırlarını nasıl bildiğimi göreceksin
[2, 30, 13, 8, 24, 40, 38, 19, 20, 8, 37, 20, 16, 10, 14, 8, 20, 13, 14, 20, 33, 13, 20, 37, 23, 21, 8, 36, 12, 21, 19, 10, 21, 12, 11, 12, 8, 11, 10, 27, 12, 19, 8, 34, 23, 19, 37, 23, 39, 23, 14, 23, 8, 17, 40, 21, 20, 26, 20, 33, 27, 23, 11]


In [17]:
# Find length of the sentences
lengths = []
for sentence in sent2int:
    lengths.append(len(sentence))
lengths = pd.DataFrame(lengths, columns=["counts"])
lengths.describe()

Unnamed: 0,counts
count,308954.0
mean,67.392036
std,67.771632
min,0.0
25%,28.0
50%,52.0
75%,89.0
max,5586.0


In [18]:
# Limitation of the training data
max_length = 45
min_length = 15

filtered_sentences = []

for sentence in sent2int:
    if len(sentence) <= max_length and len(sentence) >= min_length:
        filtered_sentences.append(sentence)
print("Total filtered sentences to train: " + str(len(filtered_sentences)))

Total filtered sentences to train: 102925


In [19]:
# Split the data into training and testing sentences
train, test = train_test_split(filtered_sentences, test_size = 0.1, random_state = 53)
print("Number of training sentences:", len(train))
print("Number of testing sentences:", len(test))

Number of training sentences: 92632
Number of testing sentences: 10293


In [20]:
# When we sort the sentences the batches will include similar length sentences, so less padding will be used.
# Hence, the model will train faster.
sorted_train = []
sorted_test = []
for i in range(min_length, max_length+1):
    for sentence in train:
        if len(sentence) == i:
            sorted_train.append(sentence)
    for sentence in test:
        if len(sentence) == i:
            sorted_test.append(sentence)

In [21]:
"""
letters = ['A', 'B', 'C', 'Ç', 'D', 'E', 'F', 'G', 'Ğ', 'H', 'I',
           'İ', 'J', 'K', 'L', 'M', 'N', 'O', 'Ö', 'P', 'Q', 'R', 
           'S', 'Ş', 'T', 'U', 'Ü', 'V', 'W', 'X', 'Y', 'Z',
"""
           
letters = ['a', 'b', 'c', 'ç', 'd', 'e', 'f', 'g', 'ğ', 'h', 'ı', 'i', 
           'j', 'k', 'l', 'm', 'n', 'o', 'ö', 'p', 'q', 'r', 's', 
           'ş', 't', 'u', 'ü', 'v', 'w', 'x', 'y' ,'z']

# Creates spelling mistakes by adding, relocating and removing characters
def create_noise(sentence, threshold):
    noisy_sentence = []
    ch = 0
    while ch < len(sentence):
        random = np.random.random()
        # If threshold close to 0 do not change, if it is close to 1 change everything
        if random > threshold:
            noisy_sentence.append(sentence[ch])
        else:
            random = np.random.random()
            # 25% chance characters will swap to each other
            if random >= 0.75:
                # If it is last character
                if ch == (len(sentence) - 1):
                    continue
                else:
                    noisy_sentence.append(sentence[ch+1])
                    noisy_sentence.append(sentence[ch])
                    ch += 1
            # 25% chance to add an extra letter
            elif random >= 0.5:
                extra_letter = np.random.choice(letters, 1)[0]
                noisy_sentence.append(sentence[ch])
                noisy_sentence.append(voc2int[extra_letter])
            # 25% chance the letter is missing
            elif random >= 0.25:
                pass
            # 25% chance change with wrong letter
            elif random >= 0:
                wrong_letter = np.random.choice(letters, 1)[0]
                noisy_sentence.append(voc2int[wrong_letter])
        ch +=1
    return noisy_sentence

In [22]:
# Translate int sentence to string sentence
def translate_sentence(sentence):
    translated_sentence = ""
    for character in sentence:
        translated_sentence += int2voc[character]
    return translated_sentence

In [23]:
# Example mistakes

# 10% chance to make spelling mistake for each letter
threshold = 0.05
for sentence in sorted_train[15000:15005]:
    wrong_sentence = create_noise(sentence, threshold)
    print(translate_sentence(sentence))
    print(translate_sentence(wrong_sentence))

Gene tıraşı uzamıştı
Gene tıraşı uzamrıtşı
Çok oluyor mu geleli
Çok oluyor mu egleli
Işığa gerek yoktu ki
Işığa gerek yoktu ki
Süreyya deli gibiydi
Süreyy adeoli gibiydi
Oysa ne çok şey oldu
Oisa ne çok şey old


In [24]:
# Add padding to the sentences to have same length
def pad_sentence_batch(sentence_batch):
    # Find max length in this batch
    max_sentence = max([len(sentence) for sentence in sentence_batch])
    return [sentence + [voc2int['<PAD>']] * (max_sentence - len(sentence)) for sentence in sentence_batch]

In [25]:
# Batching
def get_batches(sorted_sentences, batch_size, threshold):

    for batch_no in range(len(sorted_sentences)//batch_size):
        start_no = batch_no * batch_size
        sentences_batch = copy.deepcopy(sorted_sentences[start_no:start_no + batch_size])
        sentences_batch_copy = copy.deepcopy(sorted_sentences[start_no:start_no + batch_size])
        
        sentences_batch_noisy = []
        for sentence in sentences_batch:
            sentence = create_noise(sentence, threshold)
            sentence.append(voc2int['<EOS>'])
            sentence.insert(0,voc2int['<GO>'])            
            sentences_batch_noisy.append(sentence)

            
        sentences_batch_final = []
        
        # Add EOS token
        for sentence in sentences_batch_copy:
            sentence.append(voc2int['<EOS>'])
            sentence.insert(0,voc2int['<GO>'])
            sentences_batch_final.append(sentence)
            
        
        pad_sentences_batch = np.array(pad_sentence_batch(sentences_batch_final))
        pad_sentences_noisy_batch = np.array(pad_sentence_batch(sentences_batch_noisy))
        
        yield pad_sentences_noisy_batch, pad_sentences_batch

### Encoder and Decoder Model

In [26]:
class Encoder(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, enc_units, batch_sz):
        super(Encoder, self).__init__()
        self.batch_sz = batch_sz
        self.enc_units = enc_units
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.gru = tf.keras.layers.GRU(self.enc_units,
                                       return_sequences=True,
                                       return_state=True,
                                       recurrent_initializer='glorot_uniform')

    def call(self, x, hidden):
        x = self.embedding(x)
        output, state = self.gru(x, initial_state=hidden)
        return output, state

    def initialize_hidden_state(self):
        return tf.zeros((self.batch_sz, self.enc_units))

In [27]:
vocab_inp_size = len(voc2int)+1
vocab_tar_size = len(voc2int)+1
embedding_dim = 256
units = 1024
batch_size = 64
epochs = 50
threshold = 0.075

In [28]:
example_input_batch, example_target_batch = next(get_batches(sorted_train, batch_size, threshold))

In [29]:
encoder = Encoder(vocab_inp_size, embedding_dim, units, batch_size)

# sample input
sample_hidden = encoder.initialize_hidden_state()
sample_output, sample_hidden = encoder(example_input_batch, sample_hidden)
print('Encoder output shape: (batch size, sequence length, units)', sample_output.shape)
print('Encoder Hidden state shape: (batch size, units)', sample_hidden.shape)


Encoder output shape: (batch size, sequence length, units) (64, 21, 1024)
Encoder Hidden state shape: (batch size, units) (64, 1024)


In [30]:
class BahdanauAttention(tf.keras.layers.Layer):
    def __init__(self, units):
        super(BahdanauAttention, self).__init__()
        self.W1 = tf.keras.layers.Dense(units)
        self.W2 = tf.keras.layers.Dense(units)
        self.V = tf.keras.layers.Dense(1)

    def call(self, query, values):
        # query hidden state shape == (batch_size, hidden size)
        # query_with_time_axis shape == (batch_size, 1, hidden size)
        # values shape == (batch_size, max_len, hidden size)
        # we are doing this to broadcast addition along the time axis to calculate the score
        query_with_time_axis = tf.expand_dims(query, 1)

        # score shape == (batch_size, max_length, 1)
        # we get 1 at the last axis because we are applying score to self.V
        # the shape of the tensor before applying self.V is (batch_size, max_length, units)
        score = self.V(tf.nn.tanh(
            self.W1(query_with_time_axis) + self.W2(values)))

        # attention_weights shape == (batch_size, max_length, 1)
        attention_weights = tf.nn.softmax(score, axis=1)

        # context_vector shape after sum == (batch_size, hidden_size)
        context_vector = attention_weights * values
        context_vector = tf.reduce_sum(context_vector, axis=1)

        return context_vector, attention_weights

In [31]:
# sample attention
attention_layer = BahdanauAttention(10)
attention_result, attention_weights = attention_layer(sample_hidden, sample_output)

print("Attention result shape: (batch size, units)", attention_result.shape)
print("Attention weights shape: (batch_size, sequence_length, 1)", attention_weights.shape)

Attention result shape: (batch size, units) (64, 1024)
Attention weights shape: (batch_size, sequence_length, 1) (64, 21, 1)


In [32]:
class Decoder(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, dec_units, batch_sz):
        super(Decoder, self).__init__()
        self.batch_sz = batch_sz
        self.dec_units = dec_units
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.gru = tf.keras.layers.GRU(self.dec_units,
                                       return_sequences=True,
                                       return_state=True,
                                       recurrent_initializer='glorot_uniform')
        self.fc = tf.keras.layers.Dense(vocab_size)

        # used for attention
        self.attention = BahdanauAttention(self.dec_units)

    def call(self, x, hidden, enc_output):
        # enc_output shape == (batch_size, max_length, hidden_size)
        context_vector, attention_weights = self.attention(hidden, enc_output)

        # x shape after passing through embedding == (batch_size, 1, embedding_dim)
        x = self.embedding(x)

        # x shape after concatenation == (batch_size, 1, embedding_dim + hidden_size)
        x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)

        # passing the concatenated vector to the GRU
        output, state = self.gru(x)

        # output shape == (batch_size * 1, hidden_size)
        output = tf.reshape(output, (-1, output.shape[2]))

        # output shape == (batch_size, vocab)
        x = self.fc(output)

        return x, state, attention_weights

In [33]:
# sample output
decoder = Decoder(vocab_tar_size, embedding_dim, units, batch_size)

sample_decoder_output, _, _ = decoder(tf.random.uniform((batch_size, 1)),
                                      sample_hidden, sample_output)

print('Decoder output shape: (batch_size, vocab size)', sample_decoder_output.shape)

Decoder output shape: (batch_size, vocab size) (64, 99)


### Define Optimizer and Loss Function

In [34]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0002)
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True,
                                                            reduction='none')

def loss_function(real, pred):
    mask = tf.math.logical_not(tf.math.equal(real, 0))
    loss_ = loss_object(real, pred)

    mask = tf.cast(mask, dtype=loss_.dtype)
    loss_ *= mask

    return tf.reduce_mean(loss_)

### Training

In [35]:
def train_step(inp, targ, enc_hidden):
    loss = 0
    
    with tf.GradientTape() as tape:
        enc_output, enc_hidden = encoder(inp, enc_hidden)

        dec_hidden = enc_hidden

        dec_input = tf.expand_dims([voc2int['<GO>']] * batch_size, 1)

        # Teacher forcing - feeding the target as the next input
        for t in range(1, targ.shape[1]):
            # passing enc_output to the decoder
            predictions, dec_hidden, _ = decoder(dec_input, dec_hidden, enc_output)

            loss += loss_function(targ[:, t], predictions)

            # using teacher forcing
            dec_input = tf.expand_dims(targ[:, t], 1)

    batch_loss = (loss / int(targ.shape[1]))

    variables = encoder.trainable_variables + decoder.trainable_variables

    gradients = tape.gradient(loss, variables)

    optimizer.apply_gradients(zip(gradients, variables))

    return batch_loss

In [36]:
for epoch in range(epochs):
    start_epoch = time.time()
    start_batch = time.time()
    
    enc_hidden = encoder.initialize_hidden_state()
    total_loss = 0
    total_test_loss = 0
    
    total_batch = len(sorted_train)//batch_size -1 
    total_test_batch = len(sorted_test)//batch_size -1 
    
    
    # Train Loss
    
    for (batch, (inp, targ)) in enumerate(get_batches(sorted_train, batch_size, threshold)):
    
        batch_loss = train_step(inp, targ, enc_hidden)
        total_loss += batch_loss

        if batch % 100 == 0:
            end_batch = time.time()
            print(f'Epoch {epoch+1} Batch {batch} Train Loss {batch_loss.numpy():.4f} Time {end_batch - start_batch:.2f} sec') 
            start_batch = time.time()
                    
        # Test Loss
        
        if(batch == total_batch):
        
            for (test_batch, (test_inp, test_targ)) in enumerate(get_batches(sorted_test, batch_size, threshold)):

                test_batch_loss = train_step(test_inp, test_targ, enc_hidden)
                total_test_loss += test_batch_loss 
                
            print(f'\n**Epoch {epoch+1} Test Loss {total_test_loss/total_test_batch:.4f} \n')
    
    
    end_epoch = time.time()
    print(f'**Epoch {epoch+1} Train Loss {total_loss/total_batch:.4f}')
    print(f'Time taken for 1 epoch {end_epoch -start_epoch:.2f} sec\n')

Epoch 1 Batch 0 Train Loss 4.3212 Time 0.52 sec
Epoch 1 Batch 100 Train Loss 2.8294 Time 27.37 sec
Epoch 1 Batch 200 Train Loss 2.4438 Time 31.04 sec
Epoch 1 Batch 300 Train Loss 2.4652 Time 34.60 sec
Epoch 1 Batch 400 Train Loss 2.3584 Time 38.56 sec
Epoch 1 Batch 500 Train Loss 2.3165 Time 42.94 sec
Epoch 1 Batch 600 Train Loss 2.1291 Time 45.05 sec
Epoch 1 Batch 700 Train Loss 2.1496 Time 48.29 sec
Epoch 1 Batch 800 Train Loss 1.9565 Time 54.14 sec
Epoch 1 Batch 900 Train Loss 2.3416 Time 55.83 sec
Epoch 1 Batch 1000 Train Loss 1.9970 Time 59.13 sec
Epoch 1 Batch 1100 Train Loss 1.7601 Time 62.52 sec
Epoch 1 Batch 1200 Train Loss 1.3003 Time 66.11 sec
Epoch 1 Batch 1300 Train Loss 0.7466 Time 71.83 sec
Epoch 1 Batch 1400 Train Loss 0.7217 Time 74.73 sec

**Epoch 1 Test Loss 0.9408 

**Epoch 1 Train Loss 2.0145
Time taken for 1 epoch 831.42 sec

Epoch 2 Batch 0 Train Loss 2.6911 Time 0.27 sec
Epoch 2 Batch 100 Train Loss 2.1131 Time 28.57 sec
Epoch 2 Batch 200 Train Loss 1.9334 Time 

Epoch 10 Batch 900 Train Loss 0.2749 Time 57.53 sec
Epoch 10 Batch 1000 Train Loss 0.2256 Time 62.05 sec
Epoch 10 Batch 1100 Train Loss 0.2323 Time 65.06 sec
Epoch 10 Batch 1200 Train Loss 0.2519 Time 71.15 sec
Epoch 10 Batch 1300 Train Loss 0.2620 Time 76.97 sec
Epoch 10 Batch 1400 Train Loss 1.7715 Time 80.90 sec

**Epoch 10 Test Loss 0.3921 

**Epoch 10 Train Loss 0.3173
Time taken for 1 epoch 890.04 sec

Epoch 11 Batch 0 Train Loss 0.3437 Time 0.28 sec
Epoch 11 Batch 100 Train Loss 0.3001 Time 29.37 sec
Epoch 11 Batch 200 Train Loss 0.2982 Time 32.82 sec
Epoch 11 Batch 300 Train Loss 0.2185 Time 35.43 sec
Epoch 11 Batch 400 Train Loss 0.2594 Time 38.75 sec
Epoch 11 Batch 500 Train Loss 0.2355 Time 41.97 sec
Epoch 11 Batch 600 Train Loss 0.2745 Time 45.91 sec
Epoch 11 Batch 700 Train Loss 0.2747 Time 49.44 sec
Epoch 11 Batch 800 Train Loss 0.2603 Time 53.75 sec
Epoch 11 Batch 900 Train Loss 0.2225 Time 56.21 sec
Epoch 11 Batch 1000 Train Loss 0.2585 Time 59.14 sec
Epoch 11 Batch 110


**Epoch 19 Test Loss 0.2841 

**Epoch 19 Train Loss 0.2478
Time taken for 1 epoch 824.50 sec

Epoch 20 Batch 0 Train Loss 0.2187 Time 0.25 sec
Epoch 20 Batch 100 Train Loss 0.2531 Time 28.54 sec
Epoch 20 Batch 200 Train Loss 0.2077 Time 31.76 sec
Epoch 20 Batch 300 Train Loss 0.1748 Time 35.22 sec
Epoch 20 Batch 400 Train Loss 0.1851 Time 38.38 sec
Epoch 20 Batch 500 Train Loss 0.2369 Time 41.68 sec
Epoch 20 Batch 600 Train Loss 0.2117 Time 45.23 sec
Epoch 20 Batch 700 Train Loss 0.2252 Time 48.82 sec
Epoch 20 Batch 800 Train Loss 0.2333 Time 51.68 sec
Epoch 20 Batch 900 Train Loss 0.2927 Time 55.36 sec
Epoch 20 Batch 1000 Train Loss 0.2131 Time 58.20 sec
Epoch 20 Batch 1100 Train Loss 0.2320 Time 61.75 sec
Epoch 20 Batch 1200 Train Loss 0.2460 Time 65.35 sec
Epoch 20 Batch 1300 Train Loss 0.2806 Time 69.25 sec
Epoch 20 Batch 1400 Train Loss 0.2128 Time 72.99 sec

**Epoch 20 Test Loss 0.2098 

**Epoch 20 Train Loss 0.2278
Time taken for 1 epoch 821.18 sec

Epoch 21 Batch 0 Train Loss 

Epoch 29 Batch 400 Train Loss 0.1981 Time 38.37 sec
Epoch 29 Batch 500 Train Loss 0.1941 Time 41.54 sec
Epoch 29 Batch 600 Train Loss 0.1920 Time 45.41 sec
Epoch 29 Batch 700 Train Loss 0.2202 Time 49.04 sec
Epoch 29 Batch 800 Train Loss 0.1874 Time 52.19 sec
Epoch 29 Batch 900 Train Loss 0.1976 Time 55.84 sec
Epoch 29 Batch 1000 Train Loss 0.1605 Time 58.63 sec
Epoch 29 Batch 1100 Train Loss 0.1763 Time 62.35 sec
Epoch 29 Batch 1200 Train Loss 0.1610 Time 66.02 sec
Epoch 29 Batch 1300 Train Loss 0.2147 Time 69.58 sec
Epoch 29 Batch 1400 Train Loss 0.1877 Time 73.39 sec

**Epoch 29 Test Loss 0.1778 

**Epoch 29 Train Loss 0.1865
Time taken for 1 epoch 826.61 sec

Epoch 30 Batch 0 Train Loss 0.1771 Time 0.27 sec
Epoch 30 Batch 100 Train Loss 0.2242 Time 28.63 sec
Epoch 30 Batch 200 Train Loss 0.1342 Time 31.87 sec
Epoch 30 Batch 300 Train Loss 0.1589 Time 35.20 sec
Epoch 30 Batch 400 Train Loss 0.1699 Time 38.45 sec
Epoch 30 Batch 500 Train Loss 0.1925 Time 41.58 sec
Epoch 30 Batch 600 

Epoch 38 Batch 1000 Train Loss 0.1542 Time 58.75 sec
Epoch 38 Batch 1100 Train Loss 0.1512 Time 61.87 sec
Epoch 38 Batch 1200 Train Loss 0.1774 Time 66.27 sec
Epoch 38 Batch 1300 Train Loss 0.1569 Time 69.97 sec
Epoch 38 Batch 1400 Train Loss 0.1927 Time 73.22 sec

**Epoch 38 Test Loss 0.1533 

**Epoch 38 Train Loss 0.1594
Time taken for 1 epoch 825.85 sec

Epoch 39 Batch 0 Train Loss 0.1003 Time 0.27 sec
Epoch 39 Batch 100 Train Loss 0.1989 Time 28.63 sec
Epoch 39 Batch 200 Train Loss 0.1614 Time 31.79 sec
Epoch 39 Batch 300 Train Loss 0.1288 Time 35.11 sec
Epoch 39 Batch 400 Train Loss 0.1504 Time 38.22 sec
Epoch 39 Batch 500 Train Loss 0.1053 Time 41.63 sec
Epoch 39 Batch 600 Train Loss 0.1493 Time 46.11 sec
Epoch 39 Batch 700 Train Loss 0.1657 Time 48.76 sec
Epoch 39 Batch 800 Train Loss 0.1639 Time 52.15 sec
Epoch 39 Batch 900 Train Loss 0.1460 Time 55.67 sec
Epoch 39 Batch 1000 Train Loss 0.1242 Time 60.12 sec
Epoch 39 Batch 1100 Train Loss 0.1608 Time 62.41 sec
Epoch 39 Batch 12

Epoch 48 Batch 0 Train Loss 0.1377 Time 0.28 sec
Epoch 48 Batch 100 Train Loss 0.1535 Time 28.60 sec
Epoch 48 Batch 200 Train Loss 0.1419 Time 31.86 sec
Epoch 48 Batch 300 Train Loss 0.1372 Time 35.23 sec
Epoch 48 Batch 400 Train Loss 0.1547 Time 38.42 sec
Epoch 48 Batch 500 Train Loss 0.1182 Time 41.57 sec
Epoch 48 Batch 600 Train Loss 0.1186 Time 45.47 sec
Epoch 48 Batch 700 Train Loss 0.1407 Time 48.71 sec
Epoch 48 Batch 800 Train Loss 0.1302 Time 52.11 sec
Epoch 48 Batch 900 Train Loss 0.1562 Time 55.87 sec
Epoch 48 Batch 1000 Train Loss 0.1370 Time 58.71 sec
Epoch 48 Batch 1100 Train Loss 0.1151 Time 62.40 sec
Epoch 48 Batch 1200 Train Loss 0.1562 Time 65.99 sec
Epoch 48 Batch 1300 Train Loss 0.1545 Time 69.48 sec
Epoch 48 Batch 1400 Train Loss 0.1614 Time 73.32 sec

**Epoch 48 Test Loss 0.1361 

**Epoch 48 Train Loss 0.1383
Time taken for 1 epoch 825.75 sec

Epoch 49 Batch 0 Train Loss 0.0901 Time 0.26 sec
Epoch 49 Batch 100 Train Loss 0.1648 Time 28.71 sec
Epoch 49 Batch 200 Tra

In [42]:
import os
checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoint = tf.train.Checkpoint(optimizer=optimizer,
                                 encoder=encoder,
                                 decoder=decoder)
#checkpoint.save(file_prefix = checkpoint_prefix)

'./training_checkpoints\\ckpt-1'

In [None]:
checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))

In [62]:
def evaluate(sentence):
    
    sentence = sentence_to_integer(sentence)
    
    #print(sentence)
    
    sentence.insert(0,voc2int['<GO>'])
    sentence.append(voc2int['<EOS>'])
    
    #print(sentence)
    
    inputs = tf.convert_to_tensor([sentence])
    
    #print(inputs)

    result = []

    hidden = [tf.zeros((1, units))]
    enc_out, enc_hidden = encoder(inputs, hidden)

    dec_hidden = enc_hidden
    dec_input = tf.expand_dims([voc2int['<GO>']], 0)

    for t in range(max_length):
        predictions, dec_hidden, _ = decoder(dec_input,dec_hidden,enc_out)
        
        predicted_id = tf.argmax(predictions[0]).numpy()
        
    
        if int2voc[predicted_id] == '<EOS>':
            return result, sentence
        else:
            result.append(predicted_id)
            
        # the predicted ID is fed back into the model
        dec_input = tf.expand_dims([predicted_id], 0)

    return result, sentence

In [63]:
def translate(sentence):
    result, sentence = evaluate(sentence)
    
    #print('Input:', sentence)
    
    print('Predicted translation:', translate_sentence(result))

In [64]:
translate("Bugünn hva çk sıcak")

Predicted translation: Bugün hava çok sıcak


In [66]:
translate("Hyvanları sedi")

Predicted translation: Hayvanları sevdi


In [90]:
translate("Size teşekkr edröm")

Predicted translation: Size teşekkür ederim


In [139]:
translate("AAlışveriş ypmaya gidti")

Predicted translation: Alışveriş yapmaya gitti


In [135]:
translate("gazte")

Predicted translation: gazete
