In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install tensorflow_datasets
!pip install tensorflow_text

Collecting tensorflow_text
  Downloading tensorflow_text-2.7.3-cp37-cp37m-manylinux2010_x86_64.whl (4.9 MB)
[K     |████████████████████████████████| 4.9 MB 4.0 MB/s 
Installing collected packages: tensorflow-text
Successfully installed tensorflow-text-2.7.3


In [3]:
import collections
import logging
import os
import pathlib 
import re 
import string
import sys 
import time 

import numpy as np
import matplotlib.pyplot as plt 

import tensorflow_datasets as tfds 
import tensorflow_text as text 
import tensorflow as tf

from drive.MyDrive.transformer.transformer import Transformer
from drive.MyDrive.transformer.scheduler import CustomSchedule

In [4]:
logging.getLogger('tensorflow').setLevel(logging.ERROR)

In [5]:
examples, metadata = tfds.load('ted_hrlr_translate/ru_to_en', with_info=True,
                               as_supervised=True)
train_examples, val_examples = examples['train'], examples['validation']

[1mDownloading and preparing dataset ted_hrlr_translate/ru_to_en/1.0.0 (download: 124.94 MiB, generated: Unknown size, total: 124.94 MiB) to /root/tensorflow_datasets/ted_hrlr_translate/ru_to_en/1.0.0...[0m


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Extraction completed...: 0 file [00:00, ? file/s]






0 examples [00:00, ? examples/s]

Shuffling and writing examples to /root/tensorflow_datasets/ted_hrlr_translate/ru_to_en/1.0.0.incompleteVDAMLI/ted_hrlr_translate-train.tfrecord


  0%|          | 0/208106 [00:00<?, ? examples/s]

0 examples [00:00, ? examples/s]

Shuffling and writing examples to /root/tensorflow_datasets/ted_hrlr_translate/ru_to_en/1.0.0.incompleteVDAMLI/ted_hrlr_translate-validation.tfrecord


  0%|          | 0/4805 [00:00<?, ? examples/s]

0 examples [00:00, ? examples/s]

Shuffling and writing examples to /root/tensorflow_datasets/ted_hrlr_translate/ru_to_en/1.0.0.incompleteVDAMLI/ted_hrlr_translate-test.tfrecord


  0%|          | 0/5476 [00:00<?, ? examples/s]

[1mDataset ted_hrlr_translate downloaded and prepared to /root/tensorflow_datasets/ted_hrlr_translate/ru_to_en/1.0.0. Subsequent calls will reuse this data.[0m


In [6]:
model_name = 'drive/MyDrive/ru_en_bert_converter'
tokenizers = tf.saved_model.load(model_name)

In [7]:
def tokenize_pairs(ru, en):
    ru = tokenizers.ru.tokenize(ru)
    ru = ru.to_tensor()

    en = tokenizers.en.tokenize(en)
    en = en.to_tensor()
    return ru, en

In [8]:
BUFFER_SIZE = 20000
BATCH_SIZE = 64
EPOCHS = 10

In [9]:
def make_batches(ds):
  return (
      ds
      .cache()
      .shuffle(BUFFER_SIZE)
      .batch(BATCH_SIZE)
      .map(tokenize_pairs, num_parallel_calls=tf.data.AUTOTUNE)
      .prefetch(tf.data.AUTOTUNE))
    
train_batches = make_batches(train_examples)
val_batches = make_batches(val_examples)

In [10]:
num_layers = 4
d_model = 128
dff = 512
num_heads = 8
dropout_rate = 0.1

In [11]:
learning_rate = CustomSchedule(d_model)

optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98,
                                     epsilon=1e-9)

loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
    from_logits=True, reduction='none')

In [12]:
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.Mean(name='train_accuracy')

In [13]:
transformer = Transformer(
    num_layers=num_layers,
    d_model=d_model,
    num_heads=num_heads,
    dff=dff,
    input_vocab_size=tokenizers.ru.get_vocab_size().numpy(),
    target_vocab_size=tokenizers.en.get_vocab_size().numpy(),
    pe_input=1000,
    pe_target=1000,
    rate=dropout_rate)

In [14]:
checkpoint_path = "./checkpoints/train"

ckpt = tf.train.Checkpoint(transformer=transformer,
                           optimizer=optimizer)

ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=5)

In [15]:
def loss_function(real, pred):
    mask = tf.math.logical_not(tf.math.equal(real, 0))
    loss_ = loss_object(real, pred)

    mask = tf.cast(mask, dtype=loss_.dtype)
    loss_ *= mask

    return tf.reduce_sum(loss_)/tf.reduce_sum(mask)


def accuracy_function(real, pred):
    accuracies = tf.equal(real, tf.argmax(pred, axis=2))

    mask = tf.math.logical_not(tf.math.equal(real, 0))
    accuracies = tf.math.logical_and(mask, accuracies)

    accuracies = tf.cast(accuracies, dtype=tf.float32)
    mask = tf.cast(mask, dtype=tf.float32)
    return tf.reduce_sum(accuracies)/tf.reduce_sum(mask)

In [16]:
train_step_signature = [
    tf.TensorSpec(shape=(None, None), dtype=tf.int64),
    tf.TensorSpec(shape=(None, None), dtype=tf.int64),
]


@tf.function(input_signature=train_step_signature)
def train_step(inp, tar):
  tar_inp = tar[:, :-1]
  tar_real = tar[:, 1:]

  with tf.GradientTape() as tape:
    predictions = transformer([inp, tar_inp],
                                 training = True)
    loss = loss_function(tar_real, predictions)

  gradients = tape.gradient(loss, transformer.trainable_variables)
  optimizer.apply_gradients(zip(gradients, transformer.trainable_variables))

  train_loss(loss)
  train_accuracy(accuracy_function(tar_real, predictions))

In [17]:
for epoch in range(EPOCHS):
  start = time.time()

  train_loss.reset_states()
  train_accuracy.reset_states()

  for (batch, (inp, tar)) in enumerate(train_batches):
    train_step(inp, tar)

    if batch % 50 == 0:
      print(f'Epoch {epoch + 1} Batch {batch} Loss {train_loss.result():.4f} Accuracy {train_accuracy.result():.4f}')

  if (epoch + 1) % 5 == 0:
    ckpt_save_path = ckpt_manager.save()
    print(f'Saving checkpoint for epoch {epoch+1} at {ckpt_save_path}')

  print(f'Epoch {epoch + 1} Loss {train_loss.result():.4f} Accuracy {train_accuracy.result():.4f}')

  print(f'Time taken for 1 epoch: {time.time() - start:.2f} secs\n')

Epoch 1 Batch 0 Loss 8.9853 Accuracy 0.0006
Epoch 1 Batch 50 Loss 8.9244 Accuracy 0.0049
Epoch 1 Batch 100 Loss 8.8124 Accuracy 0.0225
Epoch 1 Batch 150 Loss 8.6907 Accuracy 0.0284
Epoch 1 Batch 200 Loss 8.5421 Accuracy 0.0315
Epoch 1 Batch 250 Loss 8.3674 Accuracy 0.0333
Epoch 1 Batch 300 Loss 8.1728 Accuracy 0.0344
Epoch 1 Batch 350 Loss 7.9723 Accuracy 0.0352
Epoch 1 Batch 400 Loss 7.7803 Accuracy 0.0383
Epoch 1 Batch 450 Loss 7.6060 Accuracy 0.0471
Epoch 1 Batch 500 Loss 7.4542 Accuracy 0.0568
Epoch 1 Batch 550 Loss 7.3139 Accuracy 0.0663
Epoch 1 Batch 600 Loss 7.1856 Accuracy 0.0753
Epoch 1 Batch 650 Loss 7.0644 Accuracy 0.0834
Epoch 1 Batch 700 Loss 6.9518 Accuracy 0.0908
Epoch 1 Batch 750 Loss 6.8484 Accuracy 0.0975
Epoch 1 Batch 800 Loss 6.7513 Accuracy 0.1039
Epoch 1 Batch 850 Loss 6.6632 Accuracy 0.1100
Epoch 1 Batch 900 Loss 6.5827 Accuracy 0.1157
Epoch 1 Batch 950 Loss 6.5061 Accuracy 0.1214
Epoch 1 Batch 1000 Loss 6.4351 Accuracy 0.1266
Epoch 1 Batch 1050 Loss 6.3706 Accur

In [18]:
class Translator(tf.Module):
  def __init__(self, tokenizers, transformer):
    self.tokenizers = tokenizers
    self.transformer = transformer

  def __call__(self, sentence, max_length=20):
    assert isinstance(sentence, tf.Tensor)
    if len(sentence.shape) == 0:
      sentence = sentence[tf.newaxis]

    sentence = self.tokenizers.ru.tokenize(sentence).to_tensor()

    encoder_input = sentence

    start_end = self.tokenizers.en.tokenize([''])[0]
    start = start_end[0][tf.newaxis]
    end = start_end[1][tf.newaxis]

    output_array = tf.TensorArray(dtype=tf.int64, size=0, dynamic_size=True)
    output_array = output_array.write(0, start)

    for i in tf.range(max_length):
        output = tf.transpose(output_array.stack())
        predictions = self.transformer([encoder_input, output], training=False)

        predictions = predictions[:, -1:, :]

        predicted_id = tf.argmax(predictions, axis=-1)

        output_array = output_array.write(i+1, predicted_id[0])

        if predicted_id == end:
            break

    output = tf.transpose(output_array.stack())
    text = self.tokenizers.en.detokenize(output)[0]

    tokens = self.tokenizers.en.lookup(output)[0]

    return text, tokens

In [19]:
translator = Translator(tokenizers, transformer)

In [20]:
def print_translation(sentence, tokens, ground_truth):
  print(f'{"Input:":15s}: {sentence}')
  print(f'{"Prediction":15s}: {tokens.numpy().decode("utf-8")}')
  print(f'{"Ground truth":15s}: {ground_truth}')

In [21]:
sentence = "Привет."
ground_truth = "Hi."

translated_text, translated_tokens= translator(
    tf.constant(sentence))
print_translation(sentence, translated_text, ground_truth)

Input:         : Привет.
Prediction     : hello .
Ground truth   : Hi.


In [22]:
class ExportTranslator(tf.Module):
  def __init__(self, translator):
    self.translator = translator

  @tf.function(input_signature=[tf.TensorSpec(shape=[], dtype=tf.string)])
  def __call__(self, sentence):
    (result, tokens) = self.translator(sentence, max_length=100)

    return result

In [23]:
translator = ExportTranslator(translator)

In [24]:
translator("Как твои дела?").numpy()

b'how do you do that ?'

In [25]:
tf.saved_model.save(translator, export_dir='ru_eng_translator')



In [26]:
reloaded = tf.saved_model.load('ru_eng_translator')

In [28]:
reloaded("Как дела?").numpy()

b'how does it work ?'