In [30]:
%load_ext autoreload
%autoreload 2

import torch
from torchinfo import summary
from config import en_id_model as mtconf
from dataset import get_tokenizers
from model import build_model
from train import train_model
from utils import TrainCheckpoint, EarlyStopping, ReduceLROnPlateau, TrainingCallback

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [31]:
t_src, t_tgt = get_tokenizers(mtconf, ds_train=None, force_retrain_tokenizer=False)
model = build_model(mtconf, t_src, t_tgt)

summary(
    model,
    input_data=[
        torch.randint(0, t_src.get_vocab_size(), (mtconf.batch_size, mtconf.seq_len)),  # encoder_input
        torch.randint(0, t_tgt.get_vocab_size(), (mtconf.batch_size, mtconf.seq_len)),  # decoder_input
        torch.ones(mtconf.batch_size, 1, 1, mtconf.seq_len, dtype=torch.int),  # encoder_mask
        torch.ones(mtconf.batch_size, 1, mtconf.seq_len, mtconf.seq_len, dtype=torch.int),  # decoder_mask
    ],
    col_names=["input_size", "output_size", "num_params", "trainable"],
    depth=10,
    row_settings=["var_names"]
)

tokenizer exist, getting from: .output\tokenizer_en.json
tokenizer exist, getting from: .output\tokenizer_id.json


Layer (type (var_name))                                           Input Shape               Output Shape              Param #                   Trainable
Transformer (Transformer)                                         [10, 225]                 [10, 225, 30000]          --                        True
├─InputEmbedding (src_embed)                                      [10, 225]                 [10, 225, 512]            --                        True
│    └─Embedding (embedding)                                      [10, 225]                 [10, 225, 512]            15,360,000                True
├─PositionalEncoding (src_pos)                                    [10, 225, 512]            [10, 225, 512]            --                        --
│    └─Dropout (dropout)                                          [10, 225, 512]            [10, 225, 512]            --                        --
├─Encoder (encoder)                                               [10, 225, 512]            [10, 225, 512

In [29]:
%load_ext autoreload
%autoreload 2

tcp = TrainCheckpoint(mtconf.model_output)
es = EarlyStopping(patience=8)
callback = TrainingCallback(checkpoint=tcp, early_stop=es)

history = train_model(mtconf, callback)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
total sentence pair for training: 50000
tokenizer exist, getting from: .output\tokenizer_en.json
tokenizer exist, getting from: .output\tokenizer_id.json
max length of source sentence: 127
max length of target sentence: 222


processing epoch 0: 100%|██████████| 5000/5000 [06:56<00:00, 12.01it/s, loss=5.322] 


Epoch 1 - 0:7:34.3 | train_loss=5.728802 | val_loss=5.280047

* metrics improved from inf to 5.280047


processing epoch 1: 100%|██████████| 5000/5000 [07:03<00:00, 11.81it/s, loss=4.577]


Epoch 2 - 0:7:39.5 | train_loss=4.974816 | val_loss=4.904371

* metrics improved from 5.280047 to 4.904371


processing epoch 2: 100%|██████████| 5000/5000 [06:59<00:00, 11.93it/s, loss=4.055]


Epoch 3 - 0:7:36.1 | train_loss=4.564045 | val_loss=4.700373

* metrics improved from 4.904371 to 4.700373


processing epoch 3: 100%|██████████| 5000/5000 [06:58<00:00, 11.95it/s, loss=4.420]


Epoch 4 - 0:7:35.0 | train_loss=4.255458 | val_loss=4.576805

* metrics improved from 4.700373 to 4.576805


processing epoch 4: 100%|██████████| 5000/5000 [06:58<00:00, 11.94it/s, loss=3.909]


Epoch 5 - 0:7:34.6 | train_loss=3.998574 | val_loss=4.491093

* metrics improved from 4.576805 to 4.491093


processing epoch 5: 100%|██████████| 5000/5000 [06:56<00:00, 12.00it/s, loss=4.467]


Epoch 6 - 0:7:32.5 | train_loss=3.773287 | val_loss=4.456754

* metrics improved from 4.491093 to 4.456754


processing epoch 6: 100%|██████████| 5000/5000 [34:59<00:00,  2.38it/s, loss=4.119]    


Epoch 7 - 0:35:33.8 | train_loss=3.564236 | val_loss=4.433269

* metrics improved from 4.456754 to 4.433269


processing epoch 7: 100%|██████████| 5000/5000 [07:02<00:00, 11.84it/s, loss=2.806]


Epoch 8 - 0:7:40.0 | train_loss=3.374776 | val_loss=4.454077

metrics did not improve from 4.433269


processing epoch 8: 100%|██████████| 5000/5000 [07:00<00:00, 11.90it/s, loss=3.329]


Epoch 9 - 0:7:37.7 | train_loss=3.197216 | val_loss=4.449455

metrics did not improve from 4.433269


processing epoch 9: 100%|██████████| 5000/5000 [07:04<00:00, 11.77it/s, loss=2.856]


Epoch 10 - 0:7:42.4 | train_loss=3.031546 | val_loss=4.475343

metrics did not improve from 4.433269


processing epoch 10: 100%|██████████| 5000/5000 [07:02<00:00, 11.82it/s, loss=2.570]


Epoch 11 - 0:7:37.5 | train_loss=2.869238 | val_loss=4.547474

metrics did not improve from 4.433269


processing epoch 11:  46%|████▌     | 2296/5000 [03:17<03:52, 11.65it/s, loss=2.474]


KeyboardInterrupt: 