# Setup environment

In [None]:
# %%bash
# git clone https://github.com/Paulescu/talking-machines.git
# mv talking-machines/* .
# rm -r talking-machines
# pip install -r requirements_py3.6.txt

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch
if torch.cuda.is_available():
    DEVICE = torch.device("cuda")
    print('GPU acceleration is available and will be used :-)')
else:
    DEVICE = torch.device("cpu")
    print('GPU is not available. If you are using Google Colab, change the runtime to GPU, otherwise training will '
          'take too long.')

GPU is not available. If you are using Google Colab, change the runtime to GPU, otherwise training will take too long.


# Download the data

In [75]:
!sh download_data.sh

--2021-01-27 21:16:43--  https://s3.amazonaws.com/datasets.huggingface.co/personachat/personachat_self_original.json
Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.216.251.30
Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.216.251.30|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 209850483 (200M) [application/json]
Saving to: ‘./data/personachat_self_original.json.1’


2021-01-27 21:17:19 (5.60 MB/s) - ‘./data/personachat_self_original.json.1’ saved [209850483/209850483]



In [53]:
from data_util import generate_train_validation_test_files

generate_train_validation_test_files()

0 lines removed
Train set 131,438
0 lines removed
Test set 7,801


In [72]:
%autoreload 2
from data_util import TrainingDataWrapper, save_vocab

# Dataset objects
dw = TrainingDataWrapper()
train_ds, val_ds, test_ds = dw.get_datasets(
    train_size=13, # 132000,
    val_size=7, # 7801,
    use_glove=True
)
print(f'Train set size: {len(train_ds):,}')
print(f'Validation set size: {len(val_ds):,}')
print('Vocab size: ', dw.vocab_size)

# Save vocab to disk
save_vocab(dw.vocab, f'./checkpoints/vocab_{dw.vocab_size}')

Train set size: 13
Validation set size: 7
Vocab size:  131


In [73]:
# DataLoader objects
train_iter, val_iter, test_iter = dw.get_dataloaders(
    train_ds, val_ds, test_ds,
    batch_size=2400,
    device=DEVICE
)

x = next(iter(train_iter))
print('Example \n-------')
print(x.src[0])
print(x.src[1])

Example 
-------
tensor([[ 2, 33, 19,  ..., 38,  5,  3],
        [ 2, 33, 19,  ...,  1,  1,  1],
        [ 2, 33, 19,  ...,  1,  1,  1],
        ...,
        [ 2, 33, 19,  ...,  1,  1,  1],
        [ 2, 33, 19,  ...,  1,  1,  1],
        [ 2, 33, 19,  ...,  1,  1,  1]])
tensor([166, 140, 123, 116, 105,  82,  84,  55,  52,  33,  23,  10])


In [74]:
%autoreload 2
from model import Seq2seqRNN, count_parameters

hidden_dim = 256
n_layers = 3
n_directions_encoder = 2
model = Seq2seqRNN(dw.vocab_size,
                   dw.embedding_dim,
                   hidden_dim,
                   n_layers,
                   n_directions_encoder,
                   dropout=0.2,
                   pretrained_embeddings=dw.embeddings,
                   freeze_embeddings=False)

print(f'The model has {count_parameters(model):,} parameters')

The model has 5,353,135 parameters


In [75]:
%autoreload 2
from train import Seq2seqRNNTrainer

trainer = Seq2seqRNNTrainer(model,
                            train_iter,
                            val_iter,
                            learning_rate=3e-4,
                            pad_token_id=dw.pad_token_id,
                            gradient_clip=99999,
                            teacher_forcing=0.5,
#                             checkpoint_dir='/content/drive/MyDrive/chatbot-course/')
                            checkpoint_dir='./checkpoints')
n_epochs = 2
trainer.train(n_epochs)

  0%|          | 0/13 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch: 000, Train loss: 4.8756, Val loss: 4.8790, Train ppl: 131.1, Val ppl: 131.5
checkpoints/bb3317a8-6539-11eb-be89-acbc32b70c09/0.ckpt was saved
checkpoints/bb3317a8-6539-11eb-be89-acbc32b70c09/params.json file was saved


  0%|          | 0/13 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch: 001, Train loss: 4.8641, Val loss: 4.8548, Train ppl: 129.5, Val ppl: 128.4
checkpoints/bb3317a8-6539-11eb-be89-acbc32b70c09/1.ckpt was saved
checkpoints/bb3317a8-6539-11eb-be89-acbc32b70c09/params.json file was saved


In [79]:
trainer.load_checkpoint(run_id='bb3317a8-6539-11eb-be89-acbc32b70c09', epoch=1)

n_epochs = 2
trainer.train(n_epochs)

  0%|          | 0/13 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch: 000, Train loss: 4.8491, Val loss: 4.8213, Train ppl: 127.6, Val ppl: 124.1
checkpoints/bb3317a8-6539-11eb-be89-acbc32b70c09/1.ckpt was saved
checkpoints/bb3317a8-6539-11eb-be89-acbc32b70c09/params.json file was saved


  0%|          | 0/13 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch: 001, Train loss: 4.8293, Val loss: 4.7955, Train ppl: 125.1, Val ppl: 121.0
checkpoints/bb3317a8-6539-11eb-be89-acbc32b70c09/2.ckpt was saved
checkpoints/bb3317a8-6539-11eb-be89-acbc32b70c09/params.json file was saved


In [80]:
n_epochs = 2
trainer.train(n_epochs)

  0%|          | 0/13 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch: 000, Train loss: 4.8062, Val loss: 4.7484, Train ppl: 122.3, Val ppl: 115.4
checkpoints/bb3317a8-6539-11eb-be89-acbc32b70c09/3.ckpt was saved
checkpoints/bb3317a8-6539-11eb-be89-acbc32b70c09/params.json file was saved


  0%|          | 0/13 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch: 001, Train loss: 4.7585, Val loss: 4.6679, Train ppl: 116.6, Val ppl: 106.5
checkpoints/bb3317a8-6539-11eb-be89-acbc32b70c09/4.ckpt was saved
checkpoints/bb3317a8-6539-11eb-be89-acbc32b70c09/params.json file was saved
