## Check what GPU you got
Click the Runtime dropdown at the top of the page, then Change Runtime Type and confirm the instance type is GPU.

Check the output of !nvidia-smi to make sure you've been allocated a Tesla P100.

In [0]:
!nvidia-smi

## Pre-requisites
Mount the source code and set up Spacy

In [0]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

import os
os.chdir("/content/drive/My Drive/Translator/src")
!ls

!python -m spacy download en_core_web_sm
!python -m spacy download fr_core_news_sm

!pip3 install 'torchtext==0.5.0'

!pip3 install torch torchvision


## Build the Vocabs
Make the vocabulary from a dataset

In [0]:
%%shell
DATASET=Hansard-Multi30k
TRAIN=../data/${DATASET}/Training/
TEST=../data/${DATASET}/Testing/

echo "Making the vocabulary"

python3 build-vocabs.py vocab $TRAIN fr ../models/${DATASET}/vocab.inf.5.french.gz --min-frequency 5
python3 build-vocabs.py vocab $TRAIN en ../models/${DATASET}/vocab.inf.5.english.gz --min-frequency 5

echo "Finished making the vocabulary"

## Train / Test model from English to French
Train the model with attention from English to French

In [0]:
%%shell
DATASET=Hansard-Multi30k
TRAIN=../data/${DATASET}/Training/
TEST=../data/${DATASET}/Testing/

echo "Training the model from English to French"

# With Num-Attention-Heads=8, PF-Size=512, Embedding-Size=256, Encoder-Dropout=0.1, Decoder-Dropout=0.1, Batch-Size=64, and
# * Num-Layers=3, we get a BLEU=35.26   Val-Loss=1.44
# * Num-Layers=6, we get a BLEU=0       Val-Loss=7.53
# * Num-Layers=4, we get a BLEU=33.35   Val-Loss=1.65
# * Num-Layers=2, we get a BLEU=34.17.  Val-Loss=1.56

# With Num-Layers=3, PF-Size=512, Embedding-Size=256, Encoder-Dropout=0.1, Decoder-Dropout=0.1, Batch-Size=64, and
# * Num-Attention-Heads=16, we get a BLEU=34.73   Val-Loss=1.55
# * Num-Attention-Heads=4, we get a BLEU=????

python3 train.py ${TRAIN} \
    en ../models/${DATASET}/vocab.inf.5.english.gz \
    fr ../models/${DATASET}/vocab.inf.5.french.gz \
    ../models/${DATASET}/model.en.fr.pt \
    --source-word-embedding-size 256 \
    --target-word-embedding-size 256 \
    --encoder-num-layers 2 \
    --encoder-num-attention-heads 8 \
    --encoder-pf-size 512 \
    --encoder-dropout 0.1 \
    --decoder-num-layers 2 \
    --decoder-num-attention-heads 8 \
    --decoder-pf-size 512 \
    --decoder-dropout 0.1 \
    --patience 3 \
    --train-val-ratio 0.75 \
    --batch-size 64 \
    --seed 1 \
    --device cuda \
    --resume-from-checkpoint ../models/${DATASET}/checkpoint.en.fr.pt \
    --save-checkpoint-to ../models/${DATASET}/checkpoint.en.fr.pt \

echo "Finished training"

Test the model with attention from English to French

In [0]:
%%shell
DATASET=Hansard-Multi30k
TRAIN=../data/${DATASET}/Training/
TEST=../data/${DATASET}/Testing/

echo "Testing the model from English to French"

# BLEU=34.54    Val-Loss=1.563

python3 test.py $TEST \
    en ../models/${DATASET}/vocab.inf.5.english.gz \
    fr ../models/${DATASET}/vocab.inf.5.french.gz \
    ../models/${DATASET}/model.en.fr.pt \
    --source-word-embedding-size 256 \
    --target-word-embedding-size 256 \
    --encoder-num-layers 3 \
    --encoder-num-attention-heads 8 \
    --encoder-pf-size 512 \
    --encoder-dropout 0.1 \
    --decoder-num-layers 3 \
    --decoder-num-attention-heads 8 \
    --decoder-pf-size 512 \
    --decoder-dropout 0.1 \
    --device cuda \
    --batch-size 64 \

echo "Finished testing"

## Train / Test model from French to English
Train the model from French to English

In [0]:
%%shell
DATASET=Hansard-Multi30k
TRAIN=../data/${DATASET}/Training/
TEST=../data/${DATASET}/Testing/

echo "Training the model from French to English"

# With Num-Attention-Heads=8, PF-Size=512, Embedding-Size=256, Encoder-Dropout=0.1, Decoder-Dropout=0.1, Batch-Size=64, and
# * Num-Layers=3, we get a BLEU=34.53   Val-Loss=1.829
# * Num-Layers=5, we get a BLEU=27.89   Val-Loss=2.147
# * Num-Layers=4, we get a BLEU=????    Val-Loss=????
# * Num-Layers=2, we get a BLEU=35.20   Val-Loss=1.776

# With Num-Layers=3, PF-Size=512, Embedding-Size=256, Encoder-Dropout=0.1, Decoder-Dropout=0.1, Batch-Size=64, and
# * Num-Attention-Heads=16, we get a BLEU=29.94   Val-Loss=2.031
# * Num-Attention-Heads=4, we get a  BLEU=????   Val-Loss=????

python3 train.py ${TRAIN} \
    fr ../models/${DATASET}/vocab.inf.5.french.gz \
    en ../models/${DATASET}/vocab.inf.5.english.gz \
    ../models/${DATASET}/model.fr.en.2.8.pt \
    --source-word-embedding-size 256 \
    --target-word-embedding-size 256 \
    --encoder-num-layers 2 \
    --encoder-num-attention-heads 8 \
    --encoder-pf-size 512 \
    --encoder-dropout 0.1 \
    --decoder-num-layers 2 \
    --decoder-num-attention-heads 8 \
    --decoder-pf-size 512 \
    --decoder-dropout 0.1 \
    --patience 3 \
    --train-val-ratio 0.75 \
    --batch-size 64 \
    --seed 1 \
    --device cuda \
    --resume-from-checkpoint ../models/${DATASET}/checkpoint.fr.en.2.8.pt \
    --save-checkpoint-to ../models/${DATASET}/checkpoint.fr.en.2.8.pt \

echo "Finished training"

Test the model from French to English

In [0]:
%%shell
DATASET=Hansard-Multi30k
TRAIN=../data/${DATASET}/Training/
TEST=../data/${DATASET}/Testing/

echo "Testing the model from French to English"

# BLEU=34.55 Val-Loss=1.7788

python3 test.py $TEST \
    fr ../models/${DATASET}/vocab.inf.5.french.gz \
    en ../models/${DATASET}/vocab.inf.5.english.gz \
    ../models/${DATASET}/model.fr.en.2.8.pt \
    --source-word-embedding-size 256 \
    --target-word-embedding-size 256 \
    --encoder-num-layers 2 \
    --encoder-num-attention-heads 8 \
    --encoder-pf-size 512 \
    --encoder-dropout 0.1 \
    --decoder-num-layers 2 \
    --decoder-num-attention-heads 8 \
    --decoder-pf-size 512 \
    --decoder-dropout 0.1 \
    --device cuda \
    --batch-size 64 \

echo "Finished testing"