## Installing Pytorch

In [0]:
# http://pytorch.org/
from os.path import exists
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())
cuda_output = !ldconfig -p|grep cudart.so|sed -e 's/.*\.\([0-9]*\)\.\([0-9]*\)$/cu\1\2/'
accelerator = cuda_output[0] if exists('/dev/nvidia0') else 'cpu'

!pip install -q http://download.pytorch.org/whl/{accelerator}/torch-0.4.1-{platform}-linux_x86_64.whl torchvision
import torch

## Building Fairseq

In [1]:
!git clone https://github.com/pytorch/fairseq.git
import os
os.chdir("fairseq/")
!pip install -r requirements.txt
%run -i 'setup.py' build develop

Cloning into 'fairseq'...
remote: Enumerating objects: 71, done.[K
remote: Counting objects:   1% (1/71)   [Kremote: Counting objects:   2% (2/71)   [Kremote: Counting objects:   4% (3/71)   [Kremote: Counting objects:   5% (4/71)   [Kremote: Counting objects:   7% (5/71)   [Kremote: Counting objects:   8% (6/71)   [Kremote: Counting objects:   9% (7/71)   [Kremote: Counting objects:  11% (8/71)   [Kremote: Counting objects:  12% (9/71)   [Kremote: Counting objects:  14% (10/71)   [Kremote: Counting objects:  15% (11/71)   [Kremote: Counting objects:  16% (12/71)   [Kremote: Counting objects:  18% (13/71)   [Kremote: Counting objects:  19% (14/71)   [Kremote: Counting objects:  21% (15/71)   [Kremote: Counting objects:  22% (16/71)   [Kremote: Counting objects:  23% (17/71)   [Kremote: Counting objects:  25% (18/71)   [Kremote: Counting objects:  26% (19/71)   [Kremote: Counting objects:  28% (20/71)   [Kremote: Counting objects:  29% (21/71)   

## Model for Ts


### Data

In [2]:
!git clone https://github.com/LauraMartinus/ukuxhumana.git

Cloning into 'ukuxhumana'...
remote: Enumerating objects: 120, done.[K
remote: Counting objects: 100% (120/120), done.[K
remote: Compressing objects: 100% (101/101), done.[K
remote: Total 1386 (delta 58), reused 50 (delta 19), pack-reused 1266[K
Receiving objects: 100% (1386/1386), 404.63 MiB | 22.34 MiB/s, done.
Resolving deltas: 100% (665/665), done.
Checking out files: 100% (308/308), done.


### Subword

In [3]:
!git clone https://github.com/rsennrich/subword-nmt

Cloning into 'subword-nmt'...
remote: Enumerating objects: 28, done.[K
remote: Counting objects:   3% (1/28)   [Kremote: Counting objects:   7% (2/28)   [Kremote: Counting objects:  10% (3/28)   [Kremote: Counting objects:  14% (4/28)   [Kremote: Counting objects:  17% (5/28)   [Kremote: Counting objects:  21% (6/28)   [Kremote: Counting objects:  25% (7/28)   [Kremote: Counting objects:  28% (8/28)   [Kremote: Counting objects:  32% (9/28)   [Kremote: Counting objects:  35% (10/28)   [Kremote: Counting objects:  39% (11/28)   [Kremote: Counting objects:  42% (12/28)   [Kremote: Counting objects:  46% (13/28)   [Kremote: Counting objects:  50% (14/28)   [Kremote: Counting objects:  53% (15/28)   [Kremote: Counting objects:  57% (16/28)   [Kremote: Counting objects:  60% (17/28)   [Kremote: Counting objects:  64% (18/28)   [Kremote: Counting objects:  67% (19/28)   [Kremote: Counting objects:  71% (20/28)   [Kremote: Counting objects:  75% (21/28

In [0]:
#os.chdir('../')
!ls

## Study


**4k**

In [0]:
os.chdir('../')
!cat fairseq/ukuxhumana/clean/en_tn/entn_parallel.train.en fairseq/ukuxhumana/clean/en_tn/entn_parallel.train.tn > combine.txt
!fairseq/subword-nmt/learn_bpe.py -s 4000 <combine.txt> entn.codes

!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.train.en > train.en
!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.train.tn > train.tn
!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.dev.en > valid.en
!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.dev.tn > valid.tn
!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.test.en > test.en
!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.test.tn > test.tn

os.chdir('fairseq/')
TEXT=".."
%run 'preprocess.py' --source-lang en --target-lang tn --trainpref $TEXT/train --validpref $TEXT/valid --testpref $TEXT/test --destdir data-bin/entn

!mkdir -p checkpoint/fconv
!python train.py data-bin/entn \
    --lr 0.25 --clip-norm 0.1 --dropout 0.2 --max-tokens 4000 \
    --arch fconv --save-dir checkpoint/fconv

# for sentencepiece remove ▁
output = %run 'generate.py' data-bin/entn --path checkpoint/fconv/checkpoint_best.pt --beam 5 --batch-size 128 --remove-bpe --source-lang en --target-lang tn

**8k**

In [0]:
#os.chdir('../')
!ls

In [0]:
os.chdir('../')
!cat fairseq/ukuxhumana/clean/en_tn/entn_parallel.train.en fairseq/ukuxhumana/clean/en_tn/entn_parallel.train.tn > combine.txt
!fairseq/subword-nmt/learn_bpe.py -s 8000 <combine.txt> entn.codes

!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.train.en > train.en
!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.train.tn > train.tn
!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.dev.en > valid.en
!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.dev.tn > valid.tn
!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.test.en > test.en
!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.test.tn > test.tn

os.chdir('fairseq/')
TEXT=".."
%run 'preprocess.py' --source-lang en --target-lang tn --trainpref $TEXT/train --validpref $TEXT/valid --testpref $TEXT/test --destdir data-bin/entn

!mkdir -p checkpoint/fconv
!python train.py data-bin/entn \
    --lr 0.25 --clip-norm 0.1 --dropout 0.2 --max-tokens 4000 \
    --arch fconv --save-dir checkpoint/fconv

# for sentencepiece remove ▁
output = %run 'generate.py' data-bin/entn --path checkpoint/fconv/checkpoint_best.pt --beam 5 --batch-size 128 --remove-bpe --source-lang en --target-lang tn

**12k**

In [0]:
os.chdir('../')
!cat fairseq/ukuxhumana/clean/en_tn/entn_parallel.train.en fairseq/ukuxhumana/clean/en_tn/entn_parallel.train.tn > combine.txt
!fairseq/subword-nmt/learn_bpe.py -s 12000 <combine.txt> entn.codes

!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.train.en > train.en
!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.train.tn > train.tn
!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.dev.en > valid.en
!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.dev.tn > valid.tn
!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.test.en > test.en
!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.test.tn > test.tn

os.chdir('fairseq/')
TEXT=".."
%run 'preprocess.py' --source-lang en --target-lang tn --trainpref $TEXT/train --validpref $TEXT/valid --testpref $TEXT/test --destdir data-bin/entn

!mkdir -p checkpoint/fconv
!python train.py data-bin/entn \
    --lr 0.25 --clip-norm 0.1 --dropout 0.2 --max-tokens 4000 \
    --arch fconv --save-dir checkpoint/fconv

# for sentencepiece remove ▁
output = %run 'generate.py' data-bin/entn --path checkpoint/fconv/checkpoint_best.pt --beam 5 --batch-size 128 --remove-bpe --source-lang en --target-lang tn

**16k**

In [0]:
#os.chdir('content/')
!ls

In [0]:
os.chdir('../')
!cat fairseq/ukuxhumana/clean/en_tn/entn_parallel.train.en fairseq/ukuxhumana/clean/en_tn/entn_parallel.train.tn > combine.txt
!fairseq/subword-nmt/learn_bpe.py -s 16000 <combine.txt> entn.codes

!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.train.en > train.en
!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.train.tn > train.tn
!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.dev.en > valid.en
!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.dev.tn > valid.tn
!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.test.en > test.en
!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.test.tn > test.tn

os.chdir('fairseq/')
TEXT=".."
%run 'preprocess.py' --source-lang en --target-lang tn --trainpref $TEXT/train --validpref $TEXT/valid --testpref $TEXT/test --destdir data-bin/entn

!mkdir -p checkpoint/fconv
!python train.py data-bin/entn \
    --lr 0.25 --clip-norm 0.1 --dropout 0.2 --max-tokens 4000 \
    --arch fconv --save-dir checkpoint/fconv

# for sentencepiece remove ▁
output = %run 'generate.py' data-bin/entn --path checkpoint/fconv/checkpoint_best.pt --beam 5 --batch-size 128 --remove-bpe --source-lang en --target-lang tn

**20k**

In [0]:
!ls

In [4]:
os.chdir('../')
!cat fairseq/ukuxhumana/clean/en_tn/entn_parallel.train.en fairseq/ukuxhumana/clean/en_tn/entn_parallel.train.tn > combine.txt
!fairseq/subword-nmt/learn_bpe.py -s 20.000 <combine.txt> entn.codes

!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.train.en > train.en
!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.train.tn > train.tn
!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.dev.en > valid.en
!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.dev.tn > valid.tn
!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.test.en > test.en
!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.test.tn > test.tn

os.chdir('fairseq/')
TEXT=".."
%run 'preprocess.py' --source-lang en --target-lang tn --trainpref $TEXT/train --validpref $TEXT/valid --testpref $TEXT/test --destdir data-bin/entn

!mkdir -p checkpoint/fconv
!python train.py data-bin/entn \
    --lr 0.25 --clip-norm 0.1 --dropout 0.2 --max-tokens 4000 \
    --arch fconv --save-dir checkpoint/fconv

# for sentencepiece remove ▁
output = %run 'generate.py' data-bin/entn --path checkpoint/fconv/checkpoint_best.pt --beam 5 --batch-size 128 --remove-bpe --source-lang en --target-lang tn

  args.codes = codecs.open(args.codes.name, encoding='utf-8')
  args.codes = codecs.open(args.codes.name, encoding='utf-8')
  args.codes = codecs.open(args.codes.name, encoding='utf-8')
  args.codes = codecs.open(args.codes.name, encoding='utf-8')
  args.codes = codecs.open(args.codes.name, encoding='utf-8')
  args.codes = codecs.open(args.codes.name, encoding='utf-8')
Namespace(alignfile=None, destdir='data-bin/entn', joined_dictionary=False, nwordssrc=-1, nwordstgt=-1, only_source=False, output_format='binary', padding_factor=8, source_lang='en', srcdict=None, target_lang='tn', testpref='../test', tgtdict=None, thresholdsrc=0, thresholdtgt=0, trainpref='../train', validpref='../valid', workers=1)
| [en] Dictionary: 12855 types
| [en] ../train.en: 86706 sents, 1790494 tokens, 0.0% replaced by <unk>
| [en] Dictionary: 12855 types
| [en] ../valid.en: 34162 sents, 436143 tokens, 0.0101% replaced by <unk>
| [en] Dictionary: 12855 types
| [en] ../test.en: 3000 sents, 67207 tokens, 0.0134% 

**24k**

In [0]:
!ls

In [0]:
os.chdir('../')
!cat fairseq/ukuxhumana/clean/en_tn/entn_parallel.train.en fairseq/ukuxhumana/clean/en_tn/entn_parallel.train.tn > combine.txt
!fairseq/subword-nmt/learn_bpe.py -s 24000 <combine.txt> entn.codes

!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.train.en > train.en
!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.train.tn > train.tn
!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.dev.en > valid.en
!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.dev.tn > valid.tn
!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.test.en > test.en
!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.test.tn > test.tn

os.chdir('fairseq/')
TEXT=".."
%run 'preprocess.py' --source-lang en --target-lang tn --trainpref $TEXT/train --validpref $TEXT/valid --testpref $TEXT/test --destdir data-bin/entn

!mkdir -p checkpoint/fconv
!python train.py data-bin/entn \
    --lr 0.25 --clip-norm 0.1 --dropout 0.2 --max-tokens 4000 \
    --arch fconv --save-dir checkpoint/fconv

# for sentencepiece remove ▁
output = %run 'generate.py' data-bin/entn --path checkpoint/fconv/checkpoint_best.pt --beam 5 --batch-size 128 --remove-bpe --source-lang en --target-lang tn

**28k**

In [0]:
os.chdir('../')
!cat fairseq/ukuxhumana/clean/en_tn/entn_parallel.train.en fairseq/ukuxhumana/clean/en_tn/entn_parallel.train.tn > combine.txt
!fairseq/subword-nmt/learn_bpe.py -s 28000 <combine.txt> entn.codes

!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.train.en > train.en
!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.train.tn > train.tn
!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.dev.en > valid.en
!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.dev.tn > valid.tn
!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.test.en > test.en
!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.test.tn > test.tn

os.chdir('fairseq/')
TEXT=".."
%run 'preprocess.py' --source-lang en --target-lang tn --trainpref $TEXT/train --validpref $TEXT/valid --testpref $TEXT/test --destdir data-bin/entn

!mkdir -p checkpoint/fconv
!python train.py data-bin/entn \
    --lr 0.25 --clip-norm 0.1 --dropout 0.2 --max-tokens 4000 \
    --arch fconv --save-dir checkpoint/fconv

# for sentencepiece remove ▁
output = %run 'generate.py' data-bin/entn --path checkpoint/fconv/checkpoint_best.pt --beam 5 --batch-size 128 --remove-bpe --source-lang en --target-lang tn

**32k**

In [0]:
!ls

In [0]:
os.chdir('../')
!cat fairseq/ukuxhumana/clean/en_tn/entn_parallel.train.en fairseq/ukuxhumana/clean/en_tn/entn_parallel.train.tn > combine.txt
!fairseq/subword-nmt/learn_bpe.py -s 32000 <combine.txt> entn.codes

!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.train.en > train.en
!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.train.tn > train.tn
!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.dev.en > valid.en
!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.dev.tn > valid.tn
!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.test.en > test.en
!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.test.tn > test.tn

os.chdir('fairseq/')
TEXT=".."
%run 'preprocess.py' --source-lang en --target-lang tn --trainpref $TEXT/train --validpref $TEXT/valid --testpref $TEXT/test --destdir data-bin/entn

!mkdir -p checkpoint/fconv
!python train.py data-bin/entn \
    --lr 0.25 --clip-norm 0.1 --dropout 0.2 --max-tokens 4000 \
    --arch fconv --save-dir checkpoint/fconv

# for sentencepiece remove ▁
output = %run 'generate.py' data-bin/entn --path checkpoint/fconv/checkpoint_best.pt --beam 5 --batch-size 128 --remove-bpe --source-lang en --target-lang tn

**36k**

In [0]:
#os.chdir('fairseq/')
!ls

In [0]:
os.chdir('../')
!cat fairseq/ukuxhumana/clean/en_tn/entn_parallel.train.en fairseq/ukuxhumana/clean/en_tn/entn_parallel.train.tn > combine.txt
!fairseq/subword-nmt/learn_bpe.py -s 36000 <combine.txt> entn.codes

!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.train.en > train.en
!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.train.tn > train.tn
!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.dev.en > valid.en
!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.dev.tn > valid.tn
!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.test.en > test.en
!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.test.tn > test.tn

os.chdir('fairseq/')
TEXT=".."
%run 'preprocess.py' --source-lang en --target-lang tn --trainpref $TEXT/train --validpref $TEXT/valid --testpref $TEXT/test --destdir data-bin/entn

!mkdir -p checkpoint/fconv
!python train.py data-bin/entn \
    --lr 0.25 --clip-norm 0.1 --dropout 0.2 --max-tokens 4000 \
    --arch fconv --save-dir checkpoint/fconv

# for sentencepiece remove ▁
output = %run 'generate.py' data-bin/entn --path checkpoint/fconv/checkpoint_best.pt --beam 5 --batch-size 128 --remove-bpe --source-lang en --target-lang tn

**40k**

In [0]:
!ls

In [0]:
os.chdir('../')
!cat fairseq/ukuxhumana/clean/en_tn/entn_parallel.train.en fairseq/ukuxhumana/clean/en_tn/entn_parallel.train.tn > combine.txt
!fairseq/subword-nmt/learn_bpe.py -s 40000 <combine.txt> entn.codes

!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.train.en > train.en
!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.train.tn > train.tn
!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.dev.en > valid.en
!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.dev.tn > valid.tn
!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.test.en > test.en
!fairseq/subword-nmt/apply_bpe.py -c entn.codes < fairseq/ukuxhumana/clean/en_tn/entn_parallel.test.tn > test.tn

os.chdir('fairseq/')
TEXT=".."
%run 'preprocess.py' --source-lang en --target-lang tn --trainpref $TEXT/train --validpref $TEXT/valid --testpref $TEXT/test --destdir data-bin/entn

!mkdir -p checkpoint/fconv
!python train.py data-bin/entn \
    --lr 0.25 --clip-norm 0.1 --dropout 0.2 --max-tokens 4000 \
    --arch fconv --save-dir checkpoint/fconv

# for sentencepiece remove ▁
output = %run 'generate.py' data-bin/entn --path checkpoint/fconv/checkpoint_best.pt --beam 5 --batch-size 128 --remove-bpe --source-lang en --target-lang tn

## Results

*   40k: 
*   36k: 
*   32k: 
*   28k: 
*   24k: 
*   20k: 22.09
*   16k: 26.03
*   12k: 25.62
*     8k: 21.28
*     4k: 24.34

