## Installing Pytorch

In [1]:
# http://pytorch.org/
from os.path import exists
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())
cuda_output = !ldconfig -p|grep cudart.so|sed -e 's/.*\.\([0-9]*\)\.\([0-9]*\)$/cu\1\2/'
accelerator = cuda_output[0] if exists('/dev/nvidia0') else 'cpu'

!pip install -q http://download.pytorch.org/whl/{accelerator}/torch-0.4.1-{platform}-linux_x86_64.whl torchvision
import torch

tcmalloc: large alloc 1073750016 bytes == 0x57a9a000 @  0x7f719d2e92a4 0x591a07 0x5b5d56 0x502e9a 0x506859 0x502209 0x502f3d 0x506859 0x504c28 0x502540 0x502f3d 0x506859 0x504c28 0x502540 0x502f3d 0x506859 0x504c28 0x502540 0x502f3d 0x507641 0x502209 0x502f3d 0x506859 0x504c28 0x502540 0x502f3d 0x507641 0x504c28 0x502540 0x502f3d 0x507641


## Building Fairseq

In [0]:
!git clone https://github.com/pytorch/fairseq.git
import os
os.chdir("fairseq/")
!pip install -r requirements.txt
%run -i 'setup.py' build develop

## Model for Nso

### Data

In [3]:
!git clone https://github.com/LauraMartinus/ukuxhumana.git

Cloning into 'ukuxhumana'...
remote: Enumerating objects: 56, done.[K
remote: Counting objects: 100% (56/56), done.[K
remote: Compressing objects: 100% (48/48), done.[K
remote: Total 1322 (delta 26), reused 21 (delta 8), pack-reused 1266[K
Receiving objects: 100% (1322/1322), 386.31 MiB | 21.64 MiB/s, done.
Resolving deltas: 100% (633/633), done.
Checking out files: 100% (302/302), done.


### Subword

In [4]:
!git clone https://github.com/rsennrich/subword-nmt

Cloning into 'subword-nmt'...
remote: Enumerating objects: 4, done.[K
remote: Counting objects: 100% (4/4), done.[K
remote: Compressing objects: 100% (4/4), done.[K
remote: Total 485 (delta 0), reused 1 (delta 0), pack-reused 481[K
Receiving objects: 100% (485/485), 205.64 KiB | 768.00 KiB/s, done.
Resolving deltas: 100% (287/287), done.


In [5]:
#os.chdir('../')
!ls

build		      fairseq		multiprocessing_train.py  scripts
CONTRIBUTING.md       fairseq.egg-info	PATENTS			  setup.py
distributed_train.py  fairseq.gif	preprocess.py		  subword-nmt
docs		      generate.py	README.md		  tests
eval_lm.py	      interactive.py	requirements.txt	  train.py
examples	      LICENSE		score.py		  ukuxhumana


In [0]:
!cat ukuxhumana/clean/en_nso/ennso_parallel.train.en ukuxhumana/clean/en_nso/ennso_parallel.train.nso > combine.txt

# Learn a vocabulary using 40,000 merge operations
!fairseq/subword-nmt/learn_bpe.py -s 4000 <combine.txt> ennso.codes

# Apply the vocabulary to the training file
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < ukuxhumana/clean/en_nso/ennso_parallel.train.en > train.en
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < ukuxhumana/clean/en_nso/ennso_parallel.train.nso > train.nso
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < ukuxhumana/clean/en_nso/ennso_parallel.dev.en > valid.en
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < ukuxhumana/clean/en_nso/ennso_parallel.dev.nso > valid.nso
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < ukuxhumana/clean/en_nso/ennso_parallel.test.en > test.en
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < ukuxhumana/clean/en_nso/ennso_parallel.test.nso > test.nso

In [0]:
os.chdir('fairseq/')

In [0]:
TEXT=".."
%run 'preprocess.py' --source-lang en --target-lang nso --trainpref $TEXT/train --validpref $TEXT/valid --testpref $TEXT/test --destdir data-bin/ennso

Namespace(alignfile=None, destdir='data-bin/ennso', joined_dictionary=False, nwordssrc=-1, nwordstgt=-1, only_source=False, output_format='binary', padding_factor=8, source_lang='en', srcdict=None, target_lang='nso', testpref='../test', tgtdict=None, thresholdsrc=0, thresholdtgt=0, trainpref='../train', validpref='../valid', workers=1)
| [en] Dictionary: 2991 types
| [en] ../train.en: 21543 sents, 672811 tokens, 0.0% replaced by <unk>
| [en] Dictionary: 2991 types
| [en] ../valid.en: 6234 sents, 192753 tokens, 0.0223% replaced by <unk>
| [en] Dictionary: 2991 types
| [en] ../test.en: 3000 sents, 69973 tokens, 0.0171% replaced by <unk>
| [nso] Dictionary: 3415 types
| [nso] ../train.nso: 21543 sents, 738066 tokens, 0.0% replaced by <unk>
| [nso] Dictionary: 3415 types
| [nso] ../valid.nso: 6234 sents, 208039 tokens, 0.0269% replaced by <unk>
| [nso] Dictionary: 3415 types
| [nso] ../test.nso: 3000 sents, 78649 tokens, 0.0839% replaced by <unk>
| Wrote preprocessed data to data-bin/ennso

### Train Model

In [0]:
!mkdir -p chckpoint/fconv
!python train.py data-bin/ennso \
    --lr 0.25 --clip-norm 0.1 --dropout 0.2 --max-tokens 4000 \
    --arch fconv --save-dir ckpoint/fconv

# for sentencepiece remove ▁
output = %run 'generate.py' data-bin/ennso --path ckpoint/fconv/checkpoint_best.pt --beam 5 --batch-size 128 --remove-bpe --source-lang en --target-lang nso

### Testing
Generate translations from the test data. Calculate the Bleu score.

In [0]:
# for sentencepiece remove ▁
output = %run 'generate.py' data-bin/ennso --path ckpoint/fconv/checkpoint_best.pt --beam 5 --batch-size 128 --remove-bpe --source-lang en --target-lang nso

**8k**

In [0]:
os.chdir('../')
!ls

In [0]:
!cat ukuxhumana/clean/en_nso/ennso_parallel.train.en ukuxhumana/clean/en_nso/ennso_parallel.train.nso > combine.txt
!fairseq/subword-nmt/learn_bpe.py -s 8000 <combine.txt> ennso.codes

!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < ukuxhumana/clean/en_nso/ennso_parallel.train.en > train.en
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < ukuxhumana/clean/en_nso/ennso_parallel.train.nso > train.nso
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < ukuxhumana/clean/en_nso/ennso_parallel.dev.en > valid.en
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < ukuxhumana/clean/en_nso/ennso_parallel.dev.nso > valid.nso
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < ukuxhumana/clean/en_nso/ennso_parallel.test.en > test.en
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < ukuxhumana/clean/en_nso/ennso_parallel.test.nso > test.nso

os.chdir('fairseq/')
TEXT=".."
%run 'preprocess.py' --source-lang en --target-lang nso --trainpref $TEXT/train --validpref $TEXT/valid --testpref $TEXT/test --destdir data-bin/ennso

!mkdir -p checkpoint/fconv
!python train.py data-bin/ennso \
    --lr 0.25 --clip-norm 0.1 --dropout 0.2 --max-tokens 4000 \
    --arch fconv --save-dir checkpoint/fconv

# for sentencepiece remove ▁
output = %run 'generate.py' data-bin/ennso --path checkpoint/fconv/checkpoint_best.pt --beam 5 --batch-size 128 --remove-bpe --source-lang en --target-lang nso

**12k**

In [0]:
os.chdir('../')
!cat ukuxhumana/clean/en_nso/ennso_parallel.train.en ukuxhumana/clean/en_nso/ennso_parallel.train.nso > combine.txt
!fairseq/subword-nmt/learn_bpe.py -s 12000 <combine.txt> ennso.codes

!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < ukuxhumana/clean/en_nso/ennso_parallel.train.en > train.en
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < ukuxhumana/clean/en_nso/ennso_parallel.train.nso > train.nso
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < ukuxhumana/clean/en_nso/ennso_parallel.dev.en > valid.en
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < ukuxhumana/clean/en_nso/ennso_parallel.dev.nso > valid.nso
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < ukuxhumana/clean/en_nso/ennso_parallel.test.en > test.en
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < ukuxhumana/clean/en_nso/ennso_parallel.test.nso > test.nso

os.chdir('fairseq/')
TEXT=".."
%run 'preprocess.py' --source-lang en --target-lang nso --trainpref $TEXT/train --validpref $TEXT/valid --testpref $TEXT/test --destdir data-bin/ennso

!mkdir -p checkpoint/fconv
!python train.py data-bin/ennso \
    --lr 0.25 --clip-norm 0.1 --dropout 0.2 --max-tokens 4000 \
    --arch fconv --save-dir chekpoint/fconv

# for sentencepiece remove ▁
output = %run 'generate.py' data-bin/ennso --path chekpoint/fconv/checkpoint_best.pt --beam 5 --batch-size 128 --remove-bpe --source-lang en --target-lang nso

**16k**

In [0]:
os.chdir('content/')
!ls

combine.txt  ennso.codes  fairseq  sample_data


In [0]:
#os.chdir('../')
!cat fairseq/ukuxhumana/clean/en_nso/ennso_parallel.train.en fairseq/ukuxhumana/clean/en_nso/ennso_parallel.train.nso > combine.txt
!fairseq/subword-nmt/learn_bpe.py -s 16000 <combine.txt> ennso.codes

!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < fairseq/ukuxhumana/clean/en_nso/ennso_parallel.train.en > train.en
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < fairseq/ukuxhumana/clean/en_nso/ennso_parallel.train.nso > train.nso
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < fairseq/ukuxhumana/clean/en_nso/ennso_parallel.dev.en > valid.en
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < fairseq/ukuxhumana/clean/en_nso/ennso_parallel.dev.nso > valid.nso
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < fairseq/ukuxhumana/clean/en_nso/ennso_parallel.test.en > test.en
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < fairseq/ukuxhumana/clean/en_nso/ennso_parallel.test.nso > test.nso

os.chdir('fairseq/')
TEXT=".."
%run 'preprocess.py' --source-lang en --target-lang nso --trainpref $TEXT/train --validpref $TEXT/valid --testpref $TEXT/test --destdir data-bin/ennso

!mkdir -p checkpoint/fconv
!python train.py data-bin/ennso \
    --lr 0.25 --clip-norm 0.1 --dropout 0.2 --max-tokens 4000 \
    --arch fconv --save-dir checkpoint/fconv

# for sentencepiece remove ▁
output = %run 'generate.py' data-bin/ennso --path checkpoint/fconv/checkpoint_best.pt --beam 5 --batch-size 128 --remove-bpe --source-lang en --target-lang nso

**20k**

In [0]:
!ls

build		      examples		multiprocessing_train.py  setup.py
checkpoint	      fairseq		PATENTS			  subword-nmt
CONTRIBUTING.md       fairseq.egg-info	preprocess.py		  tests
data-bin	      fairseq.gif	README.md		  train.py
distributed_train.py  generate.py	requirements.txt	  ukuxhumana
docs		      interactive.py	score.py
eval_lm.py	      LICENSE		scripts


In [0]:
os.chdir('../')
!cat fairseq/ukuxhumana/clean/en_nso/ennso_parallel.train.en fairseq/ukuxhumana/clean/en_nso/ennso_parallel.train.nso > combine.txt
!fairseq/subword-nmt/learn_bpe.py -s 20000 <combine.txt> ennso.codes

!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < fairseq/ukuxhumana/clean/en_nso/ennso_parallel.train.en > train.en
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < fairseq/ukuxhumana/clean/en_nso/ennso_parallel.train.nso > train.nso
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < fairseq/ukuxhumana/clean/en_nso/ennso_parallel.dev.en > valid.en
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < fairseq/ukuxhumana/clean/en_nso/ennso_parallel.dev.nso > valid.nso
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < fairseq/ukuxhumana/clean/en_nso/ennso_parallel.test.en > test.en
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < fairseq/ukuxhumana/clean/en_nso/ennso_parallel.test.nso > test.nso

os.chdir('fairseq/')
TEXT=".."
%run 'preprocess.py' --source-lang en --target-lang nso --trainpref $TEXT/train --validpref $TEXT/valid --testpref $TEXT/test --destdir data-bin/ennso

!mkdir -p checkpoint/fconv
!python train.py data-bin/ennso \
    --lr 0.25 --clip-norm 0.1 --dropout 0.2 --max-tokens 4000 \
    --arch fconv --save-dir chckpoint/fconv

# for sentencepiece remove ▁
output = %run 'generate.py' data-bin/ennso --path chckpoint/fconv/checkpoint_best.pt --beam 5 --batch-size 128 --remove-bpe --source-lang en --target-lang nso

**24k**

In [0]:
!ls

In [0]:
os.chdir('../')
!cat fairseq/ukuxhumana/clean/en_nso/ennso_parallel.train.en fairseq/ukuxhumana/clean/en_nso/ennso_parallel.train.nso > combine.txt
!fairseq/subword-nmt/learn_bpe.py -s 24000 <combine.txt> ennso.codes

!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < fairseq/ukuxhumana/clean/en_nso/ennso_parallel.train.en > train.en
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < fairseq/ukuxhumana/clean/en_nso/ennso_parallel.train.nso > train.nso
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < fairseq/ukuxhumana/clean/en_nso/ennso_parallel.dev.en > valid.en
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < fairseq/ukuxhumana/clean/en_nso/ennso_parallel.dev.nso > valid.nso
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < fairseq/ukuxhumana/clean/en_nso/ennso_parallel.test.en > test.en
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < fairseq/ukuxhumana/clean/en_nso/ennso_parallel.test.nso > test.nso

os.chdir('fairseq/')
TEXT=".."
%run 'preprocess.py' --source-lang en --target-lang nso --trainpref $TEXT/train --validpref $TEXT/valid --testpref $TEXT/test --destdir data-bin/ennso

!mkdir -p checkpoint/fconv
!python train.py data-bin/ennso \
    --lr 0.25 --clip-norm 0.1 --dropout 0.2 --max-tokens 4000 \
    --arch fconv --save-dir chekpoint/fconv

# for sentencepiece remove ▁
output = %run 'generate.py' data-bin/ennso --path chekpoint/fconv/checkpoint_best.pt --beam 5 --batch-size 128 --remove-bpe --source-lang en --target-lang nso

**28k**

In [0]:
os.chdir('../')
!cat fairseq/ukuxhumana/clean/en_nso/ennso_parallel.train.en fairseq/ukuxhumana/clean/en_nso/ennso_parallel.train.nso > combine.txt
!fairseq/subword-nmt/learn_bpe.py -s 28000 <combine.txt> ennso.codes

!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < fairseq/ukuxhumana/clean/en_nso/ennso_parallel.train.en > train.en
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < fairseq/ukuxhumana/clean/en_nso/ennso_parallel.train.nso > train.nso
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < fairseq/ukuxhumana/clean/en_nso/ennso_parallel.dev.en > valid.en
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < fairseq/ukuxhumana/clean/en_nso/ennso_parallel.dev.nso > valid.nso
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < fairseq/ukuxhumana/clean/en_nso/ennso_parallel.test.en > test.en
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < fairseq/ukuxhumana/clean/en_nso/ennso_parallel.test.nso > test.nso

os.chdir('fairseq/')
TEXT=".."
%run 'preprocess.py' --source-lang en --target-lang nso --trainpref $TEXT/train --validpref $TEXT/valid --testpref $TEXT/test --destdir data-bin/ennso

!mkdir -p checkpoint/fconv
!python train.py data-bin/ennso \
    --lr 0.25 --clip-norm 0.1 --dropout 0.2 --max-tokens 4000 \
    --arch fconv --save-dir checkpoint/fconv

# for sentencepiece remove ▁
output = %run 'generate.py' data-bin/ennso --path checkpoint/fconv/checkpoint_best.pt --beam 5 --batch-size 128 --remove-bpe --source-lang en --target-lang nso

**32k**

In [0]:
!ls

build		      examples		multiprocessing_train.py  setup.py
checkpoint	      fairseq		PATENTS			  subword-nmt
CONTRIBUTING.md       fairseq.egg-info	preprocess.py		  tests
data-bin	      fairseq.gif	README.md		  train.py
distributed_train.py  generate.py	requirements.txt	  ukuxhumana
docs		      interactive.py	score.py
eval_lm.py	      LICENSE		scripts


In [0]:
os.chdir('../')
!cat fairseq/ukuxhumana/clean/en_nso/ennso_parallel.train.en fairseq/ukuxhumana/clean/en_nso/ennso_parallel.train.nso > combine.txt
!fairseq/subword-nmt/learn_bpe.py -s 32000 <combine.txt> ennso.codes

!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < fairseq/ukuxhumana/clean/en_nso/ennso_parallel.train.en > train.en
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < fairseq/ukuxhumana/clean/en_nso/ennso_parallel.train.nso > train.nso
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < fairseq/ukuxhumana/clean/en_nso/ennso_parallel.dev.en > valid.en
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < fairseq/ukuxhumana/clean/en_nso/ennso_parallel.dev.nso > valid.nso
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < fairseq/ukuxhumana/clean/en_nso/ennso_parallel.test.en > test.en
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < fairseq/ukuxhumana/clean/en_nso/ennso_parallel.test.nso > test.nso

os.chdir('fairseq/')
TEXT=".."
%run 'preprocess.py' --source-lang en --target-lang nso --trainpref $TEXT/train --validpref $TEXT/valid --testpref $TEXT/test --destdir data-bin/ennso

!mkdir -p chckpoint/fconv
!python train.py data-bin/ennso \
    --lr 0.25 --clip-norm 0.1 --dropout 0.2 --max-tokens 4000 \
    --arch fconv --save-dir chckpoint/fconv

# for sentencepiece remove ▁
output = %run 'generate.py' data-bin/ennso --path chckpoint/fconv/checkpoint_best.pt --beam 5 --batch-size 128 --remove-bpe --source-lang en --target-lang nso

**36k**

In [0]:
#os.chdir('fairseq/')
!ls

In [0]:
os.chdir('../')
!cat fairseq/ukuxhumana/clean/en_nso/ennso_parallel.train.en fairseq/ukuxhumana/clean/en_nso/ennso_parallel.train.nso > combine.txt
!fairseq/subword-nmt/learn_bpe.py -s 36000 <combine.txt> ennso.codes

!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < fairseq/ukuxhumana/clean/en_nso/ennso_parallel.train.en > train.en
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < fairseq/ukuxhumana/clean/en_nso/ennso_parallel.train.nso > train.nso
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < fairseq/ukuxhumana/clean/en_nso/ennso_parallel.dev.en > valid.en
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < fairseq/ukuxhumana/clean/en_nso/ennso_parallel.dev.nso > valid.nso
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < fairseq/ukuxhumana/clean/en_nso/ennso_parallel.test.en > test.en
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < fairseq/ukuxhumana/clean/en_nso/ennso_parallel.test.nso > test.nso

os.chdir('fairseq/')
TEXT=".."
%run 'preprocess.py' --source-lang en --target-lang nso --trainpref $TEXT/train --validpref $TEXT/valid --testpref $TEXT/test --destdir data-bin/ennso

!mkdir -p checkpoint/fconv
!python train.py data-bin/ennso \
    --lr 0.25 --clip-norm 0.1 --dropout 0.2 --max-tokens 4000 \
    --arch fconv --save-dir checkpoint/fconv

# for sentencepiece remove ▁
output = %run 'generate.py' data-bin/ennso --path checkpoint/fconv/checkpoint_best.pt --beam 5 --batch-size 128 --remove-bpe --source-lang en --target-lang nso

**40k**

In [0]:
!ls

In [0]:
os.chdir('../')
!cat fairseq/ukuxhumana/clean/en_nso/ennso_parallel.train.en fairseq/ukuxhumana/clean/en_nso/ennso_parallel.train.nso > combine.txt
!fairseq/subword-nmt/learn_bpe.py -s 40000 <combine.txt> ennso.codes

!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < fairseq/ukuxhumana/clean/en_nso/ennso_parallel.train.en > train.en
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < fairseq/ukuxhumana/clean/en_nso/ennso_parallel.train.nso > train.nso
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < fairseq/ukuxhumana/clean/en_nso/ennso_parallel.dev.en > valid.en
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < fairseq/ukuxhumana/clean/en_nso/ennso_parallel.dev.nso > valid.nso
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < fairseq/ukuxhumana/clean/en_nso/ennso_parallel.test.en > test.en
!fairseq/subword-nmt/apply_bpe.py -c ennso.codes < fairseq/ukuxhumana/clean/en_nso/ennso_parallel.test.nso > test.nso

os.chdir('fairseq/')
TEXT=".."
%run 'preprocess.py' --source-lang en --target-lang nso --trainpref $TEXT/train --validpref $TEXT/valid --testpref $TEXT/test --destdir data-bin/ennso

!mkdir -p checkpoint/fconv
!python train.py data-bin/ennso \
    --lr 0.25 --clip-norm 0.1 --dropout 0.2 --max-tokens 4000 \
    --arch fconv --save-dir chckpoint/fconv

# for sentencepiece remove ▁
output = %run 'generate.py' data-bin/ennso --path chckpoint/fconv/checkpoint_best.pt --beam 5 --batch-size 128 --remove-bpe --source-lang en --target-lang nso

## Results

*   40k: 7.50
*   36k: 8.65
*   32k: 7.27
*   28k: 8.21
*   24k: 6.98
*   20k: 8.68
*   16k: 10.07
*   12k: 10.14
*     8k: 8.73
*     4k: 12.18

