# Train The Model

NOTE: Before executing cells **change runtime type > T4 GPU** or any powerful GPU available to you. Because training the model is a very expensive and hardware resources consuming task.

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
%cd /content/drive/MyDrive/machine_translation

/content/drive/MyDrive/machine_translation


In [3]:
!ls

bible_verses.csv				MT-Preparation
compute-bleu.py					nagamese.csv
compute-bleu.py.1				nagamese.csv-filtered.ng
config.yaml					nagamese.csv-filtered.ng.subword
english.csv					nagamese.csv-filtered.ng.subword.dev
english.csv-filtered.en				nagamese.csv-filtered.ng.subword.test
english.csv-filtered.en.subword			nagamese.csv-filtered.ng.subword.test.desubword
english.csv-filtered.en.subword.dev		nagamese.csv-filtered.ng.subword.train
english.csv-filtered.en.subword.test		run
english.csv-filtered.en.subword.test.desubword	source.model
english.csv-filtered.en.subword.train		source.vocab
en.translated					target.model
en.translated.desubword				target.vocab
models						train.log


In [4]:
# [Optional] Check the content of the configuration file
!cat config.yaml

# config.yaml

save_data: run

data:
    corpus_1:
        path_src: nagamese.csv-filtered.ng.subword.train
        path_tgt: english.csv-filtered.en.subword.train
        transforms: [filtertoolong]
    valid:
        path_src: nagamese.csv-filtered.ng.subword.dev
        path_tgt: english.csv-filtered.en.subword.dev
        transforms: [filtertoolong]

src_vocab: run/source.vocab
tgt_vocab: run/target.vocab

src_vocab_size: 6000
tgt_vocab_size: 6000

src_seq_length: 150
src_seq_length: 150

src_subword_model: source.model
tgt_subword_model: target.model

log_file: train.log
save_model: models/model.fren

early_stopping: 4

# Save a model checkpoint for each n steps
save_checkpoint_steps: 500  # More frequent checkpoints due to low dataset size

# Limit checkpoints to last n
keep_checkpoint: 3

seed: 2425

# Adjusted training parameters for better efficiency
train_steps: 4000        # Extended for deeper learning over limited dataset
valid_steps: 500          # More frequent validatio

In [5]:
# Find the number of CPUs/cores on the machine
!nproc --all

2


In [6]:
# Check if the GPU is active
!nvidia-smi -h # Check if the GPU is visible to PyTorch


NVIDIA System Management Interface -- v535.104.05

NVSMI provides monitoring information for Tesla and select Quadro devices.
The data is presented in either a plain text or an XML format, via stdout or a file.
NVSMI also provides several management operations for changing the device state.

Note that the functionality of NVSMI is exposed through the NVML C-based
library. See the NVIDIA developer website for more information about NVML.
Python wrappers to NVML are also available.  The output of NVSMI is
not guaranteed to be backwards compatible; NVML and the bindings are backwards
compatible.

http://developer.nvidia.com/nvidia-management-library-nvml/
http://pypi.python.org/pypi/nvidia-ml-py/
Supported products:
- Full Support
    - All Tesla products, starting with the Kepler architecture
    - All Quadro products, starting with the Kepler architecture
    - All GRID products, starting with the Kepler architecture
    - GeForce Titan products, starting with the Kepler architecture
- 

In [7]:
!nvidia-smi --list-gpus

GPU 0: Tesla T4 (UUID: GPU-73c4ef44-53f8-67ba-dfc5-f77896c2686f)


In [8]:

import torch

print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))

gpu_memory = torch.cuda.mem_get_info(0)
print("Free GPU memory:", gpu_memory[0]/1024**2, "out of:", gpu_memory[1]/1024**2)

True
Tesla T4
Free GPU memory: 14999.0625 out of: 15102.0625


In [9]:
!pwd
!ls

/content/drive/MyDrive/machine_translation
bible_verses.csv				MT-Preparation
compute-bleu.py					nagamese.csv
compute-bleu.py.1				nagamese.csv-filtered.ng
config.yaml					nagamese.csv-filtered.ng.subword
english.csv					nagamese.csv-filtered.ng.subword.dev
english.csv-filtered.en				nagamese.csv-filtered.ng.subword.test
english.csv-filtered.en.subword			nagamese.csv-filtered.ng.subword.test.desubword
english.csv-filtered.en.subword.dev		nagamese.csv-filtered.ng.subword.train
english.csv-filtered.en.subword.test		run
english.csv-filtered.en.subword.test.desubword	source.model
english.csv-filtered.en.subword.train		source.vocab
en.translated					target.model
en.translated.desubword				target.vocab
models						train.log


In [10]:

!rm -rf drive/MyDrive/machine_translation/models/

In [11]:
!pip3 install OpenNMT-py

Collecting OpenNMT-py
  Downloading OpenNMT_py-3.5.1-py3-none-any.whl.metadata (8.8 kB)
Collecting torch<2.3,>=2.1 (from OpenNMT-py)
  Downloading torch-2.2.2-cp310-cp310-manylinux1_x86_64.whl.metadata (26 kB)
Collecting configargparse (from OpenNMT-py)
  Downloading ConfigArgParse-1.7-py3-none-any.whl.metadata (23 kB)
Collecting ctranslate2<5,>=4 (from OpenNMT-py)
  Downloading ctranslate2-4.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (10 kB)
Collecting waitress (from OpenNMT-py)
  Downloading waitress-3.0.1-py3-none-any.whl.metadata (5.1 kB)
Collecting pyonmttok<2,>=1.37 (from OpenNMT-py)
  Downloading pyonmttok-1.37.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting sacrebleu (from OpenNMT-py)
  Downloading sacrebleu-2.4.3-py3-none-any.whl.metadata (51 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.8/51.8 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting rapidfuzz (from OpenNMT-py)

In [3]:
# Train the NMT model
!onmt_train -config config.yaml

[2024-11-09 16:16:16,870 INFO] Parsed 2 corpora from -data.
[2024-11-09 16:16:16,872 INFO] Get special vocabs from Transforms: {'src': [], 'tgt': []}.
[2024-11-09 16:16:17,555 INFO] The first 10 tokens of the vocabs are:['<unk>', '<blank>', '<s>', '</s>', ',', '▁', '▁laga', '.', '"', '▁"']
[2024-11-09 16:16:17,555 INFO] The decoder start token is: <s>
[2024-11-09 16:16:17,555 INFO] Building model...
[2024-11-09 16:16:20,061 INFO] Switching model to float32 for amp/apex_amp
[2024-11-09 16:16:20,061 INFO] Non quantized layer compute is fp16
[2024-11-09 16:16:20,252 INFO] NMTModel(
  (encoder): TransformerEncoder(
    (embeddings): Embeddings(
      (make_embedding): Sequential(
        (emb_luts): Elementwise(
          (0): Embedding(2168, 512, padding_idx=1)
        )
        (pe): PositionalEncoding()
      )
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): ModuleList(
      (0-5): 6 x TransformerEncoderLayer(
        (self_attn): MultiHeadedAttention(
         

In [4]:
!ls

bible_verses.csv				MT-Preparation
compute-bleu.py					nagamese.csv
compute-bleu.py.1				nagamese.csv-filtered.ng
config.yaml					nagamese.csv-filtered.ng.subword
english.csv					nagamese.csv-filtered.ng.subword.dev
english.csv-filtered.en				nagamese.csv-filtered.ng.subword.test
english.csv-filtered.en.subword			nagamese.csv-filtered.ng.subword.test.desubword
english.csv-filtered.en.subword.dev		nagamese.csv-filtered.ng.subword.train
english.csv-filtered.en.subword.test		run
english.csv-filtered.en.subword.test.desubword	source.model
english.csv-filtered.en.subword.train		source.vocab
en.translated					target.model
en.translated.desubword				target.vocab
models						train.log


In [5]:
# Translate the "subworded" source file of the test dataset
# Change the model name, if needed.
!onmt_translate -model models/model.fren_step_2500.pt -src nagamese.csv-filtered.ng.subword.test -output en.translated -gpu 0 -min_length 1


[2024-11-09 16:31:50,613 INFO] Loading checkpoint from models/model.fren_step_2500.pt
[2024-11-09 16:31:52,205 INFO] Loading data into the model
[2024-11-09 16:32:01,304 INFO] PRED SCORE: -0.4314, PRED PPL: 1.54 NB SENTENCES: 500
Time w/o python interpreter load/terminate:  11.0454421043396
