# Train Music Transformer
Since Fri. May 13th, 2022

Set up training in colab


## Setup


### Ipython



In [3]:
%load_ext autoreload
%autoreload 2



The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Colab



In [4]:
import os
import sys

import torch


if 'google.colab' in sys.modules:
    from google.colab import drive
    drive.mount('/content/drive')

    ! pip3 install sty icecream music21 transformers datasets

    path = os.path.join('drive', 'My Drive', 'Research', 'Music with NLP', 'Symbolic-Music-Generation')
    sys.path.append(path)
    ! ls "{path}"


    import time, os
    os.environ['TZ'] = 'US/Eastern'
    time.tzset()

    if torch.cuda.is_available():
        %env PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128

if torch.cuda.is_available():
    ! nvidia-smi


from icecream import ic

from musicnlp.util import *

ic(u.proj_dir, u.pkg_nm)



Mounted at /content/drive
Collecting sty
  Downloading sty-1.0.4-py3-none-any.whl (11 kB)
Collecting icecream
  Downloading icecream-2.1.2-py2.py3-none-any.whl (8.3 kB)
Collecting transformers
  Downloading transformers-4.18.0-py3-none-any.whl (4.0 MB)
[K     |████████████████████████████████| 4.0 MB 5.5 MB/s 
[?25hCollecting datasets
  Downloading datasets-2.1.0-py3-none-any.whl (325 kB)
[K     |████████████████████████████████| 325 kB 73.4 MB/s 
Collecting executing>=0.3.1
  Downloading executing-0.8.3-py2.py3-none-any.whl (16 kB)
Collecting colorama>=0.3.9
  Downloading colorama-0.4.4-py2.py3-none-any.whl (16 kB)
Collecting asttokens>=2.0.1
  Downloading asttokens-2.0.5-py2.py3-none-any.whl (20 kB)
Collecting sacremoses
  Downloading sacremoses-0.0.49-py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 56.6 MB/s 
Collecting tokenizers!=0.11.3,<0.13,>=0.11.1
  Downloading tokenizers-0.12.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.6 MB)


ic| PATH_BASE: 'drive/My Drive/Research/Music with NLP'
    DIR_PROJ: 'Symbolic-Music-Generation'
    PKG_NM: 'musicnlp'


('drive/My Drive/Research/Music with NLP',
 'Symbolic-Music-Generation',
 'musicnlp')

### code

In [5]:
import transformers

from musicnlp.trainer import train

transformers.set_seed(sconfig('random-seed'))



## Prep for training


In [6]:
md_nm = 'transf-xl'
# md_sz = 'debug'
# md_sz = 'tiny'
md_sz = 'base'


augment_key = False
dnm_909 = 'musicnlp music extraction, dnm=POP909, n=909, ' \
          'meta={mode=melody, prec=5, th=1}, 2022-04-16_20-28-47'
dnm_lmd = 'musicnlp music extraction, dnm=LMD-cleaned-subset, n=10269, ' \
          'meta={mode=melody, prec=5, th=1}, 2022-04-17_11-52-15'
# dnms = [dnm_909, dnm_lmd]
dnms = [dnm_909]


if md_sz == 'debug':
    n = 32
    train_args = dict(
        per_device_train_batch_size=4,
        # save_strategy='no',
        save_strategy='epoch',
        num_train_epochs=64,
    )
    my_train_args = dict(
        logging_strategy='epoch',
        save_epochs=16,
        tqdm='train-only',
        augment_key=augment_key
    )
elif md_sz == 'tiny':
    n = 64
    train_args = dict(
        per_device_train_batch_size=32,
        save_strategy='epoch',
        num_train_epochs=64,
    )
    my_train_args = dict(
        save_epochs=16,
        tqdm='train-only',
        augment_key=augment_key
)
else:
    n = None
    train_args = dict(
        num_train_epochs=64,
        per_device_train_batch_size=32,
    )
    my_train_args = dict(
        fp16=torch.cuda.is_available(),
        logging_strategy='epoch',
        save_epochs=2,
        tqdm='train-only',
        augment_key=augment_key
    )

mdl, tokenizer, trainer = train.get_all_setup(
    model_name=md_nm, model_size=md_sz, dataset_names=dnms, n_sample=n,
    train_args=train_args, my_train_args=my_train_args
)



Downloading builder script:   0%|          | 0.00/1.41k [00:00<?, ?B/s]

## Train


Check log and tensorboard files written



In [None]:
# trainer.train()
checkpoint_path = os.path.join(PATH_BASE, DIR_PROJ, DIR_MDL, 'reformer', '2022-04-17_22-53-41', 'checkpoint-3430')
trainer.train(checkpoint_path)

trainer.save_model(os.path.join(trainer.args.output_dir, 'trained'))


[38;2;0;186;142m2022-04-18 08:56:30[38;2;97;175;239m|[39m[49m[22m[23m[24m[25m[27m[28m[29m[38;2;198;120;221m[ReformerModelWithLMHead Training][38;2;97;175;239m::[38;2;198;120;221mon_train_begin[38;2;97;175;239m::[38;2;198;120;221mtrain.py[38;2;97;175;239m:[38;2;198;120;221m275[38;2;97;175;239m, [39m[49m[22m[23m[24m[25m[27m[28m[29mINFO[39m[49m[22m[23m[24m[25m[27m[28m[29m[38;2;97;175;239m - [39m[49m[22m[23m[24m[25m[27m[28m[29mTraining started with model [35m{[39m[49m[0mmodel name: [34mReformerModelWithLMHead[39m[49m[0m, max length: [34m2048[39m[49m[0m, axial_pos_shape: [34m(32, 64)[39m[49m[0m, n_layer: [34m12[39m[49m[0m, hidden_size: [34m768[39m[49m[0m, ff_size: [34m3072[39m[49m[0m, attention_shape: [34m12x64[39m[49m[0m, parameter_count: [34m39.9M[39m[49m[0m[35m}[39m[49m[0m, {
    [94m"hash_seed"[39;49;00m: [34mnull[39;49;00m,
    [94m"vocab_size"[39;49;00m: [34m462[39;49;00m,
    [94m"attentio

Epoch 11/64:   0%|          | 0/343 [00:00<?, ?it/s]config.num_buckets is not set. Setting config.num_buckets to 64...
config.num_buckets is not set. Setting config.num_buckets to 64...
config.num_buckets is not set. Setting config.num_buckets to 64...
Epoch 11/64: 100%|██████████| 343/343 [38:50<00:00,  5.32s/it]

[38;2;0;186;142m2022-04-18 09:35:32[38;2;97;175;239m|[39m[49m[22m[23m[24m[25m[27m[28m[29m[38;2;198;120;221m[ReformerModelWithLMHead Training][38;2;97;175;239m::[38;2;198;120;221m_log[38;2;97;175;239m::[38;2;198;120;221mtrain.py[38;2;97;175;239m:[38;2;198;120;221m343[38;2;97;175;239m, [39m[49m[22m[23m[24m[25m[27m[28m[29mINFO[39m[49m[22m[23m[24m[25m[27m[28m[29m[38;2;97;175;239m - [39m[49m[22m[23m[24m[25m[27m[28m[29m[35m{[39m[49m[0mstep: [34m 3773/21952[39m[49m[0m, epoch: [34m11.000/64[39m[49m[0m, train/learning_rate: [34m2.953e-04[39m[49m[0m, train/loss: [34m1.5178[39m[49m[0m, train/ntp_acc: [34m55.19[39m[49m[0m, train/ikr: [34m69.73[39m[49m[0m[35m}[39m[49m[0m[39m[49m[22m[23m[24m[25m[27m[28m[29m


Epoch 11/64: 100%|██████████| 343/343 [38:50<00:00,  6.79s/it]


[38;2;0;186;142m2022-04-18 09:35:58[38;2;97;175;239m|[39m[49m[22m[23m[24m[25m[27m[28m[29m[38;2;198;120;221m[ReformerModelWithLMHead Training][38;2;97;175;239m::[38;2;198;120;221m_log[38;2;97;175;239m::[38;2;198;120;221mtrain.py[38;2;97;175;239m:[38;2;198;120;221m343[38;2;97;175;239m, [39m[49m[22m[23m[24m[25m[27m[28m[29mINFO[39m[49m[22m[23m[24m[25m[27m[28m[29m[38;2;97;175;239m - [39m[49m[22m[23m[24m[25m[27m[28m[29m[35m{[39m[49m[0mstep: [34m 3773/21952[39m[49m[0m, epoch: [34m11/64[39m[49m[0m, eval/loss: [34m1.3271[39m[49m[0m, eval/ntp_acc: [34m60.95[39m[49m[0m, eval/ikr: [34m82.95[39m[49m[0m[35m}[39m[49m[0m[39m[49m[22m[23m[24m[25m[27m[28m[29m


Epoch 12/64: 100%|██████████| 343/343 [40:07<00:00,  5.53s/it]

[38;2;0;186;142m2022-04-18 10:16:06[38;2;97;175;239m|[39m[49m[22m[23m[24m[25m[27m[28m[29m[38;2;198;120;221m[ReformerModelWithLMHead Training][38;2;97;175;239m::[38;2;198;120;221m_log[38;2;97;175;239m::[38;2;198;120;221mtrain.py[38;2;97;175;239m:[38;2;198;120;221m343[38;2;97;175;239m, [39m[49m[22m[23m[24m[25m[27m[28m[29mINFO[39m[49m[22m[23m[24m[25m[27m[28m[29m[38;2;97;175;239m - [39m[49m[22m[23m[24m[25m[27m[28m[29m[35m{[39m[49m[0mstep: [34m 4116/21952[39m[49m[0m, epoch: [34m12.000/64[39m[49m[0m, train/learning_rate: [34m2.931e-04[39m[49m[0m, train/loss: [34m1.6636[39m[49m[0m, train/ntp_acc: [34m50.39[39m[49m[0m, train/ikr: [34m86.93[39m[49m[0m[35m}[39m[49m[0m[39m[49m[22m[23m[24m[25m[27m[28m[29m


Epoch 12/64: 100%|██████████| 343/343 [40:09<00:00,  7.02s/it]


[38;2;0;186;142m2022-04-18 10:16:35[38;2;97;175;239m|[39m[49m[22m[23m[24m[25m[27m[28m[29m[38;2;198;120;221m[ReformerModelWithLMHead Training][38;2;97;175;239m::[38;2;198;120;221m_log[38;2;97;175;239m::[38;2;198;120;221mtrain.py[38;2;97;175;239m:[38;2;198;120;221m343[38;2;97;175;239m, [39m[49m[22m[23m[24m[25m[27m[28m[29mINFO[39m[49m[22m[23m[24m[25m[27m[28m[29m[38;2;97;175;239m - [39m[49m[22m[23m[24m[25m[27m[28m[29m[35m{[39m[49m[0mstep: [34m 4116/21952[39m[49m[0m, epoch: [34m12/64[39m[49m[0m, eval/loss: [34m1.281[39m[49m[0m, eval/ntp_acc: [34m62.38[39m[49m[0m, eval/ikr: [34m83.86[39m[49m[0m[35m}[39m[49m[0m[39m[49m[22m[23m[24m[25m[27m[28m[29m


Epoch 13/64: 100%|██████████| 343/343 [40:14<00:00,  5.53s/it]

[38;2;0;186;142m2022-04-18 10:56:49[38;2;97;175;239m|[39m[49m[22m[23m[24m[25m[27m[28m[29m[38;2;198;120;221m[ReformerModelWithLMHead Training][38;2;97;175;239m::[38;2;198;120;221m_log[38;2;97;175;239m::[38;2;198;120;221mtrain.py[38;2;97;175;239m:[38;2;198;120;221m343[38;2;97;175;239m, [39m[49m[22m[23m[24m[25m[27m[28m[29mINFO[39m[49m[22m[23m[24m[25m[27m[28m[29m[38;2;97;175;239m - [39m[49m[22m[23m[24m[25m[27m[28m[29m[35m{[39m[49m[0mstep: [34m 4459/21952[39m[49m[0m, epoch: [34m13.000/64[39m[49m[0m, train/learning_rate: [34m2.904e-04[39m[49m[0m, train/loss: [34m1.4817[39m[49m[0m, train/ntp_acc: [34m55.91[39m[49m[0m, train/ikr: [34m84.02[39m[49m[0m[35m}[39m[49m[0m[39m[49m[22m[23m[24m[25m[27m[28m[29m


Epoch 13/64: 100%|██████████| 343/343 [40:14<00:00,  7.04s/it]


[38;2;0;186;142m2022-04-18 10:57:15[38;2;97;175;239m|[39m[49m[22m[23m[24m[25m[27m[28m[29m[38;2;198;120;221m[ReformerModelWithLMHead Training][38;2;97;175;239m::[38;2;198;120;221m_log[38;2;97;175;239m::[38;2;198;120;221mtrain.py[38;2;97;175;239m:[38;2;198;120;221m343[38;2;97;175;239m, [39m[49m[22m[23m[24m[25m[27m[28m[29mINFO[39m[49m[22m[23m[24m[25m[27m[28m[29m[38;2;97;175;239m - [39m[49m[22m[23m[24m[25m[27m[28m[29m[35m{[39m[49m[0mstep: [34m 4459/21952[39m[49m[0m, epoch: [34m13/64[39m[49m[0m, eval/loss: [34m1.2205[39m[49m[0m, eval/ntp_acc: [34m64.25[39m[49m[0m, eval/ikr: [34m83.58[39m[49m[0m[35m}[39m[49m[0m[39m[49m[22m[23m[24m[25m[27m[28m[29m


Epoch 14/64: 100%|██████████| 343/343 [40:13<00:00,  5.52s/it]

[38;2;0;186;142m2022-04-18 11:37:29[38;2;97;175;239m|[39m[49m[22m[23m[24m[25m[27m[28m[29m[38;2;198;120;221m[ReformerModelWithLMHead Training][38;2;97;175;239m::[38;2;198;120;221m_log[38;2;97;175;239m::[38;2;198;120;221mtrain.py[38;2;97;175;239m:[38;2;198;120;221m343[38;2;97;175;239m, [39m[49m[22m[23m[24m[25m[27m[28m[29mINFO[39m[49m[22m[23m[24m[25m[27m[28m[29m[38;2;97;175;239m - [39m[49m[22m[23m[24m[25m[27m[28m[29m[35m{[39m[49m[0mstep: [34m 4802/21952[39m[49m[0m, epoch: [34m14.000/64[39m[49m[0m, train/learning_rate: [34m2.873e-04[39m[49m[0m, train/loss: [34m1.3332[39m[49m[0m, train/ntp_acc: [34m60.36[39m[49m[0m, train/ikr: [34m87.61[39m[49m[0m[35m}[39m[49m[0m[39m[49m[22m[23m[24m[25m[27m[28m[29m


Epoch 14/64: 100%|██████████| 343/343 [40:16<00:00,  7.04s/it]


[38;2;0;186;142m2022-04-18 11:37:58[38;2;97;175;239m|[39m[49m[22m[23m[24m[25m[27m[28m[29m[38;2;198;120;221m[ReformerModelWithLMHead Training][38;2;97;175;239m::[38;2;198;120;221m_log[38;2;97;175;239m::[38;2;198;120;221mtrain.py[38;2;97;175;239m:[38;2;198;120;221m343[38;2;97;175;239m, [39m[49m[22m[23m[24m[25m[27m[28m[29mINFO[39m[49m[22m[23m[24m[25m[27m[28m[29m[38;2;97;175;239m - [39m[49m[22m[23m[24m[25m[27m[28m[29m[35m{[39m[49m[0mstep: [34m 4802/21952[39m[49m[0m, epoch: [34m14/64[39m[49m[0m, eval/loss: [34m1.1977[39m[49m[0m, eval/ntp_acc: [34m65.39[39m[49m[0m, eval/ikr: [34m82.96[39m[49m[0m[35m}[39m[49m[0m[39m[49m[22m[23m[24m[25m[27m[28m[29m


Epoch 15/64: 100%|██████████| 343/343 [40:17<00:00,  5.52s/it]

[38;2;0;186;142m2022-04-18 12:18:16[38;2;97;175;239m|[39m[49m[22m[23m[24m[25m[27m[28m[29m[38;2;198;120;221m[ReformerModelWithLMHead Training][38;2;97;175;239m::[38;2;198;120;221m_log[38;2;97;175;239m::[38;2;198;120;221mtrain.py[38;2;97;175;239m:[38;2;198;120;221m343[38;2;97;175;239m, [39m[49m[22m[23m[24m[25m[27m[28m[29mINFO[39m[49m[22m[23m[24m[25m[27m[28m[29m[38;2;97;175;239m - [39m[49m[22m[23m[24m[25m[27m[28m[29m[35m{[39m[49m[0mstep: [34m 5145/21952[39m[49m[0m, epoch: [34m15.000/64[39m[49m[0m, train/learning_rate: [34m2.838e-04[39m[49m[0m, train/loss: [34m1.0233[39m[49m[0m, train/ntp_acc: [34m69.06[39m[49m[0m, train/ikr: [34m73.79[39m[49m[0m[35m}[39m[49m[0m[39m[49m[22m[23m[24m[25m[27m[28m[29m


Epoch 15/64: 100%|██████████| 343/343 [40:17<00:00,  7.05s/it]


[38;2;0;186;142m2022-04-18 12:18:41[38;2;97;175;239m|[39m[49m[22m[23m[24m[25m[27m[28m[29m[38;2;198;120;221m[ReformerModelWithLMHead Training][38;2;97;175;239m::[38;2;198;120;221m_log[38;2;97;175;239m::[38;2;198;120;221mtrain.py[38;2;97;175;239m:[38;2;198;120;221m343[38;2;97;175;239m, [39m[49m[22m[23m[24m[25m[27m[28m[29mINFO[39m[49m[22m[23m[24m[25m[27m[28m[29m[38;2;97;175;239m - [39m[49m[22m[23m[24m[25m[27m[28m[29m[35m{[39m[49m[0mstep: [34m 5145/21952[39m[49m[0m, epoch: [34m15/64[39m[49m[0m, eval/loss: [34m1.1572[39m[49m[0m, eval/ntp_acc: [34m66.53[39m[49m[0m, eval/ikr: [34m83.35[39m[49m[0m[35m}[39m[49m[0m[39m[49m[22m[23m[24m[25m[27m[28m[29m


Epoch 16/64: 100%|██████████| 343/343 [40:23<00:00,  5.52s/it]

[38;2;0;186;142m2022-04-18 12:59:05[38;2;97;175;239m|[39m[49m[22m[23m[24m[25m[27m[28m[29m[38;2;198;120;221m[ReformerModelWithLMHead Training][38;2;97;175;239m::[38;2;198;120;221m_log[38;2;97;175;239m::[38;2;198;120;221mtrain.py[38;2;97;175;239m:[38;2;198;120;221m343[38;2;97;175;239m, [39m[49m[22m[23m[24m[25m[27m[28m[29mINFO[39m[49m[22m[23m[24m[25m[27m[28m[29m[38;2;97;175;239m - [39m[49m[22m[23m[24m[25m[27m[28m[29m[35m{[39m[49m[0mstep: [34m 5488/21952[39m[49m[0m, epoch: [34m16.000/64[39m[49m[0m, train/learning_rate: [34m2.799e-04[39m[49m[0m, train/loss: [34m1.1812[39m[49m[0m, train/ntp_acc: [34m65.38[39m[49m[0m, train/ikr: [34m90.24[39m[49m[0m[35m}[39m[49m[0m[39m[49m[22m[23m[24m[25m[27m[28m[29m


Epoch 16/64: 100%|██████████| 343/343 [40:26<00:00,  7.07s/it]


[38;2;0;186;142m2022-04-18 12:59:34[38;2;97;175;239m|[39m[49m[22m[23m[24m[25m[27m[28m[29m[38;2;198;120;221m[ReformerModelWithLMHead Training][38;2;97;175;239m::[38;2;198;120;221m_log[38;2;97;175;239m::[38;2;198;120;221mtrain.py[38;2;97;175;239m:[38;2;198;120;221m343[38;2;97;175;239m, [39m[49m[22m[23m[24m[25m[27m[28m[29mINFO[39m[49m[22m[23m[24m[25m[27m[28m[29m[38;2;97;175;239m - [39m[49m[22m[23m[24m[25m[27m[28m[29m[35m{[39m[49m[0mstep: [34m 5488/21952[39m[49m[0m, epoch: [34m16/64[39m[49m[0m, eval/loss: [34m1.1368[39m[49m[0m, eval/ntp_acc: [34m67.12[39m[49m[0m, eval/ikr: [34m83.76[39m[49m[0m[35m}[39m[49m[0m[39m[49m[22m[23m[24m[25m[27m[28m[29m


Epoch 17/64: 100%|██████████| 343/343 [40:21<00:00,  5.46s/it]

[38;2;0;186;142m2022-04-18 13:39:56[38;2;97;175;239m|[39m[49m[22m[23m[24m[25m[27m[28m[29m[38;2;198;120;221m[ReformerModelWithLMHead Training][38;2;97;175;239m::[38;2;198;120;221m_log[38;2;97;175;239m::[38;2;198;120;221mtrain.py[38;2;97;175;239m:[38;2;198;120;221m343[38;2;97;175;239m, [39m[49m[22m[23m[24m[25m[27m[28m[29mINFO[39m[49m[22m[23m[24m[25m[27m[28m[29m[38;2;97;175;239m - [39m[49m[22m[23m[24m[25m[27m[28m[29m[35m{[39m[49m[0mstep: [34m 5831/21952[39m[49m[0m, epoch: [34m17.000/64[39m[49m[0m, train/learning_rate: [34m2.756e-04[39m[49m[0m, train/loss: [34m0.7849[39m[49m[0m, train/ntp_acc: [34m76.72[39m[49m[0m, train/ikr: [34m84.82[39m[49m[0m[35m}[39m[49m[0m[39m[49m[22m[23m[24m[25m[27m[28m[29m


Epoch 17/64: 100%|██████████| 343/343 [40:21<00:00,  7.06s/it]


[38;2;0;186;142m2022-04-18 13:40:22[38;2;97;175;239m|[39m[49m[22m[23m[24m[25m[27m[28m[29m[38;2;198;120;221m[ReformerModelWithLMHead Training][38;2;97;175;239m::[38;2;198;120;221m_log[38;2;97;175;239m::[38;2;198;120;221mtrain.py[38;2;97;175;239m:[38;2;198;120;221m343[38;2;97;175;239m, [39m[49m[22m[23m[24m[25m[27m[28m[29mINFO[39m[49m[22m[23m[24m[25m[27m[28m[29m[38;2;97;175;239m - [39m[49m[22m[23m[24m[25m[27m[28m[29m[35m{[39m[49m[0mstep: [34m 5831/21952[39m[49m[0m, epoch: [34m17/64[39m[49m[0m, eval/loss: [34m1.1301[39m[49m[0m, eval/ntp_acc: [34m67.41[39m[49m[0m, eval/ikr: [34m81.79[39m[49m[0m[35m}[39m[49m[0m[39m[49m[22m[23m[24m[25m[27m[28m[29m


Epoch 18/64: 100%|██████████| 343/343 [40:20<00:00,  5.53s/it]

[38;2;0;186;142m2022-04-18 14:20:43[38;2;97;175;239m|[39m[49m[22m[23m[24m[25m[27m[28m[29m[38;2;198;120;221m[ReformerModelWithLMHead Training][38;2;97;175;239m::[38;2;198;120;221m_log[38;2;97;175;239m::[38;2;198;120;221mtrain.py[38;2;97;175;239m:[38;2;198;120;221m343[38;2;97;175;239m, [39m[49m[22m[23m[24m[25m[27m[28m[29mINFO[39m[49m[22m[23m[24m[25m[27m[28m[29m[38;2;97;175;239m - [39m[49m[22m[23m[24m[25m[27m[28m[29m[35m{[39m[49m[0mstep: [34m 6174/21952[39m[49m[0m, epoch: [34m18.000/64[39m[49m[0m, train/learning_rate: [34m2.710e-04[39m[49m[0m, train/loss: [34m1.3089[39m[49m[0m, train/ntp_acc: [34m60.67[39m[49m[0m, train/ikr: [34m86.83[39m[49m[0m[35m}[39m[49m[0m[39m[49m[22m[23m[24m[25m[27m[28m[29m


Epoch 18/64: 100%|██████████| 343/343 [40:23<00:00,  7.07s/it]


[38;2;0;186;142m2022-04-18 14:21:13[38;2;97;175;239m|[39m[49m[22m[23m[24m[25m[27m[28m[29m[38;2;198;120;221m[ReformerModelWithLMHead Training][38;2;97;175;239m::[38;2;198;120;221m_log[38;2;97;175;239m::[38;2;198;120;221mtrain.py[38;2;97;175;239m:[38;2;198;120;221m343[38;2;97;175;239m, [39m[49m[22m[23m[24m[25m[27m[28m[29mINFO[39m[49m[22m[23m[24m[25m[27m[28m[29m[38;2;97;175;239m - [39m[49m[22m[23m[24m[25m[27m[28m[29m[35m{[39m[49m[0mstep: [34m 6174/21952[39m[49m[0m, epoch: [34m18/64[39m[49m[0m, eval/loss: [34m1.1163[39m[49m[0m, eval/ntp_acc: [34m67.43[39m[49m[0m, eval/ikr: [34m82.54[39m[49m[0m[35m}[39m[49m[0m[39m[49m[22m[23m[24m[25m[27m[28m[29m


Epoch 19/64:  36%|███▌      | 123/343 [14:28<25:52,  7.06s/it]

In [None]:
ic(trainer.args.output_dir)
ic(os.listdir(trainer.args.output_dir))

