In [None]:
  !pip install wget
  !apt-get install sox libsndfile1 ffmpeg libsox-fmt-mp3
  !pip install unidecode
  !pip install matplotlib>=3.3.2

  ## Install NeMo
  BRANCH = 'v1.10.0'
  !python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]

  ## Grab the config we'll use in this example
  !mkdir configs
  !wget -P configs/ https://raw.githubusercontent.com/NVIDIA/NeMo/$BRANCH/examples/asr/conf/config.yaml

  exit()

In [None]:
# NeMo's "core" package
import nemo
# NeMo's ASR collection - this collections contains complete ASR models and
# building blocks (modules) for ASR
from google.colab import output
import nemo.collections.asr as nemo_asr
from nemo.utils import logging, exp_manager
from nemo.collections.asr.metrics.wer import word_error_rate
LANGUAGE = "Fa"
output.enable_custom_widget_manager()
# SAMPLE_RATE = 16000

train_dev_vocab = [' ', 'ا', 'ی', 'ر', 'د', 'ن', 'و', 'ه', 'م', 'ت', 'ب', 'س', 'ک', 'ش', 'ل', 'ز', '.',
                   'خ', 'گ', 'ف', 'ق', 'پ', 'آ', 'ج', 'ع', 'ح', 'چ', 'ص', 'ط', 'غ', 'ض', 'ذ', 'ظ', 'ث', 'ژ', 'ً', 'a',
                    'e', 'r', 'i', 's', 'n', 'o', 't', 'c', 'l', 'h', 'd', 'u', 'b', 'm', 'w', 'k', 'x', 'g', 'p', 'y',
                      'v', 'f', 'ٌ', 'z', 'q']

                      
char_model = nemo_asr.models.ASRModel.from_pretrained("stt_en_quartznet15x5", map_location='cpu')
char_model.change_vocabulary(new_vocabulary=train_dev_vocab)

In [3]:
import torch
import torch.nn as nn

def enable_bn_se(m):
    if type(m) == nn.BatchNorm1d:
        m.train()
        for param in m.parameters():
            param.requires_grad_(True)

    if 'SqueezeExcite' in type(m).__name__:
        m.train()
        for param in m.parameters():
            param.requires_grad_(True)

In [4]:
import logging
freeze_encoder = True
if freeze_encoder:
  char_model.encoder.freeze()
  char_model.encoder.apply(enable_bn_se)
  logging.info("Model encoder has been frozen, and batch normalization has been unfrozen")
else:
  char_model.encoder.unfreeze()
  logging.info("Model encoder has been un-frozen")

In [6]:
import copy
char_model.cfg.labels = train_dev_vocab
# char_model.cfg.sample_rate = 22050
cfg = copy.deepcopy(char_model.cfg)

In [7]:
from omegaconf import OmegaConf, open_dict

train_manifest_cleaned = "/content/drive/MyDrive/train_manifest.json"
dev_manifest_cleaned = "/content/drive/MyDrive/dev_manifest.json"
test_manifest_cleaned = "/content/drive/MyDrive/test_manifest_sample.json"
# Setup train, validation, test configs
with open_dict(cfg):    
  # Train dataset  (Concatenate train manifest cleaned and dev manifest cleaned)
  cfg.train_ds.manifest_filepath = f"{train_manifest_cleaned},{dev_manifest_cleaned}"
  cfg.train_ds.labels = train_dev_vocab
  cfg.train_ds.normalize_transcripts = False
  cfg.train_ds.batch_size = 4
  cfg.train_ds.num_workers = 0
  cfg.train_ds.pin_memory = True
  cfg.train_ds.trim_silence = True
  # cfg.train_ds.sample_rate = 22050

  # Validation dataset  (Use test dataset as validation, since we train using train + dev)
  cfg.validation_ds.manifest_filepath = test_manifest_cleaned
  cfg.validation_ds.labels = train_dev_vocab
  cfg.validation_ds.normalize_transcripts = False
  cfg.validation_ds.batch_size = 1
  cfg.validation_ds.num_workers = 0
  cfg.validation_ds.pin_memory = True
  # cfg.validation_ds.sample_rate = 22050
  cfg.validation_ds.trim_silence = True

In [None]:
# setup data loaders with new configs
char_model.setup_training_data(cfg.train_ds)
char_model.setup_multiple_validation_data(cfg.validation_ds)

In [17]:
with open_dict(char_model.cfg.optim):
  char_model.cfg.optim.lr = 0.01
  char_model.cfg.optim.betas = [0.95, 0.5]  # from paper
  char_model.cfg.optim.weight_decay = 0.001  # Original weight decay
  char_model.cfg.optim.sched.warmup_steps = None  # Remove default number of steps of warmup
  char_model.cfg.optim.sched.warmup_ratio = 0.05  # 5 % warmup|
  char_model.cfg.optim.sched.min_lr = 1e-5

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [11]:
use_cer = True
log_prediction = False
char_model._wer.use_cer = use_cer
char_model._wer.log_prediction = log_prediction

In [12]:
# with open_dict(char_model.cfg.spec_augment):
#   char_model.cfg.spec_augment.freq_masks = 2
#   char_model.cfg.spec_augment.freq_width = 25
#   char_model.cfg.spec_augment.time_masks = 2
#   char_model.cfg.spec_augment.time_width = 0.05

char_model.spec_augmentation = char_model.from_config_dict(char_model.cfg.spec_augment)

[NeMo I 2022-08-30 07:37:19 audio_preprocessing:491] Numba CUDA SpecAugment kernel is being used


In [18]:
import torch
import pytorch_lightning as ptl

if torch.cuda.is_available():
  accelerator = 'gpu'
else:
  accelerator = 'cpu'

EPOCHS = 100  # 100 epochs would provide better results, but would take an hour to train

trainer = ptl.Trainer(devices=1, 
                      accelerator=accelerator, 
                      max_epochs=EPOCHS, 
                      accumulate_grad_batches=1,
                      enable_checkpointing=False,
                      logger=False,
                      log_every_n_steps=5,
                      check_val_every_n_epoch=1)

# Setup model with the trainer
char_model.set_trainer(trainer)

# Finally, update the model's internal config
char_model.cfg = char_model._cfg

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True, used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


In [None]:
# Environment variable generally used for multi-node multi-gpu training.
# In notebook environments, this flag is unnecessary and can cause logs of multiple training runs to overwrite each other.
import os
os.environ.pop('NEMO_EXPM_VERSION', None)

config = exp_manager.ExpManagerConfig(
    exp_dir=f'/content/drive/MyDrive/Model_checkPoints/16000',
    name=f"ASR-Char-Model-Language-{LANGUAGE}",
    checkpoint_callback_params=exp_manager.CallbackParams(
        monitor="val_wer",
        always_save_nemo=True,
        every_n_epochs = 1,
        save_top_k = -1,
    ),
     # resume training if checkpoints already exist
    resume_if_exists= True,
    # to start training with no existing checkpoints
    resume_ignore_no_checkpoint= True

)

config = OmegaConf.structured(config)

logdir = exp_manager.exp_manager(trainer, config)

In [None]:
try:
  from google import colab
  COLAB_ENV = True
except (ImportError, ModuleNotFoundError):
  COLAB_ENV = False

# Load the TensorBoard notebook extension
if COLAB_ENV:
  %load_ext tensorboard
  %tensorboard --logdir /content/drive/MyDrive/Model_checkPoints/16000/ASR-Char-Model-Language-Fa
else:
  print("To use tensorboard, please use this notebook in a Google Colab environment.")

In [None]:
trainer.fit(char_model)