In [1]:
import os
import sys
import subprocess

def get_environment():
    # Проверка на Google Colab
    if 'COLAB_GPU' in os.environ:
        return "Colab"
    
    # Проверка на Kaggle
    elif 'KAGGLE_KERNEL_RUN_ID' in os.environ:
        return "Kaggle"
    
    # Проверка на локальной машине
    elif 'HOME' in os.environ or 'USERPROFILE' in os.environ:
        return "PC"


print(f"Running on: {get_environment()}")


if get_environment() == 'PC':
    PATH = os.getcwd()
    DATA_PATH = '/mnt/c/TTS'

Running on: PC


In [25]:
from nemo_text_processing.text_normalization.normalize import Normalizer
from nemo.collections.tts.models import AlignerModel
from nemo.collections.tts.torch.tts_tokenizers import EnglishPhonemesTokenizer
from nemo.collections.tts.g2p.models.en_us_arpabet import EnglishG2p

import math
from typing import Optional

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import Tensor

from einops import rearrange, repeat

In [6]:
text_normalizer = Normalizer(input_case="cased", lang="en")
arpabet_g2p = EnglishG2p(ignore_ambiguous_words=False)
arpabet_tokenizer = EnglishPhonemesTokenizer(arpabet_g2p)

 NeMo-text-processing :: INFO     :: Creating ClassifyFst grammars.
[NeMo E 2024-11-28 17:45:35 en_us_arpabet:104] Torch distributed needs to be initialized before you initialized EnglishG2p. This class is prone to data access race conditions. Now downloading corpora from global rank 0. If other ranks pass this before rank 0, errors might result.
[NeMo W 2024-11-28 17:45:35 en_us_arpabet:121] English g2p_dict will be used from nltk.corpus.cmudict.dict(), because phoneme_dict_path=None. Note that nltk.corpus.cmudict.dict() has old version (0.6) of CMUDict. You can use the latest official version of CMUDict (0.7b) with additional changes from NVIDIA directly from NeMo using the path scripts/tts_dataset_files/cmudict-0.7b_nv22.10.
[NeMo W 2024-11-28 17:45:36 en_us_arpabet:66] apply_to_oov_word=None, This means that some of words will remain unchanged if they are not handled by any of the rules in self.parse_one_word(). This may be intended if phonemes and chars are both valid inputs, othe

In [24]:
text = "We are going to evaluate mamba 1"
normalized_text = text_normalizer.normalize(text)
arpabet_phonemes = arpabet_g2p(normalized_text)
arpabet_tokens = arpabet_tokenizer(normalized_text)

print(text)
print(normalized_text)
print(arpabet_phonemes)
print(arpabet_tokens)

[NeMo W 2024-11-28 18:19:33 tts_tokenizers:656] Text: [WIY1 AA1R GOW1IH0NG TUW1 evaluat MAA1MBAH0 WAH1N] contains unknown char/phoneme: [e].Original text: [We are going to evaluat mamba one]. Symbol will be skipped.
[NeMo W 2024-11-28 18:19:33 tts_tokenizers:656] Text: [WIY1 AA1R GOW1IH0NG TUW1 evaluat MAA1MBAH0 WAH1N] contains unknown char/phoneme: [v].Original text: [We are going to evaluat mamba one]. Symbol will be skipped.
[NeMo W 2024-11-28 18:19:33 tts_tokenizers:656] Text: [WIY1 AA1R GOW1IH0NG TUW1 evaluat MAA1MBAH0 WAH1N] contains unknown char/phoneme: [a].Original text: [We are going to evaluat mamba one]. Symbol will be skipped.
[NeMo W 2024-11-28 18:19:33 tts_tokenizers:656] Text: [WIY1 AA1R GOW1IH0NG TUW1 evaluat MAA1MBAH0 WAH1N] contains unknown char/phoneme: [l].Original text: [We are going to evaluat mamba one]. Symbol will be skipped.
[NeMo W 2024-11-28 18:19:33 tts_tokenizers:656] Text: [WIY1 AA1R GOW1IH0NG TUW1 evaluat MAA1MBAH0 WAH1N] contains unknown char/phoneme: 

We are going to evaluat mamba 1
We are going to evaluat mamba one
['W', 'IY1', ' ', 'AA1', 'R', ' ', 'G', 'OW1', 'IH0', 'NG', ' ', 'T', 'UW1', ' ', 'e', 'v', 'a', 'l', 'u', 'a', 't', ' ', 'M', 'AA1', 'M', 'B', 'AH0', ' ', 'W', 'AH1', 'N']
[21, 35, 0, 25, 15, 0, 6, 36, 34, 13, 0, 18, 39, 0, 11, 25, 11, 1, 27, 0, 21, 27, 12]


## Model