Skip to content

Commit

Permalink
[TTS] add static method decorator. (NVIDIA#4443)
Browse files Browse the repository at this point in the history
* [TTS] add static method decorator.

Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>

* remove protect prefix

Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>

* fixed style error

Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>
Signed-off-by: David Mosallanezhad <dmosallanezh@nvidia.com>
  • Loading branch information
XuesongYang authored and Davood-M committed Aug 9, 2022
1 parent 47dd577 commit e59478a
Showing 1 changed file with 13 additions and 7 deletions.
20 changes: 13 additions & 7 deletions nemo/collections/tts/models/hifigan.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
from nemo.collections.tts.losses.hifigan_losses import DiscriminatorLoss, FeatureMatchingLoss, GeneratorLoss
from nemo.collections.tts.models.base import Vocoder
from nemo.collections.tts.modules.hifigan_modules import MultiPeriodDiscriminator, MultiScaleDiscriminator
from nemo.collections.tts.torch.data import VocoderDataset
from nemo.core.classes import Exportable
from nemo.core.classes.common import PretrainedModelInfo, typecheck
from nemo.core.neural_types.elements import AudioSignal, MelSpectrogramType
Expand All @@ -40,7 +39,9 @@


class HifiGanModel(Vocoder, Exportable):
"""HiFi-GAN model (https://arxiv.org/abs/2010.05646) that is used to generate audio from mel spectrogram."""
"""
HiFi-GAN model (https://arxiv.org/abs/2010.05646) that is used to generate audio from mel spectrogram.
"""

def __init__(self, cfg: DictConfig, trainer: 'Trainer' = None):
# Convert to Hydra 1.0 compatible DictConfig
Expand Down Expand Up @@ -81,7 +82,8 @@ def _get_max_steps(self):
drop_last=self._train_dl.drop_last,
)

def _get_warmup_steps(self, max_steps, warmup_steps, warmup_ratio):
@staticmethod
def get_warmup_steps(max_steps, warmup_steps, warmup_ratio):
if warmup_steps is not None and warmup_ratio is not None:
raise ValueError(f'Either use warmup_steps or warmup_ratio for scheduler')

Expand Down Expand Up @@ -112,7 +114,7 @@ def configure_optimizers(self):
if max_steps is None or max_steps < 0:
max_steps = self._get_max_steps()

warmup_steps = self._get_warmup_steps(
warmup_steps = HifiGanModel.get_warmup_steps(
max_steps=max_steps,
warmup_steps=sched_config.get("warmup_steps", None),
warmup_ratio=sched_config.get("warmup_ratio", None),
Expand Down Expand Up @@ -344,23 +346,27 @@ def list_available_models(cls) -> 'Optional[Dict[str, str]]':
model = PretrainedModelInfo(
pretrained_model_name="tts_hifigan",
location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/tts_hifigan/versions/1.0.0rc1/files/tts_hifigan.nemo",
description="This model is trained on LJSpeech audio sampled at 22050Hz and mel spectrograms generated from Tacotron2, TalkNet, and FastPitch. This model has been tested on generating female English voices with an American accent.",
description="This model is trained on LJSpeech audio sampled at 22050Hz and mel spectrograms generated from"
" Tacotron2, TalkNet, and FastPitch. This model has been tested on generating female English "
"voices with an American accent.",
class_=cls,
)
list_of_models.append(model)

model = PretrainedModelInfo(
pretrained_model_name="tts_en_lj_hifigan_ft_mixertts",
location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/tts_en_lj_hifigan/versions/1.6.0/files/tts_en_lj_hifigan_ft_mixertts.nemo",
description="This model is trained on LJSpeech audio sampled at 22050Hz and mel spectrograms generated from Mixer-TTS. This model has been tested on generating female English voices with an American accent.",
description="This model is trained on LJSpeech audio sampled at 22050Hz and mel spectrograms generated from"
" Mixer-TTS. This model has been tested on generating female English voices with an American accent.",
class_=cls,
)
list_of_models.append(model)

model = PretrainedModelInfo(
pretrained_model_name="tts_en_lj_hifigan_ft_mixerttsx",
location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/tts_en_lj_hifigan/versions/1.6.0/files/tts_en_lj_hifigan_ft_mixerttsx.nemo",
description="This model is trained on LJSpeech audio sampled at 22050Hz and mel spectrograms generated from Mixer-TTS-X. This model has been tested on generating female English voices with an American accent.",
description="This model is trained on LJSpeech audio sampled at 22050Hz and mel spectrograms generated from"
" Mixer-TTS-X. This model has been tested on generating female English voices with an American accent.",
class_=cls,
)
list_of_models.append(model)
Expand Down

0 comments on commit e59478a

Please sign in to comment.