Skip to content

Commit

Permalink
update branch (#4074)
Browse files Browse the repository at this point in the history
Signed-off-by: ericharper <complex451@gmail.com>

Co-authored-by: Subhankar Ghosh <subhankar2321@gmail.com>
Co-authored-by: Somshubra Majumdar <titu1994@gmail.com>
  • Loading branch information
3 people committed Apr 28, 2022
1 parent f776442 commit 655ff80
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 13 deletions.
7 changes: 3 additions & 4 deletions nemo/collections/tts/modules/fastpitch.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,8 @@ def __init__(
else:
self.speaker_emb = None

self.register_buffer('max_token_duration', torch.tensor(max_token_duration), persistent=False)
self.register_buffer('min_token_duration', torch.tensor(0.0), persistent=False)
self.max_token_duration = max_token_duration
self.min_token_duration = 0

self.pitch_emb = torch.nn.Conv1d(
1,
Expand All @@ -171,7 +171,6 @@ def __init__(
# Store values precomputed from training data for convenience
self.register_buffer('pitch_mean', torch.zeros(1))
self.register_buffer('pitch_std', torch.zeros(1))
self.register_buffer('zero_emb', torch.zeros(1), persistent=False)

self.proj = torch.nn.Linear(self.decoder.d_model, n_mel_channels, bias=True)

Expand Down Expand Up @@ -225,7 +224,7 @@ def forward(

# Calculate speaker embedding
if self.speaker_emb is None or speaker is None:
spk_emb = self.zero_emb
spk_emb = 0
else:
spk_emb = self.speaker_emb(speaker).unsqueeze(1)

Expand Down
10 changes: 5 additions & 5 deletions tutorials/AudioTranslationSample.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@
"nmt_model = nemo_nlp.models.MTEncDecModel.from_pretrained(model_name='nmt_zh_en_transformer6x6').cuda()\n",
"\n",
"# Spectrogram generator which takes text as an input and produces spectrogram\n",
"spectrogram_generator = nemo_tts.models.Tacotron2Model.from_pretrained(model_name=\"tts_en_tacotron2\").cuda()\n",
"spectrogram_generator = nemo_tts.models.FastPitchModel.from_pretrained(model_name=\"tts_en_fastpitch\").cuda()\n",
"\n",
"# Vocoder model which takes spectrogram and produces actual audio\n",
"vocoder = nemo_tts.models.HifiGanModel.from_pretrained(model_name=\"tts_hifigan\").cuda()"
Expand Down Expand Up @@ -261,7 +261,7 @@
"toc_visible": true
},
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -275,9 +275,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.7"
"version": "3.8.12"
}
},
"nbformat": 4,
"nbformat_minor": 1
}
"nbformat_minor": 4
}
8 changes: 4 additions & 4 deletions tutorials/VoiceSwapSample.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@
"punctuation = nemo_nlp.models.PunctuationCapitalizationModel.from_pretrained(model_name='punctuation_en_distilbert').cuda()\n",
"\n",
"# Spectrogram generator which takes text as an input and produces spectrogram\n",
"spectrogram_generator = nemo_tts.models.Tacotron2Model.from_pretrained(model_name=\"tts_en_tacotron2\").cuda()\n",
"spectrogram_generator = nemo_tts.models.FastPitchModel.from_pretrained(model_name=\"tts_en_fastpitch\").cuda()\n",
"\n",
"# Vocoder model which takes spectrogram and produces actual audio\n",
"vocoder = nemo_tts.models.HifiGanModel.from_pretrained(model_name=\"tts_hifigan\").cuda()"
Expand Down Expand Up @@ -306,7 +306,7 @@
"toc_visible": true
},
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -320,9 +320,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
"version": "3.8.12"
}
},
"nbformat": 4,
"nbformat_minor": 1
"nbformat_minor": 4
}

0 comments on commit 655ff80

Please sign in to comment.