Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added switchable male/female TTS #712

Closed
wants to merge 30 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
1a687d7
feature in tts, convert wav file to mp3
TharukaCkasthuri Dec 21, 2022
f9b3ed5
fixes
TharukaCkasthuri Dec 21, 2022
c95a810
fixes
TharukaCkasthuri Dec 22, 2022
6a68041
fixing test cases
TharukaCkasthuri Dec 22, 2022
0be67e9
test case fixes
TharukaCkasthuri Dec 22, 2022
849d1de
Merge branch 'main' of https://github.com/Jaseci-Labs/jaseci into tts_v0
TharukaCkasthuri Dec 28, 2022
5cadae5
setup file changes
TharukaCkasthuri Dec 28, 2022
35c2888
added vc_tts supports male/female voices
TharukaCkasthuri Dec 28, 2022
9050e60
fixes
TharukaCkasthuri Dec 28, 2022
e775800
fixes
TharukaCkasthuri Dec 28, 2022
5519a44
fixes
TharukaCkasthuri Dec 28, 2022
ce7bc53
fixses
TharukaCkasthuri Dec 28, 2022
006a575
-fixes
TharukaCkasthuri Dec 28, 2022
d330c6f
fixing dependancy issues
TharukaCkasthuri Dec 28, 2022
2dbc36b
-fixing dependancy issue
TharukaCkasthuri Dec 28, 2022
2b6eadd
added voice cloning
TharukaCkasthuri Dec 29, 2022
8497c0e
adding comments to code
TharukaCkasthuri Dec 29, 2022
d8613af
code engineering
TharukaCkasthuri Dec 29, 2022
81f9df1
remove unnessary comments
TharukaCkasthuri Dec 29, 2022
af5e2c7
resolving test case issues
TharukaCkasthuri Dec 29, 2022
bd72399
resolving ai it test failures
TharukaCkasthuri Dec 29, 2022
d2d111f
resolving ai it test failures
TharukaCkasthuri Dec 29, 2022
6bf3f06
resolving ai it test failures
TharukaCkasthuri Dec 29, 2022
086ce6a
resolving ai it test failures
TharukaCkasthuri Dec 30, 2022
4405a67
resolving ai kit test failures
TharukaCkasthuri Dec 30, 2022
903b5b0
resolving ai it test failures
TharukaCkasthuri Dec 30, 2022
713595e
resolving ai it test failures
TharukaCkasthuri Dec 30, 2022
9dfd875
resolving ai kit test failures
TharukaCkasthuri Dec 30, 2022
93967b8
resolving ai kit test failures
TharukaCkasthuri Dec 30, 2022
de11abb
resolving ai kit test failures
TharukaCkasthuri Dec 30, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 3 additions & 2 deletions .github/workflows/jaseci-ai-kit-test.yml
Expand Up @@ -22,13 +22,14 @@ jobs:

- uses: actions/checkout@v2

- name: Set up Python 3.8
- name: Set up Python 3.10
uses: actions/setup-python@v2
with:
python-version: "3.8"
python-version: "3.10"
- name: Install dependencies
run: |
sudo apt-get install -y libsndfile1
sudo apt-get install espeak-ng
python -m pip install --upgrade pip
sudo apt install ffmpeg

Expand Down
18 changes: 18 additions & 0 deletions jaseci_ai_kit/jaseci_ai_kit/modules/tts/action_utils.py
Expand Up @@ -13,6 +13,7 @@
from speechbrain.pretrained import Tacotron2 as SpeechBrain
from speechbrain.pretrained import HIFIGAN
from scipy.io.wavfile import write
from pydub import AudioSegment

config = configparser.ConfigParser()
config.read(os.path.join(os.path.dirname(__file__), "config.cfg"))
Expand Down Expand Up @@ -309,3 +310,20 @@ def prediction(input_text, seq2seqmodel, vocorder):
else:
print("No valid vocorder")
return audio_numpy


def wave2mp3(wav_file):
"""
Saving the given wav file in mp3 format.
"""
save_status = False
status = {"save_status": save_status}
mp3_file_name = wav_file[:-3] + "mp3"
try:
AudioSegment.from_wav(wav_file).export(mp3_file_name, format="mp3")
save_status = True
status["file_name"] = mp3_file_name
except Exception as e:
print(e)

return status
Expand Up @@ -30,5 +30,10 @@ walker test_load_vocorder_v1{
report tts.load_vocorder("waveglow");
}

walker test_wave2mp3{
can tts.wav2mp3;
report tts.wav2mp3("jaseci_ai_kit/modules/tts/tests/test.wav");
}



Binary file not shown.
5 changes: 5 additions & 0 deletions jaseci_ai_kit/jaseci_ai_kit/modules/tts/tests/test_tts.py
Expand Up @@ -45,6 +45,11 @@ def test_load_vocorder_v2(self, ret):
def test_load_seq2seq_model_v2(self, ret):
self.assertEqual(ret["success"], True)

@pytest.mark.order(7)
@jac_testcase("tts.jac", "test_wave2mp3")
def test_wave2mp3(self, ret):
self.assertEqual(ret["success"], True)

@classmethod
def tearDownClass(cls):
super(TestTTSModule, cls).tearDownClass()
Expand Down
13 changes: 13 additions & 0 deletions jaseci_ai_kit/jaseci_ai_kit/modules/tts/tts.py
Expand Up @@ -16,6 +16,7 @@
prediction,
load_seq2seq_model,
load_vocorder_model,
wave2mp3,
)

warnings.filterwarnings("ignore")
Expand Down Expand Up @@ -94,6 +95,18 @@ def save_audio(audio_data: list, path: str = "", rate: int = rate):
raise HTTPException(status_code=500, detail=str(e))


@jaseci_action(act_group=["tts"], allow_remote=True)
def wav2mp3(wav_file: str):
"""
Saving the given wav file in mp3 format.
"""
try:
status = wave2mp3(wav_file=wav_file)
return status
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))


if __name__ == "__main__":
print("Text to Speech Synthesizer up and running")
launch_server(port=8000)
Empty file.
80 changes: 80 additions & 0 deletions jaseci_ai_kit/jaseci_ai_kit/modules/vc_tts/action_utils.py
@@ -0,0 +1,80 @@
import os
import time
import subprocess
from pathlib import Path

from TTS.api import TTS
from TTS.utils.synthesizer import Synthesizer
from TTS.utils.manage import ModelManager


def get_models_file_path():
return Path(__file__).parent / "models.json"


voice_cloning_model = "tts_models/multilingual/multi-dataset/your_tts"

manager = ModelManager(
models_file=get_models_file_path(), progress_bar=True, verbose=False
)


def download_model_by_name(model_name: str):
"""
Download the model from the checkpoint path.

Parameters:
-----------
model_name: String, name of the model.

Return:
-----------
model_path: String, Path to downloaded checkpoint file
config_path: String, model configuration file path
vocoder_path: String, vocoder model path
vocoder_config_path: String vocoder configuration path
"""
model_path, config_path, model_item = manager.download_model(model_name)
if model_item["default_vocoder"] is None:
return model_path, config_path, None, None
vocoder_path, vocoder_config_path, _ = manager.download_model(
model_item["default_vocoder"]
)
return model_path, config_path, vocoder_path, vocoder_config_path


def load_model_by_name(model_name: str, gpu: bool = False):
"""
Load the downloaded model.

Parameters:
-----------
model_name: String, name of the model.
gpu: Boolean, if gpu is available.

Return:
-----------
synthesizer: A class object of synthesizer.
"""
model_path, config_path, vocoder_path, vocoder_config_path = download_model_by_name(
model_name
)
# init synthesizer
# None values are fetch from the model
synthesizer = Synthesizer(
tts_checkpoint=model_path,
tts_config_path=config_path,
tts_speakers_file=None,
tts_languages_file=None,
vocoder_checkpoint=vocoder_path,
vocoder_config=vocoder_config_path,
encoder_checkpoint=None,
encoder_config=None,
use_cuda=gpu,
)

return synthesizer


# defining the synthesizer for voice cloning model.
synthesizer = load_model_by_name(voice_cloning_model)