Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature in tts, convert wav file to mp3 #702

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
18 changes: 18 additions & 0 deletions jaseci_ai_kit/jaseci_ai_kit/modules/tts/action_utils.py
Expand Up @@ -13,6 +13,7 @@
from speechbrain.pretrained import Tacotron2 as SpeechBrain
from speechbrain.pretrained import HIFIGAN
from scipy.io.wavfile import write
from pydub import AudioSegment

config = configparser.ConfigParser()
config.read(os.path.join(os.path.dirname(__file__), "config.cfg"))
Expand Down Expand Up @@ -309,3 +310,20 @@ def prediction(input_text, seq2seqmodel, vocorder):
else:
print("No valid vocorder")
return audio_numpy


def wave2mp3(wav_file):
"""
Saving the given wav file in mp3 format.
"""
save_status = False
status = {"save_status": save_status}
mp3_file_name = wav_file[:-3] + "mp3"
try:
AudioSegment.from_wav(wav_file).export(mp3_file_name, format="mp3")
save_status = True
status["file_name"] = mp3_file_name
except Exception as e:
print(e)

return status
Expand Up @@ -30,5 +30,10 @@ walker test_load_vocorder_v1{
report tts.load_vocorder("waveglow");
}

walker test_wave2mp3{
can tts.wav2mp3;
report tts.wav2mp3("jaseci_ai_kit/modules/tts/tests/test.wav");
}



Binary file not shown.
5 changes: 5 additions & 0 deletions jaseci_ai_kit/jaseci_ai_kit/modules/tts/tests/test_tts.py
Expand Up @@ -45,6 +45,11 @@ def test_load_vocorder_v2(self, ret):
def test_load_seq2seq_model_v2(self, ret):
self.assertEqual(ret["success"], True)

@pytest.mark.order(7)
@jac_testcase("tts.jac", "test_wave2mp3")
def test_wave2mp3(self, ret):
self.assertEqual(ret["success"], True)

@classmethod
def tearDownClass(cls):
super(TestTTSModule, cls).tearDownClass()
Expand Down
13 changes: 13 additions & 0 deletions jaseci_ai_kit/jaseci_ai_kit/modules/tts/tts.py
Expand Up @@ -16,6 +16,7 @@
prediction,
load_seq2seq_model,
load_vocorder_model,
wave2mp3,
)

warnings.filterwarnings("ignore")
Expand Down Expand Up @@ -94,6 +95,18 @@ def save_audio(audio_data: list, path: str = "", rate: int = rate):
raise HTTPException(status_code=500, detail=str(e))


@jaseci_action(act_group=["tts"], allow_remote=True)
def wav2mp3(wav_file: str):
"""
Saving the given wav file in mp3 format.
"""
try:
status = wave2mp3(wav_file=wav_file)
return status
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))


if __name__ == "__main__":
print("Text to Speech Synthesizer up and running")
launch_server(port=8000)
3 changes: 2 additions & 1 deletion jaseci_ai_kit/setup.py
Expand Up @@ -33,12 +33,13 @@ def get_ver():
"beautifulsoup4 >= 4.10.0, < 4.11.0",
"umap-learn==0.5.3",
"hdbscan==0.8.29",
"librosa==0.9.2",
"librosa<=0.9.2",
"protobuf>=3.20.1,<3.21",
"inflect<=6.0.2",
"unidecode==1.3.6",
"soundfile<=0.11.0",
"speechbrain==0.5.13",
"pydub==0.25.1",
],
package_data={
"": ["*.json", "*.cfg", "VERSION", "*.yaml"],
Expand Down