Jaseci-Labs · TharukaCkasthuri · Dec 21, 2022 · Dec 21, 2022 · Dec 22, 2022 · Dec 22, 2022
diff --git a/.github/workflows/jaseci-ai-kit-test.yml b/.github/workflows/jaseci-ai-kit-test.yml
@@ -22,13 +22,14 @@ jobs:
 
     - uses: actions/checkout@v2
 
-    - name: Set up Python 3.8
+    - name: Set up Python 3.10
       uses: actions/setup-python@v2
       with:
-        python-version: "3.8"
+        python-version: "3.10"
     - name: Install dependencies
       run: |
         sudo apt-get install -y libsndfile1
+        sudo apt-get install espeak-ng
         python -m pip install --upgrade pip
         sudo apt install ffmpeg
 

diff --git a/jaseci_ai_kit/jaseci_ai_kit/modules/tts/action_utils.py b/jaseci_ai_kit/jaseci_ai_kit/modules/tts/action_utils.py
@@ -13,6 +13,7 @@
 from speechbrain.pretrained import Tacotron2 as SpeechBrain
 from speechbrain.pretrained import HIFIGAN
 from scipy.io.wavfile import write
+from pydub import AudioSegment
 
 config = configparser.ConfigParser()
 config.read(os.path.join(os.path.dirname(__file__), "config.cfg"))
@@ -309,3 +310,20 @@ def prediction(input_text, seq2seqmodel, vocorder):
         else:
             print("No valid vocorder")
     return audio_numpy
+
+
+def wave2mp3(wav_file):
+    """
+    Saving the given wav file in mp3 format.
+    """
+    save_status = False
+    status = {"save_status": save_status}
+    mp3_file_name = wav_file[:-3] + "mp3"
+    try:
+        AudioSegment.from_wav(wav_file).export(mp3_file_name, format="mp3")
+        save_status = True
+        status["file_name"] = mp3_file_name
+    except Exception as e:
+        print(e)
+
+    return status
diff --git a/jaseci_ai_kit/jaseci_ai_kit/modules/tts/tests/fixtures/tts.jac b/jaseci_ai_kit/jaseci_ai_kit/modules/tts/tests/fixtures/tts.jac
@@ -30,5 +30,10 @@ walker test_load_vocorder_v1{
     report tts.load_vocorder("waveglow");
 }
 
+walker test_wave2mp3{
+    can tts.wav2mp3;
+    report tts.wav2mp3("jaseci_ai_kit/modules/tts/tests/test.wav");
+}
+
 
 
diff --git a/jaseci_ai_kit/jaseci_ai_kit/modules/tts/tests/test.wav b/jaseci_ai_kit/jaseci_ai_kit/modules/tts/tests/test.wav
diff --git a/jaseci_ai_kit/jaseci_ai_kit/modules/tts/tests/test_tts.py b/jaseci_ai_kit/jaseci_ai_kit/modules/tts/tests/test_tts.py
@@ -45,6 +45,11 @@ def test_load_vocorder_v2(self, ret):
     def test_load_seq2seq_model_v2(self, ret):
         self.assertEqual(ret["success"], True)
 
+    @pytest.mark.order(7)
+    @jac_testcase("tts.jac", "test_wave2mp3")
+    def test_wave2mp3(self, ret):
+        self.assertEqual(ret["success"], True)
+
     @classmethod
     def tearDownClass(cls):
         super(TestTTSModule, cls).tearDownClass()

diff --git a/jaseci_ai_kit/jaseci_ai_kit/modules/tts/tts.py b/jaseci_ai_kit/jaseci_ai_kit/modules/tts/tts.py
@@ -16,6 +16,7 @@
     prediction,
     load_seq2seq_model,
     load_vocorder_model,
+    wave2mp3,
 )
 
 warnings.filterwarnings("ignore")
@@ -94,6 +95,18 @@ def save_audio(audio_data: list, path: str = "", rate: int = rate):
         raise HTTPException(status_code=500, detail=str(e))
 
 
+@jaseci_action(act_group=["tts"], allow_remote=True)
+def wav2mp3(wav_file: str):
+    """
+    Saving the given wav file in mp3 format.
+    """
+    try:
+        status = wave2mp3(wav_file=wav_file)
+        return status
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
 if __name__ == "__main__":
     print("Text to Speech Synthesizer up and running")
     launch_server(port=8000)
diff --git a/jaseci_ai_kit/jaseci_ai_kit/modules/vc_tts/__init__.py b/jaseci_ai_kit/jaseci_ai_kit/modules/vc_tts/__init__.py
diff --git a/jaseci_ai_kit/jaseci_ai_kit/modules/vc_tts/action_utils.py b/jaseci_ai_kit/jaseci_ai_kit/modules/vc_tts/action_utils.py
@@ -0,0 +1,80 @@
+import os
+import time
+import subprocess
+from pathlib import Path
+
+from TTS.api import TTS
+from TTS.utils.synthesizer import Synthesizer
+from TTS.utils.manage import ModelManager
+
+
+def get_models_file_path():
+    return Path(__file__).parent / "models.json"
+
+
+voice_cloning_model = "tts_models/multilingual/multi-dataset/your_tts"
+
+manager = ModelManager(
+    models_file=get_models_file_path(), progress_bar=True, verbose=False
+)
+
+
+def download_model_by_name(model_name: str):
+    """
+    Download the model from the checkpoint path.
+
+    Parameters:
+    -----------
+    model_name: String, name of the model.
+
+    Return:
+    -----------
+    model_path: String, Path to downloaded checkpoint file
+    config_path: String, model configuration file path
+    vocoder_path: String, vocoder model path
+    vocoder_config_path: String vocoder configuration path
+    """
+    model_path, config_path, model_item = manager.download_model(model_name)
+    if model_item["default_vocoder"] is None:
+        return model_path, config_path, None, None
+    vocoder_path, vocoder_config_path, _ = manager.download_model(
+        model_item["default_vocoder"]
+    )
+    return model_path, config_path, vocoder_path, vocoder_config_path
+
+
+def load_model_by_name(model_name: str, gpu: bool = False):
+    """
+    Load the downloaded model.
+
+    Parameters:
+    -----------
+    model_name: String, name of the model.
+    gpu: Boolean, if gpu is available.
+
+    Return:
+    -----------
+    synthesizer: A class object of synthesizer.
+    """
+    model_path, config_path, vocoder_path, vocoder_config_path = download_model_by_name(
+        model_name
+    )
+    # init synthesizer
+    # None values are fetch from the model
+    synthesizer = Synthesizer(
+        tts_checkpoint=model_path,
+        tts_config_path=config_path,
+        tts_speakers_file=None,
+        tts_languages_file=None,
+        vocoder_checkpoint=vocoder_path,
+        vocoder_config=vocoder_config_path,
+        encoder_checkpoint=None,
+        encoder_config=None,
+        use_cuda=gpu,
+    )
+
+    return synthesizer
+
+
+# defining the synthesizer for voice cloning model.
+synthesizer = load_model_by_name(voice_cloning_model)