<a href="https://colab.research.google.com/github/SiegfredLorelle/iskobot/blob/main/iskobot_text_to_speech.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# TEXT TO SPEECH using TTS model

# Setup

In [32]:
%%capture

# Install the TTS (Text-to-Speech) library directly from PyPI
!pip install TTS

# Setup model

In [33]:
import torch
from TTS.api import TTS

# Get device
device = "cuda" if torch.cuda.is_available() else "cpu"

# Init TTS and specify model here
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)

 > tts_models/multilingual/multi-dataset/xtts_v2 is already downloaded.
 > Using model: xtts


  self.speakers = torch.load(speaker_file_path)
  return torch.load(f, map_location=map_location, **kwargs)


# Run model

In [34]:
%%capture

# For preprocessing Japanese text
!pip install cutlet
!apt install mecab libmecab-dev mecab-ipadic-utf8
!pip install mecab-python3 fugashi unidic-lite

In [35]:
from typing import List
import os
import fugashi

def generate_tts_files(texts: dict, speaker_wav: str, languages: List[str], output_dir: str):
    """
    Generates text-to-speech files for the given texts in specified languages.

    Args:
        texts (dict): A dictionary where keys are language codes and values are the corresponding text in that language.
        speaker_wav (str): Path to the speaker's WAV file for voice cloning.
        languages (List[str]): List of language codes.
        output_dir (str): Directory where the output files will be saved.
    """
    # Ensure output directory exists
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Iterate over each language and create the TTS file
    for lang in languages:
        text = texts.get(lang, "")
        if text:  # Ensure there's text for this language
            file_path = os.path.join(output_dir, f"output_{lang}.wav")

            # Preprocess Japanese text if needed
            if lang == "ja":
                tagger = fugashi.Tagger('-Owakati')  # Initialize fugashi with -Owakati option
                text = " ".join(tagger.parse(text))  # Tokenize using fugashi

            tts.tts_to_file(
                text=text,
                speaker_wav=speaker_wav,
                language=lang,
                file_path=file_path
            )
            print(f"Generated: {file_path}")
        else:
            print(f"No text available for language code: {lang}")

# Texts in different languages
texts = {
    "en": "Hi, I’m Iskobot—a chatbot designed to assist students with their general and academic inquiries. Let’s learn and grow together!",
    "es": "¡Hola, soy Iskobot, un chatbot diseñado para ayudar a los estudiantes con sus consultas generales y académicas! ¡Aprendamos y crezcamos juntos!",
    "fr": "Salut, je suis Iskobot, un chatbot conçu pour aider les étudiants avec leurs questions générales et académiques. Apprenons et grandissons ensemble !",
    "de": "Hallo, ich bin Iskobot – ein Chatbot, der entwickelt wurde, um Studierende bei ihren allgemeinen und akademischen Anfragen zu unterstützen. Lass uns zusammen lernen und wachsen!",
    "it": "Ciao, sono Iskobot, un chatbot progettato per assistere gli studenti nelle loro richieste generali e accademiche. Impariamo e cresciamo insieme!",
    "pt": "Oi, sou Iskobot, um chatbot projetado para ajudar os estudantes com suas consultas gerais e acadêmicas. Vamos aprender e crescer juntos!",
    "pl": "Cześć, jestem Iskobot, chatbot stworzony, aby pomagać studentom w ich ogólnych i akademickich pytaniach. Uczmy się i rośnijmy razem!",
    "tr": "Merhaba, ben Iskobot—öğrencilere genel ve akademik sorularında yardımcı olmak için tasarlanmış bir sohbet robotuyum. Hadi birlikte öğrenelim ve büyüyelim!",
    "ru": "Привет, я Искобот — чат-бот, созданный для помощи студентам с их общими и академическими вопросами. Давайте учиться и расти вместе!",
    "nl": "Hallo, ik ben Iskobot, een chatbot die is ontworpen om studenten te helpen met hun algemene en academische vragen. Laten we samen leren en groeien!",
    "cs": "Ahoj, jsem Iskobot, chatbot navržený k pomoci studentům s jejich obecným a akademickým dotazům. Pojďme se učit a růst společně!",
    "ar": "مرحبًا، أنا إيسكوبوت، روبوت دردشة مصمم لمساعدة الطلاب في استفساراتهم العامة والأكاديمية. دعونا نتعلم وننمو معًا!",
    "zh-cn": "你好，我是 Iskobot——一个旨在帮助学生解答一般和学术问题的聊天机器人。让我们一起学习和成长吧！",
    "ja": "こんにちは、私は Iskobot です—学生の一般的な質問や学問的な問いに答えるために設計されたチャットボットです。一緒に学び、成長しましょう！",
    "hu": "Szia, én vagyok Iskobot—a chatbot, amely a diákoknak segít általános és akadémiai kérdéseikben. Tanuljunk és növekedjünk együtt!",
    "ko": "안녕하세요, 저는 Iskobot입니다—학생들의 일반적인 질문과 학문적인 질문을 돕기 위해 설계된 챗봇입니다. 함께 배우고 성장해요!",
    "hi": "नमस्ते, मैं इस्कोबोट हूं—एक चैटबॉट जिसे छात्रों की सामान्य और शैक्षणिक प्रश्नों में सहायता करने के लिए डिज़ाइन किया गया है। आइए साथ में सीखें और बढ़ें!"
}

# Specify the speaker's WAV file and the output directory
speaker_wav = "/content/iskobot_sample_audio.wav"
output_dir = "/content/tts_outputs"
languages = list(texts.keys())

# Call the function to generate the TTS files
generate_tts_files(texts=texts, speaker_wav=speaker_wav, languages=languages, output_dir=output_dir)

 > Text splitted to sentences.
['Hi, I’m Iskobot—a chatbot designed to assist students with their general and academic inquiries.', 'Let’s learn and grow together!']
 > Processing time: 66.63389658927917
 > Real-time factor: 6.7751098374723595
Generated: /content/tts_outputs/output_en.wav
 > Text splitted to sentences.
['¡Hola, soy Iskobot, un chatbot diseñado para ayudar a los estudiantes con sus consultas generales y académicas!', '¡Aprendamos y crezcamos juntos!']
 > Processing time: 71.6438820362091
 > Real-time factor: 6.251375517991052
Generated: /content/tts_outputs/output_es.wav
 > Text splitted to sentences.
['Salut, je suis Iskobot, un chatbot conçu pour aider les étudiants avec leurs questions générales et académiques.', 'Apprenons et grandissons ensemble !']
 > Processing time: 105.4228093624115
 > Real-time factor: 6.362696380510351
Generated: /content/tts_outputs/output_fr.wav
 > Text splitted to sentences.
['Hallo, ich bin Iskobot – ein Chatbot, der entwickelt wurde, um 