<a href="https://colab.research.google.com/github/ASREETHASREEJA5/AI---Tools/blob/main/Podcast_using_Azure.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install azure-cognitiveservices-speech
!pip install python-dotenv
!pip install pydub
!pip install langchain_groq

Collecting azure-cognitiveservices-speech
  Downloading azure_cognitiveservices_speech-1.41.1-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Downloading azure_cognitiveservices_speech-1.41.1-py3-none-manylinux1_x86_64.whl (39.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m39.7/39.7 MB[0m [31m17.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: azure-cognitiveservices-speech
Successfully installed azure-cognitiveservices-speech-1.41.1
Collecting python-dotenv
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Downloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-1.0.1
Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1
Collecting langchain_groq
  Downloading langchain_groq-0.2.3-py3-none-

In [None]:
SPEECH_KEY = "your key"
SPEECH_REGION = "eastus"

podcast generation with emotions asynchronous code

In [None]:
import asyncio
import os
import azure.cognitiveservices.speech as speechsdk
from pydub import AudioSegment
from langchain_groq import ChatGroq
import json

llm = ChatGroq(
    temperature=0,
    groq_api_key='your_key',
    model_name="llama-3.1-70b-versatile"
)

async def text_to_speech_async(ssml, output_wav_file):
    speech_config = speechsdk.SpeechConfig(subscription=SPEECH_KEY, region=SPEECH_REGION)

    os.makedirs(os.path.dirname(output_wav_file), exist_ok=True)

    audio_config = speechsdk.audio.AudioOutputConfig(filename=output_wav_file)
    synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)

    result_future = synthesizer.speak_ssml_async(ssml)

    result = await asyncio.to_thread(result_future.get)

    if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
        return output_wav_file
    elif result.reason == speechsdk.ResultReason.Canceled:
        cancellation_details = result.cancellation_details
        raise Exception(f"Speech synthesis canceled: {cancellation_details.reason} - {cancellation_details.error_details}")




async def generate_dialogue_audio(dialogue, voices, output_dir, final_output_file):
    audio_files = []

    tasks = []

    for idx, line in enumerate(dialogue):
      for speaker, content in line.items():
        text, emotion = content.rsplit(",", 1) if "," in content else (content, "neutral")
        emotion = emotion.strip()

        voice = "en-US-JennyNeural" if speaker == "female" else "en-US-GuyNeural"

        ssml = f"""
            <speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="http://www.w3.org/2001/mstts" xml:lang="en-US">
                <voice name="{voice}">
                    <mstts:express-as style="{emotion}">
                        {text}
                    </mstts:express-as>
                </voice>
            </speak>
            """

        temp_audio_file = os.path.join(output_dir, f"temp_dialogue_{idx+1}_{speaker}.wav")
        tasks.append(text_to_speech_async(ssml, temp_audio_file))

    # Wait for all tasks to complete and get their results
    audio_files = await asyncio.gather(*tasks)

    final_audio_path = combine_audio_files(audio_files, final_output_file)


    for temp_audio_file in audio_files:
        os.remove(temp_audio_file)

    return final_audio_path


def combine_audio_files(audio_files, final_output_file):
    """Combines the individual audio files into a final output file."""
    combined_audio = AudioSegment.empty()
    for audio_file in audio_files:
        audio_segment = AudioSegment.from_wav(audio_file)
        combined_audio += audio_segment

    # Export the combined audio to the final output file
    combined_audio.export(final_output_file, format="wav")
    return final_output_file


async def main():
    que = input()
    langCode = "en"
    prompt = f"""
You are an AI model designed to generate structured conversational content.
- Please provide an answer to the following question : "{que}",with given langCode"{langCode}" in the specified JSON format.
- Ensure the response includes a title, language code, and dialogue with multiple participants also with emotion which best suites for dialouge at end of each dialogue seperated by ',' with dialouge.
- Only use one of the following emotions: advertisement_upbeat, affectionate, angry, assistant, calm, chat, cheerful, customerservice, depressed, disgruntled, documentary-narration, embarrassed, empathetic, envious, excited, fearful, friendly, gentle, hopeful, lyrical, narration-professional, narration-relaxed, newscast, newscast-casual, newscast-formal, poetry-reading, sad, serious, shouting, sports_commentary, sports_commentary_excited, whispering, terrified, unfriendly.
- Make it engaging, informative, and structured.
- Use the following example as a guide:

Example Format:

{{
    "title": "What is Git and GitHub and Why It Is Used?",
    "langCode": "en",
    "dialogue": [
        {{"male": "I am so excited to be here today!, excited"}},
        {{"female": "That makes me really happy!, happy"}},
        ...
    ]
}}

Now, generate the response for the question: "{que}" in 10 lines
.
"""
    res = llm.invoke(prompt)
    raw_content = res.content

    try:
        formatted_content = json.loads(raw_content)

        required_keys = ["title", "langCode", "dialogue"]
        if not all(key in formatted_content for key in required_keys):
            print("Error: Generated content is missing required fields.")
            print("Generated Content:", json.dumps(formatted_content, indent=4))
            exit()
        data = formatted_content["dialogue"]
    except json.JSONDecodeError as e:
        print("Error decoding JSON from LLM response:", str(e))
        print("Raw Response Content:", raw_content)

    voices = {"male": "en-US-JasonNeural", "female": "en-US-JessaNeural"}

    output_dir = "output_directory"  # Specify your output directory
    final_output_file = "final_output.wav"
    print(data)
    final_audio_path = await generate_dialogue_audio(data, voices, output_dir, final_output_file)
    print(f"Final audio saved at {final_audio_path}")

await main()


what is python
[{'male': "Hello, I'm here to talk about Python, cheerful"}, {'female': "I'm excited to learn about it, let's get started!, excited"}, {'male': 'Python is a high-level programming language, calm'}, {'female': "That's right, it's easy to learn and understand, friendly"}, {'male': 'It was created in the late 1980s by Guido van Rossum, serious'}, {'female': "And it's now one of the most popular languages, cheerful"}, {'male': 'Python is used for web development, data analysis, and more, enthusiastic'}, {'female': "It's also used in artificial intelligence and machine learning, gentle"}, {'male': 'The syntax is simple and easy to read, making it perfect for beginners, hopeful'}, {'female': "So, if you're interested in programming, Python is a great place to start!, excited"}]
Final audio saved at final_output.wav


Added Features
->user can choose:
1.  language
2.  Time Length
3. degree(softer or stornger voice range between 0.01 to 2)






In [None]:
import asyncio
import os
import azure.cognitiveservices.speech as speechsdk
from pydub import AudioSegment
from langchain_groq import ChatGroq
import json

llm = ChatGroq(
    temperature=0,
    groq_api_key='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',
    model_name="llama-3.1-70b-versatile"
)

async def text_to_speech_async(ssml, output_wav_file):
    """Asynchronously converts SSML to speech and saves it to a file."""
    speech_config = speechsdk.SpeechConfig(subscription=SPEECH_KEY, region=SPEECH_REGION)

    os.makedirs(os.path.dirname(output_wav_file), exist_ok=True)

    audio_config = speechsdk.audio.AudioOutputConfig(filename=output_wav_file)
    synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)

    result_future = synthesizer.speak_ssml_async(ssml)

    result = await asyncio.to_thread(result_future.get)

    if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
        return output_wav_file
    elif result.reason == speechsdk.ResultReason.Canceled:
        cancellation_details = result.cancellation_details
        raise Exception(f"Speech synthesis canceled: {cancellation_details.reason} - {cancellation_details.error_details}")




async def generate_dialogue_audio(dialogue, voices, output_dir, final_output_file,s_degree):
    audio_files = []

    tasks = []

    for idx, line in enumerate(dialogue):
      for speaker, content in line.items():
        text, emotion = content.rsplit(",", 1) if "," in content else (content, "neutral")
        emotion = emotion.strip()

        if speaker == "male":
            voice = voices["male"]
        else:
            voice = voices["female"]
        ssml = f"""
            <speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="http://www.w3.org/2001/mstts" xml:lang="en-US">
                <voice name="{voice}">
                    <mstts:express-as style="{emotion}" styledegree="2">
                        {text}
                    </mstts:express-as>
                </voice>
            </speak>
            """

        temp_audio_file = os.path.join(output_dir, f"temp_dialogue_{idx+1}_{speaker}.wav")
        tasks.append(text_to_speech_async(ssml, temp_audio_file))

    # Wait for all tasks to complete and get their results
    audio_files = await asyncio.gather(*tasks)

    final_audio_path = combine_audio_files(audio_files, final_output_file)


    for temp_audio_file in audio_files:
        os.remove(temp_audio_file)

    return final_audio_path


def combine_audio_files(audio_files, final_output_file):
    """Combines the individual audio files into a final output file."""
    combined_audio = AudioSegment.empty()
    for audio_file in audio_files:
        audio_segment = AudioSegment.from_wav(audio_file)
        combined_audio += audio_segment

    combined_audio.export(final_output_file, format="wav")
    return final_output_file


async def main():
    que = input()
    langCode = input("enter langcode:")
    time_r = input("enter time range in seconds:")
    num = int(time_r)//5
    s_degree = input("enter the degree")
    prompt = f"""
You are an AI model designed to generate podcast content.
- Please provide an answer to the following question : "{que}",with given langCode"{langCode}" in the specified JSON format.
- Ensure the response includes a title, language code, and dialogue with multiple participants also with emotion which best suites for dialouge at end of each dialogue seperated by ',' with dialouge.
- Only use one of the following emotions: advertisement_upbeat, affectionate, angry, assistant, calm, chat, cheerful, customerservice, depressed, disgruntled, documentary-narration, embarrassed, empathetic, envious, excited, fearful, friendly, gentle, hopeful, lyrical, narration-professional, narration-relaxed, newscast, newscast-casual, newscast-formal, poetry-reading, sad, serious, shouting, sports_commentary, sports_commentary_excited, whispering, terrified, unfriendly.
- Make it engaging, informative, and structured.
- Use the following example as a guide:

Example Format:

{{
    "title": "What is Git and GitHub and Why It Is Used?",
    "langCode": "en",
    "dialogue": [
        {{"male": "Todays podcast is about git hub and git, excited"}},
        {{"female": "That makes me really happy!, happy"}},
        ...
    ]
}}

Now, generate the response for the question: "{que}" in {num} lines
.
"""
    res = llm.invoke(prompt)
    raw_content = res.content

    try:
        formatted_content = json.loads(raw_content)

        required_keys = ["title", "langCode", "dialogue"]
        if not all(key in formatted_content for key in required_keys):
            print("Error: Generated content is missing required fields.")
            print("Generated Content:", json.dumps(formatted_content, indent=4))
            exit()
        data = formatted_content["dialogue"]
    except json.JSONDecodeError as e:
        print("Error decoding JSON from LLM response:", str(e))
        print("Raw Response Content:", raw_content)

    voices1 = {
    "en": {"male": "en-US-GuyNeural", "female": "en-US-JennyNeural"},
    "te": {"male": "te-IN-MohanNeural", "female": "te-IN-ShrutiNeural"},
    "hi": {"male": "hi-IN-AmitNeural", "female": "hi-IN-SumanNeural"},
    "es": {"male": "es-ES-ÁngelNeural", "female": "es-ES-ElenaNeural"},
    "fr": {"male": "fr-FR-HenriNeural", "female": "fr-FR-DeniseNeural"},
    "de": {"male": "de-DE-KlausNeural", "female": "de-DE-KatjaNeural"},
    "it": {"male": "it-IT-GiorgioNeural", "female": "it-IT-ElsaNeural"},
    "ja": {"male": "ja-JP-HiroshiNeural", "female": "ja-JP-NatsukiNeural"},
    "ko": {"male": "ko-KR-JayNeural", "female": "ko-KR-SunHiNeural"},
    "pt": {"male": "pt-BR-AntonioNeural", "female": "pt-BR-AmandaNeural"},
    "zh": {"male": "zh-CN-XiaoxiaoNeural", "female": "zh-CN-YunxiNeural"},
    "ar": {"male": "ar-EG-HossamNeural", "female": "ar-EG-SalmaNeural"},
    "pl": {"male": "pl-PL-RafalNeural", "female": "pl-PL-KarolinaNeural"},
    "tr": {"male": "tr-TR-BerkNeural", "female": "tr-TR-BirceNeural"},
    "sv": {"male": "sv-SE-AlvaNeural", "female": "sv-SE-IlseNeural"},
    "ru": {"male": "ru-RU-DmitryNeural", "female": "ru-RU-TatyanaNeural"},
    "nl": {"male": "nl-NL-KrisNeural", "female": "nl-NL-LauraNeural"},
    "da": {"male": "da-DK-MortenNeural", "female": "da-DK-KarenNeural"},
    "fi": {"male": "fi-FI-KalleNeural", "female": "fi-FI-KaisaNeural"},
    "no": {"male": "no-NO-OleNeural", "female": "no-NO-SofieNeural"},
    "cs": {"male": "cs-CZ-VojtaNeural", "female": "cs-CZ-LuciaNeural"},
    "sk": {"male": "sk-SK-MilanNeural", "female": "sk-SK-SofiaNeural"},
    "he": {"male": "he-IL-RonenNeural", "female": "he-IL-NoaNeural"},
    "ro": {"male": "ro-RO-MateiNeural", "female": "ro-RO-LaviniaNeural"},
    "bn": {"male": "bn-IN-RaviNeural", "female": "bn-IN-KavitaNeural"},
    "ms": {"male": "ms-MY-MathewsNeural", "female": "ms-MY-SitiNeural"},
    "ta": {"male": "ta-IN-KamalNeural", "female": "ta-IN-SitaraNeural"},
    "vi": {"male": "vi-VN-HoaiAnNeural", "female": "vi-VN-PhuongNeural"},
    "th": {"male": "th-TH-ThanetNeural", "female": "th-TH-FahNeural"},
    "el": {"male": "el-GR-GiorgosNeural", "female": "el-GR-KaterinaNeural"},
    "hu": {"male": "hu-HU-BalazsNeural", "female": "hu-HU-ZsuzsaNeural"},
    "uk": {"male": "uk-UA-YevhenNeural", "female": "uk-UA-OlenaNeural"},
    "ms": {"male": "ms-MY-MathewsNeural", "female": "ms-MY-SitiNeural"},
    "sq": {"male": "sq-AL-ArdianNeural", "female": "sq-AL-DafinaNeural"},
    "hr": {"male": "hr-HR-DamirNeural", "female": "hr-HR-MajaNeural"},
    "sr": {"male": "sr-RS-JovanNeural", "female": "sr-RS-AnaNeural"},
    "lt": {"male": "lt-LT-MindaugasNeural", "female": "lt-LT-MildaNeural"},
    "et": {"male": "et-EE-JaanNeural", "female": "et-EE-KaiaNeural"},
    "lv": {"male": "lv-LV-MatisNeural", "female": "lv-LV-KatrinaNeural"},
    "bg": {"male": "bg-BG-DimitraNeural", "female": "bg-BG-EvgeniaNeural"},
    "zh-HK": {"male": "zh-HK-HarleyNeural", "female": "zh-HK-YunjieNeural"},
    "zh-TW": {"male": "zh-TW-YuYunNeural", "female": "zh-TW-YuYingNeural"},
    "ja-JP": {"male": "ja-JP-HiroshiNeural", "female": "ja-JP-NatsukiNeural"},
    "ko-KR": {"male": "ko-KR-JayNeural", "female": "ko-KR-SunHiNeural"},
    "fi-FI": {"male": "fi-FI-KalleNeural", "female": "fi-FI-KaisaNeural"},
    # Add more languages and voices as needed

}

    voices = voices1[langCode]

    output_dir = "output_directory"
    final_output_file = "final_output.wav"
    print(data)
    final_audio_path = await generate_dialogue_audio(data, voices, output_dir, final_output_file,s_degree)
    print(f"Final audio saved at {final_audio_path}")
await main()


java
enter langcode:en
enter time range in seconds:50
enter the degree0.01
[{'male': "Hello and welcome to our podcast, today we're going to talk about Java, cheerful"}, {'female': "I'm excited to learn about Java, it's such a popular programming language, excited"}, {'male': 'Yes, Java is widely used for developing large-scale applications, calm'}, {'female': "That's right, and it's also known for its platform independence, friendly"}, {'male': "Java is an object-oriented language, and it's relatively easy to learn, gentle"}, {'female': "I've heard that Java is used in Android app development, is that true?, curious"}, {'male': 'Yes, Java is used for developing Android apps, as well as web applications, serious'}, {'female': "That's really cool, I'm looking forward to learning more about Java, hopeful"}, {'male': "We'll be covering the basics of Java, including variables, data types, and control structures, calm"}, {'female': "I'm ready to get started, let's dive into the world of Jav

In [None]:
"en-IN": {'male': 'en-IN-ArjunNeural', 'female': 'en-IN-AartiNeural'},
"en-GB": {'male': 'en-GB-ThomasNeural', 'female': 'en-GB-MiaNeural'},
"en-US": {'male': 'en-US-Steffan:DragonHDLatestNeural', 'female': 'en-US-Jenny:DragonHDLatestNeural'}

"en-US":{"male": "en-US-JasonNeural", "female": "en-US-JessaNeural"},

Total Availabaility of Voices

In [None]:
import azure.cognitiveservices.speech as speechsdk

 # Replace with your Azure Speech Service key
SPEECH_KEY = "7b21760a5b1b43b48db52c037c357844"
SPEECH_REGION = "eastus"

# Create speech configuration
speech_config = speechsdk.SpeechConfig(subscription=SPEECH_KEY, region=SPEECH_REGION)

# Initialize SpeechSynthesizer
synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config)

# Retrieve available voices
voices = synthesizer.get_voices_async().get()

# Dictionary to store languages and voice names
language_voices = {}

# Iterate through voices and populate the dictionary
for voice in voices.voices:
    language = voice.locale
    gender = voice.gender.name.lower()  # Get gender as "male" or "female"

    if language not in language_voices:
        language_voices[language] = {"male": None, "female": None}

    # Assign male or female voice to the dictionary
    language_voices[language][gender] = voice.short_name
print(len(language_voices))
# Display the dictionary
for lang, genders in language_voices.items():
    print(f'"{lang}": {genders},')


154
"af-ZA": {'male': 'af-ZA-WillemNeural', 'female': 'af-ZA-AdriNeural'},
"am-ET": {'male': 'am-ET-AmehaNeural', 'female': 'am-ET-MekdesNeural'},
"ar-AE": {'male': 'ar-AE-HamdanNeural', 'female': 'ar-AE-FatimaNeural'},
"ar-BH": {'male': 'ar-BH-AliNeural', 'female': 'ar-BH-LailaNeural'},
"ar-DZ": {'male': 'ar-DZ-IsmaelNeural', 'female': 'ar-DZ-AminaNeural'},
"ar-EG": {'male': 'ar-EG-ShakirNeural', 'female': 'ar-EG-SalmaNeural'},
"ar-IQ": {'male': 'ar-IQ-BasselNeural', 'female': 'ar-IQ-RanaNeural'},
"ar-JO": {'male': 'ar-JO-TaimNeural', 'female': 'ar-JO-SanaNeural'},
"ar-KW": {'male': 'ar-KW-FahedNeural', 'female': 'ar-KW-NouraNeural'},
"ar-LB": {'male': 'ar-LB-RamiNeural', 'female': 'ar-LB-LaylaNeural'},
"ar-LY": {'male': 'ar-LY-OmarNeural', 'female': 'ar-LY-ImanNeural'},
"ar-MA": {'male': 'ar-MA-JamalNeural', 'female': 'ar-MA-MounaNeural'},
"ar-OM": {'male': 'ar-OM-AbdullahNeural', 'female': 'ar-OM-AyshaNeural'},
"ar-QA": {'male': 'ar-QA-MoazNeural', 'female': 'ar-QA-AmalNeural'},
"ar

FOR en-US,en-IN,en-GB

In [None]:
import asyncio
import os
import azure.cognitiveservices.speech as speechsdk
from pydub import AudioSegment
from langchain_groq import ChatGroq
import json

llm = ChatGroq(
    temperature=0,
    groq_api_key='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',
    model_name="llama-3.1-70b-versatile"
)

async def text_to_speech_async(ssml, output_wav_file):
    """Asynchronously converts SSML to speech and saves it to a file."""
    speech_config = speechsdk.SpeechConfig(subscription=SPEECH_KEY, region=SPEECH_REGION)

    os.makedirs(os.path.dirname(output_wav_file), exist_ok=True)

    audio_config = speechsdk.audio.AudioOutputConfig(filename=output_wav_file)
    synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)

    result_future = synthesizer.speak_ssml_async(ssml)

    result = await asyncio.to_thread(result_future.get)

    if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
        return output_wav_file
    elif result.reason == speechsdk.ResultReason.Canceled:
        cancellation_details = result.cancellation_details
        raise Exception(f"Speech synthesis canceled: {cancellation_details.reason} - {cancellation_details.error_details}")




async def generate_dialogue_audio(dialogue, voices, output_dir, final_output_file,s_degree):
    audio_files = []

    tasks = []

    for idx, line in enumerate(dialogue):
      for speaker, content in line.items():
        text, emotion = content.rsplit(",", 1) if "," in content else (content, "neutral")
        emotion = emotion.strip()

        if speaker == "male":
            voice = voices["male"]
        else:
            voice = voices["female"]
        ssml = f"""
            <speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="http://www.w3.org/2001/mstts" xml:lang="en-US">
                <voice name="{voice}">
                    <mstts:express-as style="{emotion}" styledegree="2">
                        {text}
                    </mstts:express-as>
                </voice>
            </speak>
            """

        temp_audio_file = os.path.join(output_dir, f"temp_dialogue_{idx+1}_{speaker}.wav")
        tasks.append(text_to_speech_async(ssml, temp_audio_file))

    # Wait for all tasks to complete and get their results
    audio_files = await asyncio.gather(*tasks)

    final_audio_path = combine_audio_files(audio_files, final_output_file)


    for temp_audio_file in audio_files:
        os.remove(temp_audio_file)

    return final_audio_path


def combine_audio_files(audio_files, final_output_file):
    """Combines the individual audio files into a final output file."""
    combined_audio = AudioSegment.empty()
    for audio_file in audio_files:
        audio_segment = AudioSegment.from_wav(audio_file)
        combined_audio += audio_segment

    combined_audio.export(final_output_file, format="wav")
    return final_output_file


async def main():
    que = input()
    langCode = input("enter langcode:")
    time_r = input("enter time range in seconds:")
    num = int(time_r)//5
    s_degree = input("enter the degree")
    prompt = f"""
You are an AI model designed to generate podcast content.
- Please provide a podcast script to the following question : "{que}",with given langCode"{langCode}" in the specified JSON format.
- Ensure the response includes a title, language code, and dialogue with multiple participants also with emotion which best suites for dialouge at end of each dialogue seperated
 by ',' with dialouge.
- Only use one of the following emotions: advertisement_upbeat, affectionate, angry, assistant, calm, chat, cheerful, customerservice, depressed, disgruntled, documentary-narration,
 embarrassed, empathetic, envious, excited, fearful, friendly, gentle, hopeful, lyrical, narration-professional, narration-relaxed, newscast, newscast-casual, newscast-formal,
 poetry-reading, sad, serious, shouting, sports_commentary, sports_commentary_excited, whispering, terrified, unfriendly.
- Make it engaging, informative, and structured.
- Use the following example as a guide:

Example Format:

{{
    "title": "What is Git and GitHub and Why It Is Used?",
    "langCode": "en",
    "dialogue": [
        {{"male": "Todays podcast is about git hub and git, excited"}},
        {{"female": "That makes me really happy!, happy"}},
        ...
    ]
}}

Now, generate the response for the question: "{que}" in {num} lines
.
"""
    res = llm.invoke(prompt)
    raw_content = res.content

    try:
        formatted_content = json.loads(raw_content)

        required_keys = ["title", "langCode", "dialogue"]
        if not all(key in formatted_content for key in required_keys):
            print("Error: Generated content is missing required fields.")
            print("Generated Content:", json.dumps(formatted_content, indent=4))
            exit()
        data = formatted_content["dialogue"]
    except json.JSONDecodeError as e:
        print("Error decoding JSON from LLM response:", str(e))
        print("Raw Response Content:", raw_content)

    voices1 = {
    "en-IN": {'male': 'en-IN-ArjunNeural', 'female': 'en-IN-AartiNeural'},
    "en-GB": {'male': 'en-GB-ThomasNeural', 'female': 'en-GB-MiaNeural'},
    "en-US":{"male": "en-US-JasonNeural", "female": "en-US-JessaNeural"}
    # Add more languages and voices as needed

}

    voices = voices1[langCode]

    output_dir = "output_directory"
    final_output_file = "final_output.wav"
    print(data)
    final_audio_path = await generate_dialogue_audio(data, voices, output_dir, final_output_file,s_degree)
    print(f"Final audio saved at {final_audio_path}")
await main()


what is python
enter langcode:en-US
enter time range in seconds:50
enter the degree0.01
[{'male': "Welcome to our podcast, today we're going to talk about Python, cheerful"}, {'female': "I'm excited to learn about Python, it's a popular programming language, excited"}, {'male': "That's right, Python is a high-level language that's easy to learn and versatile, calm"}, {'female': 'It was created in the late 1980s by Guido van Rossum, and has since become a favorite among developers, friendly'}, {'male': 'Python is widely used in web development, data analysis, and artificial intelligence, serious'}, {'female': "It's also used in scientific computing, education, and research, gentle"}, {'male': 'One of the key features of Python is its simplicity and readability, making it a great language for beginners, hopeful'}, {'female': 'Python has a large and active community, with many libraries and frameworks available, enthusiastic'}, {'male': 'Some of the most popular applications of Python inc