In [65]:
hr_lines = [
    "Hi John, thanks for joining. Please have a seat.",
    "I know this might not be an easy conversation, but I want to be direct and respectful. After a thorough review, the management has made the decision to end your employment with the company, effective immediately.",
    "Yes. I understand this may come as a shock. This decision was not taken lightly.",
    "I completely understand your concern. The decision was based on a combination of factors, including ongoing feedback regarding project deadlines, communication with the team, and alignment with company goals. This has been discussed over the past few months during your check-ins.",
    "You’re right that we didn’t issue a formal Performance Improvement Plan. However, your last two evaluations did raise several red flags. The management team discussed this and decided not to proceed with a PIP but rather make a direct decision.",
    "That was also a factor. The company is going through changes, and unfortunately, that means reducing roles in some departments.",
    "Your access to company systems will end by the end of today. You’ll receive two months’ severance pay, continued health benefits for 30 days, and we’ll offer outplacement support if you’re interested.",
    "Yes. I’m happy to provide a neutral reference confirming your role and time here. For a more detailed recommendation, I suggest reaching out to your former manager directly.",
    "Just your laptop and access card. You can leave them with IT today or tomorrow. We’ll email a checklist.",
    "I truly wish you all the best, John. If you need anything over the next few days, don’t hesitate to reach out."
]

employee_lines = [
    "Sure, thanks Mary.",
    "…Wait—I'm being let go?",
    "Can I ask why? I thought my performance had been solid lately.",
    "But no one told me I was at risk of being fired. I was never formally warned.",
    "I’m still surprised. I’ve been trying to improve. Is this related to the recent restructuring?",
    "So, what happens now?",
    "Will I be able to get a reference?",
    "Alright. Do I need to return anything?",
    "I see. Well… this is not how I imagined today would go. But I appreciate the clarity.",
    "Thanks, Mary. I’ll take some time to process this."
]
hr_directory = r"E:\Desktop\AI Stack\conversation-resource\hr"
employee_directory = r"E:\Desktop\AI Stack\conversation-resource\employee"

In [66]:
import openai
import os
from dotenv import load_dotenv

load_dotenv()
openai.api_key = os.getenv("OPENAI_COACH_PROJECT_KEY")

In [67]:
import openai
import time


def tts_openai_invoke(input_text, output_path):
    start = time.time()
    response = openai.audio.speech.create(
        model="tts-1",  # or "tts-1-hd"
        voice="alloy",  # or: alloy, echo, fable, onyx, nova, shimmer
        input=input_text
    )

    with open(output_path, "wb") as f:
        f.write(response.content)

    print(f"✅ Audio saved to {os.path.basename(output_path)} within {time.time() - start:.2f} seconds")

In [68]:
import os
from google.cloud import texttospeech
from dotenv import load_dotenv
import time

load_dotenv()

client = texttospeech.TextToSpeechClient()


def tts_google_invoke(input_text, output_path):
    start = time.time()
    synthesis_input = texttospeech.SynthesisInput(text=input_text)

    voice = texttospeech.VoiceSelectionParams(
        language_code="en-US",
        name="en-US-Wavenet-D",
        ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL
    )

    audio_config = texttospeech.AudioConfig(
        audio_encoding=texttospeech.AudioEncoding.MP3
    )

    # call the API
    response = client.synthesize_speech(
        input=synthesis_input,
        voice=voice,
        audio_config=audio_config
    )

    with open(output_path, "wb") as out:
        out.write(response.audio_content)
        print(f"✅ Audio saved to {output_path} within {time.time() - start:.2f} seconds")


In [39]:
# for i, line in enumerate(hr_lines):
#     tts_openai_invoke(input_text=line, output_path=f"{hr_directory}/{i}.mp3")

✅ Audio saved to 0.mp3 within 1.50 seconds
✅ Audio saved to 1.mp3 within 2.52 seconds
✅ Audio saved to 2.mp3 within 2.65 seconds
✅ Audio saved to 3.mp3 within 3.58 seconds
✅ Audio saved to 4.mp3 within 3.45 seconds
✅ Audio saved to 5.mp3 within 3.45 seconds
✅ Audio saved to 6.mp3 within 3.02 seconds
✅ Audio saved to 7.mp3 within 4.07 seconds
✅ Audio saved to 8.mp3 within 2.76 seconds
✅ Audio saved to 9.mp3 within 1.75 seconds


In [40]:
# for i, line in enumerate(hr_lines):
#     tts_google_invoke(input_text=line, output_path=f"{hr_directory}\{i}.mp3")

✅ Audio saved to E:\Desktop\AI Stack\conversation-resource\hr\0.mp3 within 0.52 seconds
✅ Audio saved to E:\Desktop\AI Stack\conversation-resource\hr\1.mp3 within 0.51 seconds
✅ Audio saved to E:\Desktop\AI Stack\conversation-resource\hr\2.mp3 within 0.41 seconds
✅ Audio saved to E:\Desktop\AI Stack\conversation-resource\hr\3.mp3 within 0.60 seconds
✅ Audio saved to E:\Desktop\AI Stack\conversation-resource\hr\4.mp3 within 0.53 seconds
✅ Audio saved to E:\Desktop\AI Stack\conversation-resource\hr\5.mp3 within 0.33 seconds
✅ Audio saved to E:\Desktop\AI Stack\conversation-resource\hr\6.mp3 within 0.63 seconds
✅ Audio saved to E:\Desktop\AI Stack\conversation-resource\hr\7.mp3 within 0.81 seconds
✅ Audio saved to E:\Desktop\AI Stack\conversation-resource\hr\8.mp3 within 0.60 seconds
✅ Audio saved to E:\Desktop\AI Stack\conversation-resource\hr\9.mp3 within 0.43 seconds


In [91]:
import os
from groq import Groq


def stt_groq_invoke(audio_path):
    client = Groq()

    # model = "whisper-large-v3-turbo"
    model = "whisper-large-v3"

    start = time.time()
    with open(audio_path, "rb") as file:
        transcription = client.audio.transcriptions.create(
            file=(audio_path, file.read()),
            model=model,
            response_format="verbose_json",
        )
        return transcription.text
    print(f"✅ Total time convertion mp3: {time.time() - start}")


In [88]:
import pygame

def play_mp3(file_path):
    pygame.mixer.init()
    pygame.mixer.music.load(file_path)
    pygame.mixer.music.play()
    while pygame.mixer.music.get_busy():
        continue


pygame 2.6.1 (SDL 2.28.4, Python 3.10.16)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [92]:
# from playsound import playsound
import time


def simulation():
    for i in range(10):
        hr_mp3_path = rf"{hr_directory}\{i}.mp3"
        print(hr_mp3_path)
        play_mp3(hr_mp3_path)
        text = stt_groq_invoke(hr_mp3_path)
        print(text)
        time.sleep(0.7)
        employee_mp3_path = rf"{employee_directory}\{i}.mp3"
        tts_openai_invoke(employee_lines[i], employee_mp3_path)
        play_mp3(employee_mp3_path)
        time.sleep(0.7)


In [93]:
simulation()

E:\Desktop\AI Stack\conversation-resource\hr\0.mp3
 Hi John, thanks for joining. Please have a seat.
✅ Audio saved to 0.mp3 within 2.82 seconds
E:\Desktop\AI Stack\conversation-resource\hr\1.mp3
 I know this might not be an easy conversation, but I want to be direct and respectful. After a thorough review, the management has made the decision to end your employment with the company, effective immediately.
✅ Audio saved to 1.mp3 within 1.05 seconds
E:\Desktop\AI Stack\conversation-resource\hr\2.mp3
 Yes, I understand this may come as a shock. This decision was not taken lightly.
✅ Audio saved to 2.mp3 within 1.70 seconds
E:\Desktop\AI Stack\conversation-resource\hr\3.mp3
 I completely understand your concern. The decision was based on a combination of factors, including ongoing feedback regarding project deadlines, communication with the team, and alignment with company goals. This has been discussed over the past few months during your check-ins.
✅ Audio saved to 3.mp3 within 2.24 seco