In [3]:
# =====================================
# Step 1: Install lightweight dependencies
# =====================================
!pip install gTTS openai-whisper phonikud

# =====================================
# Step 2: Imports
# =====================================
from gtts import gTTS
import whisper
from phonikud import phonemize
from IPython.display import Audio
import os

# Create output directories
os.makedirs("outputs/audio", exist_ok=True)
os.makedirs("outputs/transcripts", exist_ok=True)

# Initialize Whisper small model (fast)
stt = whisper.load_model("small")

# =====================================
# Step 3: Client text in Hebrew
# =====================================
client_text = "שלום, אני רוצה לבטל את המנוי לטלוויזיה."

# Phonemize (optional, for debugging)
print("Client phonemes:", phonemize(client_text))

# Convert client text → audio
client_audio_path = "outputs/audio/client.wav"
gtts_client = gTTS(text=client_text, lang="iw")
gtts_client.save(client_audio_path)

# Play client audio
Audio(client_audio_path)

# =====================================
# Step 4: Transcribe client audio
# =====================================
result = stt.transcribe(client_audio_path, language="he")
transcript = result["text"]

# Save transcript
with open("outputs/transcripts/client.txt", "w", encoding="utf-8") as f:
    f.write(transcript)

print("Transcript:", transcript)

# =====================================
# Step 5: Agent response
# =====================================
agent_response = "שלום, אני מבין. אטפל בבקשת הביטול שלך."
print("Agent phonemes:", phonemize(agent_response))

agent_audio_path = "outputs/audio/agent.wav"
gtts_agent = gTTS(text=agent_response, lang="iw")
gtts_agent.save(agent_audio_path)

# Save agent transcript
with open("outputs/transcripts/agent.txt", "w", encoding="utf-8") as f:
    f.write(agent_response)

# Play agent audio
Audio(agent_audio_path)

# =====================================
# Step 6: Done
# =====================================
print("Simulation complete!")
print("Files saved under outputs/audio and outputs/transcripts.")


Collecting gTTS
  Downloading gTTS-2.5.4-py3-none-any.whl.metadata (4.1 kB)
Collecting openai-whisper
  Downloading openai_whisper-20250625.tar.gz (803 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m803.2/803.2 kB[0m [31m15.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting click<8.2,>=7.1 (from gTTS)
  Downloading click-8.1.8-py3-none-any.whl.metadata (2.3 kB)
Downloading gTTS-2.5.4-py3-none-any.whl (29 kB)
Downloading click-8.1.8-py3-none-any.whl (98 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.2/98.2 kB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: openai-whisper
  Building wheel for openai-whisper (pyproject.toml) ... [?25l[?25hdone
  Created wheel for openai-whisper: filename=openai_whisper-20250625-py3-none-any.whl size=80397

  4%|▍         | 18.7M/425M [03:10<2:39:00, 42.6kiB/s]

Installing collected packages: click, gTTS, openai-whisper
  Attempting uninstall: click
    Found existing installation: click 8.2.1
    Uninstalling click-8.2.1:
      Successfully uninstalled click-8.2.1
Successfully installed click-8.1.8 gTTS-2.5.4 openai-whisper-20250625



  0%|                                               | 0.00/461M [00:00<?, ?iB/s][A
  1%|▎                                     | 3.80M/461M [00:00<00:12, 39.8MiB/s][A
  3%|█                                     | 12.5M/461M [00:00<00:06, 69.4MiB/s][A
  4%|█▌                                    | 19.1M/461M [00:00<00:08, 54.7MiB/s][A
  5%|██                                    | 25.2M/461M [00:00<00:07, 57.9MiB/s][A
  7%|██▋                                   | 32.4M/461M [00:00<00:07, 63.2MiB/s][A
  8%|███▏                                  | 38.6M/461M [00:00<00:07, 59.6MiB/s][A
 10%|███▋                                  | 44.4M/461M [00:00<00:08, 54.5MiB/s][A
 11%|████▏                                 | 50.8M/461M [00:00<00:07, 57.8MiB/s][A
 12%|████▋                                 | 56.5M/461M [00:01<00:12, 34.9MiB/s][A
 13%|█████                                 | 60.9M/461M [00:01<00:12, 32.8MiB/s][A
 14%|█████▎                                | 64.7M/461M [00:01<00:13, 31.7M

Client phonemes: ʃˈlm, ʔˈnj ʁˈts lˈvtl ʔˈt hˈmnj ltlvˈuz.




Transcript:  שלום, אני רוצה לבטל את המנוי לטלוויזיה
Agent phonemes: ʃˈlm, ʔˈnj mˈvn. ʔˈtfl vˈvkʃt hˈvtl ʃˈlχ.
Simulation complete!
Files saved under outputs/audio and outputs/transcripts.


In [7]:
from google.colab import files
files.download("outputs/audio/client.wav")



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [8]:
files.download("outputs/audio/agent.wav")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [9]:
files.download("outputs/transcripts/client.txt")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [10]:
files.download("outputs/transcripts/agent.txt")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>