In [None]:
# Step 1: Install all required packages
!pip install python-telegram-bot requests nest_asyncio python-dotenv gTTS pydub git+https://github.com/openai/whisper.git --quiet
!apt-get install ffmpeg -y

# Step 2: Imports
import os
import whisper
import requests
import nest_asyncio
from telegram import Update
from telegram.ext import ApplicationBuilder, CommandHandler, MessageHandler, ContextTypes, filters
from gtts import gTTS
from pydub import AudioSegment

# Step 3: Init async
nest_asyncio.apply()

# Step 4: Load Whisper model (base is fast and accurate)
whisper_model = whisper.load_model("base")

# Step 5: Set tokens manually (or use dotenv if preferred)
HF_TOKEN = "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"         # ⬅️ Replace this
TELEGRAM_TOKEN = "7903691763:AAE7s-OUpYMTkTYiOE8AtXZ4pkRHMcAQ4_M"      # ⬅️ Replace this

HF_MODEL = "HuggingFaceH4/zephyr-7b-beta"
HF_URL = f"https://api-inference.huggingface.co/models/{HF_MODEL}"
HF_HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"}

# Step 6: LLM text generation via Hugging Face
def query_huggingface(prompt):
    payload = {
        "inputs": f"<|system|>You are a helpful assistant.<|user|>{prompt}<|assistant|>",
        "parameters": {"max_new_tokens": 200, "temperature": 0.7},
    }
    response = requests.post(HF_URL, headers=HF_HEADERS, json=payload)
    if response.status_code == 200:
        try:
            return response.json()[0]['generated_text'].split("<|assistant|>")[-1].strip()
        except Exception:
            return "⚠️ Error parsing response."
    else:
        print("HuggingFace API Error:", response.text)
        return "⚠️ Sorry, the model is currently unavailable."

# Step 7: Local transcription using Whisper
def transcribe_voice(file_path):
    try:
        result = whisper_model.transcribe(file_path)
        return result["text"]
    except Exception as e:
        print("Whisper Error:", str(e))
        return "⚠️ Local transcription failed."

# Step 8: Convert reply text to voice
def generate_voice(text, filename="response.mp3"):
    tts = gTTS(text)
    tts.save(filename)
    return filename

# Step 9: Telegram bot logic
async def start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
    await update.message.reply_text("👋 Send me a text or voice message, and I’ll reply with voice!")

async def handle_text(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
    user_input = update.message.text.strip()
    await context.bot.send_chat_action(chat_id=update.effective_chat.id, action="typing")
    reply = query_huggingface(user_input)
    voice_file = generate_voice(reply)
    await update.message.reply_voice(voice=open(voice_file, "rb"))

async def handle_voice(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
    file = await update.message.voice.get_file()
    voice_path_ogg = "voice.ogg"
    voice_path_mp3 = "voice.mp3"
    await file.download_to_drive(voice_path_ogg)

    try:
        sound = AudioSegment.from_ogg(voice_path_ogg)
        sound.export(voice_path_mp3, format="mp3")

        text = transcribe_voice(voice_path_mp3)
        await update.message.reply_text(f"📝 You said: {text}")

        reply = query_huggingface(text)
        voice_reply = generate_voice(reply)
        await update.message.reply_voice(voice=open(voice_reply, "rb"))
    except Exception as e:
        print("Voice handling error:", str(e))
        await update.message.reply_text("❌ I couldn’t process your voice message.")

# Step 10: Launch bot
app = ApplicationBuilder().token(TELEGRAM_TOKEN).build()
app.add_handler(CommandHandler("start", start))
app.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_text))
app.add_handler(MessageHandler(filters.VOICE, handle_voice))

print("🎙️ Voice-enabled Telegram Bot with Local Whisper is running...")
app.run_polling()


  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m673.5/673.5 kB[0m [31m10.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m93.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m78.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m42.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.7 MB/s[0m eta [36m0

100%|████████████████████████████████████████| 139M/139M [00:01<00:00, 101MiB/s]


🎙️ Voice-enabled Telegram Bot with Local Whisper is running...
