In [None]:
!pip install --upgrade resemble

Collecting resemble
  Using cached resemble-1.6.0-py3-none-any.whl.metadata (2.9 kB)
Using cached resemble-1.6.0-py3-none-any.whl (8.0 kB)
Installing collected packages: resemble
Successfully installed resemble-1.6.0


In [8]:
pip install resemble

Note: you may need to restart the kernel to use updated packages.



✅ Wake word detected!


In [None]:
import pvporcupine
import sounddevice as sd
import struct
import threading
import time
import soundfile as sf
import requests
import json
import io
import numpy as np

# --- API KEYS ---
ACCESS_KEY = "EcMe/J73KApZdXoESH0aWLgUkTDbaeGHzMl1wguJ5Ix7p6ggXTdFDQ=="
GEMINI_API_KEY = "AIzaSyAUJrMKFNR2YgkE22Orzufwo-tD2xggDVk"
HF_API_TOKEN = "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"  # Whisper

# --- Resemble.ai ---
from resemble import Resemble
RESEMBLE_API_KEY = "bhvclc2GVzOF6xCBvdlU3wtt"
Resemble.api_key(RESEMBLE_API_KEY)

# Global project and voice UUIDs
project_uuid = None
voice_uuid = None

def init_resemble():
    global project_uuid, voice_uuid
    projects = Resemble.v2.projects.all(1, 10)
    if projects['items']:
        project_uuid = projects['items'][0]['uuid']
    else:
        raise Exception("No projects found in Resemble account.")

    voices = Resemble.v2.voices.all(1, 10)
    if voices['items']:
        voice_uuid = voices['items'][0]['uuid']
    else:
        raise Exception("No voices found in Resemble account.")

# Porcupine wake word setup
porcupine = pvporcupine.create(access_key=ACCESS_KEY, keywords=["jarvis"])
wake_word_detected = False

def audio_callback(indata, frames, time_info, status):
    global wake_word_detected
    pcm = struct.unpack_from("h" * len(indata), indata)
    result = porcupine.process(pcm)
    if result >= 0:
        print("\n✅ Wake word detected!")
        wake_word_detected = True

def start_wake_word_listener():
    with sd.InputStream(channels=1,
                        samplerate=porcupine.sample_rate,
                        dtype='int16',
                        blocksize=porcupine.frame_length,
                        callback=audio_callback):
        print("🎧 Listening for 'jarvis'... Speak the wake word anytime.")
        while True:
            time.sleep(0.1)

# Whisper transcription
def transcribe_whisper(filename):
    url = "https://api-inference.huggingface.co/models/openai/whisper-large-v3"
    headers = {
        "Authorization": f"Bearer {HF_API_TOKEN}",
        "Content-Type": "audio/wav"
    }
    with open(filename, "rb") as f:
        audio_data = f.read()
    response = requests.post(url, headers=headers, data=audio_data)
    print("📡 Whisper API status:", response.status_code)
    print("📨 Whisper API raw response:", response.text[:300])
    try:
        result = response.json()
        if "text" in result:
            return result["text"]
        else:
            print("❌ No 'text' field found in response.")
            return "Transcription failed."
    except Exception as e:
        print("❌ Error parsing Whisper response:", e)
        return "Transcription failed."

# Gemini AI query
def query_gemini(text):
    url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent?key={GEMINI_API_KEY}"
    headers = {"Content-Type": "application/json"}
    payload = {"contents": [{"parts": [{"text": text}]}]}
    response = requests.post(url, headers=headers, data=json.dumps(payload))
    print("Gemini API status:", response.status_code)
    try:
        result = response.json()
        if 'candidates' in result:
            return result['candidates'][0]['content']['parts'][0]['text']
        else:
            print("❌ No 'candidates' found in Gemini response.")
            return "Gemini response failed."
    except Exception as e:
        print("❌ Error parsing Gemini response:", e)
        return "Gemini response failed."

# Resemble async clip creation + polling + playback
def speak_resemble(text):
    if project_uuid is None or voice_uuid is None:
        print("❌ Resemble project or voice UUID not initialized.")
        return

    print("🎤 Sending text to Resemble.ai for speech synthesis...")
    clip = Resemble.v2.clips.create_sync(project_uuid, voice_uuid, text, title="AI_Response")

    audio_url = clip.get("item", {}).get("audio_src")
    if not audio_url:
        print("❌ No audio URL returned from Resemble.")
        print("Full response:", clip)
        return

    print(f"🔗 Audio URL: {audio_url}")

    audio_resp = requests.get(audio_url)
    if audio_resp.status_code != 200:
        print(f"❌ Failed to download audio: {audio_resp.status_code}")
        return

    audio_buffer = io.BytesIO(audio_resp.content)
    data, samplerate = sf.read(audio_buffer, dtype='float32')

    # Save audio as .wav
    sf.write("resemble_response.wav", data, samplerate)
    print("✅ Audio saved as 'resemble_response.wav'")

    print("🔊 Playing audio from Resemble.ai...")
    sd.play(data, samplerate)
    sd.wait()
    print("✅ Finished playing audio.")

# Main loop
def run_ai_pipeline():
    global wake_word_detected
    while True:
        if wake_word_detected:
            wake_word_detected = False
            print("🎤 Wake word received... recording voice...")

            duration = 5
            fs = 44100
            recording = sd.rec(int(duration * fs), samplerate=fs, channels=1)
            sd.wait()
            sf.write("user_input.wav", recording, fs)

            user_text = transcribe_whisper("user_input.wav")
            print("📝 Transcribed:", user_text)

            ai_text = query_gemini(user_text)
            print("🤖 Gemini says:", ai_text)

            speak_resemble(ai_text)

            print("👋 Ending assistant after 1 run.")
            break
        time.sleep(1)

# Initialize and run
print("🔧 Initializing Resemble project and voice info...")
try:
    init_resemble()
    print(f"✅ Initialized Project UUID: {project_uuid}")
    print(f"✅ Initialized Voice UUID: {voice_uuid}")
except Exception as e:
    print(f"❌ Initialization error: {e}")
    raise e

wake_thread = threading.Thread(target=start_wake_word_listener)
wake_thread.daemon = True
wake_thread.start()

run_ai_pipeline()


🔧 Initializing Resemble project and voice info...
✅ Initialized Project UUID: 8ca21166
✅ Initialized Voice UUID: 3f5fb9f1
🎧 Listening for 'jarvis'... Speak the wake word anytime.

✅ Wake word detected!
🎤 Wake word received... recording voice...
📡 Whisper API status: 200
📨 Whisper API raw response: {"text":" Tell me something about AIML."}
📝 Transcribed:  Tell me something about AIML.
Gemini API status: 200
🤖 Gemini says: AIML stands for **Artificial Intelligence Markup Language**.  It's a simple XML-based language designed specifically for creating chatbots.  Think of it as a way to define the chatbot's "personality" and knowledge base.

Here's a breakdown of key aspects:

* **XML-based:**  Its structure is based on XML tags, making it relatively easy to read and understand, even for those without extensive programming knowledge.
* **Rule-based:** AIML works primarily on a pattern-matching system.  You define patterns (user inputs) and corresponding templates (bot responses). When a us