In [1]:
# [CELL 1] - Install all required libraries
!pip install -q -U langchain-google-genai langgraph elevenlabs google-genai ffmpeg-python pydub

In [2]:
# [CELL 2] - API Key Configuration
import os
os.environ["GOOGLE_API_KEY"] = "YOUR_GOOGLE_API"
os.environ["ELEVEN_API_KEY"] = "YOUR_ELEVEN_API"

In [3]:
# [CELL 3] - Audio Recording Function
from IPython.display import Javascript, display
from google.colab import output
from base64 import b64decode

RECORD_JS = """
const sleep = time => new Promise(resolve => setTimeout(resolve, time))
const b2text = blob => new Promise(resolve => {
  const reader = new FileReader()
  reader.onloadend = e => resolve(e.srcElement.result)
  reader.readAsDataURL(blob)
})
var record = time => new Promise(async resolve => {
  stream = await navigator.mediaDevices.getUserMedia({ audio: true })
  recorder = new MediaRecorder(stream)
  chunks = []
  recorder.ondataavailable = e => chunks.push(e.data)
  recorder.start()
  await sleep(time)
  recorder.onstop = async ()=>{
    blob = new Blob(chunks)
    text = await b2text(blob)
    resolve(text)
  }
  recorder.stop()
})
"""

def record_audio(sec=5):
    print(f"Recording for {sec} seconds... Speak now!")
    display(Javascript(RECORD_JS))
    s = output.eval_js('record(%d)' % (sec * 1000))
    b = b64decode(s.split(',')[1])
    with open('audio.wav', 'wb') as f:
        f.write(b)
    return 'audio.wav'

In [4]:
# [CELL 4 - UPDATED] - Stable Brain Setup
import requests
from typing import Annotated, TypedDict
from langchain_core.tools import tool
from langgraph.graph import StateGraph, START, END
from langgraph.graph.message import add_messages
from langchain_google_genai import ChatGoogleGenerativeAI
from langgraph.prebuilt import ToolNode, tools_condition

# 1. Re-define the n8n tool (making sure it's in memory)
@tool
def trigger_automation_task(task_details: str):
    """Use this to send a task to Slack or Google Sheets via n8n."""
    # Your specific n8n URL
    N8N_WEBHOOK_URL = "YOUR_N8N_WEBHOOK_LINK"
    response = requests.post(N8N_WEBHOOK_URL, json={"task": task_details})
    return f"Task sent! n8n responded with status {response.status_code}"

tools = [trigger_automation_task]

# 2. Setup State
class State(TypedDict):
    messages: Annotated[list, add_messages]

# 3. USE THE STABLE MODEL: gemini-2.5-flash
# This model is the reliable 'workhorse' for 2026
llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash").bind_tools(tools)

def chatbot(state: State):
    return {"messages": [llm.invoke(state["messages"])]}

# 4. Re-build and Compile the Graph
graph_builder = StateGraph(State)
graph_builder.add_node("chatbot", chatbot)
graph_builder.add_node("tools", ToolNode(tools))
graph_builder.add_conditional_edges("chatbot", tools_condition)
graph_builder.add_edge("tools", "chatbot")
graph_builder.set_entry_point("chatbot")
app = graph_builder.compile()

print("Brain successfully updated to stable Gemini 2.5 model.")

Brain successfully updated to stable Gemini 2.5 model.


In [11]:
# [CELL 5 - CORRECTED] - Robust Execution Loop
import google.generativeai as genai
from IPython.display import Audio, display
from elevenlabs.client import ElevenLabs
import os

# 1. Setup the client properly
genai.configure(api_key=os.environ["GOOGLE_API_KEY"])

# Use the stable 2026 model name
# gemini-2.5-flash is currently the most stable multimodal model
MODEL_NAME = "gemini-2.5-flash"
model_transcribe = genai.GenerativeModel(MODEL_NAME)

def run_voice_assistant():
    try:
        # 1. Capture Mic
        audio_path = record_audio(sec=5)

        # 2. Transcription via Gemini (Multimodal)
        print(f"Gemini ({MODEL_NAME}) is processing audio...")

        # Upload the file to Google's temporary storage
        sample_file = genai.upload_file(path=audio_path)

        # Request transcription
        response = model_transcribe.generate_content([
            "Transcribe this audio exactly. If it's a command, just provide the command text.",
            sample_file
        ])

        user_text = response.text.strip()
        print(f"You said: {user_text}")

        # 3. Decision via LangGraph Brain
        print("Brain is thinking...")
        # Note: Ensure you ran the cell defining 'app' (the compiled LangGraph)
        events = app.stream({"messages": [("user", user_text)]})

        final_text = "I've processed that for you." # Fallback text
        for event in events:
            for value in event.values():
                if "messages" in value:
                    final_text = value["messages"][-1].content

        # 4. Voice Response via ElevenLabs
        print("Generating voice reply...")
        eleven = ElevenLabs(api_key=os.environ["ELEVEN_API_KEY"])
        audio_gen = eleven.generate(text=final_text, voice="Rachel")

        with open("response.mp3", "wb") as f:
            f.write(b"".join(audio_gen))

        display(Audio("response.mp3", autoplay=True))
        print(f"Assistant: {final_text}")

    except Exception as e:
        print(f"An error occurred: {e}")
        print("TIP: If you get a 404, double check your API key in Google AI Studio.")

# RUN IT
run_voice_assistant()

Recording for 5 seconds... Speak now!


<IPython.core.display.Javascript object>

Gemini (gemini-2.5-flash) is processing audio...
You said: Task hello world
Brain is thinking...
An error occurred: Error calling model 'gemini-2.5-flash' (RESOURCE_EXHAUSTED): 429 RESOURCE_EXHAUSTED. {'error': {'code': 429, 'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/usage?tab=rate-limit. \n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 20, model: gemini-2.5-flash\nPlease retry in 54.534717328s.', 'status': 'RESOURCE_EXHAUSTED', 'details': [{'@type': 'type.googleapis.com/google.rpc.Help', 'links': [{'description': 'Learn more about Gemini API quotas', 'url': 'https://ai.google.dev/gemini-api/docs/rate-limits'}]}, {'@type': 'type.googleapis.com/google.rpc.QuotaFailure', 'violations': [{'quotaMetric': 'generativelanguage.googleapis.com