In [None]:
# Instala as depend√™ncias necess√°rias se faltarem no ambiente do notebook
try:
    import openai
    import whisper
    import sounddevice
    import pyttsx3
    import dotenv
    print("Depend√™ncias j√° instaladas.")
except ImportError:
    print("Instalando depend√™ncias... isso pode levar alguns minutos.")
    %pip install openai openai-whisper sounddevice scipy pyttsx3 python-dotenv

# Assistente Virtual H√≠brido

Este notebook cont√©m um assistente virtual com suporte a comandos locais e intelig√™ncia artificial:

- **STT (Speech to Text)**: OpenAI Whisper (Local e gratuito)
- **TTS (Text to Speech)**: pyttsx3
- **IA (C√©rebro)**: GLM-4.7-Flash (via Hugging Face Router)
- **Comandos Locais**: Wikipedia, YouTube, Farm√°cia pr√≥xima

In [None]:
import argparse
from typing import Protocol, Optional, Iterable
from dataclasses import dataclass
import os
import urllib.parse
import webbrowser
import time
from dotenv import load_dotenv

load_dotenv()

In [None]:
class SpeechToText(Protocol):
    def listen(self, timeout: Optional[float] = None) -> Optional[str]:
        pass

class TextToSpeech(Protocol):
    def speak(self, text: str) -> None:
        pass

In [None]:
class WhisperSTT:
    def __init__(self, model_size: str = "base", language: str = "pt", duration: int = 5):
        import whisper
        import sounddevice as sd
        import scipy.io.wavfile as wav
        import tempfile
        import os
        
        self._whisper = whisper
        self._sd = sd
        self._wav = wav
        self._tempfile = tempfile
        self._os = os
        self._model = whisper.load_model(model_size)
        self._language = language
        self._duration = duration

    def listen(self, timeout: Optional[float] = None) -> Optional[str]:
        duration = timeout if timeout is not None else self._duration
        fs = 44100
        print(f"\n[Ouvindo...] Fale agora ({duration}s).")
        
        try:
            recording = self._sd.rec(int(duration * fs), samplerate=fs, channels=1, dtype='int16')
            self._sd.wait()
            print("[Processando...]")
            
            with self._tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
                temp_filename = f.name
            
            self._wav.write(temp_filename, fs, recording)
            result = self._model.transcribe(temp_filename, language=self._language, fp16=False)
            text = result["text"].strip()
            
            try:
                self._os.remove(temp_filename)
            except:
                pass
                
            return text if text else None
        except Exception as e:
            print(f"Erro no Whisper: {e}")
            return None

class TextInputSTT:
    def __init__(self, inputs: Optional[Iterable[str]] = None):
        self._inputs = list(inputs) if inputs is not None else None

    def listen(self, timeout: Optional[float] = None) -> Optional[str]:
        if self._inputs is not None:
            if not self._inputs: return None
            return self._inputs.pop(0)
        try:
            return input("\nDigite um comando (ou 'sair'): ").strip()
        except EOFError:
            return None

In [None]:
class Pyttsx3TTS:
    def __init__(self, language: str = "pt-BR", rate: Optional[int] = None):
        import pyttsx3
        self._engine = pyttsx3.init()
        self._language = language
        if rate is not None:
            self._engine.setProperty("rate", rate)
        self._select_voice()

    def _select_voice(self) -> None:
        voices = self._engine.getProperty("voices")
        chosen = None
        for v in voices:
            name = getattr(v, "name", "") or ""
            lang = "".join(getattr(v, "languages", []) or [])
            if self._language.lower()[:2] in (lang.lower(), name.lower()):
                chosen = v.id
                break
        if chosen:
            self._engine.setProperty("voice", chosen)

    def speak(self, text: str) -> None:
        print(f"ü§ñ Assistente: {text}")
        try:
            self._engine.say(text)
            self._engine.runAndWait()
        except Exception as e:
            print(f"(Erro √°udio: {e})")

class SilentTTS:
    def speak(self, text: str) -> None:
        print(f"ü§ñ Assistente (silencioso): {text}")

In [None]:
def get_glm_response(text: str) -> Optional[str]:
    """Obt√©m resposta da IA GLM-4.7-Flash com sistema de retentativa."""
    max_retries = 3
    retry_delay = 2
    
    for attempt in range(max_retries):
        try:
            from openai import OpenAI
            
            hf_token = os.getenv("HF_TOKEN")
            if not hf_token or hf_token == "seu_token_hf_aqui":
                return "Erro: HF_TOKEN n√£o configurado no arquivo .env"
                
            client = OpenAI(
                base_url="https://router.huggingface.co/v1",
                api_key=hf_token,
                timeout=30.0
            )
            
            response = client.chat.completions.create(
                model="zai-org/GLM-4.7-Flash",
                messages=[
                    {"role": "system", "content": "Voc√™ √© um assistente virtual √∫til e conciso. Responda em portugu√™s."},
                    {"role": "user", "content": text}
                ]
            )
            return response.choices[0].message.content
        except Exception as e:
            error_str = str(e)
            if any(code in error_str for code in ["504", "502", "503", "timeout"]) and attempt < max_retries - 1:
                time.sleep(retry_delay * (attempt + 1))
                continue
            
            if "<!DOCTYPE html>" in error_str or "<html>" in error_str:
                return "Erro: Servidor da IA indispon√≠vel ou em timeout. Tente novamente."
            return f"Erro na IA: {error_str}"
    return "Erro: Falha ap√≥s v√°rias tentativas."

In [None]:
@dataclass
class ActionResult:
    success: bool
    message: str
    is_ai: bool = False

def parse_and_execute(text: str) -> ActionResult:
    s = (text or "").lower()
    if not s: return ActionResult(False, "Nenhum texto reconhecido")
    
    # Comandos Locais
    if "wikipedia" in s:
        q = s.replace("wikipedia", "").replace("pesquisar", "").strip()
        url = "https://pt.wikipedia.org/wiki/Special:Search?search=" + urllib.parse.quote_plus(q)
        webbrowser.open(url)
        return ActionResult(True, f"Pesquisando Wikipedia: {q}")
        
    if "youtube" in s or "video" in s:
        q = s.replace("youtube", "").replace("video", "").replace("pesquisar", "").strip()
        url = "https://www.youtube.com/results?search_query=" + urllib.parse.quote_plus(q)
        webbrowser.open(url)
        return ActionResult(True, f"Pesquisando YouTube: {q}")
        
    if "farm√°cia" in s or "farmacia" in s:
        webbrowser.open("https://www.google.com/maps/search/farmacia+perto+de+mim")
        return ActionResult(True, "Abrindo farm√°cias pr√≥ximas")
    
    # Se n√£o for comando local, usa a IA
    ai_response = get_glm_response(text)
    if ai_response:
        return ActionResult(True, ai_response, is_ai=True)
        
    return ActionResult(False, "Comando n√£o reconhecido e IA indispon√≠vel")

In [None]:
class Assistant:
    def __init__(self, stt: SpeechToText, tts: TextToSpeech):
        self._stt = stt
        self._tts = tts

    def run(self):
        self._tts.speak("Ol√°! Sou seu assistente com IA. Como posso ajudar?")
        while True:
            text = self._stt.listen()
            if not text: continue
            print(f"üé§ Voc√™: {text}")
            
            if text.lower().strip() in ["sair", "encerrar", "tchau"]:
                self._tts.speak("At√© logo!")
                break
                
            result = parse_and_execute(text)
            self._tts.speak(result.message)

In [None]:
# Para rodar no notebook, usamos entrada de texto por padr√£o
stt = TextInputSTT()
try:
    tts = Pyttsx3TTS()
except:
    tts = SilentTTS()

assistant = Assistant(stt, tts)
assistant.run()