In [None]:
from fastapi import FastAPI, WebSocket
from typing import Dict, List
import asyncio
from langchain.chat_models import ChatOpenAI
from langchain.schema import HumanMessage, SystemMessage
import json
import sounddevice as sd
import numpy as np
import torch
from transformers import pipeline
from TTS.api import TTS

app = FastAPI()

class ChatbotManager:
    def __init__(self):
        self.llm = ChatOpenAI()
        self.sessions: Dict[str, List] = {}
        self.vad_model = torch.hub.load('snakers4/silero-vad', 'silero_vad')
        self.stt = pipeline("automatic-speech-recognition")
        self.tts = TTS.create_model()
    
    async def process_audio(self, audio_data: np.ndarray):
        # VAD processing
        speech_detected = self.vad_model(torch.from_numpy(audio_data))
        if not speech_detected:
            return None
        
        # STT processing
        text = self.stt(audio_data)
        return text

    async def generate_response(self, user_id: str, message: str):
        if user_id not in self.sessions:
            self.sessions[user_id] = []
        
        self.sessions[user_id].append(HumanMessage(content=message))
        response = await self.llm.agenerate([
            SystemMessage(content="You are a helpful AI assistant."),
            *self.sessions[user_id]
        ])
        
        self.sessions[user_id].append(response.generations[0][0])
        return response.generations[0][0].text

    async def text_to_speech(self, text: str):
        audio = self.tts.tts(text)
        return audio

chatbot = ChatbotManager()

@app.websocket("/ws/{user_id}")
async def websocket_endpoint(websocket: WebSocket, user_id: str):
    await websocket.accept()
    try:
        while True:
            data = await websocket.receive_json()
            
            if data["type"] == "text":
                response = await chatbot.generate_response(user_id, data["content"])
                audio_response = await chatbot.text_to_speech(response)
                
                await websocket.send_json({
                    "type": "response",
                    "text": response,
                    "audio": audio_response.tolist()
                })
            
            elif data["type"] == "audio":
                audio_data = np.array(data["content"])
                text = await chatbot.process_audio(audio_data)
                
                if text:
                    response = await chatbot.generate_response(user_id, text)
                    audio_response = await chatbot.text_to_speech(response)
                    
                    await websocket.send_json({
                        "type": "response",
                        "text": response,
                        "audio": audio_response.tolist()
                    })
    
    except Exception as e:
        print(f"Error: {e}")
    finally:
        await websocket.close()

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)