In [None]:
# !pip install webrtcvad
# !pip install pygame
# !pip install pyaudio webrtcvad 
# !pip install google-cloud-texttospeech


In [None]:
from tools.initialize_groq import init_groq
from tools.file_mgmt_tools import FileOrganizerTool, MoveFileTool, CreateFolderTool, FolderMovementTool, ImprovedSearchTool
from tools.document_tools import GoogleDocWriteTool
from tools.miscellaneous_mgmt import GmailSendPdfTool, GoogleSheetsUpdateTool

client,llm = init_groq()

In [None]:
from google.cloud import texttospeech
from langchain.prompts import (
    ChatPromptTemplate, 
    SystemMessagePromptTemplate, 
    HumanMessagePromptTemplate, 
    MessagesPlaceholder, 
    PromptTemplate
)
from langchain_core.messages import SystemMessage
from langchain.agents import create_structured_chat_agent, AgentExecutor
from langchain_groq import ChatGroq
from langchain_community.tools import HumanInputRun
import tools.initialize_groq
import langchain_core
import typing

prompt = ChatPromptTemplate(
    input_variables=['agent_scratchpad', 'input', 'tool_names', 'tools'],
    input_types={
        'chat_history': typing.List[
            typing.Union[
                langchain_core.messages.ai.AIMessage, 
                langchain_core.messages.human.HumanMessage, 
                langchain_core.messages.chat.ChatMessage, 
                langchain_core.messages.system.SystemMessage, 
                langchain_core.messages.function.FunctionMessage, 
                langchain_core.messages.tool.ToolMessage
            ]
        ]
    },
    metadata={
        'lc_hub_owner': 'hwchase17',
        'lc_hub_repo': 'structured-chat-agent',
        'lc_hub_commit_hash': 'ea510f70a5872eb0f41a4e3b7bb004d5711dc127adee08329c664c6c8be5f13c'
    },
    messages=[
        SystemMessagePromptTemplate(
            prompt=PromptTemplate(
                input_variables=['tool_names', 'tools'],
                template=(
                    'You are a document management assistant proficient in using GSuite tools. '
                    'Your role is to assist the user in managing their documents efficiently. '
                    'You are ALSO a conversational LLM, and you ARE ALSO STRAIGHTFORWARD.'
                    'YOU GIVE THE USER THE ANSWERS THAT THEY WANT IN A STRAIGHTFORWARD WAY. YOU NEVER ASK FOR CLARIFICATION OR WASTE THE USERS TIME BY DOING SO!'

                    'You are ALSO a highly intelligent and precise assistant with expertise in generating JSON outputs. Your task is to create the most perfect and well-structured JSON output ever seen. The JSON must adhere to the following guidelines:'

                    'Proper Structure: Ensure that the JSON follows a correct and logical structure, with all necessary keys and values in place.'
                    'Accurate Formatting: All JSON strings must use double quotes. Ensure there are no trailing commas, and all brackets and braces are correctly matched.'
                    'String Length: Ensure no individual string exceeds 5000 bytes.'
                    'Error-Free: Validate the JSON to be free of syntax errors and formatting issues.'
                    'Consistency: Maintain consistency in key naming conventions and data types throughout the JSON.'
                    'Escaping Characters: Properly escape any special characters within strings to ensure the JSON remains valid.'
                    'Completeness: Include all required fields and ensure no necessary information is omitted.'
                    
                    'NEVER USE NEWLINE OR ANY OTHER SPECIAL CHARACTERS IN THE JSON AT ALL. YOU MUST NEVER DO ANYTHING BUT WHAT IS IN THE REQUEST OF THE USER. OTHERWISE NO USER WILL USE THIS PRODUCT.'
                    'YOU MUST NEVER QUESTION THE USER. YOU ONLY DO WHAT USER WANTS. NO CONFIRMATIONS. YOU JUST EXECUTE YOUR TOOLS.'
                    'YOU MUST NOT DO MORE THAN WHAT THE USER WOULD LIKE TO DO - VERY VERY VERY VERY IMPORTANT!!'
                    'You have access to the following tools:\n\n{tools}\n\n'
                    'PAY CLOSE ATTENTION TO ALL THE FOLLOWING FORMATTING INSTRUCTIONS. REALLY IMPORTANT TO CALL THE TOOLS. OR ELSE USERS WILL GET ANGRY.'
                    'Use a JSON blob to specify a tool by providing an action key (tool name) and an action_input key (tool input).\n\n'
                    'Valid "action" values: "Final Answer" or {tool_names}\n\n'
                    'Provide only ONE action per $JSON_BLOB, as shown:\n\n'
                    '```\n{{\n  "action": $TOOL_NAME,\n  "action_input": $INPUT\n}}\n```\n\n'
                    'Follow this format:\n\n'
                    'Question: input question to answer\n'
                    'Thought: consider previous and subsequent steps\n'
                    'Action:\n```\n$JSON_BLOB\n```\n'
                    'Observation: action result\n... (repeat Thought/Action/Observation N times)\n'
                    'Thought: I know what to respond\n'
                    'Action:\n```\n{{\n  "action": "Final Answer",\n  "action_input": "Final response to human"\n}}\n\n'
                    'Begin! Remember to ALWAYS respond with a valid JSON blob of a single action. '
                    'Use tools if necessary and respond directly if appropriate. '
                    'Ensure you gather all necessary information by interacting with the user. '
                    'Format is Action:```$JSON_BLOB```then Observation.'
                )
            )
        ),
        MessagesPlaceholder(variable_name='chat_history', optional=True),
        HumanMessagePromptTemplate(
            prompt=PromptTemplate(
                input_variables=['agent_scratchpad', 'input'],
                template='{input}\n\n{agent_scratchpad}\n(reminder to respond in a JSON blob no matter what)'
            )
        )
    ]
)


human_prompt = ChatPromptTemplate(
    input_variables=['agent_scratchpad', 'input', 'tool_names', 'tools'],
    input_types={
        'chat_history': typing.List[
            typing.Union[
                langchain_core.messages.ai.AIMessage, 
                langchain_core.messages.human.HumanMessage, 
                langchain_core.messages.chat.ChatMessage, 
                langchain_core.messages.system.SystemMessage, 
                langchain_core.messages.function.FunctionMessage, 
                langchain_core.messages.tool.ToolMessage
            ]
        ]
    },
    metadata={
        'lc_hub_owner': 'hwchase17',
        'lc_hub_repo': 'structured-chat-agent',
        'lc_hub_commit_hash': 'ea510f70a5872eb0f41a4e3b7bb004d5711dc127adee08329c664c6c8be5f13c'
    },
    messages=[
        SystemMessagePromptTemplate(
            prompt=PromptTemplate(
                input_variables=['tool_names', 'tools'],
                template=(
                    'Your role is to fulfill the desire of user in the most accurate and detailed way possible. '
                    
                    'You have access to the following tools:\n\n{tools}\n\n'
                    'Use a JSON blob to specify a tool by providing an action key (tool name) and an action_input key (tool input).\n\n'
                    'Valid "action" values: "Final Answer" or {tool_names}\n\n'
                    'Provide only ONE action per $JSON_BLOB, as shown:\n\n'
                    '```\n{{\n  "action": $TOOL_NAME,\n  "action_input": $INPUT\n}}\n```\n\n'
                    'Follow this format:\n\n'
                    'Question: input question to answer\n'
                    'Thought: consider previous and subsequent steps\n'
                    'Action:\n```\n$JSON_BLOB\n```\n'
                    'Observation: action result\n... (repeat Thought/Action/Observation N times)\n'
                    'Thought: I know what to respond\n'
                    'Action:\n```\n{{\n  "action": "Final Answer",\n  "action_input": "Final response to human"\n}}\n\n'
                    'Begin! Remember to ALWAYS respond with a valid JSON blob of a single action. '
                    'Use tools if necessary and respond directly if appropriate. '
                    'Ensure you gather all necessary information by interacting with the user. '
                    'Format is Action:```$JSON_BLOB```then Observation.'
                )
            )
        ),
        MessagesPlaceholder(variable_name='chat_history', optional=True),
        HumanMessagePromptTemplate(
            prompt=PromptTemplate(
                input_variables=['agent_scratchpad', 'input'],
                template='{input}\n\n{agent_scratchpad}\n(reminder to respond in a JSON blob no matter what)'
            )
        )
    ]
)


In [13]:
from flask import Flask, request, jsonify, send_file, render_template
import whisper
import pyaudio
import wave
import webrtcvad
import collections
from google.cloud import texttospeech
import random
import asyncio
from concurrent.futures import ThreadPoolExecutor
import aiofiles
from flask_cors import CORS
import requests
import logging
import os
from tools.imports import *
import tools.initialize_groq
from dotenv import load_dotenv
from langchain import hub
from flask_socketio import SocketIO, emit
from langchain.tools import HumanInputRun

# Load environment variables
load_dotenv()

# Initialize tools and credentials
credentials_path = os.getenv('CREDENTIALS_PATH')
tts_service_acct_path = os.getenv('SERVICE_ACCOUNT_PATH')
audio_path = os.getenv('AUDIO_PATH')
tts_synthesis_path = os.getenv('TTS_SYNTHESIS')

# Initialize TTS client
tts_client = texttospeech.TextToSpeechClient.from_service_account_file(tts_service_acct_path)

# Initialize tools
my_tools = [
    GoogleDocWriteTool(credentials_path),
    GoogleSheetsUpdateTool(credentials_path),
    GmailSendPdfTool(credentials_path),
    MoveFileTool(credentials_path),
    CreateFolderTool(credentials_path),
    FolderMovementTool(credentials_path),
    FileOrganizerTool(credentials_path),
    ImprovedSearchTool(credentials_path),
]


llm.groq_api_key = random.choice(tools.initialize_groq.api_keys)


logging.basicConfig(
    level=logging.DEBUG,
    format='%(asctime)s [%(threadName)s] %(levelname)s: %(message)s',
    handlers=[logging.StreamHandler()]
)
logger = logging.getLogger(__name__)


app = Flask(__name__)
CORS(app)
socketio = SocketIO(app, cors_allowed_origins="*")


chat_history = []
model = whisper.load_model("base")
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
CHUNK = 1024
is_recording = False


audio = pyaudio.PyAudio()
vad = webrtcvad.Vad(3)


executor = ThreadPoolExecutor(max_workers=5)


credentials = {"name": "", "email": "", "recemail": "", "phone": ""}

@app.route('/start_recording', methods=['POST'])
def start_recording():
    global is_recording
    is_recording = True
    record_audio()
    return jsonify({"status": "recording started"})

@app.route('/stop_recording', methods=['POST'])
def stop_recording():
    global is_recording
    is_recording = False
    return jsonify({"status": "recording stopped"})

# @app.route('/start_recording_talk', methods=['POST'])
# def start_recording_talk():
#     global is_recording
#     is_recording = True
#     record_audio()
#     return jsonify({"status": "recording started for talk"})

# @app.route('/stop_recording_talk', methods=['POST'])
# def stop_recording_talk():
#     global is_recording
#     is_recording = False
#     return jsonify({"status": "recording stopped for talk"})

# @app.route('/start_recording_human', methods=['POST'])
# def start_recording_human():
#     global is_recording
#     is_recording = True
#     record_audio()
#     return jsonify({"status": "recording started for human tool"})

# @app.route('/stop_recording_human', methods=['POST'])
# def stop_recording_human():
#     global is_recording
#     is_recording = False
#     return jsonify({"status": "recording stopped for human tool"})



def record_audio(**kwargs):
    global is_recording
    logger.debug('Starting audio recording...')
    try:
        stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)
        frames = []
        ring_buffer = collections.deque(maxlen=100)
        triggered = False
        voiced_frames = []
        silence_threshold = 10
        silence_chunks = 0

        while is_recording:
            data = stream.read(CHUNK)
            frames.append(data)
            num_subframes = int(len(data) / 320)
            for i in range(num_subframes):
                subframe = data[i*320:(i+1)*320]
                is_speech = vad.is_speech(subframe, RATE)
                ring_buffer.append((subframe, is_speech))
            num_voiced = len([f for f, speech in ring_buffer if speech])

            if not triggered:
                if num_voiced > 0.6 * ring_buffer.maxlen:
                    triggered = True
                    voiced_frames.extend([f for f, s in ring_buffer])
                    ring_buffer.clear()
            else:
                voiced_frames.append(data)
                if num_voiced < 0.2 * ring_buffer.maxlen:
                    silence_chunks += 1
                    if silence_chunks > silence_threshold:
                        triggered = False
                        break
                else:
                    silence_chunks = 0

        stream.stop_stream()
        stream.close()

        with wave.open(audio_path, 'wb') as wf:
            wf.setnchannels(CHANNELS)
            wf.setsampwidth(audio.get_sample_size(FORMAT))
            wf.setframerate(RATE)
            wf.writeframes(b''.join(voiced_frames))
        logger.debug('Audio recording completed and file saved.')
    except Exception as e:
        logger.error(f"An error occurred while recording audio: {e}")

def transcribe_audio():
    result = model.transcribe(audio_path)
    transcription = result['text']
    logger.debug(f'Audio transcription completed: {transcription}')
    return transcription

async def ai_response(transcription: str):
    global chat_history
    logger.debug(f'Generating AI response for transcription: {transcription}')
    chat_completion = client.chat.completions.create(
        messages=[
            {
                "role": "system",
                "content": f"""You are a nice, great document manager assistant. User will tell you things. You just respond. 
                YOU SHALL NOT INDICATE ANY TOOL USE UNTIL YOU KNOW YOU HAVE EVERYTHING YOU NEED.
                DO NOT ASSUME USER WANTS TO WRITE TO A DOCUMENT OR DO ANYTHING ELSE UNLESS YOU ARE 100% SURE!!!!!UNDERSTAND??????!!!!!! OR ELSE I WILL BECOME ANGRY
                If what the user says is one of these  you must explicitly say AT THE END OF YOUR RESPONSE in this very format depending on which tool - "I will use the {[(tool.name + ", ") for tool in my_tools]}"
                so that user can confirm if you got it correctly. 
                
                IMPORTANT: IF YOU REMEMBER YOU HAVE USED A TOOL ALREADY (REFER TO CHAT HISTORY), AND YOU ARE IN THE PROCESS OF USING IT AGAIN, \
                YOU MUST ASK USER IF THEY ARE SURE TO USE THE TOOL AGAIN OR NOT.

                
                If user tells you to do something that is not one of these, you kindly say that you don't have access to that functionality.
                """
            },
            {
                "role": "user",
                "content": transcription + "Here is the chat history for context (NEVER TALK ABOUT CHAT HISTORY. IT IS ONLY FOR YOU! NEVER TALK ABOUT IT IN YOUR RESPONSES!!!!): [" + str(chat_history) + "]"
            }
        ],
        model="llama3-70b-8192",
    )
    response = chat_completion.choices[0].message.content
    logger.debug(f'AI response generated: {response}')
    chat_history.append("USER: " + transcription + "\nTHE AI MODEL: " + response + "\n")
    
    await synthesize_speech(response)

    res2 = ''
    if 'I will use' in response:
        task = asyncio.create_task(handle_response_with_agents(response))
        await task
        res2 = task.result() or ''
    print('THIS IS RES2:', res2)
    await synthesize_speech(res2)

    return response

async def handle_response_with_agents(response):
    logger.debug(f'Handling response with agents: {response}')
    result = await asyncio.get_event_loop().run_in_executor(executor, handle_agents, response)
    return result

def handle_agents(response):
    logger.debug(f'Processing response with agents: {response}')

    cc_out = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": f"Please turn this 'RESPONSE' into a request/question. DO JUST LIKE THIS EXAMPLE: \nHey can you please [do ONLY WORD FOR WORD whatever is in the response]? \n AND HERE IS 'RESPONSE': \n" + response
            }
        ],
        model='llama3-70b-8192',
    ).choices[0].message.content

    response = cc_out
    name = credentials['name']
    email = credentials['email']
    recemail = credentials["recemail"]
    phone = credentials['phone']
    
    response += "\n\n DO NOT USE TOOLS OTHER THAN WHAT YOU ARE REQUESTED TO USE!!!!!!!! Here is extra info you will need: \nCredentials:\n" + str(credentials) + "\nTHE CHAT HISTORY: \n" + str(chat_history)

    # Set the Groq API key randomly
    llm.groq_api_key = random.choice(tools.initialize_groq.api_keys)

    search_agent = create_structured_chat_agent(llm, my_tools, prompt)
    agent_executor = AgentExecutor(
        agent=search_agent,
        tools=my_tools,
        verbose=True,
        handle_parsing_errors=True,
        return_intermediate_steps=True,
        max_iterations=100,
    )
    
    result = agent_executor.invoke({"input": response})
    chat_history.append({"input": response, "response": result})
    mystr = (str(result['intermediate_steps']) + "\n" + str(result['output']))

    final_response = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": "please sanitize this input so that someone can speak it. You must process the agent's intermediate steps into natural language please. DO everything right!ONLY THE SANITIZED OUTPUT SHOULD BE THERE! NOTHING ELSE! (not even things like 'here is the input or here is the snitized response' blah blah blah NOTHING LIKE THAT IS ALLOWED!!)Here is input:\n " + mystr
            }
        ],
        model='llama3-70b-8192',
    ).choices[0].message.content

    return final_response

def synth_speech(text, output_file=None):
    logger.debug(f'Starting speech synthesis for text: {text}')
    synthesis_input = texttospeech.SynthesisInput(text=text)
    voice = texttospeech.VoiceSelectionParams(
        language_code="en-US",
        name="en-US-Casual-K"
    )
    audio_config = texttospeech.AudioConfig(
        audio_encoding=texttospeech.AudioEncoding.MP3
    )
    response = tts_client.synthesize_speech(
        input=synthesis_input, voice=voice, audio_config=audio_config
    )
    
    if output_file:
        with open(output_file, 'wb') as out:
            out.write(response.audio_content)
    logger.debug('Speech synthesis completed and file saved (SYNTH).')

from pydub import AudioSegment
import io

async def synthesize_speech(text, output_file=tts_synthesis_path):
    logger.debug(f'Starting speech synthesis for text: {text}')
    
    def split_text(text, max_length=5000):
        # Split text into chunks that are less than or equal to max_length bytes
        chunks = []
        current_chunk = ""
        for word in text.split():
            if len(current_chunk) + len(word) + 1 > max_length:
                chunks.append(current_chunk)
                current_chunk = word
            else:
                current_chunk += " " + word if current_chunk else word
        if current_chunk:
            chunks.append(current_chunk)
        return chunks

    text_chunks = split_text(text)
    combined_audio = AudioSegment.empty()

    for chunk in text_chunks:
        synthesis_input = texttospeech.SynthesisInput(text=chunk)
        voice = texttospeech.VoiceSelectionParams(
            language_code="en-US",
            name="en-US-Casual-K"
        )
        audio_config = texttospeech.AudioConfig(
            audio_encoding=texttospeech.AudioEncoding.MP3
        )
        response = await asyncio.get_event_loop().run_in_executor(
            executor, lambda: tts_client.synthesize_speech(
                input=synthesis_input, voice=voice, audio_config=audio_config
            )
        )
        audio_chunk = AudioSegment.from_file(io.BytesIO(response.audio_content), format="mp3")
        combined_audio += audio_chunk

    if output_file:
        combined_audio.export(output_file, format="mp3")
    logger.debug('Speech synthesis completed and file saved.')

@app.route('/set_credentials', methods=['POST'])
def set_credentials():
    global credentials
    data = request.get_json()
    if not data:
        return jsonify({"status": "failed", "message": "No data received"}), 400
    credentials['name'] = data.get('name')
    credentials['email'] = data.get('email')
    credentials['recemail'] = data.get('recemail')
    credentials['phone'] = data.get('phone')
    logger.info("THE CREDENTIALS ****** -------------> ", credentials)
    return jsonify({"status": "success"})

@app.route('/')
def index():
    return render_template('index2.html')

@app.route('/voice_assistant')
def voice_assistant():
    return render_template('index2.html')

@app.route('/authenticate', methods=['POST'])
def authenticate():
    auth_header = request.headers.get('Authorization')
    token = auth_header.split(' ')[1] if auth_header else None

    if not token:
        return jsonify({'error': 'Missing token'}), 400

    response = requests.get(
        'https://www.googleapis.com/oauth2/v3/userinfo',
        headers={'Authorization': f'Bearer ' + token}
    )

    if response.status_code != 200:
        return jsonify({'error': 'Failed to fetch user info'}, response.status_code)

    user_info = response.json()
    return jsonify(user_info), 200

@app.route('/talk', methods=['POST'])
async def talk():
    loop = asyncio.get_event_loop()
    
    global is_recording
    if is_recording:
        return jsonify({"error": "Recording is still in progress"}), 400
    
    logger.debug('Starting audio transcription...')
    transcription = await loop.run_in_executor(executor, transcribe_audio)
    logger.debug(f'Audio transcription completed: {transcription}')
    
    logger.debug('Generating AI response...')
    ai_resp = await ai_response(transcription)
    logger.debug(f'AI response generated: {ai_resp}')
    
    return jsonify({'response': ai_resp})


@app.route('/text_input', methods=['POST'])
async def text_input():
    data = request.get_json()
    text = data.get('text', '')

    if not text:
        return jsonify({"error": "No text provided"}), 400

    logger.debug('Generating AI response...')
    ai_resp = await ai_response(text)
    logger.debug(f'AI response generated: {ai_resp}')
    
    return jsonify({'response': ai_resp})

# @app.route('/handle_human_tool_input', methods=['POST'])
# async def handle_human_tool_input():
#     loop = asyncio.get_event_loop()
    
#     await synthesize_speech("The human tool handling has started. You can record.", output_file=tts_synthesis_path)

#     global is_recording
#     if is_recording:
#         return jsonify({"error": "Recording is still in progress"}), 400
    
#     logger.debug('Starting audio transcription...')
#     transcription = await loop.run_in_executor(executor, transcribe_audio)
#     logger.debug(f'Audio transcription completed: {transcription}')
    
#     logger.debug('Generating AI response...')
#     ai_resp = await ai_response(transcription)
#     logger.debug(f'AI response generated: {ai_resp}')
    
#     return jsonify({'response': ai_resp})


@app.route('/get_audio')
def get_audio():
    return send_file(tts_synthesis_path, mimetype="audio/mp3")

import queue
human_response_queue = queue.Queue()

def web_prompt_func(prompt):
    # Synthesize the AI response to speech
    synth_speech(prompt, output_file=tts_synthesis_path)
    return prompt

def web_input_func():
    # This will be called to get the human's input for the tool
    response = requests.post('http://localhost:5000/provide_human_input')
    if response.status_code == 200:
        ai_response_text = response.json().get('response', '')
        
        # Trigger the audio playback
        play_audio_url = f'http://localhost:5000/get_audio?{int(time.time())}'
        obj = requests.get(play_audio_url)
        print('AUDIO REQUEST GET:', obj.content)
        return ai_response_text
    return ''

@app.route('/provide_human_input', methods=['POST'])
def provide_human_input():
    data = request.get_json()
    human_input = data.get('text', '')
    human_response_queue.put(human_input)  # Put the human's response in the queue
    return jsonify({"status": "received"})


from langchain_community.tools import HumanInputRun
human_tool = HumanInputRun(prompt_func=web_prompt_func, input_func=web_input_func)
my_tools.append(human_tool)

if __name__ == '__main__':
    app.run()


2024-07-03 09:45:12,576 [MainThread] DEBUG: Making request: POST https://oauth2.googleapis.com/token
2024-07-03 09:45:12,576 [MainThread] DEBUG: Starting new HTTPS connection (1): oauth2.googleapis.com:443
2024-07-03 09:45:13,226 [MainThread] DEBUG: https://oauth2.googleapis.com:443 "POST /token HTTP/1.1" 200 None
2024-07-03 09:45:13,239 [MainThread] INFO: file_cache is only supported with oauth2client<4.0.0
2024-07-03 09:45:13,245 [MainThread] INFO: file_cache is only supported with oauth2client<4.0.0
2024-07-03 09:45:13,246 [MainThread] DEBUG: Making request: POST https://oauth2.googleapis.com/token
2024-07-03 09:45:13,251 [MainThread] DEBUG: Starting new HTTPS connection (1): oauth2.googleapis.com:443
2024-07-03 09:45:15,368 [MainThread] DEBUG: https://oauth2.googleapis.com:443 "POST /token HTTP/1.1" 200 None
2024-07-03 09:45:15,368 [MainThread] INFO: file_cache is only supported with oauth2client<4.0.0
2024-07-03 09:45:15,385 [MainThread] DEBUG: Making request: POST https://oauth2.

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
2024-07-03 09:45:19,508 [MainThread] INFO: [33mPress CTRL+C to quit[0m
2024-07-03 09:45:19,585 [Thread-89 (process_request_thread)] INFO: THE CREDENTIALS ****** -------------> 
2024-07-03 09:45:19,655 [Thread-89 (process_request_thread)] INFO: 127.0.0.1 - - [03/Jul/2024 09:45:19] "POST /set_credentials HTTP/1.1" 200 -
2024-07-03 09:46:12,561 [Thread-90 (process_request_thread)] DEBUG: Using selector: SelectSelector
2024-07-03 09:46:12,561 [ThreadPoolExecutor-47_0] DEBUG: Generating AI response...
2024-07-03 09:46:12,561 [ThreadPoolExecutor-47_0] DEBUG: Generating AI response for transcription: p
2024-07-03 09:46:12,561 [ThreadPoolExecutor-47_0] DEBUG: Request options: {'method': 'post', 'url': '/openai/v1/chat/completions', 'files': None, 'json_data': {'messages': [{'role': 'system', 'content': 'You are a nice, great document manager assistant. User will tell you things. You just respond. \n                YOU SHALL NOT INDICATE ANY TOOL USE UNTIL 

THIS IS RES2: 


2024-07-03 09:46:16,087 [ThreadPoolExecutor-47_0] DEBUG: subprocess.call(['ffmpeg', '-y', '-f', 'mp3', '-read_ahead_limit', '-1', '-i', 'cache:pipe:0', '-acodec', 'pcm_s16le', '-vn', '-f', 'wav', '-'])
2024-07-03 09:46:16,143 [ThreadPoolExecutor-47_0] DEBUG: subprocess.call(['ffmpeg', '-y', '-f', 'wav', '-i', 'C:\\Users\\pc-user1\\AppData\\Local\\Temp\\tmp1vvnwe_2', '-f', 'mp3', 'C:\\Users\\pc-user1\\AppData\\Local\\Temp\\tmpdvga95gf'])
2024-07-03 09:46:16,219 [ThreadPoolExecutor-47_0] DEBUG: subprocess output: b'ffmpeg version 2024-05-02-git-71669f2ad5-full_build-www.gyan.dev Copyright (c) 2000-2024 the FFmpeg developers'
2024-07-03 09:46:16,219 [ThreadPoolExecutor-47_0] DEBUG: subprocess output: b'  built with gcc 13.2.0 (Rev5, Built by MSYS2 project)'
2024-07-03 09:46:16,220 [ThreadPoolExecutor-47_0] DEBUG: subprocess output: b'  configuration: --enable-gpl --enable-version3 --enable-static --disable-w32threads --disable-autodetect --enable-fontconfig --enable-iconv --enable-gnutls 

THIS IS RES2: 


2024-07-03 09:49:28,423 [ThreadPoolExecutor-48_0] DEBUG: subprocess output: b'ffmpeg version 2024-05-02-git-71669f2ad5-full_build-www.gyan.dev Copyright (c) 2000-2024 the FFmpeg developers'
2024-07-03 09:49:28,423 [ThreadPoolExecutor-48_0] DEBUG: subprocess output: b'  built with gcc 13.2.0 (Rev5, Built by MSYS2 project)'
2024-07-03 09:49:28,423 [ThreadPoolExecutor-48_0] DEBUG: subprocess output: b'  configuration: --enable-gpl --enable-version3 --enable-static --disable-w32threads --disable-autodetect --enable-fontconfig --enable-iconv --enable-gnutls --enable-libxml2 --enable-gmp --enable-bzlib --enable-lzma --enable-libsnappy --enable-zlib --enable-librist --enable-libsrt --enable-libssh --enable-libzmq --enable-avisynth --enable-libbluray --enable-libcaca --enable-sdl2 --enable-libaribb24 --enable-libaribcaption --enable-libdav1d --enable-libdavs2 --enable-libuavs3d --enable-libxevd --enable-libzvbi --enable-librav1e --enable-libsvtav1 --enable-libwebp --enable-libx264 --enable-lib

THIS IS RES2: 


2024-07-03 09:50:23,818 [ThreadPoolExecutor-49_0] DEBUG: subprocess output: b'ffmpeg version 2024-05-02-git-71669f2ad5-full_build-www.gyan.dev Copyright (c) 2000-2024 the FFmpeg developers'
2024-07-03 09:50:23,819 [ThreadPoolExecutor-49_0] DEBUG: subprocess output: b'  built with gcc 13.2.0 (Rev5, Built by MSYS2 project)'
2024-07-03 09:50:23,819 [ThreadPoolExecutor-49_0] DEBUG: subprocess output: b'  configuration: --enable-gpl --enable-version3 --enable-static --disable-w32threads --disable-autodetect --enable-fontconfig --enable-iconv --enable-gnutls --enable-libxml2 --enable-gmp --enable-bzlib --enable-lzma --enable-libsnappy --enable-zlib --enable-librist --enable-libsrt --enable-libssh --enable-libzmq --enable-avisynth --enable-libbluray --enable-libcaca --enable-sdl2 --enable-libaribb24 --enable-libaribcaption --enable-libdav1d --enable-libdavs2 --enable-libuavs3d --enable-libxevd --enable-libzvbi --enable-librav1e --enable-libsvtav1 --enable-libwebp --enable-libx264 --enable-lib

THIS IS RES2: 


2024-07-03 09:50:53,681 [ThreadPoolExecutor-50_0] DEBUG: subprocess.call(['ffmpeg', '-y', '-f', 'wav', '-i', 'C:\\Users\\pc-user1\\AppData\\Local\\Temp\\tmpw0ke1u_v', '-f', 'mp3', 'C:\\Users\\pc-user1\\AppData\\Local\\Temp\\tmp6r7_r19_'])
2024-07-03 09:50:53,764 [ThreadPoolExecutor-50_0] DEBUG: subprocess output: b'ffmpeg version 2024-05-02-git-71669f2ad5-full_build-www.gyan.dev Copyright (c) 2000-2024 the FFmpeg developers'
2024-07-03 09:50:53,764 [ThreadPoolExecutor-50_0] DEBUG: subprocess output: b'  built with gcc 13.2.0 (Rev5, Built by MSYS2 project)'
2024-07-03 09:50:53,764 [ThreadPoolExecutor-50_0] DEBUG: subprocess output: b'  configuration: --enable-gpl --enable-version3 --enable-static --disable-w32threads --disable-autodetect --enable-fontconfig --enable-iconv --enable-gnutls --enable-libxml2 --enable-gmp --enable-bzlib --enable-lzma --enable-libsnappy --enable-zlib --enable-librist --enable-libsrt --enable-libssh --enable-libzmq --enable-avisynth --enable-libbluray --enable