<a href="https://colab.research.google.com/github/GuilhermeCMSiqueira/chatAssistantCriacomp/blob/main/CRIACOMP_Assistente_Virtual_com_OpenAI_Responses_e_Gradio_Guilherme.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install -q openai gradio tdqm

  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.9/46.9 MB[0m [31m18.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m322.2/322.2 kB[0m [31m21.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m95.2/95.2 kB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.4/11.4 MB[0m [31m63.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.0/72.0 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.4/62.4 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for tdqm (setup.py) ... [?25l[?25hdone


In [41]:
from google.colab import userdata
from openai import OpenAI
from tqdm import tqdm
import gradio as gr
import concurrent
import os
from openai.types.responses import ResponseFileSearchToolCall

In [3]:
client = OpenAI(api_key=userdata.get('OPENAI_API_KEY'))
dir_pdfs = '/content/input_pdfs'
pdf_files = [os.path.join(dir_pdfs, f) for f in os.listdir(dir_pdfs)]

In [4]:
def upload_single_pdf(file_path: str, vector_store_id: str):
    file_name = os.path.basename(file_path)
    try:
        file_response = client.files.create(file=open(file_path, 'rb'), purpose="assistants")
        attach_response = client.vector_stores.files.create(
            vector_store_id=vector_store_id,
            file_id=file_response.id
        )
        return {"file": file_name, "status": "success"}
    except Exception as e:
        print(f"Error with {file_name}: {str(e)}")
        return {"file": file_name, "status": "failed", "error": str(e)}

def upload_pdf_files_to_vector_store(vector_store_id: str, pdf_files: list):
    stats = {"total_files": len(pdf_files), "successful_uploads": 0, "failed_uploads": 0, "errors": []}

    print(f"{len(pdf_files)} PDF files to process. Uploading in parallel...")

    with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
        futures = {executor.submit(upload_single_pdf, file_path, vector_store_id): file_path for file_path in pdf_files}
        for future in tqdm(concurrent.futures.as_completed(futures), total=len(pdf_files)):
            result = future.result()
            if result["status"] == "success":
                stats["successful_uploads"] += 1
            else:
                stats["failed_uploads"] += 1
                stats["errors"].append(result)

    return stats

def create_vector_store(store_name: str) -> dict:
    try:
        vector_store = client.vector_stores.create(name=store_name)
        details = {
            "id": vector_store.id,
            "name": vector_store.name,
            "created_at": vector_store.created_at,
            "file_count": vector_store.file_counts.completed
        }
        print("Vector store created:", details)
        return details
    except Exception as e:
        print(f"Error creating vector store: {e}")
        return {}

In [5]:
store_name = "my_vector_store"
vector_store_details = create_vector_store(store_name)
upload_pdf_files_to_vector_store(vector_store_details["id"], pdf_files)

Vector store created: {'id': 'vs_67fe9078450881919d0a1bc2459eedbc', 'name': 'my_vector_store', 'created_at': 1744736376, 'file_count': 0}
10 PDF files to process. Uploading in parallel...


100%|██████████| 10/10 [00:02<00:00,  3.76it/s]


{'total_files': 10,
 'successful_uploads': 10,
 'failed_uploads': 0,
 'errors': []}

In [53]:
def response_output(query, history):
  response = client.responses.create(
    model="gpt-4.1-mini",
    input=[
        {"role": "system", "content": f"Você é um assistente de chat com a função específica de responder dúvidas sobre a Coordenação de Infraestrutura do Centro de Informática (CIn). Todas as suas respostas devem ser baseadas nas informações contidas nos documentos previamente fornecidos e armazenados no seu vector store. Caso uma pergunta não possa ser respondida com base nesses arquivos, você deve informar que não possui informações suficientes para fornecer uma resposta. Não é permitido responder a perguntas que estejam fora do escopo da da Infraestrutura do CIn, como temas relacionados a outras coordenações, disciplinas, professores ou assuntos externos. Suas respostas devem ser claras, objetivas e fundamentadas nos conteúdos disponíveis, sem emitir opiniões. Caso a pergunta do usuário esteja ambígua ou incompleta, oriente-o a fornecer mais detalhes ou informe que não é possível responder com precisão com base nas informações disponíveis. Até agora, na conversa, tivemos as seguintes mensagens que podem servir de contexto pra sua próxima resposta: {history}" },
        {"role": "user", "content": query}
    ],
    tools=[
        {
            "type": "file_search",
            "vector_store_ids": [vector_store_details['id']],
        }
    ]
  )
  if isinstance(response.output[0], ResponseFileSearchToolCall):
    return response.output[-1].content[0].text
  else:
    return response.output[0].content[0].text

In [52]:
demo = gr.ChatInterface(
    fn=response_output,
    type="messages"
)

demo.launch(debug=True, share=True)

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://7544574cd8f5a513d6.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://7544574cd8f5a513d6.gradio.live


