Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .env

This file was deleted.

1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
.env
2 changes: 2 additions & 0 deletions SQL/instructions_for_creation.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Para criar o banco:
mysql -u root -p -e "CREATE DATABASE IF NOT EXISTS pybot CHARACTER SET utf8mb4;"
10 changes: 10 additions & 0 deletions SQL/schema.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
CREATE TABLE IF NOT EXISTS knowledge (
id INT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
title VARCHAR(255) NOT NULL,
content TEXT NOT NULL,
category VARCHAR(100) NOT NULL DEFAULT 'geral',
active TINYINT(1) NOT NULL DEFAULT 1,
created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
INDEX idx_active_category (active, category)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
Binary file added __pycache__/app.cpython-314.pyc
Binary file not shown.
Binary file added __pycache__/main.cpython-314.pyc
Binary file not shown.
Binary file modified api/__pycache__/__init__.cpython-314.pyc
Binary file not shown.
Binary file modified api/__pycache__/routes.cpython-314.pyc
Binary file not shown.
24 changes: 24 additions & 0 deletions api/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
from fastapi import APIRouter, HTTPException, Request
from fastapi.responses import HTMLResponse, StreamingResponse

from collections.abc import AsyncGenerator

log = logging.getLogger("kernelbots.api.chat")

router = APIRouter()
Expand Down Expand Up @@ -75,6 +77,28 @@ async def chat(request: Request) -> StreamingResponse:
detail="Campo 'session_id' deve ser string ou omitido.",
)

if user_message.strip().lower() == "/reload":
log.info("🔄 Comando /reload recebido — reconstruindo índice BM25...")
services.search_engine.rebuild()
chunk_count = len(services.search_engine.chunks)
db_count = sum(1 for c in services.search_engine.chunks if c.get("source", "").startswith("db:"))
md_count = chunk_count - db_count
status = (
f"Índice reconstruído: {chunk_count} chunk(s) total "
f"({md_count} de arquivos .md + {db_count} do MySQL)."
)
log.info("✅ /reload concluído — %s", status)

async def _reload_stream() -> AsyncGenerator[str, None]:
yield f"data: {status}\n\n"
yield "data: [DONE]\n\n"

return StreamingResponse(
_reload_stream(),
media_type="text/event-stream",
headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no", "Connection": "keep-alive"},
)

built = services.context_manager.build_messages(
user_message,
discipline_filter=discipline,
Expand Down
Binary file modified app/__pycache__/__init__.cpython-314.pyc
Binary file not shown.
Binary file modified app/__pycache__/factory.cpython-314.pyc
Binary file not shown.
Binary file modified app/__pycache__/state.cpython-314.pyc
Binary file not shown.
Binary file modified core/__pycache__/__init__.cpython-314.pyc
Binary file not shown.
Binary file modified core/__pycache__/config.cpython-314.pyc
Binary file not shown.
Binary file modified core/__pycache__/logging_config.cpython-314.pyc
Binary file not shown.
27 changes: 27 additions & 0 deletions core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@ class Settings:
pinned_max_turns: int
pinned_max_chars: int
pinned_weak_score: float
db_host: str
db_port: int
db_name: str
db_user: str
db_password: str

@property
def openrouter_headers(self) -> dict[str, str]:
Expand Down Expand Up @@ -87,6 +92,23 @@ def load(cls) -> Settings:
raise RuntimeError("ACL_PINNED_WEAK_SCORE deve ser um número.") from None
pinned_weak_score = max(0.05, min(0.95, pinned_weak_score))

""" !Credenciais do banco! """

db_host = (os.getenv("DB_HOST") or "").strip()

db_port_raw = (os.getenv("DB_PORT") or "3306").strip()

try:
db_port = int(db_port_raw)
except ValueError:
raise RuntimeError("DB_PORT deve ser um inteiro.") from None

db_name = (os.getenv("DB_NAME") or "").strip()

db_user = (os.getenv("DB_USER") or "").strip()

db_password = (os.getenv("DB_PASSWORD") or "").strip()

return cls(
openrouter_api_key=key,
project_root=project_root,
Expand All @@ -100,4 +122,9 @@ def load(cls) -> Settings:
pinned_max_turns=pinned_max_turns,
pinned_max_chars=pinned_max_chars,
pinned_weak_score=pinned_weak_score,
db_host=db_host,
db_port=db_port,
db_name=db_name,
db_user=db_user,
db_password=db_password,
)
Binary file modified engine/__pycache__/__init__.cpython-314.pyc
Binary file not shown.
Binary file modified engine/__pycache__/chat_provider.cpython-314.pyc
Binary file not shown.
Binary file modified engine/__pycache__/context.cpython-314.pyc
Binary file not shown.
Binary file added engine/__pycache__/database.cpython-314.pyc
Binary file not shown.
Binary file modified engine/__pycache__/pinned_store.cpython-314.pyc
Binary file not shown.
Binary file modified engine/__pycache__/search.cpython-314.pyc
Binary file not shown.
Binary file modified engine/__pycache__/watcher.cpython-314.pyc
Binary file not shown.
84 changes: 84 additions & 0 deletions engine/database.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
"""Fonte de dados MySQL para o índice BM25."""
from __future__ import annotations

import logging
from typing import TYPE_CHECKING

if TYPE_CHECKING:
from core.config import Settings

log = logging.getLogger(f"kernelbots.{__name__}")

DB_CHUNK_WORDS = 500
DB_CHUNK_OVERLAP = 50


def _chunk_text(text: str, title: str, source: str) -> list[dict]:
"""Divide texto em janelas de ~500 palavras com overlap de 50."""
words = text.split()
if not words:
return []
chunks: list[dict] = []
start = 0
while start < len(words):
end = min(start + DB_CHUNK_WORDS, len(words))
chunks.append({
"text": f"{title}\n" + " ".join(words[start:end]),
"source": source,
"discipline": "db",
})
if end == len(words):
break
start += DB_CHUNK_WORDS - DB_CHUNK_OVERLAP
return chunks


def fetch_db_chunks(settings: Settings) -> list[dict]:
"""
Busca rows ativas da tabela knowledge e retorna lista de chunks BM25.
Retorna [] com warning se o DB não estiver configurado ou falhar.
"""
if not all([settings.db_host, settings.db_name, settings.db_user]):
log.debug("Variáveis DB_* não configuradas — pulando fonte MySQL.")
return []

try:
import pymysql
import pymysql.cursors
except ImportError:
log.warning("PyMySQL não instalado — fonte MySQL desativada.")
return []

try:
conn = pymysql.connect(
host=settings.db_host,
port=settings.db_port,
database=settings.db_name,
user=settings.db_user,
password=settings.db_password,
charset="utf8mb4",
cursorclass=pymysql.cursors.DictCursor,
connect_timeout=5,
read_timeout=10,
)
with conn:
with conn.cursor() as cursor:
cursor.execute(
"SELECT id, title, content, category "
"FROM knowledge WHERE active = 1 ORDER BY id"
)
rows = cursor.fetchall()

all_chunks: list[dict] = []
for row in rows:
source = f"db:{row['category']}"
chunks = _chunk_text(row["content"], row["title"], source)
all_chunks.extend(chunks)
log.debug(" 🗄 row id=%s '%s' → %s chunk(s)", row["id"], row["title"], len(chunks))

log.info(" 🗄 MySQL: %s row(s) → %s chunk(s) carregados", len(rows), len(all_chunks))
return all_chunks

except Exception:
log.warning("⚠ Falha ao conectar ao MySQL — continuando apenas com .md.", exc_info=True)
return []
26 changes: 21 additions & 5 deletions engine/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
from rank_bm25 import BM25Okapi

from core.config import GlobalContextMode
from core.config import Settings
from engine.database import fetch_db_chunks

log = logging.getLogger(f"kernelbots.{__name__}")

Expand All @@ -26,10 +28,12 @@ def __init__(
content_dir: Path,
score_threshold: float,
global_context_mode: GlobalContextMode = "geral",
settings: Settings | None = None, # <-- adicionar
) -> None:
self._content_dir = content_dir.resolve()
self._score_threshold = score_threshold
self._global_context_mode: GlobalContextMode = global_context_mode
self._settings = settings
self._lock = threading.RLock()
self._silos: dict[str, dict[str, Any]] = {}
self._discipline_ids: frozenset[str] = frozenset()
Expand Down Expand Up @@ -151,18 +155,27 @@ def rebuild(self) -> None:
log.warning(
"⚠ Nenhum .md indexado — BM25 desativado. Modo assistente geral ativo."
)

# --- chunks do MySQL (silo "db") ---
db_chunks: list[dict] = []
if self._settings is not None:
db_chunks = fetch_db_chunks(self._settings)
if db_chunks:
tokenized_db = [self._tokenize(c["text"]) for c in db_chunks]
new_silos["db"] = {"chunks": db_chunks, "bm25": BM25Okapi(tokenized_db)}
all_chunks.extend(db_chunks)

elapsed = (time.perf_counter() - t0) * 1000
with self._lock:
self._discipline_ids = discipline_ids
self._silos = new_silos
self._all_chunks = all_chunks

db_count = len(db_chunks)
md_count = len(all_chunks) - db_count
log.info(
"✅ Índice BM25 por silo pronto — %s chunk(s) | %s silo(s) | rebuild em %.1fms",
len(all_chunks),
len(new_silos),
elapsed,
"✅ Índice BM25 por silo pronto — %s chunk(s) (%s .md + %s MySQL) | %s silo(s) | rebuild em %.1fms",
len(all_chunks), md_count, db_count, len(new_silos), elapsed,
)

def normalize_discipline(self, raw: str | None) -> str | None:
Expand Down Expand Up @@ -225,7 +238,10 @@ def search(
return self._hits_in_silo(nd, query, top_k)

if self._global_context_mode == "geral":
return self._hits_in_silo("geral", query, top_k)
hits = self._hits_in_silo("geral", query, top_k)
hits += self._hits_in_silo("db", query, top_k)
hits.sort(key=lambda h: h["score"], reverse=True)
return hits[:top_k]

merged: list[dict] = []
for silo in sorted(self._silos.keys()):
Expand Down
1 change: 1 addition & 0 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
settings.content_dir,
settings.bm25_score_threshold,
settings.global_context_mode,
settings=settings,
)
observer = start_content_observer(search_engine, settings.content_dir)

Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ python-dotenv
jinja2
rank-bm25
watchdog
pytest
pytest
PyMySQL
2 changes: 2 additions & 0 deletions templates/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
<code>content/doc</code>), <code>/python</code>, <code>/visualizacao-sql</code>,
<code>/projeto-bloco</code>, <code>/planejamento-curso-carreira</code> (RAG só na disciplina).</p>
<div class="cmd-pills">
<span class="cmd-pill">/reload</span>
<span class="cmd-pill">/python o que são listas?</span>
<span class="cmd-pill">/visualizacao-sql explique GROUP BY</span>
<span class="cmd-pill">/projeto-bloco resuma o pipeline</span>
Expand All @@ -48,6 +49,7 @@
<span>Enter</span> envia · <span>Shift+Enter</span> nova linha ·
<span>/python</span> · <span>/visualizacao-sql</span> · <span>/projeto-bloco</span> ·
<span>/planejamento-curso-carreira</span> · <span>/doc</span> · <span>/content</span>
· <span>/reload</span> para reconstruir o índice
</div>
<div class="input-row">
<textarea id="message-input" rows="1" placeholder="Digite sua mensagem..." autocomplete="off"
Expand Down