diff --git a/.env.example b/.env.example index 6b1ab04..5d0055f 100644 --- a/.env.example +++ b/.env.example @@ -4,4 +4,10 @@ VECTOR_DB_ENV= POSTGRES_URI= SECRET_KEY= # Comma-separated list of allowed CORS origins, e.g. http://localhost:3000,https://yourdomain.com -CORS_ALLOW_ORIGINS= \ No newline at end of file +CORS_ALLOW_ORIGINS= + +# Web search configuration +WEB_SEARCH_ENGINE=tavily +TAVILY_API_KEY=your_tavily_api_key_here +MAX_FETCH_CONCURRENCY=4 +DEFAULT_TOP_K_RESULTS=8 \ No newline at end of file diff --git a/.gitignore b/.gitignore index 583d9e7..c936975 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ authormaton/ experimentalCode/.env .env + # Ignore Python cache __pycache__/ diff --git a/api/indexing_router.py b/api/indexing_router.py index e2861c8..58b7e08 100644 --- a/api/indexing_router.py +++ b/api/indexing_router.py @@ -4,7 +4,7 @@ from fastapi import APIRouter, HTTPException, status, Request from pydantic import BaseModel from config.settings import settings -from services.vector_db_service import VectorDBService +from services.vector_db_service import VectorDBClient as VectorDBService from services.embedding_service import embed_texts_batched from services.chunking_service import chunk_text from services.parsing_service import extract_text_from_pdf, extract_text_from_docx diff --git a/api/main.py b/api/main.py index af0c939..25f60a1 100644 --- a/api/main.py +++ b/api/main.py @@ -46,8 +46,10 @@ def read_root(): # Register routers from api.endpoints.upload import router as upload_router from api.endpoints.internal import router as internal_router +from api.endpoints.web_answering import router as web_answering_router app.include_router(upload_router, prefix="/upload") app.include_router(internal_router) +app.include_router(web_answering_router, prefix="/internal", tags=["websearch"]) app.include_router(indexing_router) @app.get("/health") diff --git a/requirements.txt b/requirements.txt index 4e168c8..06d7131 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ python-multipart>=0.0.6,<1.0.0 fastapi>=0.110.0,<1.0.0 uvicorn[standard]>=0.29.0,<1.0.0 pytest>=8.2.0,<9.0.0 -httpx>=0.27.0,<1.0.0 +httpx[http2]>=0.27.0,<1.0.0 python-dotenv>=1.0.0,<2.0.0 PyPDF2>=3.0.0,<4.0.0 requests>=2.31.0,<3.0.0 @@ -13,4 +13,6 @@ pinecone-client>=3.0.0,<4.0.0 weaviate-client>=4.4.0,<5.0.0 transformers>=4.40.0,<5.0.0 torch>=2.2.0,<3.0.0 -pydantic>=2.6.0,<3.0.0 \ No newline at end of file +pydantic>=2.6.0,<3.0.0 +trafilatura>=1.6.0,<2.0.0 +numpy>=1.26.0,<2.0.0 \ No newline at end of file