diff --git a/engine/query_executor.py b/engine/query_executor.py index 0020293..7cba03a 100644 --- a/engine/query_executor.py +++ b/engine/query_executor.py @@ -6,13 +6,13 @@ """ import logging -from typing import Dict, Any, Optional, Union +from typing import Any, Dict, Optional, Union from langchain_core.messages import HumanMessage -from llm_utils.graph_utils.enriched_graph import builder as enriched_builder -from llm_utils.graph_utils.basic_graph import builder as basic_builder -from llm_utils.llm_response_parser import LLMResponseParser +from utils.llm.graph_utils.basic_graph import builder as basic_builder +from utils.llm.graph_utils.enriched_graph import builder as enriched_builder +from utils.llm.llm_response_parser import LLMResponseParser logger = logging.getLogger(__name__) diff --git a/interface/app_pages/graph_builder.py b/interface/app_pages/graph_builder.py index 916d290..4792992 100644 --- a/interface/app_pages/graph_builder.py +++ b/interface/app_pages/graph_builder.py @@ -13,7 +13,7 @@ import streamlit as st from langgraph.graph import END, StateGraph -from llm_utils.graph_utils.base import ( +from utils.llm.graph_utils.base import ( CONTEXT_ENRICHMENT, GET_TABLE_INFO, PROFILE_EXTRACTION, diff --git a/interface/core/config/settings.py b/interface/core/config/settings.py index 5e15ba4..9b4eeb2 100644 --- a/interface/core/config/settings.py +++ b/interface/core/config/settings.py @@ -3,19 +3,18 @@ """ import os -from typing import Any, Dict, Optional from pathlib import Path +from typing import Any, Dict, Optional try: import streamlit as st # type: ignore except Exception: # pragma: no cover - streamlit may not be present in non-UI contexts st = None # type: ignore -from llm_utils.tools import set_gms_server +from utils.llm.tools import set_gms_server from .models import Config - DEFAULT_DATAHUB_SERVER = "http://localhost:8080" DEFAULT_VECTORDB_TYPE = os.getenv("VECTORDB_TYPE", "faiss").lower() DEFAULT_VECTORDB_LOCATION = os.getenv("VECTORDB_LOCATION", "") @@ -202,7 +201,7 @@ def update_llm_settings(*, provider: str, values: dict[str, str | None]) -> None """Update chat LLM settings from UI into process env and session. This function mirrors the environment-variable based configuration consumed by - llm_utils.llm.factory.get_llm(). Only sets provided keys; missing values are left as-is. + utils.llm.core.factory.get_llm(). Only sets provided keys; missing values are left as-is. """ provider_norm = (provider or "").lower() if provider_norm not in { @@ -229,7 +228,7 @@ def update_llm_settings(*, provider: str, values: dict[str, str | None]) -> None def update_embedding_settings(*, provider: str, values: dict[str, str | None]) -> None: """Update Embeddings settings from UI into process env and session. - Mirrors env vars consumed by llm_utils.llm.factory.get_embeddings(). + Mirrors env vars consumed by utils.llm.core.factory.get_embeddings(). """ provider_norm = (provider or "").lower() if provider_norm not in { diff --git a/interface/core/result_renderer.py b/interface/core/result_renderer.py index e96b062..e085735 100644 --- a/interface/core/result_renderer.py +++ b/interface/core/result_renderer.py @@ -11,8 +11,8 @@ from langchain_core.messages import AIMessage from infra.observability.token_usage import TokenUtils -from llm_utils.llm_response_parser import LLMResponseParser from utils.databases import DatabaseFactory +from utils.llm.llm_response_parser import LLMResponseParser from viz.display_chart import DisplayChart diff --git a/interface/core/session_utils.py b/interface/core/session_utils.py index 4791a2e..5071fbd 100644 --- a/interface/core/session_utils.py +++ b/interface/core/session_utils.py @@ -24,9 +24,9 @@ def init_graph(use_enriched: bool) -> str: """ builder_module = ( - "llm_utils.graph_utils.enriched_graph" + "utils.llm.graph_utils.enriched_graph" if use_enriched - else "llm_utils.graph_utils.basic_graph" + else "utils.llm.graph_utils.basic_graph" ) builder = __import__(builder_module, fromlist=["builder"]).builder diff --git a/llm_utils/__init__.py b/llm_utils/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/llm_utils/output_parser/__init__.py b/llm_utils/output_parser/__init__.py deleted file mode 100644 index 5a4f0b0..0000000 --- a/llm_utils/output_parser/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -""" -출력 파서 모듈 패키지 초기화. - -이 패키지는 LLM의 구조화 출력 모델과 파서들을 포함합니다. -""" diff --git a/pyproject.toml b/pyproject.toml index 9213be5..573955c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -68,7 +68,6 @@ include = [ packages = [ "cli", "interface", - "llm_utils", "engine", "infra", "viz", diff --git a/test/test_llm_utils/test_llm_response_parser.py b/test/test_llm_utils/test_llm_response_parser.py index 15f215d..ed10acd 100644 --- a/test/test_llm_utils/test_llm_response_parser.py +++ b/test/test_llm_utils/test_llm_response_parser.py @@ -9,7 +9,7 @@ import unittest -from llm_utils.llm_response_parser import LLMResponseParser +from utils.llm.llm_response_parser import LLMResponseParser class TestLLMResponseParser(unittest.TestCase): diff --git a/llm_utils/README.md b/utils/llm/README.md similarity index 99% rename from llm_utils/README.md rename to utils/llm/README.md index 182e35c..2113d38 100644 --- a/llm_utils/README.md +++ b/utils/llm/README.md @@ -1,4 +1,4 @@ -## llm_utils 개요 +## utils.llm 개요 Lang2SQL 파이프라인에서 LLM, 검색(RAG), 그래프 워크플로우, DB 실행, 시각화 등 보조 유틸리티를 모아둔 패키지입니다. 이 문서는 depth(계층)별로 기능과 통합 흐름을 정리합니다. diff --git a/llm_utils/chains.py b/utils/llm/chains.py similarity index 93% rename from llm_utils/chains.py rename to utils/llm/chains.py index ac0a854..bb2ab2e 100644 --- a/llm_utils/chains.py +++ b/utils/llm/chains.py @@ -8,21 +8,13 @@ - Question Gate (SQL 적합성 분류) """ -import os -from langchain_core.prompts import ( - ChatPromptTemplate, - SystemMessagePromptTemplate, -) +from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate from pydantic import BaseModel, Field -from llm_utils.output_parser.question_suitability import QuestionSuitability -from llm_utils.output_parser.document_suitability import ( - DocumentSuitabilityList, -) - -from llm_utils.llm import get_llm from prompt.template_loader import get_prompt_template - +from utils.llm.core import get_llm +from utils.llm.output_parser.document_suitability import DocumentSuitabilityList +from utils.llm.output_parser.question_suitability import QuestionSuitability llm = get_llm() diff --git a/llm_utils/llm/__init__.py b/utils/llm/core/__init__.py similarity index 94% rename from llm_utils/llm/__init__.py rename to utils/llm/core/__init__.py index 55c09fb..a842cfb 100644 --- a/llm_utils/llm/__init__.py +++ b/utils/llm/core/__init__.py @@ -1,18 +1,18 @@ -from .factory import ( - get_llm, - get_llm_openai, - get_llm_azure, - get_llm_bedrock, - get_llm_gemini, - get_llm_ollama, - get_llm_huggingface, +from utils.llm.core.factory import ( get_embeddings, - get_embeddings_openai, get_embeddings_azure, get_embeddings_bedrock, get_embeddings_gemini, - get_embeddings_ollama, get_embeddings_huggingface, + get_embeddings_ollama, + get_embeddings_openai, + get_llm, + get_llm_azure, + get_llm_bedrock, + get_llm_gemini, + get_llm_huggingface, + get_llm_ollama, + get_llm_openai, ) __all__ = [ diff --git a/llm_utils/llm/factory.py b/utils/llm/core/factory.py similarity index 98% rename from llm_utils/llm/factory.py rename to utils/llm/core/factory.py index d12cb99..3164220 100644 --- a/llm_utils/llm/factory.py +++ b/utils/llm/core/factory.py @@ -2,7 +2,7 @@ from typing import Optional from langchain.llms.base import BaseLanguageModel -from langchain_aws import ChatBedrockConverse, BedrockEmbeddings +from langchain_aws import BedrockEmbeddings, ChatBedrockConverse from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings from langchain_huggingface import ( ChatHuggingFace, @@ -11,9 +11,9 @@ ) from langchain_ollama import ChatOllama, OllamaEmbeddings from langchain_openai import ( + AzureChatOpenAI, AzureOpenAIEmbeddings, ChatOpenAI, - AzureChatOpenAI, OpenAIEmbeddings, ) diff --git a/llm_utils/graph_utils/__init__.py b/utils/llm/graph_utils/__init__.py similarity index 95% rename from llm_utils/graph_utils/__init__.py rename to utils/llm/graph_utils/__init__.py index 678f637..c2012ad 100644 --- a/llm_utils/graph_utils/__init__.py +++ b/utils/llm/graph_utils/__init__.py @@ -4,16 +4,16 @@ 이 패키지는 Lang2SQL의 워크플로우 그래프 구성과 관련된 모듈들을 포함합니다. """ -from .base import ( - QueryMakerState, +from utils.llm.graph_utils.base import ( + CONTEXT_ENRICHMENT, GET_TABLE_INFO, - QUERY_MAKER, PROFILE_EXTRACTION, - CONTEXT_ENRICHMENT, + QUERY_MAKER, + QueryMakerState, + context_enrichment_node, get_table_info_node, - query_maker_node, profile_extraction_node, - context_enrichment_node, + query_maker_node, ) from .basic_graph import builder as basic_builder diff --git a/llm_utils/graph_utils/base.py b/utils/llm/graph_utils/base.py similarity index 98% rename from llm_utils/graph_utils/base.py rename to utils/llm/graph_utils/base.py index 54edfcc..41a4d49 100644 --- a/llm_utils/graph_utils/base.py +++ b/utils/llm/graph_utils/base.py @@ -1,18 +1,16 @@ import json -from typing_extensions import TypedDict, Annotated from langgraph.graph.message import add_messages +from typing_extensions import Annotated, TypedDict - -from llm_utils.chains import ( - query_maker_chain, +from utils.llm.chains import ( + document_suitability_chain, profile_extraction_chain, query_enrichment_chain, + query_maker_chain, question_gate_chain, - document_suitability_chain, ) - -from llm_utils.retrieval import search_tables +from utils.llm.retrieval import search_tables # 노드 식별자 정의 QUESTION_GATE = "question_gate" diff --git a/llm_utils/graph_utils/basic_graph.py b/utils/llm/graph_utils/basic_graph.py similarity index 92% rename from llm_utils/graph_utils/basic_graph.py rename to utils/llm/graph_utils/basic_graph.py index 0a1b35f..e7c7ade 100644 --- a/llm_utils/graph_utils/basic_graph.py +++ b/utils/llm/graph_utils/basic_graph.py @@ -1,23 +1,22 @@ -import json +""" +기본 워크플로우를 위한 StateGraph 구성입니다. +GET_TABLE_INFO -> QUERY_MAKER 순서로 실행됩니다. +""" -from langgraph.graph import StateGraph, END -from llm_utils.graph_utils.base import ( - QueryMakerState, - QUESTION_GATE, - GET_TABLE_INFO, +from langgraph.graph import END, StateGraph + +from utils.llm.graph_utils.base import ( EVALUATE_DOCUMENT_SUITABILITY, + GET_TABLE_INFO, QUERY_MAKER, - question_gate_node, - get_table_info_node, + QUESTION_GATE, + QueryMakerState, document_suitability_node, + get_table_info_node, query_maker_node, + question_gate_node, ) -""" -기본 워크플로우를 위한 StateGraph 구성입니다. -GET_TABLE_INFO -> QUERY_MAKER 순서로 실행됩니다. -""" - # StateGraph 생성 및 구성 builder = StateGraph(QueryMakerState) builder.set_entry_point(QUESTION_GATE) diff --git a/llm_utils/graph_utils/enriched_graph.py b/utils/llm/graph_utils/enriched_graph.py similarity index 90% rename from llm_utils/graph_utils/enriched_graph.py rename to utils/llm/graph_utils/enriched_graph.py index 17e9f36..703726d 100644 --- a/llm_utils/graph_utils/enriched_graph.py +++ b/utils/llm/graph_utils/enriched_graph.py @@ -1,27 +1,26 @@ -import json +""" +기본 워크플로우에 '프로파일 추출(PROFILE_EXTRACTION)'과 '컨텍스트 보강(CONTEXT_ENRICHMENT)'를 +추가한 확장된 그래프입니다. +""" -from langgraph.graph import StateGraph, END -from llm_utils.graph_utils.base import ( - QueryMakerState, - QUESTION_GATE, - GET_TABLE_INFO, +from langgraph.graph import END, StateGraph + +from utils.llm.graph_utils.base import ( + CONTEXT_ENRICHMENT, EVALUATE_DOCUMENT_SUITABILITY, + GET_TABLE_INFO, PROFILE_EXTRACTION, - CONTEXT_ENRICHMENT, QUERY_MAKER, - question_gate_node, - get_table_info_node, + QUESTION_GATE, + QueryMakerState, + context_enrichment_node, document_suitability_node, + get_table_info_node, profile_extraction_node, - context_enrichment_node, query_maker_node, + question_gate_node, ) -""" -기본 워크플로우에 '프로파일 추출(PROFILE_EXTRACTION)'과 '컨텍스트 보강(CONTEXT_ENRICHMENT)'를 -추가한 확장된 그래프입니다. -""" - # StateGraph 생성 및 구성 builder = StateGraph(QueryMakerState) builder.set_entry_point(QUESTION_GATE) diff --git a/llm_utils/graph_utils/profile_utils.py b/utils/llm/graph_utils/profile_utils.py similarity index 100% rename from llm_utils/graph_utils/profile_utils.py rename to utils/llm/graph_utils/profile_utils.py diff --git a/llm_utils/llm_response_parser.py b/utils/llm/llm_response_parser.py similarity index 100% rename from llm_utils/llm_response_parser.py rename to utils/llm/llm_response_parser.py diff --git a/llm_utils/output_parser/document_suitability.py b/utils/llm/output_parser/document_suitability.py similarity index 97% rename from llm_utils/output_parser/document_suitability.py rename to utils/llm/output_parser/document_suitability.py index dacdb6e..7b4c11a 100644 --- a/llm_utils/output_parser/document_suitability.py +++ b/utils/llm/output_parser/document_suitability.py @@ -5,7 +5,8 @@ 최상위는 테이블명(string) -> 평가 객체 매핑을 담는 Root 모델입니다. """ -from typing import Dict, List +from typing import List + from pydantic import BaseModel, Field diff --git a/llm_utils/output_parser/question_suitability.py b/utils/llm/output_parser/question_suitability.py similarity index 100% rename from llm_utils/output_parser/question_suitability.py rename to utils/llm/output_parser/question_suitability.py diff --git a/llm_utils/retrieval.py b/utils/llm/retrieval.py similarity index 96% rename from llm_utils/retrieval.py rename to utils/llm/retrieval.py index 39f219c..0b5d916 100644 --- a/llm_utils/retrieval.py +++ b/utils/llm/retrieval.py @@ -1,12 +1,11 @@ import os -from langchain_community.vectorstores import FAISS -from langchain_openai import OpenAIEmbeddings + from langchain.retrievers import ContextualCompressionRetriever from langchain.retrievers.document_compressors import CrossEncoderReranker from langchain_community.cross_encoders import HuggingFaceCrossEncoder from transformers import AutoModelForSequenceClassification, AutoTokenizer -from llm_utils.vectordb import get_vector_db +from utils.llm.vectordb import get_vector_db def load_reranker_model(device: str = "cpu"): diff --git a/llm_utils/tools/__init__.py b/utils/llm/tools/__init__.py similarity index 80% rename from llm_utils/tools/__init__.py rename to utils/llm/tools/__init__.py index 0d71c0b..d7ab34a 100644 --- a/llm_utils/tools/__init__.py +++ b/utils/llm/tools/__init__.py @@ -1,7 +1,7 @@ -from .datahub import ( - set_gms_server, +from utils.llm.tools.datahub import ( get_info_from_db, get_metadata_from_db, + set_gms_server, ) __all__ = [ diff --git a/llm_utils/tools/datahub.py b/utils/llm/tools/datahub.py similarity index 100% rename from llm_utils/tools/datahub.py rename to utils/llm/tools/datahub.py diff --git a/llm_utils/vectordb/__init__.py b/utils/llm/vectordb/__init__.py similarity index 70% rename from llm_utils/vectordb/__init__.py rename to utils/llm/vectordb/__init__.py index 31674ad..6265b0f 100644 --- a/llm_utils/vectordb/__init__.py +++ b/utils/llm/vectordb/__init__.py @@ -2,6 +2,6 @@ VectorDB 모듈 - FAISS와 pgvector를 지원하는 벡터 데이터베이스 추상화 """ -from .factory import get_vector_db +from utils.llm.vectordb.factory import get_vector_db __all__ = ["get_vector_db"] diff --git a/llm_utils/vectordb/factory.py b/utils/llm/vectordb/factory.py similarity index 91% rename from llm_utils/vectordb/factory.py rename to utils/llm/vectordb/factory.py index b09ae1d..942a443 100644 --- a/llm_utils/vectordb/factory.py +++ b/utils/llm/vectordb/factory.py @@ -5,8 +5,8 @@ import os from typing import Optional -from llm_utils.vectordb.faiss_db import get_faiss_vector_db -from llm_utils.vectordb.pgvector_db import get_pgvector_db +from utils.llm.vectordb.faiss_db import get_faiss_vector_db +from utils.llm.vectordb.pgvector_db import get_pgvector_db def get_vector_db( diff --git a/llm_utils/vectordb/faiss_db.py b/utils/llm/vectordb/faiss_db.py similarity index 90% rename from llm_utils/vectordb/faiss_db.py rename to utils/llm/vectordb/faiss_db.py index 02fc2d1..d4754a5 100644 --- a/llm_utils/vectordb/faiss_db.py +++ b/utils/llm/vectordb/faiss_db.py @@ -7,8 +7,8 @@ from langchain_community.vectorstores import FAISS -from llm_utils.llm import get_embeddings -from llm_utils.tools import get_info_from_db +from utils.llm.core import get_embeddings +from utils.llm.tools import get_info_from_db def get_faiss_vector_db(vectordb_path: Optional[str] = None): diff --git a/llm_utils/vectordb/pgvector_db.py b/utils/llm/vectordb/pgvector_db.py similarity index 95% rename from llm_utils/vectordb/pgvector_db.py rename to utils/llm/vectordb/pgvector_db.py index d5e7536..d03f034 100644 --- a/llm_utils/vectordb/pgvector_db.py +++ b/utils/llm/vectordb/pgvector_db.py @@ -4,12 +4,12 @@ import os from typing import Optional + import psycopg2 -from sqlalchemy.orm import Session from langchain_postgres.vectorstores import PGVector -from llm_utils.tools import get_info_from_db -from llm_utils.llm import get_embeddings +from utils.llm.core import get_embeddings +from utils.llm.tools import get_info_from_db def _check_collection_exists(connection_string: str, collection_name: str) -> bool: