# --- 로컬 DB 연결 세팅 ---------------------------------------------
pip install -q python-dotenv sqlalchemy psycopg2-binary pgvector

.env에 추가
# LLM API KEY
ANTHROPIC_API_KEY=
DEBUG=true

In [None]:
from dotenv import load_dotenv
import os, sqlalchemy as sa

load_dotenv()    # .env 로드
engine = sa.create_engine(os.getenv("DATABASE_URL"), pool_pre_ping=True)

# 빠른 연결 테스트
with engine.begin() as conn:
    print("DB 연결 OK:", conn.execute(sa.text("SELECT 1")).scalar() == 1)

pip install -q langchain-pgvector sentence_transformers

In [None]:
from langchain_pgvector import PGVector
from langchain.embeddings import HuggingFaceEmbeddings

emb = HuggingFaceEmbeddings(
        model_name="nlpai-lab/KURE-v1",
        encode_kwargs={"normalize_embeddings": True})

store = PGVector(
    connection_string=os.getenv("DATABASE_URL"),
    collection_name="members",          # 테이블명
    embedding_function=emb,
    id_column_name="member_id",
    vector_column_name="embedding",     # pgvector 컬럼
    embedding_dimension=768
)
retriever = store.as_retriever(search_kwargs={"k": 200})

In [None]:
def get_members_by_meta(age_from:int, age_to:int,
                        gender:str, limit:int=100) -> list[int]:
    """
    나이 [age_from, age_to) & gender='M/F' 를 만족하는 member_id 무작위 샘플
    """
    sql = """
      SELECT member_id
      FROM   members
      WHERE  age >= :lo AND age < :hi AND gender = :g
      ORDER  BY random()
      LIMIT  :lim
    """
    with engine.begin() as conn:
        rows = conn.execute(sa.text(sql),
                            {"lo": age_from, "hi": age_to,
                             "g": gender, "lim": limit})
        return [r[0] for r in rows]

In [None]:
# (1) 메타-필터만
mids = get_members_by_meta(20, 30, "M", 100)
print("메타쿼리 결과(샘플 5):", mids[:5], "| 총", len(mids))

# (2) 벡터 검색 + 메타필터 (hybrid 예시)
query = "운동하는 20대 남자 회원 100명"
docs = retriever.get_relevant_documents(query)
docs = [d for d in docs if 20 <= d.metadata["age"] < 30 and d.metadata["gender"]=="M"][:100]
print("하이브리드 결과(샘플 5):",
      [d.metadata["member_id"] for d in docs[:5]], "| 총", len(docs))