In [88]:
# 시스템 모듈
import os
from dotenv import load_dotenv

# 랭체인 모듈
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnableLambda, RunnablePassthrough
from langchain.chat_models import ChatOpenAI

# 추가 모듈
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# 환경변수 로드
load_dotenv()

# 벡터 DB 설정
DB_INDEX = "MANUAL_DB"
embeddings = OpenAIEmbeddings()
vectorstore = FAISS.load_local(DB_INDEX, embeddings, allow_dangerous_deserialization=True)
retriever = vectorstore.as_retriever(search_kwargs={"k": 4})

# LLM 모델 설정
llm = ChatOpenAI(
    model_name="gpt-4o",
    temperature=0,
    streaming=True,
)

# 프롬프트 템플릿
prompt = ChatPromptTemplate.from_messages([
    # 시스템 메시지
    ("system",
    """
    Answer the question using ONLY the following context.
    If you don't know the answer just say you don't know. DON'T make anything up.

    Context: {context}
    """), 
    # 사용자 메시지
    ("human", "{question}"), 
])

# 임베딩 생성 함수
def get_embeddings(texts, embeddings_model):
    if isinstance(texts, list):
        return embeddings_model.embed_documents(texts)
    return embeddings_model.embed_query(texts)

# 유사도 계산 함수
def calculate_cosine_similarity(query_embedding, doc_embeddings):
    return cosine_similarity([query_embedding], doc_embeddings)[0]

# 검색 증강 문서 처리 및 유사도 계산
def formatting_docs_with_scores(docs, query, embeddings_model):
    # 쿼리와 문서의 임베딩 계산
    query_embedding = get_embeddings(query, embeddings_model)
    doc_texts = [doc.page_content for doc in docs]
    doc_embeddings = get_embeddings(doc_texts, embeddings_model)
    
    # 유사도 계산
    similarities = calculate_cosine_similarity(query_embedding, doc_embeddings)
    
    # 문서와 유사도 점수 결합
    context = []
    for doc, similarity in zip(docs, similarities):
        source = doc.metadata.get("source", "")
        content = doc.page_content
        context.append(f"Source: {source}\nScore: {similarity}\n\n{content}")
    
    return "\n\n".join(context)

# 체인 설정
chain = {
    "context": retriever | RunnableLambda(lambda docs: formatting_docs_with_scores(docs, "김장호라는 사람", embeddings)), 
    "question": RunnablePassthrough()
} | prompt | llm

response = chain.invoke("사용자 추가하는 방법")
