In [15]:
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings


def make_chroma_db(documents):
    # Chunking
    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    docs = splitter.split_documents(documents)

    # 벡터 저장소 만들기
    db = Chroma.from_documents(docs, OpenAIEmbeddings(), persist_directory="chroma_db")
    return db

def get_top5_docs_from_db(query):
    db = Chroma(persist_directory="chroma_db", embedding_function=OpenAIEmbeddings())
    retriever = db.as_retriever(search_kwargs={"k": 5}) # 상위 5개만 추출하도록 설정

    return retriever.get_relevant_documents(query)


In [11]:
from openai import OpenAI

# OpenAI API 키 설정 (환경변수 또는 직접 입력)
client = OpenAI()

# 🔍 GPT를 사용해 요약 생성
def summarize_with_gpt(content: str, file_path: str, max_chars: int = 1500) -> str:
    prompt = f"""
    다음은 '{file_path}'라는 파일의 코드입니다. 
    이 파일의 목적이 무엇인지, 어떤 기능이 있고 어떤 문제를 해결하는지 간단히 요약해 주세요. 
    \n\n```python\n{content[:max_chars]}\n```\n\n요약:"""
    
    try:
        response = client.chat.completions.create(
            model="gpt-4.1",  # 또는 gpt-3.5-turbo
            messages=[{"role": "user", "content": prompt}],
            temperature=0.3,
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        print(f"❌ GPT 요약 실패 ({file_path}): {e}")
        return "요약 실패"

In [12]:
import sys
sys.path.append(r"C:\Users\USER\Desktop\GitHub\3rd_project")  # chahae 폴더의 상위 폴더

from chahae.github_repo_viewer import main
from dotenv import load_dotenv
import os

load_dotenv()
documents = main(os.environ.get("GITHUB_TOKEN"))


[Git] Git이 설치되어 있습니다: C:\Program Files\Git\cmd\git.EXE
[오류] 올바른 GitHub 저장소 URL을 입력해주세요.
예시: https://github.com/octocat/Hello-World
또는: https://github.com/octocat/Hello-World/blob/main/README.md
[오류] 올바른 GitHub 저장소 URL을 입력해주세요.
예시: https://github.com/octocat/Hello-World
또는: https://github.com/octocat/Hello-World/blob/main/README.md

[정보] 저장소 소유자: AnsirH
[정보] 저장소 이름: LANGCHAIN

[정보] 전체 저장소 내용을 가져오는 중...


In [13]:
chroma_db = make_chroma_db(documents)

In [16]:
query = "prompt template 관련 코드"
results = get_top5_docs_from_db(query)
results

[Document(metadata={'file_path': '03_PromptTemplate.ipynb', 'sha': '5f590d28b92557296352981f3f6250dd0259b346', 'source': 'https://github.com/AnsirH/LANGCHAIN/blob/main/03_PromptTemplate.ipynb', 'type': 'file', 'file_name': '03_PromptTemplate.ipynb', 'size': 16527}, page_content='"# 템플릿 문자열 정의\\n",\n    "template_str = (\\n",\n    "    \\"당신은 최고 수준의 마케팅 카피라이터입니다.\\\\n\\"\\n",\n    "    \\"아래 제품의 매력적인 홍보 문구를 100자 이내로 작성해주세요.\\\\n\\\\n\\"\\n",\n    "    \\"제품 명: {product_name}\\\\n\\"\\n",\n    ")\\n",\n    "\\n",\n    "# 템플릿 객체 생성\\n",\n    "product_prompt = PromptTemplate.from_template(template_str)\\n",\n    "\\n",\n    "# 프롬프트에 제품 이름을 삽입\\n",\n    "product_name = \\"스마트폰\\"\\n",\n    "formatted_prompt = product_prompt.format(product_name=product_name)\\n",\n    "# 프롬프트 출력\\n",\n    "print(formatted_prompt)"'),
 Document(metadata={'file_name': '03_PromptTemplate.ipynb', 'type': 'file', 'size': 16527, 'sha': '5f590d28b92557296352981f3f6250dd0259b346', 'source': 'https://github.com/Ansir

In [17]:
from langchain.chat_models import ChatOpenAI
from langchain.chains.qa_with_sources import load_qa_with_sources_chain


# 4. LLM 준비
llm = ChatOpenAI(model_name="gpt-4", temperature=0)

# 5. 문서를 기반으로 설명 생성 (Chain 사용)
qa_chain = load_qa_with_sources_chain(llm, chain_type="stuff")
result = qa_chain({"input_documents": results, "question": query}, return_only_outputs=True)

# 6. 출력
print("📝 설명 결과:\n", result["output_text"])
# print("\n📚 참고된 문서 정보:\n", result["sources"])

  llm = ChatOpenAI(model_name="gpt-4", temperature=0)
See also the following migration guides for replacements based on `chain_type`:
stuff: https://python.langchain.com/docs/versions/migrating_chains/stuff_docs_chain
map_reduce: https://python.langchain.com/docs/versions/migrating_chains/map_reduce_chain
refine: https://python.langchain.com/docs/versions/migrating_chains/refine_chain
map_rerank: https://python.langchain.com/docs/versions/migrating_chains/map_rerank_docs_chain

  qa_chain = load_qa_with_sources_chain(llm, chain_type="stuff")
  result = qa_chain({"input_documents": results, "question": query}, return_only_outputs=True)


📝 설명 결과:
 The code related to the prompt template includes defining a template string, creating a template object, inserting a product name into the prompt, and printing the formatted prompt. It also includes setting up an output parser, using the template to complete a sentence, and printing the response. There are also examples of creating a PartialPromptTemplate, which is a new template that partially fills in part of the template. Other examples include formatting the prompt value through formatting and printing the prompt.
SOURCES: https://github.com/AnsirH/LANGCHAIN/blob/main/03_PromptTemplate.ipynb, https://github.com/AnsirH/LANGCHAIN/blob/main/01_LCEL.ipynb


In [None]:
result

{'output_text': '하노이탑 코드 경로는 다음과 같습니다: [바로가기](https://github.com/wonwookim/coding_test_study/tree/main/week_2)\nSOURCES: https://github.com/hwangchahae/coding_test_study/blob/main/README.md'}