In [1]:
import os
import time
from openai import OpenAI
from dotenv import load_dotenv

load_dotenv()

False

Define Loader Class

In [2]:
from typing import List
import requests

from langchain.docstore.document import Document
from langchain.document_loaders.base import BaseLoader


# BaseLoader : LangChain_Core Class
class WikidocsLoader(BaseLoader):
    def __init__(self, book_id: int, base_url="https://wikidocs.net", **kwargs):
        super().__init__(**kwargs)
        self.book_id = book_id
        self.base_url = base_url
        self.headers = {"Content-Type": "application/json"}

    def load(self) -> List[Document]:
        toc = self._get_toc(self.book_id)
        pages = []
        for item in toc:
            page_id = item["id"]
            page_data = self._get_page(page_id)
            document = Document(
                title=page_data["subject"],
                page_content=page_data["content"],
                metadata={
                    'id': page_id,
                    'source': f"{self.base_url}/{page_id}",
                    'title': page_data["subject"]
                }
            )
            pages.append(document)

        return pages

    # Get book index by book ID
    def _get_toc(self, book_id):
        url = f"{self.base_url}/api/v1/toc/{book_id}"
        response = requests.get(url, headers=self.headers)
        if response.status_code == 200:
            return response.json()
        else:
            raise ValueError("Failed to get table of contents")

    # Get page contents by book ID
    def _get_page(self, page_id):
        url = f"{self.base_url}/api/v1/page/{page_id}"
        response = requests.get(url, headers=self.headers)
        if response.status_code == 200:
            return response.json()
        else:
            raise ValueError("Failed to get page")

In [3]:
book_id = 14316  # 생성AI 프로그래밍 트러블슈팅 가이드
loader = WikidocsLoader(book_id)
documents = loader.load()

In [4]:
documents

[Document(metadata={'id': 231844, 'source': 'https://wikidocs.net/231844', 'title': '1. OpenAI 관련 문제해결'}, page_content='OpenAI-Python 깃허브: <https://github.com/openai/openai-python>\n\n[파이썬 openai 패키지 릴리스 이력](https://pypi.org/project/openai/#history)\n\n마이그레이션 가이드:\n\n- OpenAI [v1.0.0 Migration Guide](https://github.com/openai/openai-python/discussions/742)\n- Azure [OpenAI Python API 라이브러리 1.x로 마이그레이션](https://learn.microsoft.com/ko-kr/azure/ai-services/openai/how-to/migration?tabs=python-new%2Cdalle-fix)\n'),
 Document(metadata={'id': 239781, 'source': 'https://wikidocs.net/239781', 'title': '1.1. OpenAI 관련 기본적인 문제 해결'}, page_content='.'),
 Document(metadata={'id': 231848, 'source': 'https://wikidocs.net/231848', 'title': "ImportError: cannot import name 'OpenAI' from 'openai'"}, page_content="## 문제\n\nopenai==0.28을 설치한 채로 다음을 실행하면,\n\n```python\nfrom openai import OpenAI\n```\n\n다음 오류가 발생한다.\n\n```\nImportError: cannot import name 'OpenAI' from 'openai'\n```\n\n## 해결\n\n[최신 버전을 설치](2

In [6]:
# Create Index
from langchain_openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS

text_splitter = CharacterTextSplitter(chunk_size=600, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

Created a chunk of size 1008, which is longer than the specified 600
Created a chunk of size 622, which is longer than the specified 600
Created a chunk of size 850, which is longer than the specified 600
Created a chunk of size 744, which is longer than the specified 600
Created a chunk of size 1247, which is longer than the specified 600
Created a chunk of size 869, which is longer than the specified 600
Created a chunk of size 681, which is longer than the specified 600
Created a chunk of size 654, which is longer than the specified 600
Created a chunk of size 1300, which is longer than the specified 600
Created a chunk of size 1100, which is longer than the specified 600
Created a chunk of size 1087, which is longer than the specified 600
Created a chunk of size 947, which is longer than the specified 600
Created a chunk of size 1854, which is longer than the specified 600
Created a chunk of size 738, which is longer than the specified 600


In [20]:
docs

[Document(metadata={'id': 231844, 'source': 'https://wikidocs.net/231844', 'title': '1. OpenAI 관련 문제해결'}, page_content='OpenAI-Python 깃허브: <https://github.com/openai/openai-python>\n\n[파이썬 openai 패키지 릴리스 이력](https://pypi.org/project/openai/#history)\n\n마이그레이션 가이드:\n\n- OpenAI [v1.0.0 Migration Guide](https://github.com/openai/openai-python/discussions/742)\n- Azure [OpenAI Python API 라이브러리 1.x로 마이그레이션](https://learn.microsoft.com/ko-kr/azure/ai-services/openai/how-to/migration?tabs=python-new%2Cdalle-fix)'),
 Document(metadata={'id': 239781, 'source': 'https://wikidocs.net/239781', 'title': '1.1. OpenAI 관련 기본적인 문제 해결'}, page_content='.'),
 Document(metadata={'id': 231848, 'source': 'https://wikidocs.net/231848', 'title': "ImportError: cannot import name 'OpenAI' from 'openai'"}, page_content="## 문제\n\nopenai==0.28을 설치한 채로 다음을 실행하면,\n\n```python\nfrom openai import OpenAI\n```\n\n다음 오류가 발생한다.\n\n```\nImportError: cannot import name 'OpenAI' from 'openai'\n```\n\n## 해결\n\n[최신 버전을 설치](229

In [7]:
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

In [8]:
search_index = FAISS.from_documents(docs, embeddings)

Retrieval QA with Sources

In [12]:
from langchain.chains import RetrievalQAWithSourcesChain
from langchain_openai import OpenAI

retrieval_qa_with_sources_chain = RetrievalQAWithSourcesChain.from_chain_type(
    OpenAI(temperature=0), chain_type="stuff", retriever=search_index.as_retriever()
)

In [13]:
def retrieval_qa_with_sources(question):
    response = retrieval_qa_with_sources_chain.invoke(
        {"question": question}, return_only_outputs=True
    )
    if response["sources"]:
        return response["answer"] + "출처: " + response["sources"]
    else:
        return response["answer"]

In [14]:
print(retrieval_qa_with_sources("openai 패키지 구버전과 최신 버전 설치 방법"))

 openai 패키지의 구버전(0.28)과 최신 버전의 설치 방법은 다음과 같다: 
- 구버전(0.28)으로 고정: `pip install -U openai==0.28` (https://wikidocs.net/229554#installing-openai-0.28)
- 최신 버전 설치: `pip install -U openai` (https://wikidocs.net/229554#installing-latest-openai-package)
- 코드 수정하여 신버전의 패키지 사용: openai>=1.0.0에서는 코드를 다음과 같이 수정하면 오류나 경고가 뜨지 않고 잘 실행된다: 
```python
import os
import openai

openai.api_key = os.environ["OPENAI_API_KEY"]

response = openai.[[MARK]]chat.completions[[/MARK]].create(
    model="gpt-3.5-turbo",
    messages=[
        {"role": "user", "content": "hello"},
    ],
)
```
(https://wikidocs.net/231865


In [15]:
print(retrieval_qa_with_sources("langchain_community.llms.openai.OpenAI 경고가 떠요"))

 The class `langchain_community.llms.openai.OpenAI` was deprecated in langchain-community 0.0.10 and will be removed in 0.2.0. An updated version of the class exists in the langchain-openai package and should be used instead. To use it run `pip install -U langchain-openai` and import as `from langchain_openai import OpenAI`.
출처: https://wikidocs.net/231843, https://wikidocs.net/235770, https://wikidocs.net/233334


QA with sources

In [16]:
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.prompts import PromptTemplate
from langchain_openai import OpenAI

template = """Given the following extracted parts of a long document and a question, create a final answer with references ("SOURCES").
If you don't know the answer, just say that you don't know. Don't try to make up an answer.
ALWAYS return a "SOURCES" part in your answer.
Respond in Korean.

QUESTION: {question}
=========
{summaries}
=========
FINAL ANSWER IN KOREAN:"""

PROMPT = PromptTemplate(template=template, input_variables=["summaries", "question"])

qa_with_sources_chain = load_qa_with_sources_chain(
    OpenAI(temperature=0),
    chain_type="stuff",
    prompt=PROMPT
)

See also the following migration guides for replacements based on `chain_type`:
stuff: https://python.langchain.com/docs/versions/migrating_chains/stuff_docs_chain
map_reduce: https://python.langchain.com/docs/versions/migrating_chains/map_reduce_chain
refine: https://python.langchain.com/docs/versions/migrating_chains/refine_chain
map_rerank: https://python.langchain.com/docs/versions/migrating_chains/map_rerank_docs_chain

  qa_with_sources_chain = load_qa_with_sources_chain(


In [17]:
def qa_with_sources(question):
    return qa_with_sources_chain.invoke(
        {
            "input_documents": search_index.similarity_search(question, k=3),
            "question": question,
        },
        return_only_outputs=True,
    )["output_text"]

In [18]:
print(qa_with_sources('openai 패키지 구버전과 최신 버전 설치 방법'))

 openai 패키지를 설치하는 방법은 두 가지가 있습니다. 첫 번째 방법은 구버전인 0.28로 고정하는 것이고, 두 번째 방법은 최신 버전으로 설치하는 것입니다. 구버전으로 고정하려면 `pip install -U openai==0.28` 명령을 실행하면 됩니다. 최신 버전으로 설치하려면 `pip install -U openai` 명령을 실행하면 됩니다. 하지만 최신 버전에서는 코드를 수정해야 합니다. 따라서 옵션 2를 선택하면 됩니다. 이때 코드를 수정하는 방법은 두 가지가 있습니다. 첫 번째 방법은 다운그레이드하는 것이고, 두 번째 방법은 코드를 수정하는 것입니다. 다운그레이드하는 방법은 [https://wikidocs.net/229554#installing-openai-0.28](https://wikidocs.net


In [19]:
print(qa_with_sources('langchain_community.llms.openai.OpenAI 경고가 떠요'))


