In [1]:
from langchain.llms.ollama import Ollama
from langchain.chat_models import ChatOllama
from langchain.callbacks import StreamingStdOutCallbackHandler

from langchain.document_loaders import TextLoader
from langchain.document_loaders import UnstructuredFileLoader #txt,pef,docx,jpg 등 다양한거 다 들고올 수 있음
from langchain.text_splitter import RecursiveCharacterTextSplitter # 문서 분할용

from langchain.embeddings import OllamaEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import Chroma
from langchain.storage import LocalFileStore

cache_dir = LocalFileStore("./.cache/")

# RAG(Retrieval Augmented Generation, 검색 증강 생성), Document
chat = ChatOllama(
    # model="gemma:latest",
    # model="llama2:latest",
    model="mistral:latest",
    temperature=0.1,
    streaming=True,
    callbacks=[StreamingStdOutCallbackHandler()]
)

## Data Loaders and Splitters
splitter = RecursiveCharacterTextSplitter(
    # separators="\n",
    chunk_size=2600,
    chunk_overlap=100, # 앞 조각의 약간을 다음 조각에 덫붙임
)
loader = UnstructuredFileLoader("./files/mid_text_en.txt")
# loader.load()
# print(loader.load_and_split(text_splitter=splitter))

## Tiktoken
splitter2 = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    separators="\n",
    chunk_size=2600,
    chunk_overlap=100
) #모델은 limit가 있어서 tiktoken단위보다 텍스트 길이로 하는것이 더 좋음

## Vectors
embedder = OllamaEmbeddings()

# vector1 = embedder.embed_query("Hi")
# print(len(vector1))
# vector2 = embedder.embed_documents([
#     "hi",
#     "how",
#     "are",
#     "you longer sentences because",
# ])
# print(len(vector2), len(vector2[0])) #4개의 벡터와 4096개의 차원

## embed를 매번 코드를 실행할때마다 하는 것이 아니라 vectors store에 캐싱해서 써야 함
# chroma(사용 - 로컬), FAISS, pinecone(클라우드), 다른 vector store 등
# loader2 = UnstructuredFileLoader("./files/mid_text_ko.txt")
# docs = loader2.load_and_split(text_splitter=splitter2)
# cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embedder, cache_dir)
# vectorstore = Chroma.from_documents(docs, embedder)
# vectorstore = Chroma.from_documents(docs, cached_embeddings)

In [None]:
# results = vectorstore.similarity_search("Tell me the type of weather.") #vector store에서 검색
# print(len(results))
# results

In [None]:
# from langchain.chains import RetrievalQA

# RetrievalQA is Legacy
# chain = RetrievalQA.from_chain_type(
#     llm=chat,
#     chain_type="stuff",
#     retriever=vectorstore.as_retriever(),
# )
# chain.run("Tell me the type of weather.")

In [2]:
# Stuff LCEL Chain
from langchain.vectorstores import FAISS
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough

docs = loader.load_and_split(text_splitter=splitter2)

embeddings = OllamaEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)

vectorstore = FAISS.from_documents(docs, cached_embeddings)

retriver = vectorstore.as_retriever()

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant. Answer questions using only the following context. If you don't know the answer just say you don't know, don't make it up:\n\n{context}",
        ),
        ("human", "{question}"),
    ]
)

chain = (
    {
        "context": retriver,
        "question": RunnablePassthrough(),
    }
    | prompt
    | chat
)

chain.invoke("Tell me the type of weather.")

 The text provides information about various types of clouds and their associated weather conditions. However, it does not explicitly state the type of weather in the given text. The text mentions different cloud types such as cumulus humilis, stratus, nimbostratus, cumulus mediocris, cumulus congestus, cumulonimbus calvus, and cumulonimbus capillatus, and their associated weather phenomena like rain or snow. But it does not specify a particular type of weather in the text as a whole.

AIMessage(content=' The text provides information about various types of clouds and their associated weather conditions. However, it does not explicitly state the type of weather in the given text. The text mentions different cloud types such as cumulus humilis, stratus, nimbostratus, cumulus mediocris, cumulus congestus, cumulonimbus calvus, and cumulonimbus capillatus, and their associated weather phenomena like rain or snow. But it does not specify a particular type of weather in the text as a whole.')

In [1]:
# Map Reduce LCEL Chain - 문서의 양이 많을 때 좋음
from langchain.chat_models import ChatOllama
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OllamaEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda

llm = ChatOllama(
    model="mistral:latest",
    temperature=0.1,
)

cache_dir = LocalFileStore("./.cache/")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=1600,
    chunk_overlap=100,
)
loader = UnstructuredFileLoader("./files/George-Orwell 1984 part1.txt")

docs = loader.load_and_split(text_splitter=splitter)

embeddings = OllamaEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)

vectorstore = FAISS.from_documents(docs, cached_embeddings)

retriever = vectorstore.as_retriever()

map_doc_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            Use the following portion of a long document to see if any of the text is relevant to answer the question. Return any relevant text verbatim. If there is no relevant text, return : ''
            -------
            {context}
            """,
        ),
        ("human", "{question}"),
    ]
)

map_doc_chain = map_doc_prompt | llm


def map_docs(inputs):
    documents = inputs["documents"]
    question = inputs["question"]
    return "\n\n".join(
        map_doc_chain.invoke(
            {"context": doc.page_content, "question": question}
        ).content
        for doc in documents
    )


map_chain = {
    "documents": retriever,
    "question": RunnablePassthrough(),
} | RunnableLambda(map_docs)

final_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            Given the following extracted parts of a long document and a question, create a final answer. 
            If you don't know the answer, just say that you don't know. Don't try to make up an answer.
            ------
            {context}
            """,
        ),
        ("human", "{question}"),
    ]
)

chain = {"context": map_chain, "question": RunnablePassthrough()} | final_prompt | llm

chain.invoke("How many ministries are mentioned")

ValueError: Error raised by inference endpoint: HTTPConnectionPool(host='localhost', port=11434): Max retries exceeded with url: /api/embeddings (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x0000024F8C11C8D0>: Failed to establish a new connection: [WinError 10061] 대상 컴퓨터에서 연결을 거부했으므로 연결하지 못했습니다'))