In [1]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.embeddings import GPT4AllEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.llms import GPT4All
from langchain_core.chat_history import BaseChatMessageHistory

from langchain_core.prompts.base import BasePromptTemplate
from langchain_core.prompts import MessagesPlaceholder

from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import ChatPromptTemplate

from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

# from langchain.retrievers import ContextualCompressionRetriever
# from langchain.retrievers.document_compressors import CrossEncoderReranker
# from langchain_community.cross_encoders import HuggingFaceCrossEncoder
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough




In [2]:

file_path = ("Huyen C. Designing Machine Learning Systems...2022.pdf")

loader = PyPDFLoader(file_path)
pages = loader.load_and_split()

In [3]:
def pretty_print_docs(docs):
    print(
        f"\n{'-' * 100}\n".join(
            [f"Document {i+1}:\n\n" + d.page_content for i, d in enumerate(docs)]
        )
    )

In [4]:
gpt4all_embd = GPT4AllEmbeddings()

In [5]:
db = await FAISS.afrom_documents(pages, gpt4all_embd)
retriever = db.as_retriever()

In [6]:
# model = HuggingFaceCrossEncoder(model_name="BAAI/bge-reranker-base")
# compressor = CrossEncoderReranker(model=model, top_n=3)

# compression_retriever = ContextualCompressionRetriever(
#     base_compressor=compressor, base_retriever=retriever
# )

In [7]:
# query = "What is MLOps?"
# docs = retriever.invoke(query)
# pretty_print_docs(docs)

In [8]:
model = GPT4All(model="E:\GPT4ALL\Phi-3-mini-4k-instruct.Q4_0.gguf", n_threads=8)

In [9]:
# system = """You are an expert about a set of software for building LLM-powered applications called LangChain, LangGraph, LangServe, and LangSmith.

# LangChain is a Python framework that provides a large set of integrations that can easily be composed to build LLM applications.
# LangGraph is a Python package built on top of LangChain that makes it easy to build stateful, multi-actor LLM applications.
# LangServe is a Python package built on top of LangChain that makes it easy to deploy a LangChain application as a REST API.
# LangSmith is a platform that makes it easy to trace and test LLM applications.

# Answer the user question as best you can. Answer as though you were writing a tutorial that addressed the user question."""

# prompt = ChatPromptTemplate.from_messages(
#     [
#         ("system", system),
#         ("human", "{question}"),
#     ]
# )

# qa_no_context = prompt | model | StrOutputParser()

# hyde_chain = RunnablePassthrough.assign(hypothetical_document=qa_no_context)

In [10]:
history = {}

def maintain_history(session_id):
    if session_id not in history:
        history[session_id] = ChatMessageHistory()
    return history[session_id]


In [11]:
contextual_query = """ Given the query and the message history, create a new query which can be interpreted without the chat history.\
Do not add any extra information other than which is available in the chat history and existing query.\
The reultant query must not exceed 50 words.
"""

contextual_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextual_query),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}")
    ]
)

contextual_retriever = create_history_aware_retriever(model, retriever, contextual_prompt)

In [14]:
contextual_retriever.invoke({"input": "What is the need for MLOps?"})

[Document(metadata={'source': 'Huyen C. Designing Machine Learning Systems...2022.pdf', 'page': 18}, page_content='T H E  R E L A T I O N S H I P  B E T W E E N  M L O P S  A N D  M L\nS Y S T E M S  D E S I G N\nOps in MLOps comes from DevOps, short for Developments and Operations. T o\noperationalize something means to bring it into production, which includes deploying,\nmonitoring, and maintaining it. MLOps is a set of tools and best practices for bringing\nML into production.\nML systems design takes a system approach to MLOps, which means that it considers\nan ML system holistically to ensure that all the components and their stakeholders can\nwork together to satisfy the specified objectives and requirements.\nFigur e 1-1. Differ ent components of an ML system. “ML algorithms” is usually what people think of\nwhen they say machine learning, but it’ s only a small part of the entir e system.\nThere are many excellent books about various ML algorithms. This book\ndoesn’ t cover any

In [12]:
# HyDE Retriver on contextual_retriever
hyde_system_prompt = """ 
Given the query, create a short hypothetical response for it under 100 words.
"""

hyde_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", hyde_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}")
    ]
)

hyde_retriever = create_retrieval_chain(model, contextual_retriever, hyde_prompt)

TypeError: create_retrieval_chain() takes 2 positional arguments but 3 were given

In [18]:
qa_system_prompt = """ Given the query and its context, create a short and concise response. \
 context: {context}\
 \n
 Assistant: 
"""


qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}")
    ]
)

qa_chain = create_stuff_documents_chain(model, qa_prompt)

rag_chain = create_retrieval_chain(contextual_retriever, qa_chain)

In [19]:
conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    maintain_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer"
)

In [None]:
''' 
Retriver + History Aware -> Mod Prompt -> Retriver -> Context -> LLM

Retriver + History Aware -> Mod Prompt -> Hyde Retriver -> Retriver -> Context -> LLM
'''

In [None]:
gen_response = []
async for events in conversational_rag_chain.astream_events(
        {"input": "What is the need for MLOps?"},
    config={
        "configurable": {"session_id": "4"}
    }, version="v1"
):
    kind = events["event"]
    if kind == 'on_llm_stream':
        data_chunk = events['data']['chunk']
        # if len(data_chunk.strip()) > 0:
        gen_response.append(data_chunk)
        print("".join(gen_response))
    #{'event': 'on_llm_stream', 'name': 'GPT4All', 'run_id': '87927e82-97ce-4033-9cb4-886edd4b332c', 'tags': ['seq:step:3'], 'metadata': {'session_id': '1', 'ls_provider': 'gpt4all', 'ls_model_type': 'llm', 'ls_model_name': 'E:\\GPT4ALL\\Phi-3-mini-4k-instruct.Q4_0.gguf', 'ls_max_tokens': 200}, 'data': {'chunk': 'n'}, 'parent_ids': []}



In [None]:
gen_response = []
async for events in conversational_rag_chain.astream_events(
        {"input": "What are some tools to implement it?"},
    config={
        "configurable": {"session_id": "4"}
    }, version="v1"
):
    kind = events["event"]
    if kind == 'on_llm_stream':
        data_chunk = events['data']['chunk']
        # if len(data_chunk.strip()) > 0:
        gen_response.append(data_chunk)
        print("".join(gen_response))