In [1]:
from langchain_ollama import OllamaLLM
from langchain_core.messages import AIMessage , HumanMessage , BaseMessage
from langchain_core.prompts import ChatPromptTemplate
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import START , StateGraph
from langchain_community.document_loaders.text import TextLoader
from langchain_community.document_loaders.pdf import PDFMinerLoader
from langchain_community.document_loaders import DirectoryLoader , Docx2txtLoader
from langchain.vectorstores import FAISS
from typing_extensions import TypedDict , List , Annotated
from typing import Sequence
from langgraph.graph.message import add_messages
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from langchain_core.vectorstores import InMemoryVectorStore
# from FlagEmbedding import BGEM3FlagModel
from pprint import pp

In [None]:
llm = OllamaLLM(
    model = "gemma3", # name of a loaded model
    num_gpu = -1,
    temperature = 1.0,
    top_k = 64,
    top_p = 0.95,
    min_p = 0.0,
    # base_url = '0.0.0.0:11444',
    num_predict = 500
)

In [3]:
# embed = BGEM3FlagModel('BAAI/bge-m3', use_fp16=True)

# vector_store = InMemoryVectorStore(embed)

In [4]:
embed = HuggingFaceEmbeddings(model_name = r"/media/j/msmj/models/bge m3 embedder/BAAIbge-m3" ,
                              encode_kwargs = {"normalize_embeddings": True}) # loading embedding from local

vector_store = InMemoryVectorStore(embed) # loading embedding into vector store in RAM

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
txt_loader = DirectoryLoader(path = "sources/" , glob = '**/*.txt' , use_multithreading = True , loader_cls = TextLoader) # loading all txt files
docs = txt_loader.load()

In [6]:
docx_loader = DirectoryLoader(path = "sources/" , glob = '**/*.docx' , use_multithreading = True , loader_cls = Docx2txtLoader) # loading all .docx files
temp = docx_loader.load()
docs += temp
del temp

In [7]:
pdf_loader = DirectoryLoader(path = "sources/" , glob = '**/*.pdf' , use_multithreading = True , loader_cls = PDFMinerLoader) # loading all pdf files
temp = pdf_loader.load()
docs += temp
del temp

In [8]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000 , chunk_overlap = 200) # split all documents in chunks
all_splits = text_splitter.split_documents(docs)

vectorized_doc = FAISS.from_documents(all_splits , embed) # Return VectorStore initialized from documents and embeddings.
retriever = vectorized_doc.as_retriever() # Return VectorStoreRetriever initialized from this VectorStore.

In [None]:
# template = """if user said hi or hello, say hi or hello back to him.
# Use the following pieces of context to answer the question at the end.
# It's important to pay attention to the chat history and summarized documents.
# Your answer must be based on these pieces of context, chat history,summarized documents, and your own knowledge base.
# If you don't know the answer from the provided context, feel free to use your internal knowledge to answer.
# Use three sentences maximum and keep the answer as concise as possible.
# If the user asks about his previous questions, respond **only using the chat history below** and **do not use the current question's context or documents**.

# context:
# {context}

# summarized documents:
# {summarized_doc}

# chat history:
# {chat_history}

# Question: {input}

# Helpful Answer:"""

# # Create a prompt template
# prompt = ChatPromptTemplate.from_template(template)

In [None]:
template = """if user said hi or hello, say hi or hello back to him.
Use the following pieces of context to answer the question at the end.
It's important to pay attention to the chat history and summarized documents.
Your answer must be based solely on these pieces of context, chat history, and summarized documents.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum and keep the answer as concise as possible.

If the user asks about his previous questions, respond **only using the chat history** and **do not use the current question's context or documents**.

context:
{context}

summarized documents:
{summarized_doc}

chat history:
{chat_history}

Question: {input}

Helpful Answer:"""

# Create a prompt template
prompt = ChatPromptTemplate.from_template(template)

In [None]:
# defining state
class State(TypedDict): 
    messages : Annotated[Sequence[BaseMessage] , add_messages]
    all_docs : List[List[str]]



# making ['a' , 'b' , 'c' , 'd' , 'e' , 'f'] into [('a' , 'b') , ('c' , 'd') , ('e' , 'f')]
def making_pairs(messages_list):
    pairs_list = [(messages_list[i] , messages_list[i + 1]) for i in range(0 , len(messages_list) , 2)]
    return pairs_list



def answering(state : State):
    
    # initialize "state['all_docs']" as a empty list if its first run
    if 'all_docs' not in state or state['all_docs'] is None: 
        state['all_docs'] = []
        


    relevant_docs = retriever.invoke(state['messages'][-1].content) # retrieving related doc for user question
    integrated_relevant_docs = "\n\n".join(doc.page_content for doc in relevant_docs) # integrating retrieved docs into a single str


    QA_pairs = making_pairs(state['messages'][-11:-1]) # make pairs of human question and ai answer
    state['all_docs'] += [[integrated_relevant_docs]] # adding retrieved integrated docs to "state['all_docs']" for having history of retrieved docs



    chat_history = ""
    for i, (Q, A) in enumerate(QA_pairs , start = 1):  # Limit to last 5 pairs
        chat_history += f"Qestion {i} : {Q.content}\nAnswer {i} : {A.content}\n\n" # run this on a list of human and ai messages to see what it does



    # using llm to get a summary of last 3 docs exept for latest cuz we gonna give the latest docs as related docs to user question into the prompt context
    if state['all_docs'][-4:-1] == []:
        summarized_docs = llm.invoke(f'concisely summarize these documents: {integrated_relevant_docs}')
    else:
        summarized_docs = llm.invoke(f'concisely summarize these documents: {state['all_docs'][-4:-1]}')



    # fit user latest question and related retrieved docs into prompt
    msg = prompt.invoke({'input' : state['messages'][-1].content,
                         'context' : integrated_relevant_docs,
                         'summarized_doc' : summarized_docs,
                         'chat_history' : chat_history})

    


    ans = llm.invoke(msg.to_messages())
    return {'messages' : [AIMessage(ans)] , 'all_docs' : state['all_docs']} # we're returning "state['all_docs']" becuz it was needed for history of docs to persist



memory = MemorySaver() # needed for our RAG to have a memory of interactions

graph_builder = StateGraph(state_schema = State)
graph_builder.add_edge(START, "search")
graph_builder.add_node("search" , answering)

graph = graph_builder.compile(checkpointer = memory) 

In [None]:
configuration = {"configurable": {"thread_id": "J"}} # this link the history of our interactions with thread_id
query = "this is where you gotta put your questions in"
input_msg = [HumanMessage(query)]

output = graph.invoke({"messages" : input_msg} , config = configuration)

In [None]:
pp(output['messages'])

In [None]:
output['messages'][-2].pretty_print()
output['messages'][-1].pretty_print()