In [1]:
from langchain.retrievers import MultiVectorRetriever
from langchain.storage._lc_store import create_kv_docstore
from langchain.storage import InMemoryStore, LocalFileStore
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
from langchain_community.vectorstores import Chroma
import os
import sys
from operator import itemgetter
from typing import List
from langchain.docstore.document import Document


os.environ["AZURE_OPENAI_ENDPOINT"] = "https://dalle3-swo.openai.azure.com/"
os.environ["AZURE_OPENAI_API_KEY"] = "e51119f8d8774069a6594d92ccf7a70d"


# LLM
gpt_4o = AzureChatOpenAI(
    openai_api_version="2024-02-15-preview",
    azure_deployment="gpt-4o",
    temperature=0
)
gpt_35_turbo_16k = AzureChatOpenAI(
    openai_api_version="2024-02-15-preview",
    azure_deployment="gpt-35-turbo-16k",
    temperature=0
)
smart_llm = AzureChatOpenAI(
    openai_api_version="2024-02-15-preview",
    azure_deployment="gpt-4",
    temperature=0
)
# Embedding
embeddings = AzureOpenAIEmbeddings(
    azure_deployment="text-embedding-ada-002",
    openai_api_version="2024-02-15-preview",
)

embeddings_3_large = AzureOpenAIEmbeddings(
    azure_deployment="text-embedding-3-large",
    openai_api_version="2024-02-15-preview",
)

# load from disk and recreate retriever
vectorstore_chunk_zie_400 = Chroma(
    persist_directory="./chroma_db/su_embedding_400_large_with_source", embedding_function=embeddings_3_large
)
# The storage layer for the parent documents
# store = InMemoryStore()
fs = LocalFileStore(
    "./parent_document_store/su_embedding_large_with_source")
store = create_kv_docstore(fs)
parent_document_retriever = MultiVectorRetriever(
    vectorstore=vectorstore_chunk_zie_400,
    docstore=store,
    search_kwargs={"k": 2},
)


def format_docs_with_sources(docs: List[Document]) -> str:
    formatted = []
    for i, doc in enumerate(docs):
        doc_str = f"""\
        Source Name: {doc.metadata['file_name']} - Page {doc.metadata['page']}
        Information: {doc.page_content}
        """
        formatted.append(doc_str)
    return "\n\n".join(formatted)


system_prompt = """
As an AI assistant specializing in student support, your task is to provide concise and comprehensive answers to specific questions based on the provided context. 
The context is a list of sources. Each source includes source name and information.
You MUST follow instruction deliminated by ###.

###
Instructions:

1. Begin by reading the context carefully.
2. Answer the question based on the information in the context.
3. If you don’t know the answer, say "Sorry, the documents do not mention about this information. Please contact the Student Information Office via studentservice@buv.edu.vn for further support. Thank you". Do not fabricate responses. And Do not make up references
4. Keep your answer as succinct as possible, but ensure it includes all relevant information from the context. For examples: 
    - if students ask about a department or services, you should answer not only department name or serivec name, but also service link and department contact such as email, phone, ... if those information have in the context. 
    - if context does not have specific answer, but contain reference information such as reference link, reference contact point, support contact point and so on. Then you should show it up.
    - if context contains advices for specific student's action, you should show it up.
5. Always include the source name from the context for each fact you use in the response in the following format: 
```
{{Answer here}} 

Sources:
- Source name 1
- Source name 2
....
- Source name n
```
### 

--- Start Context:
{context}
--- End Context

Note that if the previous conversations contains usefull information, you can response based on provided context and those information too. 
Only answer in English.
"""

In [2]:
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import MessagesPlaceholder
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import Runnable

contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
parent_document_with_history_aware_retriever = create_history_aware_retriever(
    gpt_4o, parent_document_retriever, contextualize_q_prompt
)

In [4]:
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

custom_retriever_chain: Runnable = parent_document_with_history_aware_retriever | format_docs_with_sources

rag_chain_with_parent_retriever_with_sources: Runnable = (
    RunnablePassthrough.assign(context=custom_retriever_chain)
    | qa_prompt
    | gpt_35_turbo_16k
    | StrOutputParser()
)

In [5]:
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_community.chat_message_histories import ChatMessageHistory

demo_ephemeral_chat_history = ChatMessageHistory()

chain_with_message_history: Runnable = RunnableWithMessageHistory(
    rag_chain_with_parent_retriever_with_sources,
    lambda session_id: demo_ephemeral_chat_history,
    input_messages_key="input",
    history_messages_key="chat_history",
)

In [6]:
from langchain_core.runnables import RunnablePassthrough


def trim_messages(chain_input):
    # just get two latest conversation
    stored_messages = demo_ephemeral_chat_history.messages
    if len(stored_messages) <= 2:
        return False

    demo_ephemeral_chat_history.clear()

    for message in stored_messages[-2:]:
        demo_ephemeral_chat_history.add_message(message)

    return True


chain_with_trimming = (
    RunnablePassthrough.assign(messages_trimmed=trim_messages)
    | chain_with_message_history
)

In [7]:
def chain_test(query, system_prompt=system_prompt,instruction_in_system=True, llm=gpt_35_turbo_16k, context_aware_retriever_llm = gpt_35_turbo_16k, memory=demo_ephemeral_chat_history):
    # retriever with history aware
    contextualize_q_system_prompt = (
        "Given a chat history and the latest user question "
        "which might reference context in the chat history, "
        "formulate a standalone question which can be understood "
        "without the chat history. Do NOT answer the question, "
        "just reformulate it if needed and otherwise return it as is."
    )

    contextualize_q_prompt = ChatPromptTemplate.from_messages(
        [
            ("system", contextualize_q_system_prompt),
            MessagesPlaceholder("chat_history"),
            ("human", "{input}"),
        ]
    )
    parent_document_with_history_aware_retriever = create_history_aware_retriever(
        context_aware_retriever_llm, parent_document_retriever, contextualize_q_prompt
    )

    # main chain
    if instruction_in_system:
        qa_prompt = ChatPromptTemplate.from_messages(
            [
                ("system", system_prompt),
                MessagesPlaceholder("chat_history"),
                ("human", "{input}"),
            ]
        )
    else:
        qa_prompt = ChatPromptTemplate.from_messages(
            [
                ("human", system_prompt),
                MessagesPlaceholder("chat_history"),
                ("human", "{input}"),
            ]
        )      

    custom_retriever_chain = parent_document_with_history_aware_retriever | format_docs_with_sources

    rag_chain_with_parent_retriever_with_sources = (
        RunnablePassthrough.assign(context=custom_retriever_chain)
        | qa_prompt
        | llm
        | StrOutputParser()
    )

    chain_with_message_history = RunnableWithMessageHistory(
        rag_chain_with_parent_retriever_with_sources,
        lambda session_id: memory,
        input_messages_key="input",
        history_messages_key="chat_history",
    )


    def trim_messages(chain_input):
        stored_messages = demo_ephemeral_chat_history.messages
        if len(stored_messages) <= 2:
            return False

        demo_ephemeral_chat_history.clear()

        for message in stored_messages[-2:]:
            demo_ephemeral_chat_history.add_message(message)

        return True


    chain_with_follow_up = (
        RunnablePassthrough.assign(messages_trimmed=trim_messages)
        | chain_with_message_history
    )

    response = chain_with_follow_up.invoke(
    {"input": query},
    {"configurable": {"session_id": "unused"}},
)
    return response

In [8]:
demo_ephemeral_chat_history.clear()

In [17]:
demo_ephemeral_chat_history.messages

[]

In [13]:
questions = [
    "I want to register to study late at the university",
    "Is my reason valid for approval?",
    "What should I do to be absent for a driving test session?",
]

In [14]:
a = chain_test(query=questions[0], 
               instruction_in_system=False, 
               llm=gpt_35_turbo_16k, 
               context_aware_retriever_llm=gpt_4o,
               system_prompt=system_prompt)

Parent run 6c8e15a5-e950-49c2-94fd-e6f9a96d18b3 not found for run 3431bacb-169c-4139-8531-00e301b623cb. Treating as a root run.


In [15]:
a

'Yes, you can register to study at the university outside of regular operating hours. The university allows students to access classrooms and facilities for self-study purposes during both operating and out-of-hours time. \n\nTo register for accessing campus facilities during operating hours:\n- Check the availability of accessible rooms.\n- Each student can use rooms for a maximum of 1 hour per day and 2 hours per week.\n- Register directly at the Student Information Office on level 2.\n\nTo register for accessing campus facilities outside of operating hours:\n- Check the availability of accessible rooms (room 3-5, 3-6, 3-7, 3-8, 3-11, 3-12) and functional rooms (approval required from the relevant Discipline Lead).\n- Submit the Out-of-hours Access Agreement form at the Student Information Office before 4:00 PM, Monday to Friday.\n- For functional rooms, seek confirmation from the relevant Discipline Lead before submitting the form.\n\nPlease note that the approval for accessing regi

In [20]:
print(a)

Sorry, the documents do not mention about this information. Please contact the Student Information Office via studentservice@buv.edu.vn for further support. Thank you.


# Test retrievers

In [29]:
a = parent_document_retriever.invoke("Can I retake this exam?")

In [30]:
a[0]

[Document(page_content='How are retakes at BUV regulated? / Quy định học lại ở BUV như thế nào?\n\nAnswer / Câu trả lời:\n\nIf the External Examination Board decides that student needs to retake a module, students will need to study that module again with all achieved marked wiped clean. The student will also need to pay the restudy fee for the module(s) & ensure attendance. Their mark will not be capped for the first retake attempt, but any second attempt is capped at 40% for undergraduate level. On the other hand, a student’s Academic Misconduct history is retained on file and is taken into account on further academic conduct instances.\n\nNếu Hội đồng Khảo thí quyết định sinh viên cần phải học lại một hoặc nhiều môn, toàn bộ điểm số trước đó sinh viên đã đạt được trong môn học sẽ bị xóa khỏi hệ thống và sinh viên sẽ cần học lại môn học từ đầu khi có cơ hội kế tiếp, đảm bảo chuyên cần và thanh toán phí học lại. Khi học lại, điểm sẽ được tính như bình thường. Nếu sinh viên thi trượt v