In [1]:
import os
from openai import OpenAI
from dotenv import load_dotenv
load_dotenv()

client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))

In [2]:
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings

PDF_DOCS_PATH = "./pdf_docs"
PDF_DOCS_CHROMA_PATH = "./chroma_data"
EMBEDDING_MODEL = "text-embedding-3-small"

loader = PyPDFDirectoryLoader(PDF_DOCS_PATH)
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=40)
final_documents = text_splitter.split_documents(documents)

pdf_docs_vector_db = Chroma.from_documents(
    final_documents,
    OpenAIEmbeddings(model=EMBEDDING_MODEL),
    persist_directory=PDF_DOCS_CHROMA_PATH
)

In [247]:
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate

from langchain.chains import create_retrieval_chain, create_history_aware_retriever
from langchain_core.prompts import MessagesPlaceholder
from langchain.chains.combine_documents import create_stuff_documents_chain

from langchain_core.chat_history import BaseChatMessageHistory
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory
# from langchain_core.messages import AIMessage, HumanMessage


chat_model = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)

pdf_docs_retriever = pdf_docs_vector_db.as_retriever(k=5)

contextualize_system_prompt = """
Given a chat history and the latest user question which might reference context in the chat history,
formulate a standalone question which can be understood without the chat history.
Do NOT answer the question, just reformulate it if needed and otherwise return it as is."
"""

contextualize_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}")
    ]
)

history_aware_pdf_docs_retriever = create_history_aware_retriever(
    chat_model, pdf_docs_retriever, contextualize_prompt
)

pdf_docs_template_str = """
Your job is to answer questions about mentioned person's business, occupation, career, professional skills.
Only answer person-related questions.
Be as detailed as possible but don't make up any information that's not from the context.
If you don't know an answer, say you don't know.

{context}
"""

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", pdf_docs_template_str),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}")
    ]
)

qa_chain = create_stuff_documents_chain(chat_model, prompt)

pdf_docs_chain = create_retrieval_chain(history_aware_pdf_docs_retriever, qa_chain)

store = {}

def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store.get(session_id)
        

pdf_docs_rag_chain = RunnableWithMessageHistory(
    pdf_docs_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

    
# chat_history = list()


# def update_history(history, request, reply):
#     history.extend([
#         HumanMessage(content=request),
#         AIMessage(content=reply.get("answer"))
#     ])
#     return history

In [242]:
query = "What is Denis's doing for living?"

In [248]:
# res = pdf_docs_chain.invoke({"input": query, "chat_history": chat_history})
res = pdf_docs_rag_chain.invoke({"input": query}, config={"configurable": {"session_id": "abc123"}})

Parent run 3180e265-091c-4ebe-a516-bfaadc96b14f not found for run 8f3d46ec-da47-4010-a2c8-16f14db71c62. Treating as a root run.


In [249]:
res.get("answer")

'Denis Korolev is a highly qualified software engineer. He is involved in various aspects of software development, including web development, AI development, edtech development, and system design. His primary occupation is in the field of software engineering.'

In [177]:
print(update_history(chat_history, query, res))

[HumanMessage(content="What is Denis's doing for living?"), AIMessage(content='Denis Korolev is a highly qualified software engineer. He is involved in various aspects of software development, including web development, AI development, edtech development, and system design. His primary occupation is working in the field of software engineering.')]


In [250]:
query2 = "What is his major field?"

In [251]:
# res2 = pdf_docs_chain.invoke({"input": query2, "chat_history": chat_history})
res2 = pdf_docs_rag_chain.invoke({"input": query2}, config={"configurable": {"session_id": "abc123"}})

Parent run ba1c41fc-f911-4a14-9d65-1916b50a3d73 not found for run d3216e9b-8492-4e80-9ebd-23829ffaf82e. Treating as a root run.


In [252]:
res2.get("answer")

"Denis Korolev's major field is software engineering, with a focus on web development, AI development, edtech development, and system design. He is highly skilled and experienced in these areas, making software engineering his primary expertise."

In [181]:
print(update_history(chat_history, query2, res2))

[HumanMessage(content="What is Denis's doing for living?"), AIMessage(content='Denis Korolev is a highly qualified software engineer. He is involved in various aspects of software development, including web development, AI development, edtech development, and system design. His primary occupation is working in the field of software engineering.'), HumanMessage(content='What is his major field?'), AIMessage(content="Denis Korolev's major field is software engineering, with a focus on web development, AI development, edtech development, and system design. He is highly skilled in these areas and has a wide range of expertise in software development.")]


In [253]:
query3 = 'I will call Denis a "TECH_MAN" during our conversation, so for you "TECH_MAN" equals "Denis Korolev"'

In [254]:
# res3 = pdf_docs_chain.invoke({"input": query3, "chat_history": chat_history})
res3 = pdf_docs_rag_chain.invoke({"input": query3}, config={"configurable": {"session_id": "abc123"}})

Parent run f444073f-d384-4a2b-a2aa-83b6c81053b5 not found for run 96988bb6-b30e-41f0-9314-bce635fdc5d5. Treating as a root run.


In [255]:
res3.get("answer")

'Got it! When you refer to "TECH_MAN," I will understand that you are talking about Denis Korolev. Feel free to ask any questions you have about him or his professional background.'

In [186]:
print(update_history(chat_history, query3, res3))

[HumanMessage(content="What is Denis's doing for living?"), AIMessage(content='Denis Korolev is a highly qualified software engineer. He is involved in various aspects of software development, including web development, AI development, edtech development, and system design. His primary occupation is working in the field of software engineering.'), HumanMessage(content='What is his major field?'), AIMessage(content="Denis Korolev's major field is software engineering, with a focus on web development, AI development, edtech development, and system design. He is highly skilled in these areas and has a wide range of expertise in software development."), HumanMessage(content='I will call Denis a "TECH_MAN" during our conversation, so for you "TECH_MAN" equals "Denis Korolev"'), AIMessage(content='Got it! When you refer to "TECH_MAN," I will understand that you are talking about Denis Korolev. Feel free to ask any questions you have about him.')]


In [256]:
query4 = "Now tell me what do you know about Ilya's occupation?"

In [257]:
# res4 = pdf_docs_chain.invoke({"input": query4, "chat_history": chat_history})
res4 = pdf_docs_rag_chain.invoke({"input": query4}, config={"configurable": {"session_id": "abc123"}})

Parent run cc87c360-2bab-4312-aba8-32bfdd21ec05 not found for run 2d8fcf4b-98dc-4e05-9b3c-72087ae538d6. Treating as a root run.


In [258]:
res4.get("answer")

'Ilya Pischalnikov is a mega businessman who owns several companies. Some of the companies he is running include "Notishop," "TouchIP," "SuperBot72," "DevTrix&Co," and "IPGazMyas." His occupation primarily revolves around managing and overseeing these businesses, which contribute to his significant net worth and ranking on the Forbes list.'

In [190]:
print(update_history(chat_history, query4, res4))

[HumanMessage(content="What is Denis's doing for living?"), AIMessage(content='Denis Korolev is a highly qualified software engineer. He is involved in various aspects of software development, including web development, AI development, edtech development, and system design. His primary occupation is working in the field of software engineering.'), HumanMessage(content='What is his major field?'), AIMessage(content="Denis Korolev's major field is software engineering, with a focus on web development, AI development, edtech development, and system design. He is highly skilled in these areas and has a wide range of expertise in software development."), HumanMessage(content='I will call Denis a "TECH_MAN" during our conversation, so for you "TECH_MAN" equals "Denis Korolev"'), AIMessage(content='Got it! When you refer to "TECH_MAN," I will understand that you are talking about Denis Korolev. Feel free to ask any questions you have about him.'), HumanMessage(content="Now tell me what do you

In [259]:
query5 = "Wow, he seems like a busy man. Probably, he works hard and then he plays hard, does he?"

In [260]:
# res5 = pdf_docs_chain.invoke({"input": query5, "chat_history": chat_history})
res5 = pdf_docs_rag_chain.invoke({"input": query5}, config={"configurable": {"session_id": "abc123"}})

Parent run 972146c1-bafc-4dae-9ba6-56ccc871c63d not found for run 879f371a-de03-4837-93a7-a2e91a5811a4. Treating as a root run.


In [261]:
res5.get("answer")

"As a successful businessman with multiple companies under his ownership, Ilya Pischalnikov likely has a busy schedule managing his various ventures. While it's common for high-profile individuals like him to work hard to maintain and grow their businesses, it's also possible that he enjoys some leisure time or activities outside of work to relax and recharge. However, specific details about his work-life balance or personal activities would require more information or direct insight into his lifestyle."

In [194]:
print(update_history(chat_history, query5, res5))

[HumanMessage(content="What is Denis's doing for living?"), AIMessage(content='Denis Korolev is a highly qualified software engineer. He is involved in various aspects of software development, including web development, AI development, edtech development, and system design. His primary occupation is working in the field of software engineering.'), HumanMessage(content='What is his major field?'), AIMessage(content="Denis Korolev's major field is software engineering, with a focus on web development, AI development, edtech development, and system design. He is highly skilled in these areas and has a wide range of expertise in software development."), HumanMessage(content='I will call Denis a "TECH_MAN" during our conversation, so for you "TECH_MAN" equals "Denis Korolev"'), AIMessage(content='Got it! When you refer to "TECH_MAN," I will understand that you are talking about Denis Korolev. Feel free to ask any questions you have about him.'), HumanMessage(content="Now tell me what do you

In [262]:
query6 = 'I wll call Ilya a "BIZ_MAN" during our conversation, so for you "BIZ_MAN" equals "Ilya Pischalnikov".'

In [263]:
# res6 = pdf_docs_chain.invoke({"input": query6, "chat_history": chat_history})
res6 = pdf_docs_rag_chain.invoke({"input": query6}, config={"configurable": {"session_id": "abc123"}})

Parent run ff9d39da-c658-4386-81d8-c939fca186cf not found for run 34d9ec20-1dee-4ef8-b3a5-8ba423a7d82c. Treating as a root run.


In [264]:
res6.get("answer")

'Understood! When you refer to "BIZ_MAN," I will understand that you are talking about Ilya Pischalnikov. Feel free to ask any questions you have about him or his business endeavors.'

In [198]:
print(update_history(chat_history, query6, res6))

[HumanMessage(content="What is Denis's doing for living?"), AIMessage(content='Denis Korolev is a highly qualified software engineer. He is involved in various aspects of software development, including web development, AI development, edtech development, and system design. His primary occupation is working in the field of software engineering.'), HumanMessage(content='What is his major field?'), AIMessage(content="Denis Korolev's major field is software engineering, with a focus on web development, AI development, edtech development, and system design. He is highly skilled in these areas and has a wide range of expertise in software development."), HumanMessage(content='I will call Denis a "TECH_MAN" during our conversation, so for you "TECH_MAN" equals "Denis Korolev"'), AIMessage(content='Got it! When you refer to "TECH_MAN," I will understand that you are talking about Denis Korolev. Feel free to ask any questions you have about him.'), HumanMessage(content="Now tell me what do you

In [265]:
query7 = 'Now, talking about "TECH_MAN", does he somehow cooperate with "BIZ_MAN"? Do you have any information that they have worked together on any projects? Use "rule1" and "rule2" in your answer.'

In [266]:
# res7 = pdf_docs_chain.invoke({"input": query7, "chat_history": chat_history})
res7 = pdf_docs_rag_chain.invoke({"input": query7}, config={"configurable": {"session_id": "abc123"}})

Parent run 7a4bc5a7-edc4-441b-85db-2e7701893dba not found for run 3cccd27f-b42d-44af-bb5a-ed6e8d45367a. Treating as a root run.


In [267]:
res7.get("answer")

'I don\'t have any information about "TECH_MAN" (Denis Korolev) and "BIZ_MAN" (Ilya Pischalnikov) working together on any projects. Therefore, based on "rule1" (information provided) and "rule2" (no additional context), there is no known collaboration or projects involving both individuals at this time.'

In [202]:
print(update_history(chat_history, query7, res7))

[HumanMessage(content="What is Denis's doing for living?"), AIMessage(content='Denis Korolev is a highly qualified software engineer. He is involved in various aspects of software development, including web development, AI development, edtech development, and system design. His primary occupation is working in the field of software engineering.'), HumanMessage(content='What is his major field?'), AIMessage(content="Denis Korolev's major field is software engineering, with a focus on web development, AI development, edtech development, and system design. He is highly skilled in these areas and has a wide range of expertise in software development."), HumanMessage(content='I will call Denis a "TECH_MAN" during our conversation, so for you "TECH_MAN" equals "Denis Korolev"'), AIMessage(content='Got it! When you refer to "TECH_MAN," I will understand that you are talking about Denis Korolev. Feel free to ask any questions you have about him.'), HumanMessage(content="Now tell me what do you

In [268]:
query8 = 'Pleas check if company which belongs to "TECH_MAN" did ever provide service for company which belongs to "BIZ_MAN"? I mean like B2B service"?'

In [269]:
# res8 = pdf_docs_chain.invoke({"input": query8, "chat_history": chat_history})
res8 = pdf_docs_rag_chain.invoke({"input": query8}, config={"configurable": {"session_id": "abc123"}})

Parent run 5c0384ea-0f77-456e-a9bb-b956c1e764eb not found for run d63fba6b-4f02-4f6c-8ef9-ab0795483fba. Treating as a root run.


In [270]:
res8.get("answer")

'Based on the information provided, there is no indication or specific details regarding whether the companies owned by "TECH_MAN" (Denis Korolev) have ever provided services to the companies owned by "BIZ_MAN" (Ilya Pischalnikov) in a business-to-business (B2B) capacity. Without additional context or specific examples of such interactions, it is not possible to confirm any direct B2B service provision between their respective companies.'

In [206]:
print(update_history(chat_history, query8, res8))

[HumanMessage(content="What is Denis's doing for living?"), AIMessage(content='Denis Korolev is a highly qualified software engineer. He is involved in various aspects of software development, including web development, AI development, edtech development, and system design. His primary occupation is working in the field of software engineering.'), HumanMessage(content='What is his major field?'), AIMessage(content="Denis Korolev's major field is software engineering, with a focus on web development, AI development, edtech development, and system design. He is highly skilled in these areas and has a wide range of expertise in software development."), HumanMessage(content='I will call Denis a "TECH_MAN" during our conversation, so for you "TECH_MAN" equals "Denis Korolev"'), AIMessage(content='Got it! When you refer to "TECH_MAN," I will understand that you are talking about Denis Korolev. Feel free to ask any questions you have about him.'), HumanMessage(content="Now tell me what do you

In [271]:
query9 = 'Is there any information about any company which belongs to "TECH_MAN" that it provides B2B service?'

In [272]:
# res9 = pdf_docs_chain.invoke({"input": query9, "chat_history": chat_history})
res9 = pdf_docs_rag_chain.invoke({"input": query9}, config={"configurable": {"session_id": "abc123"}})

Parent run dd3aee08-2147-4a2a-8856-f7562af56126 not found for run 303ae8a8-7a87-46a1-9de4-5dce98f8963e. Treating as a root run.


In [273]:
res9.get("answer")

'Yes, there is information that one of the companies owned by "TECH_MAN" (Denis Korolev) provides high-quality B2B services for clients from all over the world. Denis Korolev\'s expertise in software engineering likely enables his company to offer specialized B2B services in areas such as web development, AI development, edtech development, and system design to cater to the needs of various clients globally.'

In [278]:
res9.get("context")

[Document(page_content='companies that he is running:\n- «Notishop»\n- «TouchIP»\n- «SuperBot72»\n- «DevTrix&Co»\n- «IPGazMyas»', metadata={'page': 0, 'source': 'pdf_docs/ip_story.pdf'}),
 Document(page_content='which provides a high quality B2B service for clients from all over the world.', metadata={'page': 0, 'source': 'pdf_docs/dk_story.pdf'}),
 Document(page_content='Ilya Pischalnikov is mega businessman who owns quite a few companies. This is only few of the \ncompanies that he is running:\n- «Notishop»\n- «TouchIP»\n- «SuperBot72»\n- «DevTrix&Co»\n- «IPGazMyas»', metadata={'page': 0, 'source': 'pdf_docs/ip_story.pdf'}),
 Document(page_content='Ilya Pischalnikov is mega businessman who owns quite a few companies. This is only few of the \ncompanies that he is running:\n- «Notishop»\n- «TouchIP»\n- «SuperBot72»\n- «DevTrix&Co»\n- «IPGazMyas»', metadata={'page': 0, 'source': 'pdf_docs/ip_story.pdf'})]

In [227]:
print(update_history(chat_history, query9, res9))

[HumanMessage(content="What is Denis's doing for living?"), AIMessage(content='Denis Korolev is a highly qualified software engineer. He is involved in various aspects of software development, including web development, AI development, edtech development, and system design. His primary occupation is working in the field of software engineering.'), HumanMessage(content='What is his major field?'), AIMessage(content="Denis Korolev's major field is software engineering, with a focus on web development, AI development, edtech development, and system design. He is highly skilled in these areas and has a wide range of expertise in software development."), HumanMessage(content='I will call Denis a "TECH_MAN" during our conversation, so for you "TECH_MAN" equals "Denis Korolev"'), AIMessage(content='Got it! When you refer to "TECH_MAN," I will understand that you are talking about Denis Korolev. Feel free to ask any questions you have about him.'), HumanMessage(content="Now tell me what do you

In [274]:
query10 = 'What is the name of this one company which belongs to "TECH_MAN" which provides high-quality B2B service?'

In [275]:
# res10 = pdf_docs_chain.invoke({"input": query10, "chat_history": chat_history})
res10 = pdf_docs_rag_chain.invoke({"input": query10}, config={"configurable": {"session_id": "abc123"}})

Parent run 6bd82244-fff8-45d5-b16b-7338392fc99d not found for run 3ca97d91-3162-4623-8d94-79677e6cbeda. Treating as a root run.


In [279]:
res10.get("answer")

'The company owned by "TECH_MAN" (Denis Korolev) that provides high-quality B2B services is called "DevTrix&Co." This company likely specializes in software engineering services, including web development, AI development, edtech development, and system design, catering to clients worldwide.'

In [281]:
res10.get("chat_history")

[HumanMessage(content="What is Denis's doing for living?"),
 AIMessage(content='Denis Korolev is a highly qualified software engineer. He is involved in various aspects of software development, including web development, AI development, edtech development, and system design. His primary occupation is in the field of software engineering.'),
 HumanMessage(content='What is his major field?'),
 AIMessage(content="Denis Korolev's major field is software engineering, with a focus on web development, AI development, edtech development, and system design. He is highly skilled and experienced in these areas, making software engineering his primary expertise."),
 HumanMessage(content='I will call Denis a "TECH_MAN" during our conversation, so for you "TECH_MAN" equals "Denis Korolev"'),
 AIMessage(content='Got it! When you refer to "TECH_MAN," I will understand that you are talking about Denis Korolev. Feel free to ask any questions you have about him or his professional background.'),
 HumanMe

In [282]:
len(res10.get("chat_history"))

18