In [None]:
# !pip install -qU langchain-openai langchain_chroma

In [2]:
import os
import openai
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain_community.document_loaders import PyPDFLoader
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import AIMessage, HumanMessage

In [3]:
openai_api_key = os.getenv('OPENAI_API_KEY')

In [4]:
openai_api_key

'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'

In [5]:
openai.api_key = openai_api_key

from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o-mini")

In [6]:
folder_path = 'Database'

In [7]:
for filename in os.listdir(folder_path):
    file_path = os.path.join(folder_path,filename)
    if os.path.isfile(file_path):
        with open(file_path, 'r') as file:
            loader = PyPDFLoader(file_path)

In [8]:
# loader = PyPDFLoader(
#     "Manual_Goods_2024.pdf",
# )

In [9]:
docs = loader.load()

In [10]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)

In [11]:
splits = text_splitter.split_documents(docs)

In [12]:
# Define batch size
batch_size = 166  # or any size less than the limit

# Break the documents into chunks
chunks = [splits[i:i + batch_size] for i in range(0, len(splits), batch_size)]

# Process each chunk
for chunk in chunks:
    vectorstore = Chroma.from_documents(documents=chunk, embedding=OpenAIEmbeddings())

In [13]:
# Retrieve and generate using the relevant snippets of the blog.
retriever = vectorstore.as_retriever()
# prompt = hub.pull('rlm/rag-prompt')

In [14]:
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

In [15]:
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [16]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)

In [17]:
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [18]:
contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)

In [19]:
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

In [20]:
question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

In [21]:
rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

In [27]:
chat_history = []

question = "Tell me about thermodynamics?"
ai_msg_1 = rag_chain.invoke({"input": question, "chat_history": chat_history})
print(ai_msg_1['answer'])

Thermodynamics is the branch of physics that deals with the relationships between heat, work, temperature, and energy. It is governed by four fundamental laws that describe how energy is transferred and transformed in physical systems. The principles of thermodynamics are essential for understanding various processes in engineering, chemistry, and physics.


In [28]:
chat_history.extend(
    [
        HumanMessage(content=question),
        AIMessage(content=ai_msg_1["answer"]),
    ]
)

In [38]:
second_question = "can you tell me about properties of real gases and how are they different from ideal gas?"

In [39]:
ai_msg_2 = rag_chain.invoke({"input": second_question, "chat_history": chat_history})

In [40]:
print(ai_msg_2["answer"])

Real gases exhibit behaviors that deviate from the ideal gas law due to interactions between gas molecules and the volume occupied by the molecules themselves. Unlike ideal gases, which assume no interactions and that gas particles occupy no volume, real gases experience attractive and repulsive forces, especially at high pressures and low temperatures. Additionally, real gases can condense into liquids, which does not occur in ideal gas behavior, highlighting the limitations of the ideal gas model.
