In [67]:
import os
from dotenv import load_dotenv
from langchain_openai import  AzureChatOpenAI, OpenAIEmbeddings,AzureOpenAIEmbeddings
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_text_splitters import  RecursiveCharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate,MessagesPlaceholder
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain,create_history_aware_retriever
from langchain_community.vectorstores import FAISS
from langchain_chroma import Chroma
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.tools.retriever import create_retriever_tool

load_dotenv()

os.environ['OPENAI_API_TYPE']=os.getenv("AL_OPENAI_API_TYPE")
os.environ['OPENAI_API_VERSION']=os.getenv("AL_OPENAI_API_VERSION")
os.environ['AZURE_OPENAI_ENDPOINT']=os.getenv("AL_AZURE_OPENAI_ENDPOINT")
os.environ['OPENAI_API_KEY']=os.getenv("AL_OPENAI_API_KEY")
os.environ['DEPLOYMENT_NAME']=os.getenv("AL_DEPLOYMENT_NAME")

os.environ["LANGSMITH_TRACING"] = "true"
os.environ["LANGCHAIN_PROJECT"]=os.getenv("AL_LANGCHAIN_PROJECT")

os.environ["HF_TOKEN"]=os.getenv("HF_TOKEN")

In [68]:
llm=AzureChatOpenAI()

In [69]:

embeddings=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
loader=PyPDFDirectoryLoader("Requirements")
docs=loader.load()
text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
chunksDocuments=text_splitter.split_documents(docs)
vector_store_db=FAISS.from_documents(chunksDocuments,embeddings)
retriever=vector_store_db.as_retriever(search_type="similarity",search_kwargs={"k":1})

retriever_tool=create_retriever_tool(retriever,"PhaseFinder","Search phases in the document")



In [70]:

contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

In [71]:
history_aware_retriever=create_history_aware_retriever(llm,retriever,contextualize_q_prompt)
question_answer_chain=create_stuff_documents_chain(llm,qa_prompt)
rag_chain=create_retrieval_chain(history_aware_retriever,question_answer_chain)

In [72]:
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

store={}
def get_session_history(session_id:str)->BaseChatMessageHistory:
    if  session_id not in store:
        store[session_id]=ChatMessageHistory()
    return store[session_id]
        
with_message_history=RunnableWithMessageHistory(llm,get_session_history)

In [73]:
conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

In [74]:
conversational_rag_chain.invoke(
    {"input": "means any software programme or code which does"},
    config={
        "configurable": {"session_id": "session-1"}
    },  # constructs a key "abc123" in `store`.
)["answer"]

'Malicious Software means any software programme or code which does, or which is intended to, destroy, interfere with, corrupt or cause undesired effects on program files, hardware, network, data hosting/storage facilities, databases, information and/or communication technology systems, data or other information, executable code or application software macros or programs.'

In [76]:
conversational_rag_chain.invoke(
    {"input": "means the Operator's IT/Information Security Policy"},
    config={"configurable": {"session_id": "session-1"}},
)["answer"]

"Security Policy means the Operator's IT/Information Security Policy as may be notified to the Manufacturer from time to time."