In [1]:
import os
from dotenv import load_dotenv
from langchain_openai import  AzureChatOpenAI, OpenAIEmbeddings,AzureOpenAIEmbeddings
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_text_splitters import  RecursiveCharacterTextSplitter,CharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate,MessagesPlaceholder
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain,create_history_aware_retriever
from langchain_community.vectorstores import FAISS
from langchain_chroma import Chroma
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.tools.retriever import create_retriever_tool
from  langchain_community.embeddings import OllamaEmbeddings
from PyPDF2 import PdfReader

load_dotenv()

os.environ['OPENAI_API_TYPE']=os.getenv("AL_OPENAI_API_TYPE")
os.environ['OPENAI_API_VERSION']=os.getenv("AL_OPENAI_API_VERSION")
os.environ['AZURE_OPENAI_ENDPOINT']=os.getenv("AL_AZURE_OPENAI_ENDPOINT")
os.environ['OPENAI_API_KEY']=os.getenv("AL_OPENAI_API_KEY")
os.environ['DEPLOYMENT_NAME']=os.getenv("AL_DEPLOYMENT_NAME")

os.environ["LANGCHAIN_TRACING_V2"]="true"
os.environ["LANGCHAIN_PROJECT"]=os.getenv("AL_LANGCHAIN_PROJECT")

os.environ["HF_TOKEN"]=os.getenv("HF_TOKEN")

In [2]:
llm=AzureChatOpenAI()

In [3]:
# Extract the text from the PDF file.

pdfReader = PdfReader('./Requirements/SET MSA Schedule 23_updated.pdf')

raw_text = ''
for i, page in enumerate(pdfReader.pages):
    text = page.extract_text()
    if text:
        raw_text += text
        
text_splitter = CharacterTextSplitter(
    separator = "\n",
    chunk_size = 1000,
    chunk_overlap  = 200,
    length_function = len,
)
texts = text_splitter.split_text(raw_text)

In [4]:
embeddings=OllamaEmbeddings(model="mxbai-embed-large")
vector_store_db=FAISS.from_texts(texts,embeddings)
retriever=vector_store_db.as_retriever(search_type="similarity",search_kwargs={"k":1})
vector_store_db.save_local("faiss_index")

In [5]:
#embeddings=OllamaEmbeddings(model="mxbai-embed-large")
vector_store_db_from_local=FAISS.load_local('faiss_index',embeddings,allow_dangerous_deserialization=True)
retriever=vector_store_db_from_local.as_retriever(search_type="similarity",search_kwargs={"k":1})

In [68]:
##embeddings=OllamaEmbeddings(model="mxbai-embed-large")
embeddings=AzureOpenAIEmbeddings()

loader=PyPDFDirectoryLoader("Requirements")
docs=loader.load()
text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
chunksDocuments=text_splitter.split_documents(docs)
##result = [chunksDocument.dict()['page_content'] for chunksDocument in chunksDocuments]
vector_store_db=FAISS.from_documents(chunksDocuments,embeddings)
##retriever=vector_store_db.as_retriever(search_type="similarity",search_kwargs={"k":1})
##retriever_tool=create_retriever_tool(retriever,"PhaseFinder","Search phases in the document")

BadRequestError: Unsupported data type

In [6]:

contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

In [7]:
history_aware_retriever=create_history_aware_retriever(llm,retriever,contextualize_q_prompt)
question_answer_chain=create_stuff_documents_chain(llm,qa_prompt)
rag_chain=create_retrieval_chain(history_aware_retriever,question_answer_chain)

In [8]:
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

store={}
def get_session_history(session_id:str)->BaseChatMessageHistory:
    if  session_id not in store:
        store[session_id]=ChatMessageHistory()
    return store[session_id]
        
with_message_history=RunnableWithMessageHistory(llm,get_session_history)

In [9]:
conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

In [10]:
from langchain_core.messages import AIMessage,HumanMessage,SystemMessage
chat_history=[]


question="tell about Security Policy in this document "
response=conversational_rag_chain.invoke(
    {"input": question},
    config={ "configurable": {"session_id": "session-1"} },  # constructs a key "abc123" in `store`.
)

chat_history.extend(
    [
        HumanMessage(content=question),
        AIMessage(content=response["answer"]),
    ]
)

print(response['answer'])


The Security Policy in this document requires that all IT and OT Systems, including associated systems, hardware, and firmware, adhere to its requirements. It also mandates limiting access to network and information systems to authorized personnel who need access for their job duties, with regular reviews of access rights. Additionally, all personnel with access to these systems must be under a contractual duty of confidence.


In [11]:
question="The Manufacturer shall:"
response=conversational_rag_chain.invoke(
    {"input": question},
    config={"configurable": {"session_id": "session-1"}},
)

chat_history.extend(
    [
        HumanMessage(content=question),
        AIMessage(content=response["answer"]),
    ]
)
print(response['answer'])

The Manufacturer shall have sole responsibility for the maintenance of the IT and OT Systems, ensuring timely updates, patches, and upgrades to maintain service availability, integrity, and protection of networks, communications systems, and data. The Manufacturer must comply with legal and regulatory requirements, best industry practices, the latest technological developments, threat intelligence, specific sections of the Functional Specification, and any reasonable security guidelines or instructions provided by the Operator.


In [25]:
chat_history.clear()