In [1]:
import os
from langchain.llms import OpenAI
from dotenv import load_dotenv
load_dotenv()

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
llm = OpenAI(model_name="gpt-4o-mini")  # or any other model of your choice




In [2]:
# import bs4
from langchain import hub
from langchain_chroma import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader


In [3]:
from langchain.document_loaders import Docx2txtLoader
from dotenv import load_dotenv
import os
load_dotenv()
# Specify the path to your .docx file
file_path = "/home/nouman-aziz/Downloads/Vetting_Questions.docx"

# Initialize the loader
loader = Docx2txtLoader(file_path)

In [4]:
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")


docs = loader.load()

In [5]:
# splits

In [None]:

from langchain_core.vectorstores import InMemoryVectorStore

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
all_splits = text_splitter.split_documents(docs)
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
vector_store = InMemoryVectorStore(embeddings)


# Index chunks
_ = vector_store.add_documents(documents=all_splits)
# Retrieve and generate using the relevant snippets of the blog.
retriever = vector_store.as_retriever()


llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)





In [None]:
template = """
        **Objective:**
        As an AI language model, your task is to generate a sequence of five relevant questions that build upon the provided conversation history. The goal is to ensure that each question logically follows the previous one, maintaining coherence and continuity in the dialogue.

        **Instructions:**
        1. **Review the Conversation History:**
        - Analyze the provided chat history to understand the context and flow of the conversation.
        - Assess whether the user's previous responses adequately address the preceding questions.

        2. **Generate Relevant Questions:**
        - If the user's response to a question is satisfactory, proceed by formulating the next question in the sequence.
        - If the user's response is incomplete or unsatisfactory, rephrase and repeat the previous question to elicit a more comprehensive answer.

        3. **Sequence and Coherence:**
        - Ensure that the sequence consists of five questions that are contextually relevant and logically connected.
        - Each question should naturally follow from the previous one, fostering a coherent and engaging dialogue.

        **Conversation History:**
        {chat_history}

        **Sample Questions:**
        To guide the formulation of your questions, consider the following examples:
        - What motivates your current job or service search?
        - Would you describe your current work or mission as a calling?
        - How would you articulate your life mission in a few words?
        - How significant is your faith in influencing your career choices?
        - Do you prefer working with organizations that share your faith or values?
        - What is your current city and country of residence?
        - What is your nationality?
        - Are you open to relocating? If yes, where?
        - What types of work are you interested in?
        - What work environment do you prefer?

        **Output Format:**
        - Present the generated questions in a numbered list format.
        - Ensure clarity and conciseness in each question.

        **Example Output:**
        1. What motivates your current job or service search?
        2. How would you describe your life mission in a few words?
        3. How significant is your faith in influencing your career choices?
        4. Are you open to relocating? If yes, where?
        5. What types of work are you interested in?

        By adhering to these guidelines, you will create a structured and coherent sequence of questions that effectively continues the dialogue based on the provided conversation history.

        """


In [None]:
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

contextualize_q_system_prompt = """
                Objective:

                Your task is to analyze the provided chat history and the latest user question, which may reference earlier parts of the conversation. Your goal is to ensure that the question can stand alone and be understood without requiring prior context.

                Instructions:

                Review the Conversation History:

                Analyze the flow of the conversation and how the latest user query relates to previous messages.

                Identify whether the user's latest question depends on prior context.

                Reformulate the Question (if needed):

                If the question references earlier parts of the conversation, restate it in a way that includes the missing context.

                If the question is already self-contained, return it as is.

                Ensure Clarity and Coherence:

                The reformulated question should be concise and fully understandable on its own.

                Preserve the original intent of the user's query while making necessary adjustments.

                Output Format:

                Provide the reformulated question in a clear and direct manner.

                Do not provide an answer—only restate the question in a standalone format.


                """
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)

In [None]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

qa_system_prompt = """Objective:
You are an AI assistant responsible for answering user queries using retrieved context. Your goal is to provide accurate and concise responses while ensuring relevance to the question.

Instructions:

Utilize Retrieved Context:

Use the provided context to generate an informed response.

If the answer is explicitly stated in the context, summarize it succinctly.

Handle Unanswered Questions:

If the context does not contain sufficient information, clearly state that you do not know the answer.

Do not fabricate information or make assumptions beyond the provided context.

Maintain Clarity and Conciseness:

Limit your response to a maximum of three sentences.

Ensure that the response directly addresses the question without unnecessary elaboration.


Output Format:

Provide a direct and informative answer based on the retrieved context.

If user response is not clear, respond with the previous question asked again until he gives better answer or skips the question.

Retrieved Context:

{context}

"""
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)


question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

In [None]:
from langchain_core.messages import HumanMessage

chat_history = []

question = "What is Task Decomposition?"
ai_msg_1 = rag_chain.invoke({"input": question, "chat_history": chat_history})
chat_history.extend([HumanMessage(content=question), ai_msg_1["answer"]])

second_question = "What are common ways of doing it?"
ai_msg_2 = rag_chain.invoke({"input": second_question, "chat_history": chat_history})

print(ai_msg_2["answer"])