In [1]:
from dotenv import load_dotenv
load_dotenv()


True

In [2]:
decompositioning_system_prompt = """
You are a helpful assistant trained to generates multiple sub-questions related to the a prompt.\n
The goal is to break down the input into a set of sub-problems / sub-questions that can be answered perfectly with LLMs.\n\n
Answer in this format:
1- sub-question-one
2- sub-question-two
...
"""

decompositioning_human_prompt = """
Prompt that is needed to convert to sub-questions:\n
<prompt>\n
{question}\n
</prompt>\n
"""

In [3]:
from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
system_prompt = SystemMessagePromptTemplate.from_template(decompositioning_system_prompt)
human_prompt = HumanMessagePromptTemplate.from_template(decompositioning_human_prompt, input_variables=["question"])
prompt_decomposition = ChatPromptTemplate([system_prompt, human_prompt])
prompt_decomposition

ChatPromptTemplate(input_variables=['question'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template='\nYou are a helpful assistant trained to generates multiple sub-questions related to the a prompt.\n\nThe goal is to break down the input into a set of sub-problems / sub-questions that can be answered perfectly with LLMs.\n\n\nAnswer in this format:\n1- sub-question-one\n2- sub-question-two\n...\n'), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], input_types={}, partial_variables={}, template='\nPrompt that is needed to convert to sub-questions:\n\n<prompt>\n\n{question}\n\n</prompt>\n\n'), additional_kwargs={})])

In [4]:
from langchain_groq import ChatGroq
models = {"Google": ["gemma2-9b-it", "gemma-7b-it"],
          "Groq": ["llama3-groq-70b-8192-tool-use-preview", "llama3-groq-8b-8192-tool-use-preview"],
          "Meta": ["llama-3.1-70b-versatile", "llama-3.1-8b-instant", "llama-3.2-1b-preview", "llama-3.2-3b-preview", "llama-3.2-11b-vision-preview", "llama-3.2-90b-vision-preview", "llama-guard-3-8b", "llama3-70b-8192", "llama3-8b-8192"],
          "Mistral": ["mixtral-8x7b-32768"],
          "OpenAI": ["whisper-large-v3", "whisper-large-v3-turbo"]}
model_groq = ChatGroq(model=models["Meta"][0], temperature=0)

In [28]:
from langchain_core.output_parsers.string import StrOutputParser
from IPython.display import Markdown
def generate_sub_queries(query):
    chain = (prompt_decomposition 
             | model_groq 
             | StrOutputParser()
             | (lambda x: x.split("\n")))
    questions_produced = chain.invoke({"question":query})
    display(Markdown("==========================================\n\n" + "Sub-Questions  :  \n\n" + "\n\n".join(questions_produced) + "\n\n=========================================="))
    return "\n\n".join(questions_produced)

In [29]:
generate_sub_queries("who is Keivan jamali?")

==========================================

Sub-Questions  :  

1- What is Keivan Jamali's nationality?

2- What is Keivan Jamali's profession or occupation?

3- Is Keivan Jamali a public figure, and if so, in what context?

4- What are Keivan Jamali's notable achievements or contributions?

5- Is there any available information on Keivan Jamali's personal life or background?

6- Is Keivan Jamali associated with any specific industry or field?

7- Are there any notable works or projects attributed to Keivan Jamali?

8- Is Keivan Jamali active on social media or other online platforms?

==========================================

"1- What is Keivan Jamali's nationality?\n\n2- What is Keivan Jamali's profession or occupation?\n\n3- Is Keivan Jamali a public figure, and if so, in what context?\n\n4- What are Keivan Jamali's notable achievements or contributions?\n\n5- Is there any available information on Keivan Jamali's personal life or background?\n\n6- Is Keivan Jamali associated with any specific industry or field?\n\n7- Are there any notable works or projects attributed to Keivan Jamali?\n\n8- Is Keivan Jamali active on social media or other online platforms?"

In [30]:
template = """You are an AI language model assistant. Your task is to generate five 
different versions of the given user question to retrieve relavant documents from a vector 
database. By generating mulitple perspectives on the user quesion, your goal is to help 
the user overcome some of the limitations of the distance-based similarity search.
Provide these alternative questions seprated by newlines. Original questions: {question}"""

multiquery_prompt = ChatPromptTemplate.from_template(template)
multiquery_prompt

ChatPromptTemplate(input_variables=['question'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], input_types={}, partial_variables={}, template='You are an AI language model assistant. Your task is to generate five \ndifferent versions of the given user question to retrieve relavant documents from a vector \ndatabase. By generating mulitple perspectives on the user quesion, your goal is to help \nthe user overcome some of the limitations of the distance-based similarity search.\nProvide these alternative questions seprated by newlines. Original questions: {question}'), additional_kwargs={})])

In [31]:
def generate_multi_queries(query):
    chain  = (multiquery_prompt
        | model_groq
        | StrOutputParser()
        | (lambda x: x.split("\n"))
    )
    multi_queries = chain.invoke({"question": query})
    display(Markdown("==========================================\n\n" + "Sub-Queries  :  \n\n" + "\n\n".join(multi_queries) + "\n\n=========================================="))
    return chain

In [32]:
generate_multi_queries(generate_sub_queries("Who is Keivan?"))

==========================================

Sub-Questions  :  

1- What is the full name of Keivan?

2- What is Keivan's nationality?

3- What is Keivan known for (e.g. profession, achievement, etc.)?

4- Is Keivan a public figure, and if so, in what context?

5- Are there any notable achievements or contributions associated with Keivan?

==========================================

==========================================

Sub-Queries  :  

Here are five different versions of each user question to retrieve relevant documents from a vector database:



1. What is the full name of Keivan?

 

Keivan's full name is what?

What is Keivan's complete name?

Can you provide the full name of the individual known as Keivan?

What is the official name of Keivan?



2. What is Keivan's nationality?



What country is Keivan from?

What is Keivan's country of origin?

What nationality does Keivan hold?

Is Keivan a citizen of a specific country?

What is Keivan's ethnic background?



3. What is Keivan known for (e.g. profession, achievement, etc.)?



What is Keivan's profession or area of expertise?

What is Keivan famous for?

What notable achievements has Keivan accomplished?

What is Keivan recognized for in his field?

What is Keivan's claim to fame?



4. Is Keivan a public figure, and if so, in what context?



Is Keivan a well-known individual?

What is Keivan's public profile?

Is Keivan a celebrity or notable figure?

In what capacity is Keivan a public figure?

Is Keivan widely recognized in a particular field?



5. Are there any notable achievements or contributions associated with Keivan?



What significant contributions has Keivan made?

What notable accomplishments is Keivan known for?

Has Keivan achieved any notable milestones?

What impact has Keivan had in his field?

What are Keivan's most notable achievements?

==========================================

ChatPromptTemplate(input_variables=['question'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], input_types={}, partial_variables={}, template='You are an AI language model assistant. Your task is to generate five \ndifferent versions of the given user question to retrieve relavant documents from a vector \ndatabase. By generating mulitple perspectives on the user quesion, your goal is to help \nthe user overcome some of the limitations of the distance-based similarity search.\nProvide these alternative questions seprated by newlines. Original questions: {question}'), additional_kwargs={})])
| ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x0000023A45558F20>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x0000023A45559E20>, model_name='llama-3.1-70b-versatile', temperature=1e-08, model_kwargs={}, groq_api_key=SecretStr('**********'))
| StrOutputParser()
|

In [34]:
import os
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

doc_path = r"D:\All Python\Pure-Python\P4\06-PromptEngineering\Project 3 - OpenAI\KeivanJamali_doc"
document = []

for file_name in os.listdir(doc_path):
    if file_name.endswith(".txt"):
        # print(len(document))
        file_path = os.path.join(doc_path, file_name)
        loader = TextLoader(file_path, encoding='utf-8')
        docs = loader.load()
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=300,
            chunk_overlap=10,
            separators="\n\n"
        )
        result = text_splitter.split_documents(docs)
        document += result

len(document)

121

In [40]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
embeddings = HuggingFaceEmbeddings()
db_k1 = Chroma.from_documents(document, embeddings)
retriever = db_k1.as_retriever()
print(f"Number of documents is  :  {db_k1._collection.count()}")

INFO: Use pytorch device_name: cpu
INFO: Load pretrained SentenceTransformer: sentence-transformers/all-mpnet-base-v2


Number of documents is  :  242


In [41]:
from langchain.load import dumps, loads

def get_unique_union(documents: list[list]):
    flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
    
    unique_docs = list(set(flattened_docs))

    return [loads(doc) for doc in unique_docs]

In [42]:
rag_template = """Answer the following question based on this context:
{context}
Question: {question}
"""
rag_prompt = ChatPromptTemplate.from_template(rag_template)
rag_prompt

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template='Answer the following question based on this context:\n{context}\nQuestion: {question}\n'), additional_kwargs={})])

In [57]:
from langchain_core.runnables import RunnablePassthrough

def query(query, retriever):
    retrieval_chain = (generate_multi_queries(generate_sub_queries(query)) 
                       | retriever.map()
                       | get_unique_union)
    docs = retrieval_chain.invoke({"question": query})
    display(f"Number of document that is used is ({len(docs)})")
    # return docs
    final_rag_chain = ({"context": retrieval_chain, "question": RunnablePassthrough()} 
                       | rag_prompt
                       | model_groq
                       | StrOutputParser())
    return final_rag_chain.invoke(query)

In [60]:
from IPython.display import Markdown
answer = query("Tell me about Keivan Jamali's projects.", retriever)
display(Markdown(answer))

INFO: HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


==========================================

Sub-Questions  :  

1- Who is Keivan Jamali and what is his profession?

2- What type of projects is Keivan Jamali known for working on?

3- Can you provide a list of Keivan Jamali's notable projects?

4- Are Keivan Jamali's projects focused on a specific industry or field?

5- What is the most recent project that Keivan Jamali has worked on?

6- Are there any upcoming projects that Keivan Jamali is involved in?

7- How can I find more information about Keivan Jamali's projects?

8- Are Keivan Jamali's projects available to the public or are they private?

==========================================

INFO: HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


==========================================

Sub-Queries  :  

Here are five alternative versions of each user question to help retrieve relevant documents from a vector database:



1. Who is Keivan Jamali and what is his profession?



Keivan Jamali's background and occupation

What does Keivan Jamali do for a living?

Keivan Jamali's professional profile

Information about Keivan Jamali's career

Keivan Jamali's job title and expertise



2. What type of projects is Keivan Jamali known for working on?



Keivan Jamali's project specialties

The types of projects Keivan Jamali is involved in

Keivan Jamali's areas of project expertise

Project genres associated with Keivan Jamali

Keivan Jamali's typical project scope



3. Can you provide a list of Keivan Jamali's notable projects?



Notable projects by Keivan Jamali

Keivan Jamali's most notable works

A list of Keivan Jamali's significant projects

Keivan Jamali's top projects

Keivan Jamali's most famous projects



4. Are Keivan Jamali's projects focused on a specific industry or field?



Keivan Jamali's industry specialization

The fields in which Keivan Jamali works

Keivan Jamali's project focus areas

Is Keivan Jamali's work limited to a specific sector?

Keivan Jamali's areas of industry expertise



5. What is the most recent project that Keivan Jamali has worked on?



Keivan Jamali's latest project

Recent projects by Keivan Jamali

Keivan Jamali's current project

What is Keivan Jamali working on now?

Keivan Jamali's newest project



6. Are there any upcoming projects that Keivan Jamali is involved in?



Keivan Jamali's future projects

Upcoming projects featuring Keivan Jamali

What's next for Keivan Jamali?

Keivan Jamali's projects in development

Keivan Jamali's forthcoming projects



7. How can I find more information about Keivan Jamali's projects?



Resources for learning about Keivan Jamali's projects

Where to find information on Keivan Jamali's projects

Keivan Jamali's project documentation

How to stay updated on Keivan Jamali's projects

Keivan Jamali's project portfolio



8. Are Keivan Jamali's projects available to the public or are they private?



Keivan Jamali's project accessibility

Are Keivan Jamali's projects publicly available?

Keivan Jamali's project visibility

Can the public access Keivan Jamali's projects?

Keivan Jamali's project confidentiality

==========================================

INFO: HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


'Number of document that is used is (6)'

INFO: HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


Keivan Jamali has a project portfolio that can be found at https://keivanjamali.com/keivan-jamali-project-portfolio/azadi-tower-structural-analysis/. Additionally, one of his projects is mentioned as "Azadi Tower Structural Analysis".

In [45]:
# from langchain.chains import RetrievalQA
# chain2 = RetrievalQA.from_chain_type(
#     llm=model_groq,
#     chain_type="stuff",
#     retriever=retriever,
#     return_source_documents=True
# )

