# 1. 
# 2. Prompt Template
# 3. Output-parsers
- StrOutputParser
- CommaSeparatedListOutputParser
- JsonOutputParser
# 4. Chat with Documents using Retrieval Chains
- create_stuff_documents_chain
- create_retrieval_chain
# 5. Adding Chat History to Chatbot
- MessagesPlaceholder
- HumanMessage, AIMessage.
- create_history_aware_retriever
# 6. Agents with Tools
- create_openai_functions_agent, AgentExecutor
- TavilySearchResults
- create_retriever_tool
# 7. Long Term Chat Memory with Upstash Redis
# 8. Query Translation -- Multi Query
- sinh thêm câu truy vấn
# 9. Query Translation -- RAG Fusion
- sinh thêm câu truy vấn
- reciprocal_rank_fusion: rank documents
# 10. Query Translation -- Decomposition tách câu hỏi thành từng phần để hỏi
- Answer recursively - trả lời đệ quy, kết hợp phần trả lời của câu trước để trả lời cho câu phụ tiếp theo
- Answer individually - trả lời riêng từng câu
# 11. Query Translation -- Step Back
- sinh thêm câu hỏi lùi mức độ cao hơn, trừu tượng hơn
# 12. Query Translation -- HyDE
- tạo ra 1 tài liệu giả định từ llm -> nhúng, truy suất tài liệu
# 13. Routing -- Logical routing* | Semantic routing
- llm -> chọn đường tới datasource liên quan tới câu hỏi
- Logical routing: logic
- Semantic routing: ngữ nghĩa query-prompt | cosine_similarity()
# 14. Query Structuring*
- Chuyển câu hỏi truy vấn ở dạng ngôn ngữ tự nhiên sang có cấu trúc | pydantic 
- test Self-querying

# ############################################################################## #

In [8]:
import os
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_API_KEY'] = "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"

# 1. 

In [None]:
from dotenv import load_dotenv
load_dotenv()

from langchain_openai import ChatOpenAI

llm = ChatOpenAI(
    model="gpt-3.5-turbo",
    temperature=0.7,
)

response = llm.stream("Write a poem about AI")
# print(response)

for chunk in response:
    print(chunk.content, end="", flush=True)  # flush hiện thị đầu ra ngay lập tức

# 2. Prompt Template
# from langchain_core.prompts import ChatPromptTemplate

In [None]:
from dotenv import load_dotenv
load_dotenv()

from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate

# Instantiate Model
llm = ChatOpenAI(
    temperature=0.7,
    model="gpt-3.5-turbo-1106",
)

# Prompt Template
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "Generate a list of 10 synonyms for the following word. Return the results as a comma seperated list."),
        ("human", "{input}")
    ]
)

# Create LLM Chain
chain = prompt | llm

response = chain.invoke({"input": "happy"})
print(response)

# 3. Output-parsers

# from langchain_core.output_parsers import StrOutputParser, CommaSeparatedListOutputParser, JsonOutputParser
# from langchain_core.pydantic_v1 import BaseModel, Field



In [24]:
from dotenv import load_dotenv
load_dotenv()

from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser, CommaSeparatedListOutputParser, JsonOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field

# model = ChatOpenAI(model="gpt-3.5-turbo-1106", temperature=0.7)
from langchain_community.llms.ollama import Ollama
model = Ollama(model="llama3")

def call_string_output_parser():
    prompt = ChatPromptTemplate.from_messages([
        ("system", "Tell me a joke about the following subject"),
        ("human", "{input}")
    ])

    parser = StrOutputParser()

    chain = prompt | model | parser

    return chain.invoke({
        "input": "dog"
    })

def call_list_output_parser():
    prompt = ChatPromptTemplate.from_messages([
        ("system", "Generate a list of 10 synonyms for the following word. Return the results as a comma seperated list."),
        ("human", "{input}")
    ])

    parser = CommaSeparatedListOutputParser()
    
    chain = prompt | model | parser

    return chain.invoke({
        "input": "happy"
    })

def call_json_output_parser():
    prompt = ChatPromptTemplate.from_messages([
        ("system", "Extract information from the following phrase.\nFormatting Instructions: {format_instructions}"),
        ("human", "{phrase}")
    ])

    class Person(BaseModel):
        recipe: str = Field(description="the name of the recipe")
        ingredients: list = Field(description="ingredients")
        

    parser = JsonOutputParser(pydantic_object=Person)

    chain = prompt | model | parser
    
    return chain.invoke({
        "phrase": "The ingredients for a Margherita pizza are tomatoes, onions, cheese, basil",
        "format_instructions": parser.get_format_instructions()
    })

# print(type(call_string_output_parser()))
# print(type(call_list_output_parser()))
print(call_json_output_parser())

{'properties': {'recipe': 'Margherita Pizza', 'ingredients': ['tomatoes', 'onions', 'cheese', 'basil']}}


# 4. Chat with Documents using Retrieval Chains
# from langchain.chains.combine_documents import create_stuff_documents_chain  -> StrOutputParser.
# from langchain.chains import create_retrieval_chain

In [None]:
from dotenv import load_dotenv
load_dotenv()

from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores.faiss import FAISS
from langchain.chains import create_retrieval_chain

# Retrieve Data
def get_docs():
    loader = WebBaseLoader('https://python.langchain.com/docs/expression_language/')
    docs = loader.load()

    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=200,
        chunk_overlap=20
    )

    splitDocs = text_splitter.split_documents(docs)

    return splitDocs

def create_vector_store(docs):
    embedding = OpenAIEmbeddings()
    vectorStore = FAISS.from_documents(docs, embedding=embedding)
    return vectorStore


def create_chain(vectorStore):
    model = ChatOpenAI(
        temperature=0.4,
        model='gpt-3.5-turbo-1106'
    )

    prompt = ChatPromptTemplate.from_template("""
    Answer the user's question.
    Context: {context}
    Question: {input}
    """)

    # chain = prompt | model
    document_chain = create_stuff_documents_chain(
        llm=model,
        prompt=prompt
    )

    retriever = vectorStore.as_retriever()

    retrieval_chain = create_retrieval_chain(retriever, document_chain)

    return retrieval_chain


docs = get_docs()
vectorStore = create_vector_store(docs)
chain = create_chain(vectorStore)

response = chain.invoke({
    "input": "What is LCEL?",
})

print(response)

In [None]:
from typing import Optional

from langchain.chains import create_structured_output_runnable
from langchain_openai import ChatOpenAI
from langchain_core.pydantic_v1 import BaseModel, Field


class RecordDog(BaseModel):
    '''Record some identifying information about a dog.'''

    name: str = Field(..., description="The dog's name")
    color: str = Field(..., description="The dog's color")
    fav_food: Optional[str] = Field(None, description="The dog's favorite food")

llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "You are an extraction algorithm. Please extract every possible instance"),
        ('human', '{input}')
    ]
)
structured_llm = create_structured_output_runnable(
    RecordDog,
    llm,
    mode="openai-tools",
    enforce_function_usage=True,
    return_single=True
)
structured_llm.invoke({"input": "Harry was a chubby brown beagle who loved chicken"})
# -> RecordDog(name="Harry", color="brown", fav_food="chicken")

# 5. Adding Chat History to Chatbot

# from langchain_core.prompts import MessagesPlaceholder
# from langchain_core.messages import HumanMessage, AIMessage
# from langchain.chains.history_aware_retriever import create_history_aware_retriever



In [None]:
from dotenv import load_dotenv
load_dotenv()

from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores.faiss import FAISS
from langchain.chains import create_retrieval_chain

# Conversation imports
from langchain_core.prompts import MessagesPlaceholder
from langchain_core.messages import HumanMessage, AIMessage
from langchain.chains.history_aware_retriever import create_history_aware_retriever

def get_documents_from_web(url):
    loader = WebBaseLoader(url)
    docs = loader.load()
    
    splitter = RecursiveCharacterTextSplitter(
        chunk_size=400,
        chunk_overlap=20
    )
    splitDocs = splitter.split_documents(docs)
    return splitDocs

def create_db(docs):
    embedding = OpenAIEmbeddings()
    vectorStore = FAISS.from_documents(docs, embedding=embedding)
    return vectorStore

def create_chain(vectorStore):
    model = ChatOpenAI(
        model="gpt-3.5-turbo-1106",
        temperature=0.4
    )

    prompt = ChatPromptTemplate.from_messages([
        ("system", "Answer the user's questions based on the context: {context}"),
        MessagesPlaceholder(variable_name="chat_history"),
        ("user", "{input}")
    ])

    # chain = prompt | model
    chain = create_stuff_documents_chain(
        llm=model,
        prompt=prompt
    )

    # Replace retriever with history aware retriever
    retriever = vectorStore.as_retriever(search_kwargs={"k": 3})

    retriever_prompt = ChatPromptTemplate.from_messages([
        MessagesPlaceholder(variable_name="chat_history"),
        ("user", "{input}"),
        ("user", "Given the above conversation, generate a search query to look up in order to get information relevant to the conversation")
    ])
    history_aware_retriever = create_history_aware_retriever(
        llm=model,
        retriever=retriever,
        prompt=retriever_prompt
    )

    retrieval_chain = create_retrieval_chain(
        # retriever, Replace with History Aware Retriever
        history_aware_retriever,
        chain
    )

    return retrieval_chain


def process_chat(chain, question, chat_history):
    response = chain.invoke({
        "chat_history": chat_history,
        "input": question,
    })
    return response["answer"]




if __name__ == "__main__":
    docs = get_documents_from_web('https://python.langchain.com/docs/expression_language/')
    vectorStore = create_db(docs)
    chain = create_chain(vectorStore)

    # Initialize chat history
    chat_history = []

    while True:
        user_input = input("You: ")
        if user_input.lower() == 'exit':
            break
        response = process_chat(chain, user_input, chat_history)
        chat_history.append(HumanMessage(content=user_input))
        chat_history.append(AIMessage(content=response))
        print("Assistant:", response)

# 6. Agents with Tools
# from langchain.agents import create_openai_functions_agent, AgentExecutor
# from langchain_community.tools.tavily_search import TavilySearchResults
# from langchain.tools.retriever import create_retriever_tool

In [None]:
from dotenv import load_dotenv
load_dotenv()

from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.agents import create_openai_functions_agent, AgentExecutor
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_core.messages import HumanMessage, AIMessage
from langchain.tools.retriever import create_retriever_tool

from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores.faiss import FAISS

# Create Retriever
loader = WebBaseLoader("https://python.langchain.com/docs/expression_language/")
docs = loader.load()
    
splitter = RecursiveCharacterTextSplitter(
    chunk_size=200,
    chunk_overlap=20
)
splitDocs = splitter.split_documents(docs)

embedding = OpenAIEmbeddings()
vectorStore = FAISS.from_documents(docs, embedding=embedding)
retriever = vectorStore.as_retriever(search_kwargs={"k": 3})

model = ChatOpenAI(
    model='gpt-3.5-turbo-1106',
    temperature=0.7
)

prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a friendly assistant called Max."),
    MessagesPlaceholder(variable_name="chat_history"),
    ("human", "{input}"),
    MessagesPlaceholder(variable_name="agent_scratchpad")
])

search = TavilySearchResults()
retriever_tools = create_retriever_tool(
    retriever,
    "lcel_search",
    "Use this tool when searching for information about Langchain Expression Language (LCEL)."
)
tools = [search, retriever_tools]

agent = create_openai_functions_agent(
    llm=model,
    prompt=prompt,
    tools=tools
)

agentExecutor = AgentExecutor(
    agent=agent,
    tools=tools
)

def process_chat(agentExecutor, user_input, chat_history):
    response = agentExecutor.invoke({
        "input": user_input,
        "chat_history": chat_history
    })
    return response["output"]

if __name__ == '__main__':
    chat_history = []

    while True:
        user_input = input("You: ")
        if user_input.lower() == 'exit':
            break

        response = process_chat(agentExecutor, user_input, chat_history)
        chat_history.append(HumanMessage(content=user_input))
        chat_history.append(AIMessage(content=response))

        print("Assistant:", response)

# 7. Long Term Chat Memory with Upstash Redis

In [None]:
from dotenv import load_dotenv
load_dotenv()

from langchain_openai import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.chains import LLMChain
from langchain_community.chat_message_histories.upstash_redis import (
    UpstashRedisChatMessageHistory,
)
from langchain.prompts import (
    ChatPromptTemplate,
    MessagesPlaceholder,
)

model = ChatOpenAI(
    model="gpt-3.5-turbo",
    temperature=0.6
)

prompt = ChatPromptTemplate.from_messages([
        ("system", "You are a friendly AI assistant."),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{input}")
    ])


URL = ""
TOKEN =""
history = UpstashRedisChatMessageHistory(
    url=URL, token=TOKEN, ttl=500, session_id="chat1"
)

memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True,
    chat_memory=history,
)

# chain = prompt | model
chain = LLMChain(
    llm=model,
    prompt=prompt,
    verbose=True,
    memory=memory
)


# Prompt 1
q1 = { "input": "My name is Leon" }
resp1 = chain.invoke(q1)
print(resp1["text"])

# Prompt 2
q2 = { "input": "What is my name?" }
resp2 = chain.invoke(q2)
print(resp2["text"])

# ############## Query Translation ############## #

# 8. Query Translation -- Multi Query
# sinh thêm câu hỏi truy vấn

In [8]:
from langchain_community.embeddings.ollama import OllamaEmbeddings
from langchain_community.llms.ollama import Ollama

embeddings = OllamaEmbeddings(model="llama3")
model = Ollama(model="llama3")

In [40]:
#### INDEXING ####

# # Load blog
# import bs4
# from langchain_community.document_loaders import WebBaseLoader
# loader = WebBaseLoader(
#     web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
#     bs_kwargs=dict(
#         parse_only=bs4.SoupStrainer(
#             class_=("post-content", "post-title", "post-header")
#         )
#     ),
# )
# blog_docs = loader.load()

# # Split
# from langchain.text_splitter import RecursiveCharacterTextSplitter
# text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
#     chunk_size=300, 
#     chunk_overlap=50)

# # Make splits
# splits = text_splitter.split_documents(blog_docs)

# # Index
# from langchain_openai import OpenAIEmbeddings
# from langchain_community.vectorstores import Chroma
# vectorstore = Chroma.from_documents(documents=splits, 
#                                     embedding=embeddings)

# retriever = vectorstore.as_retriever()

from langchain.chains import create_retrieval_chain
from langchain_community.embeddings.ollama import OllamaEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.vectorstores.chroma import Chroma
from langchain_community.llms.ollama import Ollama

from langchain_core.prompts import MessagesPlaceholder
from langchain_core.messages import HumanMessage, AIMessage
from langchain.chains.history_aware_retriever import create_history_aware_retriever

CHROMA_PATH = "chroma"
DATA_PATH = "data"

embeddings = OllamaEmbeddings(model="llama3")

db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embeddings)

model = Ollama(model="llama3")
# prompt = ChatPromptTemplate.from_messages([
#     ("system", "Answer the user's questions based on the context: {context}"),
#     MessagesPlaceholder(variable_name="chat_history"),
#     ("user", "{input}")
# ])

# document_chain = create_stuff_documents_chain(llm=model, prompt=prompt)

# retriever = db.as_retriever(search_kwargs={"k": 3})
retriever = db.as_retriever()

In [23]:
# Prompt
from langchain.prompts import ChatPromptTemplate

# Multi Query: Different Perspectives
template = """You are an AI language model assistant. Your task is to generate five 
different versions of the given user question to retrieve relevant documents from a vector 
database. By generating multiple perspectives on the user question, your goal is to help
the user overcome some of the limitations of the distance-based similarity search. 
Provide these alternative questions separated by newlines. Original question: {question}"""
prompt_perspectives = ChatPromptTemplate.from_template(template)

from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI

generate_queries = (
    prompt_perspectives 
    | model
    | StrOutputParser() 
    | (lambda x: x.split("\n"))
)

In [24]:
from langchain.load import dumps, loads

def get_unique_union(documents: list[list]):
    """ Unique union of retrieved docs """
    # Flatten list of lists, and convert each Document to string
    # làm phằng, đưa list of list -> 1 list
    flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
    # Get unique documents
    unique_docs = list(set(flattened_docs))
    # Return list documents
    return [loads(doc) for doc in unique_docs]

# Retrieve
question = "What is the dataset?"
retrieval_chain = generate_queries | retriever.map() | get_unique_union
docs = retrieval_chain.invoke({"question":question})
print(len(docs))
print(docs)

10
[Document(page_content='42. El-Rashidy N, Ebrahim N, El Ghamry A, Talaat FM (2022)\nUtilizing fog computing and explainable deep learning tech-niques for gestational diabetes prediction. Neural Comput Appl.https://doi.org/10.1007/s00521-022-08007-5\n43. El-Balka RM et al (2022) Enhancing the performance of smart\nelectrical grids using data mining and fuzzy inference engine.Multimed Tools Appl 81(23):33017–33049\n44. Talaat FM (2022) Effective deep Q-networks (EDQN) strategy\nfor resource allocation based on optimized reinforcement learn-\ning algorithm. Multimed Tools Appl 81:39945–39961\n45. Alshathri S, Talaat FM, Nasr AA (2022) A new reliable system\nfor managing virtual cloud network. Comput Mater Continua\n73(3):5863–5885. https://doi.org/10.32604/cmc.2022.026547', metadata={'id': 'data\\yolov8.pdf:15:2', 'page': 15, 'source': 'data\\yolov8.pdf'}), Document(page_content='F1 score, demonstrating its effectiveness in detecting ﬁres\nFig. 7 Precision–recall curve of the proposed 

In [25]:
from operator import itemgetter
from langchain_openai import ChatOpenAI
from langchain_core.runnables import RunnablePassthrough

# RAG
template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

# llm = ChatOpenAI(temperature=0)
llm = model

final_rag_chain = (
    {"context": retrieval_chain, 
     "question": itemgetter("question")} 
    | prompt
    | llm
    | StrOutputParser()
)

final_rag_chain.invoke({"question":question})

'Based on the provided context, it appears that there is no explicit mention of a specific dataset. However, it can be inferred from the content that the paper discusses various approaches to fire detection using deep learning-based models, such as YOLOv8. \n\nThe text mentions the use of datasets in previous research studies, for example, the MobileNetV2 model was used with the Forest Fire Detection System (FFireNet) dataset [8], and an improved YOLOv5 algorithm was tested on photographs taken by unmanned aerial vehicles (UAVs) [9]. \n\nHowever, it does not specify a particular dataset used in this paper.'

In [26]:
print("...")

...


# 9. Query Translation -- RAG Fusion

In [None]:
from langchain_community.embeddings.ollama import OllamaEmbeddings
from langchain_community.llms.ollama import Ollama

embeddings = OllamaEmbeddings(model="llama3")
model = Ollama(model="llama3")

In [28]:
# Prompt
from langchain.prompts import ChatPromptTemplate

# RAG-Fusion: Related
template = """You are a helpful assistant that generates multiple search queries based on a single input query. \n
Generate multiple search queries related to: {question} \n
Output (4 queries):"""
prompt_rag_fusion = ChatPromptTemplate.from_template(template)

In [30]:
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI

generate_queries = (
    prompt_rag_fusion 
    | model # ChatOpenAI(temperature=0)
    | StrOutputParser() 
    | (lambda x: x.split("\n")) # list query
)

In [33]:
from langchain.load import dumps, loads
    
def reciprocal_rank_fusion(results: list[list], k=60):
    """ Reciprocal_rank_fusion that takes multiple lists of ranked documents 
        and an optional parameter k used in the RRF formula 
        RRF- Reciprocal rank fusion
        
        Tài liệu nào được truy xuất thành kết quả nhiều lần -> rank cao hơn
    """
    
    # Initialize a dictionary to hold fused scores for each unique document
    fused_scores = {}

    # Iterate through each list of ranked documents
    for docs in results:
        # Iterate through each document in the list, with its rank (position in the list)
        for rank, doc in enumerate(docs):
            # Convert the document to a string format to use as a key (assumes documents can be serialized to JSON)
            doc_str = dumps(doc)
            # If the document is not yet in the fused_scores dictionary, add it with an initial score of 0
            if doc_str not in fused_scores:
                fused_scores[doc_str] = 0
            # Retrieve the current score of the document, if any
            previous_score = fused_scores[doc_str]
            # Update the score of the document using the RRF formula: 1 / (rank + k)
            fused_scores[doc_str] += 1 / (rank + k)

    # Sort the documents based on their fused scores in descending order to get the final reranked results
    reranked_results = [
        (loads(doc), score)
        for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    ]

    # Return the reranked results as a list of tuples, each containing the document and its fused score
    return reranked_results

retrieval_chain_rag_fusion = generate_queries | retriever.map() | reciprocal_rank_fusion
docs = retrieval_chain_rag_fusion.invoke({"question": question})
print(len(docs))

8
[[Document(page_content='costs associated with false alarms.\n•Cost-effective: The proposed approach may be cost-\neffective compared to traditional ﬁre detection methods\nas it can be implemented using low-cost cameras and\nhardware, reducing the need for expensive ﬁre detectionsystems.\n•Large dataset: Unlike other methods that use small\nnumber of datasets, a large dataset containing ﬁre,smoke, and normal scenes is used. The dataset has real-\nworld images and videos collected from various\nsources. The dataset has a diverse range of ﬁrescenarios, including indoor and outdoor ﬁres, small\nand large ﬁres, and low-light and high-light conditions.\nA deep CNN gathers essential data from big datasets toproduce precise predictions and reduce overﬁtting.\nThe following is how the remaining work is structured.', metadata={'id': 'data\\yolov8.pdf:1:5', 'page': 1, 'source': 'data\\yolov8.pdf'}), Document(page_content='detected objects highlighted. The SFD algorithm is a\npowerful tool for 

In [32]:
# RAG
template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

print()
final_rag_chain = (
    {"context": retrieval_chain_rag_fusion, 
     "question": itemgetter("question")} # ~ind
    | prompt
    | llm
    | StrOutputParser()
)

final_rag_chain.invoke({"question":question})

'Based on the provided context, it appears that there is no specific mention of a dataset in this text. The text seems to be discussing various algorithms and approaches for fire detection, including YOLOv8, Faster R-CNN, and SSD. It mentions the need for diverse and larger datasets for training and testing these approaches, but does not specify a particular dataset being used or referred to.'

# 10. Query Translation -- Decomposition tách câu hỏi thành từng phần để hỏi
Answer recursively - trả lời đệ quy, kết hợp phần trả lời của câu trước để trả lời cho câu phụ tiếp theo
Answer individually - trả lời riêng

In [34]:
from langchain_community.embeddings.ollama import OllamaEmbeddings
from langchain_community.llms.ollama import Ollama

embeddings = OllamaEmbeddings(model="llama3")
model = Ollama(model="llama3")

In [35]:
from langchain.prompts import ChatPromptTemplate
# Decomposition
template = """You are a helpful assistant that generates multiple sub-questions related to an input question. \n
The goal is to break down the input into a set of sub-problems / sub-questions that can be answers in isolation. \n
Generate multiple search queries related to: {question} \n
Output (3 queries):"""
prompt_decomposition = ChatPromptTemplate.from_template(template)

In [38]:
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser

# LLM
llm = model

# Chain
generate_queries_decomposition = (
    prompt_decomposition 
    | llm 
    | StrOutputParser() 
    | (lambda x: x.split("\n")))

# Run
# question = "What are the main components of an LLM-powered autonomous agent system?"
question = "What data is used in the paper?"
questions = generate_queries_decomposition.invoke({"question":question})

In [39]:
questions

['Here are three sub-questions related to "What data is used in the paper?" :',
 '',
 '1. **What type of data**: Is the data used in the paper numerical, categorical, text-based, or a combination?',
 '2. **Source of the data**: Is the data sourced from primary research, secondary sources (e.g., literature reviews), or external datasets (e.g., government reports)?',
 '3. **Specific variables or metrics**: What specific variables, metrics, or indicators are used in the paper to support its arguments or findings?']

#

In [None]:
# Answer recursively - trả lời đệ quy, kết hợp phần trả lời của câu trước để trả lời cho câu phụ tiếp theo

In [41]:
# Prompt
template = """Here is the question you need to answer:

\n --- \n {question} \n --- \n

Here is any available background question + answer pairs:

\n --- \n {q_a_pairs} \n --- \n

Here is additional context relevant to the question: 

\n --- \n {context} \n --- \n

Use the above context and any background question + answer pairs to answer the question: \n {question}
"""

decomposition_prompt = ChatPromptTemplate.from_template(template)

In [42]:
from operator import itemgetter
from langchain_core.output_parsers import StrOutputParser

def format_qa_pair(question, answer):
    """Format Q and A pair"""
    
    formatted_string = ""
    formatted_string += f"Question: {question}\nAnswer: {answer}\n\n"
    return formatted_string.strip()

# llm
# llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
llm = model
q_a_pairs = ""
for q in questions:
    
    rag_chain = (
    {"context": itemgetter("question") | retriever, 
     "question": itemgetter("question"),
     "q_a_pairs": itemgetter("q_a_pairs")} 
    | decomposition_prompt
    | llm
    | StrOutputParser())

    answer = rag_chain.invoke({"question":q,"q_a_pairs":q_a_pairs})
    q_a_pair = format_qa_pair(q,answer)
    q_a_pairs = q_a_pairs + "\n---\n"+  q_a_pair

#

In [None]:
# Answer individually - trả lời riêng

In [43]:
# Answer each sub-question individually 

from langchain import hub
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI

# RAG prompt
prompt_rag = hub.pull("rlm/rag-prompt")

def retrieve_and_rag(question,prompt_rag,sub_question_generator_chain):
    """RAG mỗi một câu hỏi phụ"""
    
    # Use our decomposition / Tách sinh câu hỏi
    sub_questions = sub_question_generator_chain.invoke({"question":question})
    
    # Initialize a list to hold RAG chain results
    rag_results = []
    
    for sub_question in sub_questions:
        
        # Retrieve documents for each sub-question
        retrieved_docs = retriever.get_relevant_documents(sub_question)
        
        # Use retrieved documents and sub-question in RAG chain
        answer = (prompt_rag
                  | llm 
                  | StrOutputParser()
        ).invoke({"context": retrieved_docs,"question": sub_question})
        rag_results.append(answer)
    
    return rag_results, sub_questions

# Wrap the retrieval and RAG process in a RunnableLambda for integration into a chain
answers, questions = retrieve_and_rag(
    question, 
    prompt_rag, 
    generate_queries_decomposition  # chain tách sinh câu hỏi 
)

  warn_deprecated(


In [44]:
def format_qa_pairs(questions, answers):
    """Format Q and A pairs"""
    
    formatted_string = ""
    for i, (question, answer) in enumerate(zip(questions, answers), start=1):
        formatted_string += f"Question {i}: {question}\nAnswer {i}: {answer}\n\n"
    return formatted_string.strip()

context = format_qa_pairs(questions, answers)

# Prompt
template = """Here is a set of Q+A pairs:

{context}

Use these to synthesize an answer to the question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = (
    prompt
    | llm
    | StrOutputParser()
)

final_rag_chain.invoke({"context":context,"question":question})

'Based on the provided context and the Q&A pairs, the data used in the paper includes:\n\n1. A large dataset containing real-world images and videos of fire, smoke, and normal scenes from various sources.\n2. This dataset has a diverse range of fire scenarios, including indoor and outdoor fires, small and large fires, and low-light and high-light conditions.\n3. The dataset is used to train a deep Convolutional Neural Network (CNN) model, specifically the YOLOv8 object detection algorithm, to detect fires in real-time video input.\n\nThe data analysis techniques applied involve image pre-processing and object detection using the YOLOv8 model. No specific datasets are mentioned in the paper, but it is clear that the dataset used is a large collection of images and videos. The study does not rely on secondary data from existing studies or datasets; instead, it collects its own dataset to train the deep CNN model.\n\nIn summary, the data used in the paper consists of a large dataset conta

# 11. Query Translation -- Step Back
# từ câu hỏi ban đầu tạo 1 câu hỏi lùi mức độ cao hơn, trừu tường hơn có thể đóng vai trò là điều kiện tiên quyết để trả lời chính xác câu hỏi ban đầu

In [45]:
# Few Shot Examples
from langchain_core.prompts import ChatPromptTemplate, FewShotChatMessagePromptTemplate
examples = [
    {
        "input": "Could the members of The Police perform lawful arrests?",
        "output": "what can the members of The Police do?",
    },
    {
        "input": "Jan Sindel’s was born in what country?",
        "output": "what is Jan Sindel’s personal history?",
    },
]
# We now transform these to example messages
example_prompt = ChatPromptTemplate.from_messages(
    [
        ("human", "{input}"),
        ("ai", "{output}"),
    ]
)
few_shot_prompt = FewShotChatMessagePromptTemplate(
    example_prompt=example_prompt,
    examples=examples,
)
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """You are an expert at world knowledge. Your task is to step back and paraphrase a question to a more generic step-back question, which is easier to answer. Here are a few examples:""",
        ),
        # Few shot examples
        few_shot_prompt,
        # New question
        ("user", "{question}"),
    ]
)

In [46]:
# generate_queries_step_back = prompt | ChatOpenAI(temperature=0) | StrOutputParser()
generate_queries_step_back = prompt | model | StrOutputParser()
question = "What data is used in the paper?"
generate_queries_step_back.invoke({"question": question})

"I'm ready to help. Please go ahead and ask your question, and I'll try to paraphrase it into a more generic step-back question that's easier for me to answer."

In [48]:
# Response prompt 
response_prompt_template = """You are an expert of world knowledge. I am going to ask you a question. Your response should be comprehensive and not contradicted with the following context if they are relevant. Otherwise, ignore them if they are not relevant.

# {normal_context}
# {step_back_context}

# Original Question: {question}
# Answer:"""
response_prompt = ChatPromptTemplate.from_template(response_prompt_template)

chain = (
    {
        # Retrieve context using the normal question
        "normal_context": RunnableLambda(lambda x: x["question"]) | retriever,
        # Retrieve context using the step-back question
        "step_back_context": generate_queries_step_back | retriever,
        # Pass on the question
        "question": lambda x: x["question"],
    }
    | response_prompt
    | model
    | StrOutputParser()
)

chain.invoke({"question": question})

'Based on the provided documents, it appears that a large dataset containing fire, smoke, and normal scenes is used in the paper. This dataset has real-world images and videos collected from various sources, including live cameras and pre-recorded video files. The goal of using this dataset is to have a large enough and balanced dataset that can generalize well to new data.\n\nThe specific characteristics of the dataset are not explicitly stated in the provided documents, but it is mentioned that the dataset needs to be processed (e.g., resizing or normalizing) to achieve the desired level of quality.'

# 12. Query Translation -- HyDE
# nhằm tạo ra 1 tài liệu giả định từ llm để trả lời câu hỏi người dùng.
# tài liệu này được nhúng, sdung để truy xuất tài liệu

In [50]:
from langchain.prompts import ChatPromptTemplate

# HyDE document genration
template = """Please write a scientific paper passage to answer the question
Question: {question}
Passage:"""
prompt_hyde = ChatPromptTemplate.from_template(template)

from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI

generate_docs_for_retrieval = (
    prompt_hyde | model | StrOutputParser() 
)

# Run
question = "What is task decomposition for LLM agents?"
generate_docs_for_retrieval.invoke({"question":question})

'Here\'s a passage answering the question:\n\nTask decomposition is a crucial concept in the realm of Large Language Models (LLMs), particularly in the context of natural language processing (NLP) and artificial intelligence (AI). In essence, task decomposition refers to the process of breaking down complex tasks or goals into smaller, more manageable sub-tasks that can be executed independently by individual agents within a distributed AI system.\n\nIn LLMs, task decomposition is essential for several reasons. Firstly, it enables the distribution of workload across multiple agents, thereby allowing for greater computational efficiency and scalability. Secondly, it facilitates the development of specialized agents with unique capabilities, each focusing on a specific sub-task that leverages their strengths. This approach also enables the reuse of pre-trained models and reduces the need for re-training from scratch.\n\nTask decomposition in LLMs typically involves several key steps: (1)

In [51]:
# Retrieve
retrieval_chain = generate_docs_for_retrieval | retriever 
retireved_docs = retrieval_chain.invoke({"question":question})
retireved_docs

[Document(page_content='detected objects highlighted. The SFD algorithm is a\npowerful tool for detecting ﬁres in real-time and enablesquick and effective responses to potential ﬁre hazards.The smart ﬁre detection (SFD) uses Yolov8 object\ndetection model. It involves several steps:\n•The video input source is set up from either a live\ncamera or pre-recorded video ﬁle.\n•The video capture process is started, and each frame of\nthe video is looped through.\n•Image pre-processing techniques are applied to each\nframe, and the pre-processed frame is passed to the\nYolov8 model for object detection.\n•The detected objects are checked for ﬁre-related\nclasses, such as ‘‘ﬂames’’, ‘‘smoke’’, or ‘‘embers’’.\n•If a ﬁre-related class is detected, an alarm is triggered,\nand relevant authorities are notiﬁed.', metadata={'id': 'data\\yolov8.pdf:9:1', 'page': 9, 'source': 'data\\yolov8.pdf'}),
 Document(page_content='tative of the overall dataset. Other pre-processing steps,\nsuch as resizing or n

In [52]:
# RAG
template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = (
    prompt
    | llm
    | StrOutputParser()
)

final_rag_chain.invoke({"context":retireved_docs,"question":question})

'The text does not explicitly mention "task decomposition" in the context of Large Language Model (LLM) agents. However, based on the provided documents, I can infer that task decomposition refers to breaking down a complex task into smaller sub-tasks or steps.\n\nIn this specific context, the documents describe a smart fire detection system using YOLOv8 object detection model. The process involves several steps:\n\n1. Setting up the video input source\n2. Capturing and pre-processing video frames\n3. Passing the pre-processed frames to the YOLOv8 model for object detection\n4. Checking detected objects for fire-related classes (e.g., flames, smoke, embers)\n5. Triggering an alarm if a fire-related class is detected\n\nThis breakdown of steps can be considered as task decomposition, where the overall task of smart fire detection is divided into smaller, manageable sub-tasks that can be executed sequentially or in parallel to achieve the desired outcome.'

# 13. Routing
# llm -> chọn đường dẫn tới datasource liên quan tới câu hỏi
Logical routing | Semantic routing

In [57]:
from typing import Literal

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_openai import ChatOpenAI

# ollama
from langchain_experimental.llms.ollama_functions import OllamaFunctions

# Data model
class RouteQuery(BaseModel):
    """Route a user query to the most relevant datasource."""

    datasource: Literal["python_docs", "js_docs", "golang_docs"] = Field(
        ...,
        description="Given a user question choose which datasource would be most relevant for answering their question",
    )

# LLM with function call 
llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
# llm = OllamaFunctions(model="llama3", format="json", temperature=0)

structured_llm = llm.with_structured_output(RouteQuery)

# Prompt 
system = """You are an expert at routing a user question to the appropriate data source.

Based on the programming language the question is referring to, route it to the relevant data source."""

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "{question}"),
    ]
)

# Define router 
router = prompt | structured_llm

In [None]:
# Example user query
user_question = "How do I create a virtual environment in Python?"

# Generate the final prompt with the user question
final_prompt = prompt.format(question=user_question)

# Get the routing decision
routing_decision = router.invoke({"question": user_question})

print(routing_decision)


In [None]:
question = "How do I create a virtual environment in Python?"

result = router.invoke({"question": question})

In [None]:
result

In [None]:
result.datasource

In [None]:
def choose_route(result):
    if "python_docs" in result.datasource.lower():
        ### Logic here 
        return "chain for python_docs"
    elif "js_docs" in result.datasource.lower():
        ### Logic here 
        return "chain for js_docs"
    else:
        ### Logic here 
        return "golang_docs"

from langchain_core.runnables import RunnableLambda

full_chain = router | RunnableLambda(choose_route)

In [None]:
full_chain.invoke({"question": question})

In [None]:
# Semantic routing 
# dựa trên ngữ nghĩa tính cosi similarity giữa câu hỏi và template

In [3]:
from langchain.utils.math import cosine_similarity
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings


from langchain_community.embeddings.ollama import OllamaEmbeddings
from langchain_community.llms.ollama import Ollama

embeddings = OllamaEmbeddings(model="llama3")

model = Ollama(model="llama3")

# Two prompts
physics_template = """You are a very smart physics professor. \
You are great at answering questions about physics in a concise and easy to understand manner. \
When you don't know the answer to a question you admit that you don't know.

Here is a question:
{query}"""

math_template = """You are a very good mathematician. You are great at answering math questions. \
You are so good because you are able to break down hard problems into their component parts, \
answer the component parts, and then put them together to answer the broader question.

Here is a question:
{query}"""

# Embed prompts
# embeddings = OpenAIEmbeddings()
prompt_templates = [physics_template, math_template]
prompt_embeddings = embeddings.embed_documents(prompt_templates)

# Route question to prompt 
def prompt_router(input):
    # Embed question
    query_embedding = embeddings.embed_query(input["query"])
    # Compute similarity
    similarity = cosine_similarity([query_embedding], prompt_embeddings)[0]
    most_similar = prompt_templates[similarity.argmax()]
    # Chosen prompt 
    print("Using MATH" if most_similar == math_template else "Using PHYSICS")
    return PromptTemplate.from_template(most_similar)


chain = (
    {"query": RunnablePassthrough()}
    | RunnableLambda(prompt_router)
    | model
    | StrOutputParser()
)

print(chain.invoke("What's a black hole"))

Using MATH
A question that may seem simple, but can be quite complex when delved into!

To tackle this question, let's break it down into smaller, more manageable parts. We'll start with some definitions and then build our way up to understanding what a black hole is.

**Part 1: What is gravity?**

Gravity is a fundamental force of nature that describes the attraction between two objects with mass (or energy). According to Einstein's theory of general relativity, massive objects warp the fabric of spacetime around them, creating a gravitational field. This warping causes other objects with mass to follow curved trajectories, which we experience as the force of gravity.

**Part 2: What is a singularity?**

A singularity is a point in spacetime where the curvature becomes infinite and the laws of physics as we know them break down. In general relativity, singularities occur when the density and curvature of spacetime become extreme, such as at the center of a black hole or during the Big

# 14. Query Structuring
Chuyển câu hỏi truy vấn ở dạng ngôn ngữ tự nhiên sang có cấu trúc 
test Self-querying

In [8]:
from langchain_community.document_loaders import YoutubeLoader

docs = YoutubeLoader.from_youtube_url(
    "https://www.youtube.com/watch?v=pbAd8O1Lvm4", add_video_info=True
).load()

docs[0].metadata

{'source': 'pbAd8O1Lvm4',
 'title': 'Self-reflective RAG with LangGraph: Self-RAG and CRAG',
 'description': 'Unknown',
 'view_count': 18109,
 'thumbnail_url': 'https://i.ytimg.com/vi/pbAd8O1Lvm4/hq720.jpg',
 'publish_date': '2024-02-07 00:00:00',
 'length': 1058,
 'author': 'LangChain'}

In [9]:
import datetime
from typing import Literal, Optional, Tuple
from langchain_core.pydantic_v1 import BaseModel, Field

class TutorialSearch(BaseModel):
    """Search over a database of tutorial videos about a software library."""

    content_search: str = Field(
        ...,
        description="Similarity search query applied to video transcripts.",
    )
    title_search: str = Field(
        ...,
        description=(
            "Alternate version of the content search query to apply to video titles. "
            "Should be succinct and only include key words that could be in a video "
            "title."
        ),
    )
    min_view_count: Optional[int] = Field(
        None,
        description="Minimum view count filter, inclusive. Only use if explicitly specified.",
    )
    max_view_count: Optional[int] = Field(
        None,
        description="Maximum view count filter, exclusive. Only use if explicitly specified.",
    )
    earliest_publish_date: Optional[datetime.date] = Field(
        None,
        description="Earliest publish date filter, inclusive. Only use if explicitly specified.",
    )
    latest_publish_date: Optional[datetime.date] = Field(
        None,
        description="Latest publish date filter, exclusive. Only use if explicitly specified.",
    )
    min_length_sec: Optional[int] = Field(
        None,
        description="Minimum video length in seconds, inclusive. Only use if explicitly specified.",
    )
    max_length_sec: Optional[int] = Field(
        None,
        description="Maximum video length in seconds, exclusive. Only use if explicitly specified.",
    )

    def pretty_print(self) -> None:
        for field in self.__fields__:
            if getattr(self, field) is not None and getattr(self, field) != getattr(
                self.__fields__[field], "default", None
            ):
                print(f"{field}: {getattr(self, field)}")

In [None]:
# from langchain_core.prompts import ChatPromptTemplate
# from langchain_openai import ChatOpenAI

# system = """You are an expert at converting user questions into database queries. \
# You have access to a database of tutorial videos about a software library for building LLM-powered applications. \
# Given a question, return a database query optimized to retrieve the most relevant results.

# If there are acronyms or words you are not familiar with, do not try to rephrase them."""
# prompt = ChatPromptTemplate.from_messages(
#     [
#         ("system", system),
#         ("human", "{question}"),
#     ]
# )
# llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
# structured_llm = llm.with_structured_output(TutorialSearch)
# query_analyzer = prompt | structured_llm

In [32]:
# test ollama
from langchain_core.output_parsers import JsonOutputParser
from langchain_community.embeddings.ollama import OllamaEmbeddings
from langchain_community.llms.ollama import Ollama

embeddings = OllamaEmbeddings(model="llama3")

model = Ollama(model="llama3")

system = """You are an expert at converting user questions into database queries. \
You have access to a database of tutorial videos about a software library for building LLM-powered applications. \
Given a question, return a database query optimized to retrieve the most relevant results.
\nFormatting Instructions: {format_instructions}
If there are acronyms or words you are not familiar with, do not try to rephrase them."""

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "{question}"),
    ]
)

out_parser = JsonOutputParser(pydantic_object=TutorialSearch)

query_analyzer = prompt | model | out_parser

In [33]:
query_analyzer.invoke({"question": "rag from scratch",
                       "format_instructions": out_parser.get_format_instructions()
})

{'content_search': 'title: ("RAG" OR "from scratch") AND description: ("RAG" OR "from scratch")',
 'title_search': 'title: ("RAG" OR "from scratch")',
 'min_view_count': None,
 'max_view_count': None,
 'earliest_publish_date': None,
 'latest_publish_date': None,
 'min_length_sec': None,
 'max_length_sec': None}

In [34]:
query_analyzer.invoke({"question": "videos that are focused on the topic of chat langchain that are published before 2024",
                       "format_instructions": out_parser.get_format_instructions()
})

{'content_search': 'chat langchain',
 'title_search': 'chat langchain',
 'earliest_publish_date': {'$lt': '2024-01-01'}}

In [35]:
query_analyzer.invoke({"question": "how to use multi-modal models in an agent, only videos under 5 minutes",
                       "format_instructions": out_parser.get_format_instructions()
})

{'content_search': 'multi-modal models AND (length < 300) AND (view_count < 500)',
 'title_search': 'using multi-modal models',
 'min_view_count': 0,
 'max_view_count': 499,
 'earliest_publish_date': None,
 'latest_publish_date': None,
 'min_length_sec': 0,
 'max_length_sec': 300}

In [None]:
# query_analyzer.invoke({"question": "rag from scratch"}).pretty_print()

In [None]:
# query_analyzer.invoke(
#     {"question": "videos that are focused on the topic of chat langchain that are published before 2024"}
# ).pretty_print()

In [None]:
# query_analyzer.invoke(
#     {
#         "question": "how to use multi-modal models in an agent, only videos under 5 minutes"
#     }
# ).pretty_print()

In [None]:
# test Self-querying

In [5]:
from langchain.vectorstores.chroma import Chroma
from langchain_core.documents import Document
from langchain_community.embeddings.ollama import OllamaEmbeddings
from langchain_community.llms.ollama import Ollama

docs = [
    Document(
        page_content="A bunch of scientists bring back dinosaurs and mayhem breaks loose",
        metadata={"year": 1993, "rating": 7.7, "genre": "science fiction"},
    ),
    Document(
        page_content="Leo DiCaprio gets lost in a dream within a dream within a dream within a ...",
        metadata={"year": 2010, "director": "Christopher Nolan", "rating": 8.2},
    ),
    Document(
        page_content="A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea",
        metadata={"year": 2006, "director": "Satoshi Kon", "rating": 8.6},
    ),
    Document(
        page_content="A bunch of normal-sized women are supremely wholesome and some men pine after them",
        metadata={"year": 2019, "director": "Greta Gerwig", "rating": 8.3},
    ),
    Document(
        page_content="Toys come alive and have a blast doing so",
        metadata={"year": 1995, "genre": "animated"},
    ),
    Document(
        page_content="Three men walk into the Zone, three men walk out of the Zone",
        metadata={
            "year": 1979,
            "director": "Andrei Tarkovsky",
            "genre": "thriller",
            "rating": 9.9,
        },
    ),
]


embeddings = OllamaEmbeddings(model="llama3")
model = Ollama(model="llama3")
vectorstore = Chroma.from_documents(docs, embeddings)

In [6]:
# Creating our self-querying retriever
from langchain.chains.query_constructor.base import AttributeInfo
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain_openai import ChatOpenAI

metadata_field_info = [
    AttributeInfo(
        name="genre",
        description="The genre of the movie. One of ['science fiction', 'comedy', 'drama', 'thriller', 'romance', 'action', 'animated']",
        type="string",
    ),
    AttributeInfo(
        name="year",
        description="The year the movie was released",
        type="integer",
    ),
    AttributeInfo(
        name="director",
        description="The name of the movie director",
        type="string",
    ),
    AttributeInfo(
        name="rating", description="A 1-10 rating for the movie", type="float"
    ),
]
document_content_description = "Brief summary of a movie"
llm = model
retriever = SelfQueryRetriever.from_llm(
    llm,
    vectorstore,
    document_content_description,
    metadata_field_info,
)


In [20]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "Answer the user's questions based on the context: {context}"),
        ("human", "{input}"),
    ]
)
chain = create_stuff_documents_chain(
    llm=model,
    prompt=prompt
)
retriever_chain = create_retrieval_chain(retriever, chain)

In [21]:
# This example only specifies a filter
retriever.invoke("I want to watch a movie rated higher than 8.5")

[Document(page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea', metadata={'director': 'Satoshi Kon', 'rating': 8.6, 'year': 2006}),
 Document(page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea', metadata={'director': 'Satoshi Kon', 'rating': 8.6, 'year': 2006}),
 Document(page_content='Three men walk into the Zone, three men walk out of the Zone', metadata={'director': 'Andrei Tarkovsky', 'genre': 'thriller', 'rating': 9.9, 'year': 1979}),
 Document(page_content='Three men walk into the Zone, three men walk out of the Zone', metadata={'director': 'Andrei Tarkovsky', 'genre': 'thriller', 'rating': 9.9, 'year': 1979})]

In [22]:
# This example only specifies a filter
retriever_chain.invoke({"input": "I want to watch a movie rated higher than 8.5"})

{'input': 'I want to watch a movie rated higher than 8.5',
 'context': [Document(page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea', metadata={'director': 'Satoshi Kon', 'rating': 8.6, 'year': 2006}),
  Document(page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea', metadata={'director': 'Satoshi Kon', 'rating': 8.6, 'year': 2006}),
  Document(page_content='Three men walk into the Zone, three men walk out of the Zone', metadata={'director': 'Andrei Tarkovsky', 'genre': 'thriller', 'rating': 9.9, 'year': 1979}),
  Document(page_content='Three men walk into the Zone, three men walk out of the Zone', metadata={'director': 'Andrei Tarkovsky', 'genre': 'thriller', 'rating': 9.9, 'year': 1979})],
 'answer': 'A unique context! As a psychologist/detective lost in dreams within dreams, my mind is sharp and ready to analyze this situation.

# 15. Multi-representation Indexing
# Indexing