In [None]:
import os
from langchain_core.documents import Document

# initialize some documents containing a paragraph about a random topic
documents = [
    Document(page_content="Climate change refers to significant changes in global temperatures and weather patterns over time. While climate has changed throughout Earth's history, the rapid warming seen today is primarily driven by the increase in greenhouse gases, such as carbon dioxide and methane, due to human activities like deforestation, industrial processes, and the burning of fossil fuels. This warming has numerous effects on the environment, including more frequent and severe heatwaves, droughts, and heavy rainfall events, which can lead to disasters like wildfires and floods. Additionally, rising sea levels caused by the melting of polar ice and the expansion of warmer sea waters pose significant threats to coastal regions. Changes in climate also affect biodiversity, as species must either adapt to new conditions, migrate, or face extinction. The social and economic impacts are equally significant, affecting agriculture, health, water supplies, and even triggering human displacement. Addressing climate change requires global cooperation to reduce greenhouse gas emissions, alongside efforts to adapt to its inevitable impacts and invest in sustainable technologies.",
             metadata={"id": "1000"}),
    Document(page_content="Thanos is one of the most iconic and powerful villains in the Marvel Universe, known for his obsession with balancing the universe by any means necessary, including mass genocide. Originating from the moon Titan, he is a member of the race known as the Eternals, possessing immense strength, intelligence, and mastery of mystical forces. Thanos is driven by his infatuation with the physical embodiment of Death, to whom he aims to prove his love by erasing half of all life in the universe. His quest to achieve this goal involves a relentless search for the Infinity Stones—artifacts of incredible power that, when combined in the Infinity Gauntlet, give him the ability to manipulate reality, time, space, and the very fabric of existence. His complex character is marked not only by his malevolence but also by a philosophical underpinning that he believes justifies his catastrophic actions. This chilling conviction makes him a unique villain who not only challenges the physical might of heroes like the Avengers but also poses profound moral and ethical dilemmas.",
             metadata={"id": "1001"}),
    Document(page_content="Mercedes-Benz is a globally recognized German automobile brand known for its luxury vehicles, buses, coaches, and trucks. Founded in 1926 under the name Daimler-Benz, it originated from the merging of Karl Benz's and Gottlieb Daimler's companies, who independently developed the first gasoline-powered automobiles in the late 19th century. The brand is synonymous with quality, innovation, and sophistication, often pioneering advancements in safety, comfort, and technology. Mercedes-Benz was the first to introduce innovations such as the anti-lock braking system (ABS), electronic stability program (ESP), and PRE-SAFE systems, which have set industry standards. The company places a significant emphasis on producing vehicles that offer a blend of high performance, exceptional luxury, and aesthetic design. With a reputation built on more than a century of excellence, Mercedes-Benz continues to be at the forefront of automotive development, including new initiatives in electric vehicles and sustainable mobility solutions, aiming to shape the future of transportation while maintaining its legacy of luxury and quality.",
             metadata={"id": "1002"})
]

In [None]:
from langchain_aws import BedrockEmbeddings

# init embeddings function
bedrock_embeddings = BedrockEmbeddings(
    credentials_profile_name=os.getenv("AWS_CREDENTIALS_PROFILE_NAME"),
    region_name=os.getenv("AWS_REGION_NAME", "eu-central-1"),
    model_id=os.getenv("AWS_EMBEDDING_MODEL_ID", "amazon.titan-text-express-v1")
)

In [None]:
from langchain_aws.chat_models import ChatBedrock

bedrock_llm = ChatBedrock(
    credentials_profile_name=os.getenv("AWS_CREDENTIALS_PROFILE_NAME"),
    region_name=os.getenv("AWS_REGION_NAME", "eu-central-1"),
    model_id=os.getenv("AWS_LANGUAGE_MODEL_ID", "amazon.titan-text-express-v1"),
    model_kwargs={"temperature": 0.0, "maxTokenCount": 8192}
)

In [None]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate

# create summaries of the parent documents
chain = (
    {"doc": lambda x: x.page_content}
    | ChatPromptTemplate.from_template(
        """Summarize the following paragraph in one sentence, including important key words:
        
        ---
        {doc}
        ---
        
        Assistant:
        """
    )
    | bedrock_llm
    | StrOutputParser()
)

summaries = chain.batch(documents)

In [None]:
import chromadb
from chromadb.config import Settings
from langchain_community.vectorstores import Chroma

COLLECTION_NAME = "test_multivec_retriever"

# init a client that connects to a local chromadb server
chroma_settings = Settings(allow_reset=True)
chroma_client = chromadb.HttpClient(
    settings=chroma_settings,
)

# The vectorstore to use to index the child chunks
vectorstore = Chroma(
    client=chroma_client,
    collection_name=COLLECTION_NAME,
    embedding_function=bedrock_embeddings,
)

In [None]:
chroma_client.delete_collection(COLLECTION_NAME)

In [None]:
from langchain.storage.redis import RedisStore
from redis import Redis

redis_client = Redis(host='localhost', port=6379)

# The storage layer for the parent documents
doc_store = RedisStore(
    client=redis_client,
)

In [None]:
from langchain.retrievers import MultiVectorRetriever

# id key for summary document
ID_KEY = "parent_doc_id"

retriever = MultiVectorRetriever(
    vectorstore=vectorstore,
    byte_store=doc_store,
    id_key=ID_KEY,
    search_kwargs={"k": 1}
)

parent_doc_ids = [doc.metadata['id'] for doc in documents]

# create a list of summary docs
# each element is a document
# - containing the summary
# - having a metadata property with a "parent_doc_id" corresponding do the id of the parent doc
summary_docs = [
    Document(page_content=summary, metadata={ID_KEY: parent_doc_ids[i]})
    for i, summary in enumerate(summaries)
]

# Add embeddings of the summaries
retriever.vectorstore.add_documents(summary_docs)

# add the parent document and it's id to the document store
retriever.docstore.mset(list(zip(parent_doc_ids, documents)))

In [None]:
query = ""

# perform similarity search on the embeddings of the summaries
sub_docs = vectorstore.similarity_search(query=query, k=1)
print(sub_docs[0].page_content)

print("------------------------------------")

retrieved_full_docs = retriever.invoke(query)
retrieved_full_docs[0].page_content

In [None]:
from langchain_community.chat_models.ollama import ChatOllama
from langchain import hub
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

ollama_llm = ChatOllama(model="mistral:7b", temperature=0)

query = "What iinovations did Mercedes Benz introduce?"

prompt = hub.pull("rlm/rag-prompt")

chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | ollama_llm
    | StrOutputParser()
)

response = chain.invoke(query)
response