In [1]:
!pip install -U \
  huggingface_hub \
  sentence-transformers \
  langchain \
  langchain-core \
  langchain-huggingface \
  langchain-community \
  docarray

Collecting huggingface_hub
  Using cached huggingface_hub-1.3.7-py3-none-any.whl.metadata (13 kB)


In [2]:
import os
from getpass import getpass
from langchain_groq import ChatGroq

os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY") or \
    getpass("Enter your GROQ API key: ")

Enter your GROQ API key: ··········


In [3]:
llm = ChatGroq(temperature=0.0, model="llama-3.3-70b-versatile")

# LCEL RunnableParallel and RunnablePassthrough

**RunnableParallel** — allows us to run multiple Runnable instances in parallel. Acting almost as a Y-fork in the chain.

**RunnablePassthrough** — allows us to pass through a variable to the next Runnable without modification.

In [4]:
from langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-l6-v2"
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [5]:
from langchain_community.vectorstores import DocArrayInMemorySearch

vecstore_a = DocArrayInMemorySearch.from_texts(
    [
        "half the info is here",
        "DeepSeek-V3 was released in December 2024"
    ],
    embedding=embeddings
)

vecstore_b = DocArrayInMemorySearch.from_texts(
    [
        "the other half of the info is here",
        "the DeepSeek-V3 LLM is a mixture of experts model with 671B parameters"
    ],
    embedding=embeddings
)


In [6]:
prompt_str = """Using the context provided, answer the user's question.
Context:
{context_a}
{context_b}
"""

In [8]:
from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate

In [9]:
prompt = ChatPromptTemplate.from_messages([
    SystemMessagePromptTemplate.from_template(prompt_str),
    HumanMessagePromptTemplate.from_template("{question}")
])

In [10]:
from langchain_core.runnables import RunnablePassthrough, RunnableParallel

retriever_a = vecstore_a.as_retriever()
retriever_b = vecstore_b.as_retriever()

retrieval = RunnableParallel(
    {
        "context_a": retriever_a,
        "context_b": retriever_b,
        "question": RunnablePassthrough()
    }
)

In [12]:
from langchain_core.output_parsers import StrOutputParser

output_parser = StrOutputParser()

In [13]:
chain = retrieval | prompt | llm | output_parser

In [16]:
result = chain.invoke(
    "what architecture does the model DeepSeek released in december use?"
)

result

'The DeepSeek-V3 model, released in December 2024, uses a Mixture of Experts (MoE) architecture with 671B parameters.'

In [18]:
chain.invoke("what architecture does the model llama 3.3 released in december use?")

'The provided context does not mention a model called "Llama 3.3" or its architecture. However, it does mention "DeepSeek-V3", which is described as "a mixture of experts model with 671B parameters" and was released in December 2024. If you\'re looking for information on a different model, the context does not provide that information.'