In [1]:
# INGEST DATA
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import DeepLake
from langchain_text_splitters import RecursiveCharacterTextSplitter

embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

db = DeepLake(
    dataset_path="./deeplake_dataset",
    embedding=embeddings,
    read_only=False
)

texts = [
    "Napoleon Bonaparte was born on 15 August 1769.",
    "Michael Jeffrey Jordan was born on 17 February 1963."
]

splitter = RecursiveCharacterTextSplitter()
docs = splitter.create_documents(texts)

db.add_documents(docs)

print("✅ Data Added")




Loading weights:   0%|          | 0/103 [00:00<?, ?it/s]

[1mBertModel LOAD REPORT[0m from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.[0m


Deep Lake Dataset in ./deeplake_dataset already exists, loading from the storage


  db = DeepLake(
Creating 2 embeddings in 1 batches of size 2:: 100%|█████████████████████████████████████| 1/1 [00:00<00:00,  6.33it/s]

Dataset(path='./deeplake_dataset', tensors=['embedding', 'id', 'metadata', 'text'])

  tensor      htype     shape     dtype  compression
  -------    -------   -------   -------  ------- 
 embedding  embedding  (6, 384)  float32   None   
    id        text      (6, 1)     str     None   
 metadata     json      (6, 1)     str     None   
   text       text      (6, 1)     str     None   
✅ Data Added





In [2]:
# QUERY RAG

from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

llm = ChatGroq(
    model="llama-3.1-8b-instant",
    temperature=0
)

db = DeepLake(
    dataset_path="./deeplake_dataset",
    embedding=embeddings,
    read_only=True
)

retriever = db.as_retriever()

prompt = ChatPromptTemplate.from_template(
    """Answer using only context:

{context}

Question: {question}
"""
)

rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
)

response = rag_chain.invoke("When was Michael Jordan born?")
print(response.content)


Deep Lake Dataset in ./deeplake_dataset already exists, loading from the storage
17 February 1963.
