In [1]:
!pip install -q \
langchain \
langchain-community \
langchain-groq \
langchain-text-splitters \
deeplake \
sentence-transformers \
tiktoken

In [3]:
from langchain_community.vectorstores import DeepLake
from langchain_groq import ChatGroq
from langchain_community.embeddings import HuggingFaceEmbeddings

print("All imports working ✅")

All imports working ✅


In [4]:
!pip install -q activeloop

ERROR: Could not find a version that satisfies the requirement activeloop (from versions: none)
ERROR: No matching distribution found for activeloop


In [5]:
!pip install -U deeplake



In [None]:
import os

os.environ["GROQ_API_KEY"] = ""
os.environ["ACTIVELOOP_TOKEN"] = ""

In [13]:
# ================================
# Imports
# ================================

from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import DeepLake
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough


# ================================
# Step 1: Create Sample Text File
# ================================

text = """
Google is opening access to its AI language model PaLM.
It aims to compete with OpenAI's GPT models.
Google is launching APIs and enterprise AI tools.
PaLM is a large language model similar to GPT.
"""

with open("my_file.txt", "w") as f:
    f.write(text)


# ================================
# Step 2: Load Document
# ================================

loader = TextLoader("my_file.txt")
documents = loader.load()


# ================================
# Step 3: Split Text into Chunks
# ================================

text_splitter = CharacterTextSplitter(
    chunk_size=200,
    chunk_overlap=20
)

docs = text_splitter.split_documents(documents)


# ================================
# Step 4: Create Embeddings
# ================================

embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)


# ================================
# Step 5: Create Deep Lake Dataset
# ================================

dataset_path = "hub://chalamalasettyakashmadhukar/groq_rag_demo"

db = DeepLake(
    dataset_path=dataset_path,
    embedding=embeddings,
    overwrite=True
)

db.add_documents(docs)

print("Deep Lake dataset created successfully ✅")


# ================================
# Step 6: Create Retriever
# ================================

retriever = db.as_retriever()


# ================================
# Step 7: Create Groq LLM
# ================================

llm = ChatGroq(
    model="llama-3.3-70b-versatile",
    temperature=0
)


# ================================
# Step 8: Create Prompt Template
# ================================

prompt = ChatPromptTemplate.from_template(
    """Answer the question using only the context below.

Context:
{context}

Question:
{question}
"""
)


# ================================
# Step 9: Build Modern RAG Chain
# ================================

rag_chain = (
    {
        "context": retriever,
        "question": RunnablePassthrough()
    }
    | prompt
    | llm
    | StrOutputParser()
)


# ================================
# Step 10: Ask Question
# ================================

query = "How is Google competing with OpenAI?"
response = rag_chain.invoke(query)

print("\nAnswer:\n")
print(response)

Loading weights:   0%|          | 0/103 [00:00<?, ?it/s]

[1mBertModel LOAD REPORT[0m from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.[0m


Your Deep Lake dataset has been successfully created!


Creating 1 embeddings in 1 batches of size 1:: 100%|█████████████████████████████████████| 1/1 [00:23<00:00, 23.51s/it]


Dataset(path='hub://chalamalasettyakashmadhukar/groq_rag_demo', tensors=['text', 'metadata', 'embedding', 'id'])

  tensor      htype     shape     dtype  compression
  -------    -------   -------   -------  ------- 
   text       text      (1, 1)     str     None   
 metadata     json      (1, 1)     str     None   
 embedding  embedding  (1, 384)  float32   None   
    id        text      (1, 1)     str     None   
Deep Lake dataset created successfully ✅

Answer:

Google is competing with OpenAI by opening access to its AI language model PaLM and launching APIs and enterprise AI tools, aiming to rival OpenAI's GPT models.
