In [None]:
from groq import Groq
import os

os.environ["GROQ_API_KEY"] = "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"

client = Groq()

topic = "Retrieval Augmented Generation"
prompt_text = f"Explain {topic} in simple words."

response = client.chat.completions.create(
    model="llama-3.1-8b-instant",
    messages=[{"role": "user", "content": prompt_text}]
)

# Print the model's text reply (Groq's SDK returns ChatCompletionMessage objects)
print(response.choices[0].message.content)



In [None]:
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate

model = ChatGroq(model="llama-3.1-8b-instant")

prompt = ChatPromptTemplate.from_template(
    "Explain {topic} in simple words."
)
chain = prompt | model

print(chain.invoke({"topic": "Retrieval Augmented Generation"}).content)


In [14]:
from langchain_community.embeddings import HuggingFaceEmbeddings

embedder = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)


  embedder = HuggingFaceEmbeddings(


In [15]:
docs = [
    "Retrieval Augmented Generation (RAG) combines external knowledge with LLM generation.",
    "Vector databases store embeddings that represent meaning instead of exact words.",
    "Chunking splits large documents into smaller pieces to improve retrieval accuracy.",
    "Embeddings are numerical representations of text that capture semantic meaning."
]


In [30]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(
    chunk_size=50,
    chunk_overlap=20
)

documents = splitter.create_documents(docs)
documents


[Document(metadata={}, page_content='Retrieval Augmented Generation (RAG) combines'),
 Document(metadata={}, page_content='(RAG) combines external knowledge with LLM'),
 Document(metadata={}, page_content='knowledge with LLM generation.'),
 Document(metadata={}, page_content='Vector databases store embeddings that represent'),
 Document(metadata={}, page_content='that represent meaning instead of exact words.'),
 Document(metadata={}, page_content='Chunking splits large documents into smaller'),
 Document(metadata={}, page_content='into smaller pieces to improve retrieval'),
 Document(metadata={}, page_content='improve retrieval accuracy.'),
 Document(metadata={}, page_content='Embeddings are numerical representations of text'),
 Document(metadata={}, page_content='of text that capture semantic meaning.')]

In [18]:
from langchain_community.vectorstores import Chroma

vectorstore = Chroma.from_documents(
    documents,
    embedding=embedder,
    persist_directory="./chroma_db"
)


In [19]:
retriever = vectorstore.as_retriever()


In [23]:
llm = ChatGroq(model="llama-3.1-8b-instant")

rag_prompt = ChatPromptTemplate.from_template("""
Use the following context to answer the question:

Context:
{context}

Question:
{question}

Answer in a simple way.
""")

In [24]:
from langchain_core.runnables import RunnablePassthrough

rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | rag_prompt
    | llm
)


In [25]:
response = rag_chain.invoke("What is RAG?")
print(response.content)


RAG combines external knowledge with a Large Language Model (LLM) generation to create new information.


In [26]:
print(rag_chain.invoke("What is an embedding?").content)
print(rag_chain.invoke("Why do we chunk documents?").content)


An embedding is a numerical representation of text that captures its meaning.
We chunk documents to improve retrieval accuracy.


In [27]:
vec = embedder.embed_query("What is RAG?")
print(len(vec))
print(vec[:10])  # show first 10 dims


384
[-0.06957074254751205, 0.09520000964403152, 0.016021398827433586, 0.00680147111415863, -0.08840498328208923, 0.014204839244484901, 0.05402772128582001, 0.0456368550658226, -0.03192177787423134, -0.029563739895820618]


In [28]:
v1 = embedder.embed_query("RAG uses retrieval to help LLMs answer better.")
v2 = embedder.embed_query("Retrieval augmented generation combines docs and models.")

import numpy as np

cosine = np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))
print("Cosine similarity:", cosine)


Cosine similarity: 0.3026949104640774
