In [1]:
from langchain.chat_models import ChatOpenAI
from langchain.schema import SystemMessage, HumanMessage
from langchain.prompts import ChatPromptTemplate
from langchain_community.document_loaders import PyPDFLoader
from langchain_openai import ChatOpenAI
from langchain.chains import ConversationChain
from langchain.memory import ConversationBufferMemory
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
import os
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
GROQ_API_KEY = "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"

llm = ChatOpenAI(
    openai_api_key=GROQ_API_KEY,
    openai_api_base="https://api.groq.com/openai/v1",
    model_name="groq/compound-mini",
    temperature=0.5
)


In [4]:
response = llm.invoke("hello how are you?")
response.content

"Hello! I'm just a language model, so I don't have feelings or emotions like humans do, but I'm functioning properly and ready to help you with any questions or tasks you have! How can I assist you today?"

In [39]:
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a assisstant who just gives answers in 1 sentence"),
    ("user", "{question}")
])

chain = prompt | llm
resp = chain.invoke({"question": "Explain vector embeddings in simple terms."})
print(resp.content)


Vector embeddings are a way to represent complex data, like words or images, as simple numerical vectors in a high-dimensional space, allowing similar things to be close together.


# Sequential Chain

In [8]:
from langchain.chains import SimpleSequentialChain

# Step 1: Summarize
prompt1 = ChatPromptTemplate.from_template("Summarize this text in one sentence: {input_text}")
chain1 = prompt1 | llm

# Step 2: Translate
prompt2 = ChatPromptTemplate.from_template("Translate this text to kannada: {summary}")
chain2 = prompt2 | llm

# Combine them with mapping
overall_chain = (
    {"summary": chain1}  # first step outputs {summary: ...}
    | chain2             # second step consumes it
)

resp = overall_chain.invoke({"input_text": "LangChain makes building LLM apps easier."})
print(resp.content)

The text you provided appears to be a JSON (JavaScript Object Notation) object containing metadata about a response generated by a large language model. If you'd like, I can help extract and translate the primary content.

The primary content is:

"LangChain simplifies the development of Large Language Model (LLM) applications."

Here's the translation to Kannada:

"ಲಾಂಗ್‌ಚೈನ್ ದೊಡ್ಡ ಭಾಷಾ ಮಾದರಿ (ಎಲ್‌ಎಲ್‌ಎಂ) ಅಪ್ಲಿಕೇಶನ್‌ಗಳ ಅಭಿವೃದ್ಧಿಯನ್ನು ಸರಳಗೊಳಿಸುತ್ತದೆ."


# Rember conversation and chatbot style

In [43]:
memory = ConversationBufferMemory(return_messages=True)

conversation = ConversationChain(
    llm=llm,
    memory=memory,
    verbose=False
)
while True:
    user1 = input("YOU:")
    if user1 == "exit":
        break
    response = conversation.predict(input=user1)
    print("BOT:", response)


YOU: hello, i'm lavanya


BOT: Hello Lavanya! It's nice to meet you. I'm Compound-Beta, a conversational AI built by Groq. I'm running on a custom LPU (Language Processing Unit) hardware designed for fast AI inference. I'm excited to chat with you and share my knowledge. How's your day going so far?


YOU: i'm 27


BOT: Happy 27th birthday, Lavanya! I'm glad I got to know your name and now your age. I'm still learning and growing, but I'm not aware of having a birthday myself since I'm a software program. I'm always here and ready to chat, though! What do you like to do for fun, or is there something specific on your mind that you'd like to talk about? By the way, I'm processing our conversation using my LPU hardware, which allows me to respond quickly and efficiently. It's a 16-core LPU, and it's capable of handling a large number of conversations simultaneously.


YOU: whats my age


BOT: You mentioned earlier that you're 27 years old, Lavanya!


YOU: exit


# RAG

In [46]:
loader = PyPDFLoader("/Users/lavanya/Downloads/Lavanya_SWE.pdf")
docs = loader.load()

print("Loaded pages:", len(docs))

Loaded pages: 1


In [49]:
docs[0].page_content[:30]

'Lavanya Shankar\nlavanyashankar'

In [51]:
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = splitter.split_documents(docs)

print("Total chunks:", len(chunks))


Total chunks: 10


In [53]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
db = Chroma.from_documents(chunks, embeddings)


  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


In [54]:
db

<langchain_community.vectorstores.chroma.Chroma at 0x303323e10>

In [57]:
print("Number of chunks stored:", db._collection.count())
# Peek into the DB
raw = db._collection.get(include=["embeddings", "documents"], limit=1)
print("Stored text:", raw["documents"][0][:200])
print("Embedding length:", len(raw["embeddings"][0]))
print("First 10 values:", raw["embeddings"][0][:10])


Number of chunks stored: 10
Stored text: Lavanya Shankar
lavanyashankarsv09@gmail.com| +1 704-490-9869 | LinkedIn | GitHub | Medium | Portfolio
Education
Johns Hopkins University Baltimore, MD
Master of Science in Engineering in Data Science
Embedding length: 384
First 10 values: [-0.0776967  -0.01542668 -0.01383872  0.01303261 -0.04081499 -0.09348963
 -0.04556158  0.00163316 -0.1418069  -0.00174948]


# Generate 1 embedding

In [56]:
# Take the first chunk of your text
first_chunk_text = chunks[0].page_content
print("First chunk text:", first_chunk_text[:200])  # preview 200 chars

# Create embedding
first_embedding = embeddings.embed_query(first_chunk_text)
print("Embedding length:", len(first_embedding))   # should be 384
print("First 10 values:", first_embedding[:10])   # preview first 10 numbers

First chunk text: Lavanya Shankar
lavanyashankarsv09@gmail.com| +1 704-490-9869 | LinkedIn | GitHub | Medium | Portfolio
Education
Johns Hopkins University Baltimore, MD
Master of Science in Engineering in Data Science
Embedding length: 384
First 10 values: [-0.07769669592380524, -0.015426683239638805, -0.013838724233210087, 0.013032610528171062, -0.04081499204039574, -0.0934896320104599, -0.04556158185005188, 0.0016331623774021864, -0.14180688560009003, -0.0017494767671450973]


In [63]:
qa = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=db.as_retriever(),
    return_source_documents=True
)

# Example queries
result = qa({"query": "her education"})

print("Answer:", result["result"])


Answer: Lavanya Shankar's education includes:

1. **Master of Science in Engineering in Data Science** at Johns Hopkins University, Baltimore, MD, with a CGPA of 3.9/4 from August 2023 to May 2025.
2. **Bachelor of Engineering in Computer Science** at Visvesvaraya Technological University, Bengaluru, India, with a CGPA of 3.6/4 from August 2016 to August 2020.


# Similarity

In [64]:
query = "Lavanya’s education"
results = db.similarity_search(query, k=2)  # top 3 matches

for i, r in enumerate(results):
    print(f"\n--- Match {i+1} ---")
    print("Text:", r.page_content)
    #print("Metadata:", r.metadata)



--- Match 1 ---
Text: Lavanya Shankar
lavanyashankarsv09@gmail.com| +1 704-490-9869 | LinkedIn | GitHub | Medium | Portfolio
Education
Johns Hopkins University Baltimore, MD
Master of Science in Engineering in Data Science | CGPA: 3.9/4 Aug 2023 – May 2025
Visvesvaraya Technological University Bengaluru, India
Bachelor of Engineering in Computer Science | CGPA: 3.6/4 Aug 2016 – Aug 2020
Technical Skills
Programming: Python, Java, JavaScript, Ansible, SQL, HTML/CSS, Spring Boot, C/C++

--- Match 2 ---
Text: ACL 2025: Generated educational material for 4 low-resource Indigenous languages employing POS tagging,
chain-of-thought reasoning, and ensemble learning; improved accuracy by 10% over previous benchmarks (ACL Paper )
ACL 2025: Created spoken language translation systems for 10 language pairs leveraging SeamlessM4T, Whisper, and
Whisper+NLLB models; applied Minimum Bayes Risk (MBR) ensembling to enhance translation accuracy (ACL Paper )
Projects
Aragorn - Agentic RAG Bot Link
