In [6]:
def load_documents(data_path="./docs", file_pattern="**/*.md"):
    from langchain_community.document_loaders import DirectoryLoader
    loader = DirectoryLoader(
        path=data_path,
        glob=file_pattern
    )
    documents = loader.load()
    for i, doc in enumerate(documents[:2]):
        print(f"\nDocument {i+1}:")
        print(f"Source: {doc.metadata.get('source')}")
        # print(f"Content (first 150 chars): {doc.page_content[:150]}...")
    return documents
    # Print information about the first few documents

In [15]:
def split_documents(documents):
    from langchain_text_splitters import RecursiveCharacterTextSplitter
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=400,
        chunk_overlap=100,
        length_function=len,
        add_start_index=True
    )
    chunks = text_splitter.split_documents(documents)
    print("Chunking Done")
    return chunks


In [17]:
def generate_embeddings():
    from langchain_huggingface import HuggingFaceEmbeddings
    embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",
    model_kwargs = {'device': 'cpu'}, encode_kwargs = {'normalize_embeddings': False}
    )
    print("Inside generate_embeddings")
    return embedding_model


In [18]:
def embed():
    # from sentence_transformers import SentenceTransformer
    from langchain_community.vectorstores import Chroma

    documents = load_documents()
    chunks = split_documents(documents)
    embeddings = generate_embeddings()

    vector_db = Chroma.from_documents(
        documents,
        embeddings,
        persist_directory="./keploy-docs-coll"
    )

    print("Filling of vectorstore done")

In [19]:
embed()

libmagic is unavailable but assists in filetype detection. Please consider installing libmagic for better results.
libmagic is unavailable but assists in filetype detection. Please consider installing libmagic for better results.
libmagic is unavailable but assists in filetype detection. Please consider installing libmagic for better results.
libmagic is unavailable but assists in filetype detection. Please consider installing libmagic for better results.
libmagic is unavailable but assists in filetype detection. Please consider installing libmagic for better results.
libmagic is unavailable but assists in filetype detection. Please consider installing libmagic for better results.
libmagic is unavailable but assists in filetype detection. Please consider installing libmagic for better results.
libmagic is unavailable but assists in filetype detection. Please consider installing libmagic for better results.
libmagic is unavailable but assists in filetype detection. Please consider insta


Document 1:
Source: docs\ci-cd\github.md

Document 2:
Source: docs\ci-cd\gitlab.md
Chunking Done
Inside generate_embeddings
Filling of vectorstore done


In [11]:
emb_model = generate_embeddings()

Inside generate_embeddings


In [12]:
from sklearn.metrics.pairwise import cosine_similarity

words = ["appple", "car", "iphone"]
vectors = emb_model.embed_documents(words)
print(len(vectors))

embedding1 = vectors[0]
embedding2 = vectors[1]
embedding3 = vectors[2]

cos_sim12 = cosine_similarity([embedding1], [embedding2])[0][0]
cos_sim13 = cosine_similarity([embedding1], [embedding3])[0][0]
cos_sim23 = cosine_similarity([embedding2], [embedding3])[0][0]
print("Cosine Similarity:", cos_sim12, cos_sim13, cos_sim23)    

3
Cosine Similarity: 0.27573918300413686 0.2951819883381006 0.435098850035305


In [26]:
from langchain_community.vectorstores import Chroma
db = Chroma(persist_directory="./keploy-docs-coll", embedding_function=emb_model)
results = db.similarity_search_with_relevance_scores("How does keploy work?", k=3)
# print(results[0][1])
for doc, score in results:
    normalized_score = (score + 1) / 2
    # print(f"Original score: {score}, Normalized score: {normalized_score}")
    print(doc.page_content)
    print("-------------------------")
    

id: how-keploy-works title: How Keploy Works? sidebar_label: How Keploy Works? tags: - explanation - replay-test-case - replay-guide - record-guide - record-test-case

🌟 Keploy V2 Architecture 🌟

🎯 Goals

🛠 Automatic instrumentation: No code changes required.

📡 Automatic traffic capture: Both incoming and outgoing traffic is captured and manipulated.

✍️ Readable and Editable: Tests and stubs are easy to understand and modify.

🔒 TLS Support: Secure connections in HTTPS or databases are supported.

🔄 Request Matching: Mocking responses during testing by matching requests.

🏗 High-level architecture

Keploy uses eBPF to instrument applications without code changes. Key components include:

eBPF hooks loader

Network Proxy

API server

Keploy Architecture

🪝 eBPF hooks loader

The eBPF hooks loader handles the Ingress and Egress Interceptor logic.

Ingress Interceptor: Captures incoming HTTP calls and stores them in YAML format. It intercepts system calls related to incoming HTTP reques

In [57]:
PROMPT_TEMPLATE = ["""
You are a smart and helpful AI assistant trained to answer technical questions using documentation.

You are provided with multiple documentation snippets retrieved from a vector database. These are separated by lines containing only: 
-------------------------

Your goal is to:
- Use the relevant information from any or all of the provided snippets to answer the user’s question.
- If the question involves implementation or coding, provide a complete and accurate code snippet.
- Do not exceed the examples to more than 1.
- Keep the response short, concise, clear, and in a developer-friendly tone.
- Format your output using Markdown (e.g., code blocks, lists, bold) where appropriate.
- If the information is not directly present, reason based on the closest matches and explain your assumptions.

Documentation:
{context}

User query:
{query}

Answer:

"""]

In [69]:
from langchain_core.prompts import ChatPromptTemplate

question_text = "I have a node js application consisting of few APIs interacting with Mongo DB. How can I use Keploy to test?"
results = db.similarity_search_with_relevance_scores(question_text, k=3)
context_text =  "\n\n-------------------------\n\n".join([doc.page_content for doc, _score in results])
prompt_template = ChatPromptTemplate(PROMPT_TEMPLATE)
prompt = prompt_template.format(context=context_text, query=question_text)
# print(prompt)

In [71]:
# Print file paths of the top 3 retrieved chunks
for idx, (doc, score) in enumerate(results, 1):
    print(f"Chunk {idx}:")
    print(f"  Source file: {doc.metadata.get('source', 'Unknown')}")
    print(f"  Score: {score}")
    print(f"  Preview: {doc.page_content[:120]}...\n")

Chunk 1:
  Source file: docs\quickstart\samples-node-mongo.md
  Score: 0.5606058280166488
  Preview: id: crud-nodejs title: NodeJS Sample Application sidebar_label: NodeJS - Express + Mongoose description: The following s...

Chunk 2:
  Source file: docs\quickstart\samples-node-mongo.md
  Score: 0.5606058280166488
  Preview: id: crud-nodejs title: NodeJS Sample Application sidebar_label: NodeJS - Express + Mongoose description: The following s...

Chunk 3:
  Source file: docs\quickstart\samples-bunjs.md
  Score: 0.4911044947104263
  Preview: id: samples-bunjs title: BunJS Sample Application sidebar_label: BunJS + Mongo description: The following sample app sho...



In [55]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(model="gemini-2.5-pro-exp-03-25", google_api_key="AIzaSyBsdulzKbiRrCOy0bV71L1ty3FMPyUL1Y4")

In [75]:
response = llm.invoke(prompt)
# print(response.content)
answer = response.content
seen_sources = set()
sources_list = []
for idx, (doc, score) in enumerate(results, 1):
    source = doc.metadata.get('source', 'Unknown')
    if source not in seen_sources:
        sources_list.append(f"{len(sources_list)+1}. {source}")
        seen_sources.add(source)

sources = "\nSources for the above answer:\n" + "\n".join(sources_list)
final_response = f"{answer}\n\n{sources}"

In [76]:
print(final_response)

Okay, here's how you can test your Node.js application that interacts with MongoDB using Keploy:

1.  **Record Test Cases and Mocks:**
    Navigate to your application's directory in the terminal and run the Keploy `record` command. Replace `"npm start"` with the actual command you use to run your Node.js application.

    ```bash
    keploy record -c "npm start"
    ```
    While Keploy is recording, make API calls to your application (using tools like `curl`, Postman, or Hoppscotch). Keploy will capture these API calls as test cases and the corresponding MongoDB interactions as mocks, saving them into `keploy/test-*.yml` and `keploy/mocks.yml` files.

2.  **Run Tests:**
    Once you have recorded the interactions, you can run the tests using the Keploy `test` command. Again, replace `"npm start"` with your application's run command. The `--delay` flag gives your application some time to start up before tests begin.

    ```bash
    keploy test -c "npm start" --delay 10
    ```
    Ke