In [1]:
!pip install chromadb google-generativeai pandas

Collecting chromadb
  Downloading chromadb-1.1.1-cp39-abi3-macosx_11_0_arm64.whl.metadata (7.2 kB)
Collecting google-generativeai
  Downloading google_generativeai-0.8.5-py3-none-any.whl.metadata (3.9 kB)
Collecting pandas
  Downloading pandas-2.3.3-cp313-cp313-macosx_11_0_arm64.whl.metadata (91 kB)
Collecting build>=1.0.3 (from chromadb)
  Using cached build-1.3.0-py3-none-any.whl.metadata (5.6 kB)
Collecting pydantic>=1.9 (from chromadb)
  Downloading pydantic-2.11.10-py3-none-any.whl.metadata (68 kB)
Collecting pybase64>=1.4.1 (from chromadb)
  Using cached pybase64-1.4.2-cp313-cp313-macosx_11_0_arm64.whl.metadata (8.7 kB)
Collecting uvicorn>=0.18.3 (from uvicorn[standard]>=0.18.3->chromadb)
  Downloading uvicorn-0.37.0-py3-none-any.whl.metadata (6.6 kB)
Collecting numpy>=1.22.5 (from chromadb)
  Downloading numpy-2.3.3-cp313-cp313-macosx_14_0_arm64.whl.metadata (62 kB)
Collecting posthog<6.0.0,>=2.4.0 (from chromadb)
  Using cached posthog-5.4.0-py3-none-any.whl.metadata (5.7 kB)
C

In [2]:
import json
import pandas as pd
import chromadb
import google.generativeai as genai
import time



  from .autonotebook import tqdm as notebook_tqdm


In [3]:
genai.configure(api_key="AIzaSyCJp_Gy_5uDyJDuPwkL3BQSC-lr_z4Begs") # Or set GOOGLE_API_KEY environment variable


In [None]:
def load_and_prepare_docs(filepath="codebase_map.jsonl"):
    """Loads the JSONL file and formats each entry for embedding."""
    documents = []
    metadata = []
    ids = []
    with open(filepath, 'r') as f:
        for i, line in enumerate(f):
            data = json.loads(line)
            content = ""
            if data['type'] == 'function':
                func = data['function']
                content = f"File: {data['file_path']}\nType: function\nSignature: {func['signature']}\nBody: {func['body']}"
            elif data['type'] == 'struct':
                struct = data['struct']
                fields_str = "\n".join([f"  {field['name']} {field['type']} `{field.get('tag', '')}`" for field in struct['fields']])
                content = f"File: {data['file_path']}\nType: struct\nName: {struct['name']}\nFields:\n{fields_str}"

            documents.append(content)
            metadata.append(data)
            ids.append(str(i + 1)) # ChromaDB requires string IDs
    return documents, metadata, ids

print("Loading and preparing documents...")
documents, metadata, ids = load_and_prepare_docs()
print(f"Loaded {len(documents)} documents.")
# Example of a prepared document:
# print(documents[5])

Loading and preparing documents...
Loaded 99 documents.


Embed and Store in VectorDB

In [5]:
# Initialize ChromaDB client. This creates a persistent DB in the 'chroma_db' directory.
client = chromadb.PersistentClient(path="./chroma_db")

# Create a collection to store your codebase embeddings
# If it already exists, you can retrieve it instead of creating it
collection_name = "gocodebase"
if collection_name in [c.name for c in client.list_collections()]:
    collection = client.get_collection(name=collection_name)
    print(f"Using existing collection: '{collection_name}'")
else:
    collection = client.create_collection(name=collection_name)
    print(f"Creating new collection: '{collection_name}'")

    print("Embedding and indexing the codebase... This may take a moment.")
    # Embed the documents in batches (safer for API limits)
    batch_size = 10
    for i in range(0, len(documents), batch_size):
        batch_docs = documents[i:i+batch_size]
        batch_ids = ids[i:i+batch_size]
        batch_meta = metadata[i:i+batch_size]

        # Using Google's embedding model
        response = genai.embed_content(
            model="models/text-embedding-004",
            content=batch_docs,
            task_type="RETRIEVAL_DOCUMENT"
        )
        embeddings = response['embedding']

        collection.add(
            embeddings=embeddings,
            documents=batch_docs,
            metadatas=batch_meta,
            ids=batch_ids
        )
        print(f"Indexed batch {i//batch_size + 1}...")
        time.sleep(10)

print("Codebase successfully indexed in ChromaDB.")

Creating new collection: 'gocodebase'
Embedding and indexing the codebase... This may take a moment.


E0000 00:00:1759641093.862375 3116590 alts_credentials.cc:93] ALTS creds ignored. Not running on GCP and untrusted ALTS is not enabled.


ValueError: Expected metadata value to be a str, int, float, bool, SparseVector, or None, got {'name': 'Chirp', 'fields': [{'name': 'ID', 'type': 'uuid.UUID', 'tag': 'json:"id"'}, {'name': 'CreatedAt', 'type': 'time.Time', 'tag': 'json:"created_at"'}, {'name': 'UpdatedAt', 'type': 'time.Time', 'tag': 'json:"updated_at"'}, {'name': 'UserID', 'type': 'uuid.UUID', 'tag': 'json:"user_id"'}, {'name': 'Body', 'type': 'string', 'tag': 'json:"body"'}]} which is a dict in add.

Take request, find relevent snippets from chroma nad pass to generative model

In [None]:
def query_rag(query: str, n_results: int = 5):
    """Performs the RAG process: query -> retrieve -> augment -> generate."""

    # 1. Retrieve relevant code snippets
    # Embed the user's query
    query_embedding_response = genai.embed_content(
        model="models/text-embedding-004",
        content=query,
        task_type="RETRIEVAL_QUERY"
    )

    # Search ChromaDB for the most similar documents
    results = collection.query(
        query_embeddings=[query_embedding_response['embedding']],
        n_results=n_results
    )

    retrieved_docs = results['documents'][0]
    context = "\n---\n".join(retrieved_docs)

    # 2. Augment: Create a prompt for the generative model
    prompt = f"""You are an expert Go programmer. Your task is to help a user modify their codebase.
Use the following relevant code snippets from the codebase as context to provide a complete and accurate answer.Acknowledge the lack of code snippets if there is none.

**CONTEXT FROM THE CODEBASE:**
---
{context}
---

**USER'S REQUEST:**
"{query}"

**YOUR TASK:**
Based on the user's request and the provided context, generate the necessary code changes.
- If a struct needs modification, show the new struct definition.
- If a function needs to be changed, provide the complete, updated function body.
- If new functions are needed, write them.
- Provide a brief, clear explanation of the changes you made.
- Present the final output in Go code blocks.
"""

    # 3. Generate the response
    model = genai.GenerativeModel('gemini-2.0-flash-lite')
    response = model.generate_content(prompt)

    return response.text

Giving request


In [None]:
user_request = """
I need to add a 'likes' count to the Chirp model.
It should be an integer and default to 0.

Then, update the 'handlerChirpsCreate' function. After creating a chirp,
the response should include this new 'likes' field.
"""

# Get the suggested code change
suggested_change = query_rag(user_request)

# Print the result
print("--- SUGGESTED CODE CHANGE ---")
print(suggested_change)

--- SUGGESTED CODE CHANGE ---
Okay, I'm ready to help you add the 'likes' count to your Chirp model and update the `handlerChirpsCreate` function. Since no code snippets were provided, I'll create a basic example based on common Go web server patterns.

Here's the code with the requested changes:

```go
package main

import (
	"encoding/json"
	"fmt"
	"log"
	"net/http"
	"time"
)

// Chirp Model with likes
type Chirp struct {
	ID        int       `json:"id"`
	Author    string    `json:"author"`
	Body      string    `json:"body"`
	CreatedAt time.Time `json:"created_at"`
	Likes     int       `json:"likes"` // New field
}

// In-memory storage (replace with a database in a real application)
var chirps []Chirp

// handlerChirpsCreate handles POST requests to create a chirp.
func handlerChirpsCreate(w http.ResponseWriter, r *http.Request) {
	type parameters struct {
		Author string `json:"author"`
		Body   string `json:"body"`
	}

	decoder := json.NewDecoder(r.Body)
	params := parameters{}
	e

In [None]:
import google.generativeai as genai

# Make sure your key is configured via one of the methods above
# genai.configure(api_key="...")

# This loop will print available models if your key is valid
# It will fail with the same error if the key is still invalid
print("Verifying API Key by listing available models:")
for m in genai.list_models():
  if 'embedContent' in m.supported_generation_methods:
    print(m.name)

Verifying API Key by listing available models:
models/embedding-001
models/text-embedding-004
models/gemini-embedding-exp-03-07
models/gemini-embedding-exp
models/gemini-embedding-001
