In [None]:
%load_ext autoreload
%autoreload 2

In [174]:
import os
import numpy as np
from fastcore.all import *
from litellm.proxy.client.cli.commands.teams import available
from litesearch import *
from ddgs import DDGS
from lisette import *
from toolslm.funccall import *
from toolslm.md_hier import *
import ast
from ast import get_source_segment as gs
import sys
import gc
import json
import nbformat as nbf
from IPython import get_ipython

Let's create a simple code ingestion pipeline which ingests functions, classes, and global variables from python packages into a litesearch database for searching. We'll be using ast module to parse python files and extract relevant code snippets.

In [None]:
def is_func(n): return isinstance(n,(ast.FunctionDef,ast.AsyncFunctionDef,ast.ClassDef))
def is_assign(n): return isinstance(n, (ast.Assign, ast.AnnAssign)) and n.value
def is_p_mod(n): return getattr(getattr(n,'parent',None),'__class__',None) == ast.Module
def is_global(n): return False if not is_assign(n) else True if is_p_mod(n) else False
def is_allowed(n): return is_func(n) or is_global(n)

def code2chunks(pkg,p):
	txt=Path(p).read_text(encoding='utf-8')
	tree=ast.parse(txt)
	[setattr(c,'parent',n) for n in ast.walk(tree) for c in ast.iter_child_nodes(n)]
	def n2c(n): return dict(content=gs(txt, n).strip(), metadata=dict(path=p), package=pkg)
	return L(ast.walk(tree)).filter(is_allowed).map(n2c)

Let's set up the database and create the necessary tables. packages will store package metadata and store will store code snippets with their embeddings.

In [None]:
db = setup_db('code.db')
store = db.mk_store()
if 'package' not in store.c: store.add_column('package', str)
packages = db.t.packages.create(name=str, version=str, summary=str, uploaded_at=float, pk='name', defaults=dict(uploaded_at='CURRENT_TIMESTAMP'), not_null=['name'], if_not_exists=True)

The main ingestion function which takes a list of package names, extracts python files from them, parses the files to extract functions, classes, and global variables, and stores them in the database.

In [None]:
def pkg2files(pkg:str):
	from importlib import import_module
	from importlib.metadata import PackageNotFoundError
	try: mod = import_module(pkg)
	except PackageNotFoundError: print(f'package {pkg} not found. Skipping....'); return L()
	skip_file_re = r'(^__init__\.py$|^setup\.py$|^conftest\.py$|^test_.*\.py$|^tests?\.py$|^.*_test\.py$)'
	skip_folder_re = r'(^tests?$|^__pycache__$|^\.eggs$|^\.mypy_cache$|^\.tox$|^examples?$|^docs?$|^build$|^dist$|^\.git$|^\.ipynb_checkpoints$)'
	return globtastic(Path(mod.__file__).parent, file_glob='*.py',skip_file_glob='_*',folder_re=pkg, skip_folder_re=skip_folder_re, skip_file_re=skip_file_re)

def ingest(pkgs=list):
    def _(pkg):
        print(f'Ingesting package: {pkg}...')
        from importlib.metadata import version, metadata
        packages.upsert(dict(name=pkg, version=version(pkg)), summary=metadata(pkg)['Summary'], pk='name')
        store.insert_all(pkg2files(pkg).map(lambda p: code2chunks(pkg,p)).concat())
    return L(pkgs).map(_)


def all_user_packages():
	from importlib.metadata import distributions
	not_stdlib = lambda d: d.metadata.get('Author') not in ('Python', None)
	return L(distributions()).filter(not_stdlib).map(lambda d: d.metadata['Name'])

all_user_packages()
pl = ['fastcore', 'fastlite', 'lisette', 'toolslm', 'chonkie', 'model2vec']

We'll use modern-bert-base onnx model for embedding the code snippets. We'll ingest the chunks first and then embed them. You can do both together. I like to separate it so that I can add more columns and test with different embedding models later. eg: `store.add_column('test_embedding', bytes)` and then `embed_docs(col='test_embedding', f=other_model.encode_document)`

In [None]:
ingest(pl)
# ingest(all_user_packages()) # alternatively ingest all user installed packages. very slow

FasstEncode is a simple ONNX based embedding model wrapper which can work with most onnx models with a huggingface tokenizer.

In [None]:
m1 = FastEncode(modernbert, md='models/%s' % modernbert.model)

The main embedding function which embeds all documents in the store table using the provided embedding function.

In [None]:
def embed_docs(st:Table=store, f:callable=m1.encode_document, col='embedding',sz:int=100):
	'Embed all documents in a table using emb_f'
	def n(col, st): return only(st(select='count(id) as c', where=f'{col} is NULL'))['c']
	st2fill= n(col, st)
	while n(col, st)>0: # onnx models sometimes fail silently so we loop until all are done
		for offset in range(0,st2fill,sz):
			batch=st(where=f'{col} is NULL', limit=sz, offset=offset)
			c = [b['content'] for b in batch if b['content'].strip()]
			if not c: continue
			embs=f(c)
			for e,b in zip(embs,batch): b[col] = e.tobytes()
			st.upsert_all(batch, pk='id')
		st2fill=n(col, st)

In [None]:
embed_docs() # using modern-bert-base onnx model

Some helper functions and tools for our coding assistant.

In [None]:
@timed_cache(3600)
def get_pkgs():
    'Return list of ingested package names.'
    return [p['name'] for p in packages(select='name')]


In [None]:
delegates(db.search)
def code_search(q:str,               	# query to search
                emb_q:str=None,     	# query to embed. If None, use q
				emb_f= m1.encode_query, # embedding function
                wide:bool=False,    	# whether to use wide search
                **kw					# additional args to pass to db.search
):
	'Code search through the database to find relevant code snippets.'
	emb = emb_f(emb_q if emb_q else q)
	available = L(set(q.split(' ')).intersection(get_pkgs()))
	wh = f'package in ({','.join(available.map(repr))})' if available else None
	kw['where'] = wh if 'where' not in kw else f"({kw['where']}) AND {wh}" if wh else kw['where']
	return db.search(pre(q, wide=wide), emb.tobytes(), **kw)

In [None]:
def websearch(q: str,               # query to search
              top_k: int = 10,      # number of top results to return
              ):
	'Web search results reranked with flashrank'
	res = dict2obj(DDGS().text(q, max_results=top_k))
	return json.dumps([dict(text=r.body, id=r.href, meta=dict(title=r.title)) for r in res])

The main RAG function which first searches the code database and then optionally performs a web search if needed. You can also use models that support web search directly wiuth lisette(litellm wrapper) instead of the web function.

In [None]:
def rag(q: str,	# query string
        emb_q: str = None, # query to embed. If None, use q
        top_k: int = 5, # number of top results to return
        wide: bool = False, # whether to use wide search
        web: bool = False # whether to include web search results
) -> str:
    'Search indexed code for relevant chunks. Returns structured results.'
    r = code_search(q, emb_q=emb_q if emb_q else q, lim=top_k, columns=['content', 'metadata'], wide=wide)
    if web: r += websearch(q, top_k=top_k // 2)  # Balance
    return json.dumps(dict(query=q, results=r, top_k=top_k))  # Or dict if no Pydantic

In [154]:
def run_code(code:str # code to run
             )->str:
	'''Run code in python interpreter'''
	return python(code, globals())

def get_globals():
	'Return current global variables.'
	return globals().keys()

In [None]:
def get_live_nb() -> Path:
	'''Returns the live notebook's path you're currently running in.'''
	ip = get_ipython(), None
	if not ip: return nbf.v4.new_notebook() if create else None # Not in an IPython environment
	opt = globtastic(Path.cwd(), file_glob='*.ipynb').sorted(key=os.path.getmtime, reverse=True)
	return opt[0] if opt else None

def ai(code: str, # code to insert
       nb_path: Path | str | None = None # notebook path to use
       ):
    '''
    AI → inserts the given code string into the very next code cell.
    If nb_path provided → uses that instead of auto-detect.
    '''
    if not nb_path: nb_path = get_live_nb() or 'Untitled.ipynb'
    nb,ip = nbf.read(Path(nb_path), as_version=4), get_ipython()
    src = ip.user_ns.get('In', [None])[-1]
    idx = L(nb.cells).argfirst(lambda c: c.cell_type == 'code' and c.source.strip() == src.strip()) or -1
    ins_idx = idx + 1 if idx >= 0 else len(nb.cells)
    nw_c = nbf.v4.new_code_cell(source=code.strip() + "\n")
    check = ins_idx < len(nb.cells) and nb.cells[ins_idx].cell_type == 'code' and not nb.cells[ins_idx].source.strip()
    if check: nb.cells[ins_idx] = nw_c
    else: nb.cells.insert(ins_idx, nw_c)
    nbf.write(nb, nb_path)

In [None]:
sp=f'''You are a razor-sharp Python coding assistant with perfect knowledge of fastcore, fastlite, lisette, toolslm, chonkie, model2vec and every package indexed in your RAG database.

Your ONLY job: answer with concise, working, copy-pasteable code. No essays. No apologies.
Available packages: {packages(select='name,summary')}.
TOOLS (in exact order you MUST follow):
1. rag(q: str, emb_q: str | None, top_k: int = 7) → ALWAYS call this first on every code question.
   - Use natural language q for FTS5, Do not pass the user query as is to q. Clean the query first. Add package names if teh query matches any ingested package. Use the summary of packages to help you.
   - Craft the right fts5 to get the best response.
   - Craft a precise emb_q for semantic search if needed.
   - top_k=5–10. Never more unless explicitly asked.
2. websearch(q) → ONLY if rag returned <3 useful chunks or the question is about unindexed packages.
3. run_code(code: str) → MANDATORY: execute the final example before replying. If it fails, fix and retry.
4. get_globals() → MANDATORY: call before any code that uses variables. Never clash with user namespace.

RESPONSE RULES — NON-NEGOTIABLE:
- Step 1 is internal thinking only. Never show it unless it contains a tool call.
- ALWAYS call rag() first. No exceptions for "simple" questions.
- Quote the most relevant source chunk verbatim (with path comment).
- Synthesize → minimal explanation → final runnable example.
- Final answer MUST be a single ```python code block. Nothing after it except optional one-sentence note.
- Judge the answer and If the answer is not good enough, research and reply with improved code.
- Use unique variable names (e.g. _result, _df, _items, _chat) unless user explicitly reuses theirs. Make sure you do not clash with user namespace.
- If run_code fails → fix silently and retry until it passes.
- Keep total response ≤ 250 words.

MEMORY: You remember every past example in this conversation. Reuse and refine them when relevant.

FAILURE IS NOT AN OPTION.
Be brutal. If the user’s idea is dumb, say so and give the right way.
  '''

In [None]:
chat=Chat('gpt-4.1', sp, tools=[rag, websearch, run_code, get_globals])

In [None]:
c=bind(chat, max_steps=8, return_all=True, max_tokens=10000)

syntactic sugar to format the last response to show code, tool name, code executed and code result.

In [None]:
@patch
def fmt_res(self:Chat):
	'Format the last response to show code, tool name, code executed and code result.'
	lm,tr,ltc=self.hist[-1].content, self.hist[-2], self.hist[-3].tool_calls
	code = lm.split('```python')[-1].split('```')[0].strip()
	tn,ce,cr= tr['name'], json.loads(dict2obj(ltc[-1]).function.arguments)['code'], tr['content']
	return AttrDict(code=code, tool_name=tn, code_executed=ce, code_result=cr)

Let's try some code generation tasks now.

In [None]:
r=c('parallel processing of a list in fastcore');print(chat.fmt_res().code)

In [None]:
r=c('running a model inference using litellm');print(chat.fmt_res().code)

In [None]:
code = '''
 for i, cell in enumerate(nb.cells):
        if cell.cell_type == 'code' and cell.source.strip() == src.strip():
            idx = i
            break'''
r=c(['I want to make this code more efficient using fastcore utilities. Use the same variable names and exact logic. Rewrite it.', code])

We can use the `ai` function to insert code into the next cell of the current notebook and run it if we want to.

In [None]:
ai(chat.fmt_res().code)

This time let's actually ask it to do things in the workspace. You're basically putting the agent to work on pointed tasks.

In [None]:
r=c('how can I distill a model using model2vec and save it locally on a file to reuse later. take the example of nomic-ai/modernbert-embed-base');print(chat.fmt_res().code)

In [None]:
ai(chat.fmt_res().code)