In [None]:
#| default_exp code-assistant

# Setup
> Let's set up our coding assistant with RAG capabilities using litesearch for code search and embedding models for semantic search.


In [None]:
import ast
%load_ext autoreload
%autoreload 2

In [1]:
#| export
from ddgs import DDGS
from fastcore.all import *
from fastlite import Database
import json
from lisette import *
from litesearch import *
import os
from toolslm.funccall import *

Applying usearch macOS fix if required...
usearch dylib path:  /Users/71293/code/litesearch/.venv/bin/usearch_binaries/usearch_sqlite.dylib
✗ install_name_tool failed: Command '['install_name_tool', '-add_rpath', '/usr/lib', '/Users/71293/code/litesearch/.venv/bin/usearch_binaries/usearch_sqlite.dylib']' returned non-zero exit status 1.
Command output: 
Command stderr: error: /Library/Developer/CommandLineTools/usr/bin/install_name_tool: for: /Users/71293/code/litesearch/.venv/bin/usearch_binaries/usearch_sqlite.dylib (for architecture arm64) option "-add_rpath /usr/lib" would duplicate path, file already has LC_RPATH for: /usr/lib



## CodeSearcher
Let's set up the codesearcher db which will handle code ingestion, embedding and searching.
>We'll use modern-bert-base onnx model for embedding the code snippets. The `FastEncode` class from utils will help us with that.

In [24]:
#| export
os.environ['TOKENIZERS_PARALLELISM']='false'  # to suppress warnings from tokenizers
def codesearcher(db_pth='code.db'):
	'A simple code search database using litesearch and onnx embeddings.'
	db = database(db_pth)
	st = db.get_store(hash=True)
	if 'package' not in st.c: st.add_column('package', str)
	db.t.packages.create(name=str, version=str, summary=str, uploaded_at=float, pk=['name'],
		defaults=dict(uploaded_at='CURRENT_TIMESTAMP'), not_null=['name'], if_not_exists=True)
	return db

@patch(as_prop=True)
def st(self:Database): return self.get_store(hash=True)
@patch(as_prop=True)
def packages(self:Database): return self.t.packages

@timed_cache(3600*24)
def embedder(emb_model=modernbert): return FastEncode(emb_model)
emb_doc = lambda t: embedder().encode_document(t)
emb_query = lambda t: embedder().encode_query(t)

@patch
def _is_pkg_ingested(self:Database, pkg:str) -> bool:
	'Check if a package is already ingested and up-to-date.'
	from importlib.metadata import version as v
	ep = self.packages(select='name, version', where=f'name={pkg!r}')
	return (only(ep)['version'] == v(pkg)) if ep else False

@patch
def update_pkg(self:Database, pkg:str, embed=True):
	'Update package metadata in the packages table.'
	if self._is_pkg_ingested(pkg) and len(self.st(where=f'package={pkg!r}')) > 0: return
	from importlib.metadata import version as v, metadata as meta
	content = pkg2chunks(pkg).map(lambda d: d.update(dict(package=pkg,uploaded_at=d['metadata']['uploaded_at'],metadata=d['metadata'])) or d)
	if ex := self.st(select='content', where=f'package={pkg!r}'):
		def slug(s): from hashlib import md5; return md5(s.encode('utf-8')).hexdigest()
		hash_ = lambda r: slug(r['content'])
		if del_ids := set(ex.map(hash_)).difference(content.map(hash_)): self.st.delete(del_ids)
	if not content: return
	if embed: content = L(chunked(content, 100)).map(embed_chunk).concat()
	self.st.insert_all(content,upsert=True,hash_id='id')
	self.packages.upsert(dict(name=pkg, version=v(pkg)), summary=meta(pkg)['Summary'], pk='name')

@patch
def rm_pkg(self:Database, pkg:str):
	'Remove a package and its code snippets from the database.'
	self.st.delete(where=f'package={pkg!r}')
	self.packages.delete(where=f'name={pkg!r}')

def embed_chunk(chunk:list,emb_fn=emb_doc):
	'Embed a list of code chunks using emb_f'
	if not (chunk and embedder()): return
	c = [b['content'] for b in chunk if b['content'].strip()]
	if not c: return
	for e,b in zip(emb_fn(c),chunk): b['embedding'] = e.tobytes()
	return chunk

@patch
def embed(self:Database, sz=100, reembed=False):
	'Embed all documents in a table using emb_f'
	if not (self.st and embedder()): return
	b=L(chunked(self.st(where=f'embedding is NULL' if not reembed else None), sz)).map(embed_chunk).concat()
	self.st.upsert_all(b, pk='id')

@patch
def update_pkgs(self:Database, pkgs:str|list, embed=True):
	pkgs = set(pkgs).union(L(self.packages(select='name')).map('name'))
	if not pkgs: return
	L(pkgs).map(lambda p:self.update_pkg(p,embed))

@patch
def code_search(self:Database,
            q:str,               	# query to search
			emb_q:str=None,     	# query to embed. If None, use q
			wide:bool=False,    	# whether to use wide search
			emb_fn=emb_query,		# embedding function
			**kw					# additional args to pass to db.search
	):
		'Code search through the database to find relevant code snippets.'
		emb = emb_fn(emb_q if emb_q else q)
		available = L(set(q.split(' ')).intersection(L(self.packages(select='name')).map('name')))
		wh = f'package in ({','.join(available.map(repr))})' if available else None
		kw['where'] = wh if 'where' not in kw else f"({kw['where']}) AND {wh}" if wh else kw['where']
		return self.search(pre(q, wide=wide), emb.tobytes(), **kw)

You can now create a codesearcher database and ingest code from installed packages. I'm giving it a few popular packages to start with.

In [9]:
db=codesearcher()
p = installed_packages(['fastcore','fastlite','lisette','toolslm','chonkie','model2vec','ghapi','apswutils','apsw','fasthtml','litesearch'])
db.update_pkgs(p, embed=True)

## Coding Assistant

## Coding Assistant Tools
> Let's set up the tools that our coding assistant will use. We'll have a web search tool, a code search tool using RAG, and a code execution tool.

In [16]:
#| export
def websearch(q: str,               # query to search
              top_k: int = 10,      # number of top results to return
              ):
	'Web search results reranked with flashrank'
	res = dict2obj(DDGS().text(q, max_results=top_k))
	return json.dumps([dict(text=r.body, id=r.href, meta=dict(title=r.title)) for r in res])

def rag(q: str,	# query string
        emb_q: str = None, # query to embed. If None, use q
        top_k: int = 50, # number of top results to return
        wide: bool = False, # whether to use wide search
        web: bool = False # whether to include web search results
) -> str:
    'Search indexed code for relevant chunks. Returns structured results.'
    r = db.code_search(q, emb_q=emb_q if emb_q else q, limit=top_k, columns=['content', 'metadata'], wide=wide, emb_metric='cosine')
    if web: r += websearch(q, top_k=top_k // 2)  # Balance
    return json.dumps(dict(query=q, results=r, top_k=top_k))  # Or dict if no Pydantic

def run_code(code:str, 	  # code to run
             strict=True # whether to run in strict mode
             )->str:
	'''Run code in python interpreter'''
	if not strict: (code, globals())
	return python(code)

def get_globals():
	'Return current global variables.'
	return globals().keys()

### LLM Chat with Tools Setup.
> Let's not call it an Agent. Maybe an Augmented LLM Chat?

In [105]:
#| export
sp= f'''You are a razor-sharp Python coding assistant with perfect knowledge of fastcore, fastlite, lisette, toolslm, chonkie, model2vec and every package indexed in your RAG database.

Your ONLY job: answer with concise, working, copy-pasteable code. No essays. No apologies.
Available packages: {db.packages(select='name,summary')}.
TOOLS (in exact order you MUST follow):
1. rag(q: str, emb_q: str | None, top_k: int = 10, wide:bool=False, web:bool=False) → ALWAYS call this first on every code question.
   - Use natural language q for FTS5, Do not pass the user query as is to q. Clean the query first. Add package names if the query matches any ingested package. Use the summary of packages to help you to add the correct package name if it exists in packages. for ex: fastlite is a wrapper around apsw and sqlite, lisette is a wrapper around litellm.
   - Craft the right fts5 to get the best response.
   - Craft a precise emb_q for semantic search if needed.
   - top_k=5–10. Never more unless explicitly asked.
   - wide=True if the question is vague or broad.
   - web=True if the question seems to need web results.
3. run_code(code: str) → MANDATORY: execute the final example before replying. If it fails, fix and retry.
4. get_globals() → MANDATORY: call before any code that uses variables. Never clash with user namespace.

RESPONSE RULES — NON-NEGOTIABLE:
- Step 0: ALWAYS think about which tools to use and in what order. Plan your steps carefully. You can use rag(), get_globals() and run_code() multiple times.
- Step 1 is internal thinking only. Never show it unless it contains a tool call.
- ALWAYS call rag() first. No exceptions for "simple" questions.
- if the results returned aren't sufficient to answer the question, refine your query and call rag() with `web` true again.
- Quote the most relevant source chunk verbatim (with path comment).
- Synthesize → minimal explanation → final runnable example.
- Final answer MUST be a single ```python code block. Nothing after it except optional one-sentence note.
- Judge the answer and If the answer is not good enough, research and reply with improved code.
- Use unique variable names (e.g. _result, _df, _items, _chat) unless user explicitly reuses theirs. Make sure you do not clash with user namespace.
- If run_code fails → fix silently and retry until it passes.
- Keep total response ≤ 250 words.

MEMORY: You remember every past example in this conversation. Reuse and refine them when relevant.

FAILURE IS NOT AN OPTION.
Be brutal. If the user’s idea is dumb, say so and give the right way.
  '''
xtra = {'editor-version': 'vscode/1.85.1','Copilot-Integration-Id': 'vscode-chat'}
chat=Chat('github_copilot/claude-sonnet-4', sp, tools=[rag, websearch, run_code, get_globals])
c=bind(chat, max_steps=8, return_all=True, max_tokens=10000,extra_headers=xtra)

syntactic sugar to format the last response to show code, tool name, code executed and code result.

In [106]:
@patch
def fmt_res(self:Chat, copy:bool=True):
	'Format the last response to show code, tool name, code executed and code result.'
	import pyperclip
	lm,tr,ltc=self.hist[-1].content, self.hist[-2] or None, getattr(self.hist[-3], 'tool_calls', None)
	code = lm.split('```python')[-1].split('```')[0].strip()
	tn=ce=cr= None
	if tr and ltc: tn,ce,cr= tr['name'], json.loads(dict2obj(ltc[-1]).function.arguments)['code'], tr['content']
	if copy: pyperclip.copy(code); print('Code copied to clipboard!')
	return AttrDict(code=code, tool_name=tn, code_executed=ce, code_result=cr)

Let's try some code generation tasks now.

In [107]:
r=c('parallel processing of a list in fastcore')




[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m



In [108]:
print(chat.fmt_res().code)

Code copied to clipboard!
from fastcore.parallel import parallel

def square(x):
    return x ** 2

_numbers = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
_parallel_result = parallel(square, _numbers, n_workers=4)
print(_parallel_result)  # [1, 4, 9, 16, 25, 36, 49, 64, 81, 100]


In [109]:
r=c('running a model inference using litellm');print(chat.fmt_res().code)


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m

Code copied to clipboard!
from lisette import Chat

_chat_client = Chat(model="gpt-4o-mini", temp=0.7)
_response = _chat_client("What is the capital of France?")
print(_response.choices[0].message.content)  # "The capital of France is Paris."


In [88]:
r=c('how can I distill a model using model2vec and save it locally on a file to reuse later. websearch the best code embedding model from hf and use that repo.');print(chat.fmt_res().code)

  from .autonotebook import tqdm as notebook_tqdm
MPS is available but PyTorch 2.9.1 has known performance regressions. Falling back to CPU. Please use a PyTorch version < 2.8.0 to enable MPS support.
Encoding tokens: 100%|██████████| 50262/50262 [00:19<00:00, 2525.97 tokens/s]


Code copied to clipboard!
from model2vec.distill.distillation import distill
from model2vec import StaticModel

# Use the CodeBERT model
hf_model = "microsoft/codebert-base"

# Distill the encoder down to a static model (low rank pca, 256 dims)
static_model = distill(model_name=hf_model, pca_dims=256)

# Save to disk
save_path = "./distilled-codebert-base"
static_model.save_pretrained(save_path)

# Reload when needed
reloaded = StaticModel.from_pretrained(save_path)
# Test: get embedding for code
embedding = reloaded.encode(["def foo():\n    return 42"])
print(embedding.shape)  # (1, 256)


Le

In [110]:
r=c('how can I compress a video using ffmpeg')


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m



In [111]:
print(chat.fmt_res().code)

Code copied to clipboard!
# Most common compression command - good quality/size balance
ffmpeg -i input.mp4 -c:v libx264 -crf 24 -c:a aac -movflags +faststart output.mp4

# Aggressive compression - reduce resolution + higher CRF  
ffmpeg -i input.mp4 -vf scale=1280:720 -c:v libx264 -crf 28 -c:a aac output.mp4


# Export Notebook

In [None]:
#| hide
from nbdev.export import nb_export; nb_export('02_tool_use.ipynb')