In [None]:
#| default_exp core

# llm_ctx
> Source code for llm_ctx

In [None]:
#| export
from fastcore.utils import *
from fastcore.xml import *
from fastcore.script import *
import httpx

We'll use an `llms.txt` file for FastHTML for our examples.

In [None]:
path = Path('llms.txt')
txt = path.read_text()

In [None]:
#| export
def _parse_links(content):
    link_pat = r'^\s*-\s*\[(.+?)\]\((.+?)\)(?:\s*:\s*(.*))?$'
    return [dict(zip(['title','url','info'], m.groups())) 
            for m in re.finditer(link_pat, content, re.M)]

def parse_llm_txt(md):
    "Parse fasthtml markdown into structured dict"
    title = re.findall(r'^# (.+)$', md, re.M)[0]
    summary = re.findall(r'^> (.+)$', md, re.M)[0]
    section_pat = r'^## (.+)$'
    sec_spl = re.split(section_pat, md, flags=re.MULTILINE)[1:]
    sections = {t.strip():_parse_links(c)
                for t,c in zip(sec_spl[::2], sec_spl[1::2])}
    return dict2obj(dict(title=title.strip(), summary=summary.strip(), sections=sections))

The returned `dict` contains the summary and title of the file...

In [None]:
d = parse_llm_txt(txt)
d.title, d.summary

('FastHTML',
 'FastHTML is a python library which brings together Starlette, Uvicorn, HTMX, and fastcore\'s `FT` "FastTags" into a library for creating server-rendered hypermedia applications. The `FastHTML` class itself inherits from `Starlette`, and adds decorator-based routing with many additions, Beforeware, automatic `FT` to HTML rendering, and much more. Although parts of its API are inspired by FastAPI, it is *not* compatible with FastAPI syntax and is not targeted at creating API services. FastHTML includes support for Pico CSS and the fastlite sqlite library, although using both are optional; sqlite can be used directly or via the fastsql library, and any CSS framework can be used. FastHTML is compatible with web components and any vanilla JS library, but not with React, Vue, or Svelte. Support for the Surreal and css-scope-inline libraries are also included, but both are optional.')

...along with each of the H2-defined sections.

In [None]:
list(d.sections)

['Docs', 'Examples', 'Optional']

Each section contains a list of URLs and optional extra info on each one.

In [None]:
d.sections.Optional

(#1) [{'title': 'Starlette documentation', 'url': 'https://gist.githubusercontent.com/jph00/809e4a4808d4510be0e3dc9565e9cbd3/raw/9b717589ca44cedc8aaf00b2b8cacef922964c0f/starlette-sml.md', 'info': 'A subset of the Starlette documentation useful for FastHTML development'}]

In [None]:
#| export
Sections = partial(ft, 'sections')
Project = partial(ft, 'project')

In [None]:
#| export
def Doc(url, **kw):
    "Create a `Doc` FT object with the text retrieved from `url` as the child, and `kw` as attrs."
    re_comment = re.compile('^<!--.*-->$', flags=re.MULTILINE)
    txt = [o for o in httpx.get(url).text.splitlines() if not re_comment.search(o)]
    return ft('doc', '\n'.join(txt), **kw)

In [None]:
#| export
def Section(nm, items):
    "Create a `Section` FT object containing a `Doc` object for each child."
    return ft(nm, *[Doc(**o) for o in items])

In [None]:
#| export
def mk_ctx(d, optional=True):
    "Create a `Project` with a `Section` for each H2 part in `d`, optionally skipping the 'optional' section."
    skip = '' if optional else 'Optional'
    sections = [Section(k, v) for k,v in d.sections.items() if k!=skip]
    return Project(title=d.title, summary=d.summary)(*sections)

In [None]:
ctx = mk_ctx(d)

In [None]:
d0 = ctx.children[0].children[2]
d0.title,d0.info

('FastHTML quick start', 'A brief overview of many FastHTML features')

In [None]:
d0.children[0][:180]

'# Web Devs Quickstart\n\n\n\n<div>\n\n> **Note**\n>\n> We’re going to be adding more to this document, so check back\n> frequently for updates.\n\n</div>\n\n## Installation\n\n``` bash\npip instal'

In [None]:
#| export
def get_sizes(ctx):
    return {o.tag:{p.title:len(p.children[0]) for p in o.children} for o in ctx.children}

In [None]:
get_sizes(ctx)

{'docs': {'Surreal': 17720,
  'CSS Scope Inline': 7836,
  'FastHTML quick start': 19364,
  'HTMX reference': 26427},
 'examples': {'Todo list application': 18588, 'Websockets application': 693},
 'optional': {'Starlette documentation': 48331}}

In [None]:
Path('fasthtml.md').write_text(to_xml(ctx))

151269

In [None]:
#| export
@call_parse
def llms_txt2ctx(
    fname:str, # File name to read
    optional:bool_arg=True # Skip 'optional' section?
):
    "Print a `Project` with a `Section` for each H2 part in file read from `fname`, optionally skipping the 'optional' section.."
    skip = '' if optional else 'Optional'
    sections = [Section(k, v) for k,v in d.sections.items() if k!=skip]
    ctx = Project(title=d.title, summary=d.summary)(*sections)
    print(to_xml(ctx))

In [None]:
from contextlib import redirect_stdout

In [None]:
with open('fasthtml-noopt.md', 'w') as f:
    with redirect_stdout(f): llms_txt2ctx.__wrapped__('llms.txt', optional=False)