In [None]:
#|default_exp core

# AI-Magic

## Setup

- Retrieval:
  - Include `-i URL` on `%%ai` line to grab that and put all in context
    - Optionally print the extracted URL
    - `-f` force downloading rather than using cache
    - Use 'Distributing Jupyter Extensions as Python Packages' as an example
  - `%ai add_docs URL {-n name} {-d depth}` to grab and embed website and stick it in a local retrieval DB
  - `%ai -r name` to do RAG over that bit of the embedding DB
    - Optionally print the retrieved context
- `%ai reset` should take param to set default config, such as image size and max RAG context
- Images

In [None]:
#|hide
import os
# os.environ['ANTHROPIC_LOG'] = 'debug'

from IPython.core.magic import register_line_cell_magic

In [None]:
#| export
import xml.etree.ElementTree as ET, json

from anthropic import Anthropic
from IPython.display import Markdown,Javascript,clear_output
from io import BytesIO
from html import unescape

from fastcore.utils import *

In [None]:
#| export
models = 'claude-3-opus-20240229','claude-3-sonnet-20240229','claude-3-haiku-20240307'

In [None]:
model = models[1]

In [None]:
ip = get_ipython()

In [None]:
#| export
def set_next_cell(ip, text, code=True, replace=False, execute=False):
    "Create or replace an nb cell underneath the active cell containing `text`"
    if not code: execute=True
    ip.payload_manager.write_payload(dict(
        source='set_next_input',
        replace=replace, execute=execute,
        text=text, ctype='code' if code else 'markdown'))

## Claude helpers

In [None]:
#| export
def mk_msg(content, role='user', **kw):
    "Helper to create a `dict` appropriate for a Claude message"
    return dict(role=role, content=content, **kw)

In [None]:
#| export
def mk_msgs(msgs, **kw):
    "Helper to set 'assistant' role on alternate messages"
    return [mk_msg(o, ('user','assistant')[i%2], **kw) if isinstance(o,str) else o
            for i,o in enumerate(msgs)]

In [None]:
#| export
def contents(r):
    "Help to get the contents from Claude response `r`"
    return r.content[0].text.strip()

In [None]:
#| export
class AnthClient:
    def __init__(self, model, cli=None):
        "Basic Anthropic messages client"
        self.m,self.model = (cli or Anthropic()).messages,model

    def __call__(self, msgs, sp='', temp=0, maxtok=4096, stop=None):
        return self.m.create(model=self.model, messages=mk_msgs(msgs), max_tokens=maxtok,
                             system=sp, temperature=temp, stop_sequences=stop)

    def stream(self, msgs, sp='', temp=0, maxtok=4096, stop=None):
        with self.m.stream(model=self.model, messages=mk_msgs(msgs), max_tokens=maxtok,
            system=sp, temperature=temp, stop_sequences=stop) as stream:
            yield from stream.text_stream

In [None]:
c = AnthClient(models[-1])

In [None]:
#| export
def _mk_sysp(pre, has_nb):
    res = 'You are a helpful assistant with deep expertise in many topics, being used inside a Jupyter Notebook environment'
    if not pre: return res + '. You provide concise yet complete answers, with no summary or restatement of the question background.'
    if pre[0]=='-': return res + ". Provide concise step by step instructions to complete the task as a markdown bulleted list. Don't provide much detail about each step -- the user will be going through each step in a notebook one at a time, so they will ask for more detail when they get there."
    lang = pre[3:]
    res = res + f', and are a skilled {lang} coder. You provide, using a markdown fenced block with no additional explanation, {lang} code that fully completes the task. Your code will be run in an existing notebook.'
    if has_nb:
        res += "\nThe user has just run the cells shown in `nb_cells` in the notebook you are using, so don't repeat that context."
    return res

In [None]:
#| export
def hl_md(s, lang='xml'):
    "Syntax highlight `s` using `lang`"
    return Markdown(f'```{lang}\n{s}\n```')

In [None]:
#| export
def to_xml(node, hl=False):
    "Convert `node` to an XML string"
    def mk_el(tag, cs, attrs):
        el = ET.Element(tag, attrib=attrs)
        if isinstance(cs, list): el.extend([mk_el(*o) for o in cs])
        elif cs is not None: el.text = str(cs)
        return el

    root = mk_el(*node)
    ET.indent(root)
    res = ET.tostring(root, encoding='unicode')
    return hl_md(res) if hl else res

In [None]:
#| export
def t(tag, c=None, **kw):
    "Helper to create appropriate data structure for `to_xml`"
    kw = {k.lstrip('_'):str(v) for k,v in kw.items()}
    return tag,c,kw

In [None]:
#| export
g = globals()
tags = 'div','img','h1','h2','h3','h4','h5','p','hr','span','html'
for o in tags: g[o] = partial(t, o)

In [None]:
a = html([
    p('This is a paragraph'),
    hr(),
    t('x-custom', foo='bar'),
    img(src='http://example.prg'),
    div([
        h1('This is a header'),
        h2('This is a sub-header', style='k:v'),
    ], _class='foo')
])

In [None]:
to_xml(a, True)

```xml
<html>
  <p>This is a paragraph</p>
  <hr />
  <x-custom foo="bar" />
  <img src="http://example.prg" />
  <div class="foo">
    <h1>This is a header</h1>
    <h2 style="k:v">This is a sub-header</h2>
  </div>
</html>
```

In [None]:
#|export
def json_to_xml(d, rnm):
    root = ET.Element(rnm)
    def build_xml(data, parent):
        if isinstance(data, dict):
            for key, value in data.items(): build_xml(value, ET.SubElement(parent, key))
        elif isinstance(data, list):
            for item in data: build_xml(item, ET.SubElement(parent, 'item'))
        else: parent.text = str(data)
    build_xml(d, root)
    ET.indent(root)
    return ET.tostring(root, encoding='unicode')

In [None]:
#| export
def _get_output(o):
    ot = o['output_type']
    if ot in ('stream','execute_result','display_data'):
        return t('output', o['text'] if ot=='stream' else o['data'], type=ot)
    elif o['output_type']=='error':
        return t('error', o['tb'], evalue=o['evalue'])
    raise Exception(o)

def _cellxml(c):
    "Cell `c` converted to XML"
    elems = [t('source', c['source'])]
    outs = c.get('outputs', [])
    ol = [_get_output(o) for o in outs]
    if ol: elems.append(t('outputs', ol))
    return t('cell', elems, type=c['cell_type'])

In [None]:
#| export
def _mk_ctx(vs, cells, ns=None):
    "Context for Claude using variables `vs` and nb `cells`, with variables from namespace `ns`"
    if not vs and not cells: return ''
    r = []
    if vs:
        if not ns: ns=globals()
        elems = [t(o, ns[o], type=type(ns[o]).__name__) for o in vs]
        r.append(t('variables', elems))
    if cells:
        elems = [_cellxml(c) for c in cells]
        r.append(t('nb_cells', elems))
    res = to_xml(t('context', r))
    info = 'If this XML context contains entities, they should be decoded.'
    if '&' in res:
        r.insert(0, t('info', info))
        res = to_xml(t('context', r))
    return res+'\n' if res else ''

In [None]:
b,c = 'bb',1

In [None]:
print('</code>')
Markdown('*nice*')

</code>


*nice*

In [None]:
# TODO: check on outputs

In [None]:
#| eval: false
cc = nbmeta['idx']
hl_md(_mk_ctx(['b','c'], nbmeta['cells'][cc-2:cc]))

```xml
<context>
  <info>If this XML context contains entities, they should be decoded.</info>
  <variables>
    <b type="str">bb</b>
    <c type="int">1</c>
  </variables>
  <nb_cells>
    <cell type="code">
      <source>print('&lt;/code&gt;')
Markdown('*nice*')</source>
      <outputs>
        <output type="stream">&lt;/code&gt;
</output>
        <output type="execute_result">*nice*</output>
      </outputs>
    </cell>
    <cell type="code">
      <source># TODO: check on outputs</source>
    </cell>
  </nb_cells>
</context>

```

In [None]:
#| export
def first_match(lst, f, default=None):
    "First element of `lst` matching predicate `f`, or `default` if none"
    return next((i for i,o in enumerate(lst) if f(o)), default)

In [None]:
#| export
def last_match(lst, f, default=None):
    "Last element of `lst` matching predicate `f`, or `default` if none"
    return next((i for i in range(len(lst)-1, -1, -1) if f(lst[i])), default)

In [None]:
#| hide_input
# Mock nbmeta in case nbextension not installed
nbmeta = dict(idx=45, name='00_core.ipynb', cells=[{'cell_type': 'code', 'source': '%%ai\n0', 'outputs': []}])

# temp for testing
@register_line_cell_magic
def ai(*args, **kw): pass

@register_line_cell_magic
def aip(*args, **kw): pass

## Dummy cells for testing

In [None]:
%ai reset

We have extracted the following snippet from inside a web page's Javascript, for further processing:

In [None]:
snip = '13], "nums":[1,4,6], "chars":'

In [None]:
snip

'13], "nums":[1,4,6], "chars":'

In [None]:
%%aip 0
Extract the 'nums' section of `snip`.

In [None]:
nums = re.search(r'"nums":\[[^\]]*\]', snip).group()

In [None]:
nums

'"nums":[1,4,6]'

In [None]:
%%aip 0
Set `nlist` to the numeric list in `nums`

In [None]:
nlist = json.loads('{'+nums+'}')['nums']

In [None]:
nlist

[1, 4, 6]

In [None]:
%ai skip
# This useless cell won't be included in the dialog
'asdfsadsf'

'asdfsadsf'

In [None]:
%%ai 0
Get the sum of squares of `nlist`

## Prompt and dialog creation

In [None]:
#|export
def get_cells(nbm, offset=1):
    cells = nbm['cells'][:nbm['idx'] + offset]
    lm = last_match(cells, lambda o: o.get('source','').startswith('%ai reset'))
    if lm: cells = cells[lm+1:]
    return [o for o in cells if not re.match(r'%ai +skip*$', o.get('source',''), flags=re.MULTILINE)]

In [None]:
cells = get_cells(nbmeta, -2)

In [None]:
#| eval: false
m = first_match(cells, lambda o: o.get('source','').startswith('%%ai'))
aic = cells[m]
rep = cells[m+1] if m<len(cells)-1 else None

In [None]:
#| eval: false
aic,rep

({'cell_type': 'code',
  'source': "%%aip 0\nExtract the 'nums' section of `snip`.",
  'outputs': []},
 {'cell_type': 'code',
  'source': 'nums = re.search(r\'"nums":\\[[^\\]]*\\]\', snip).group()',
  'outputs': []})

In [None]:
#| export
_pp = '''{context_goes_here}<instructions>
{instructions_go_here}
</instructions>

<task>
{prompt_goes_here}
</task>'''

_code_pp = '''Write code I can run to complete the `task` below. You are not expected to access the internet or run code -- please provide code that I will run in my notebook.

Write code using expert-level concise code with no comments, and using minimal vertical space (including using the ternary `if` op as appropriate).'''
_prose_pp = 'Complete the `task` below. Answer concisely and with no summary or background unless asked for specifically -- I will ask for additional details or examples if I need them.'

In [None]:
#| export
def _mk_prompt(aic, rep, cells, expand=True, ns=None):
    "Prompt for AI cell `aic` and optional reply `rep`, with nb cell context `cells`, and optionally $`variable`s expanded"
    if ns is None: ns = get_ipython()
    magic,*prompt = aic['source'].split('\n')
    prompt = '\n'.join(prompt).strip()
    cmd = magic.split()[0]
    inst = _prose_pp if cmd in ('%%ai','%%aio') else _code_pp
    vars = [re.sub(r'\$`(\w+)`', r'\1', o) for o in re.findall(r'\$`\w+`', prompt)] if expand else []
    prompt = re.sub(r'\$(`\w+`)', r'\1', prompt)
    ctx = _mk_ctx(vars, cells, ns)
    fullp = _pp.format(prompt_goes_here=prompt, context_goes_here=ctx, instructions_go_here=inst)
    res = [mk_msg(fullp)]
    if rep:
        src = rep['source'].strip()
        if rep['cell_type']=='code': src = f'```\n{src}\n```'
        res.append(mk_msg(src, role='assistant'))
    return res

In [None]:
#| eval: false
user,asst = _mk_prompt(aic, rep, cells[:m])
hl_md(user['content'])

```xml
<context>
  <nb_cells>
    <cell type="markdown">
      <source>We have extracted the following snippet from inside a web page's Javascript, for further processing:</source>
    </cell>
    <cell type="code">
      <source>snip = '13], "nums":[1,4,6], "chars":'</source>
    </cell>
    <cell type="code">
      <source>snip</source>
      <outputs>
        <output type="execute_result">'13], "nums":[1,4,6], "chars":'</output>
      </outputs>
    </cell>
  </nb_cells>
</context>
<instructions>
Write code I can run to complete the `task` below. You are not expected to access the internet or run code -- please provide code that I will run in my notebook.

Write code using expert-level concise code with no comments, and using minimal vertical space (including using the ternary `if` op as appropriate).
</instructions>

<task>
Extract the 'nums' section of `snip`.
</task>
```

In [None]:
#| eval: false
Markdown(asst['content'])

```
nums = re.search(r'"nums":\[[^\]]*\]', snip).group()
```

In [None]:
#| export
def _mk_dialog(cells, ns=None):
    "Split `cells` into groups based on ai magics, and create Claude dialog messages"
    res = []
    while True:
        m = first_match(cells, lambda o: o.get('source','').startswith('%%ai'), 0)
        aic = cells[m]
        rep = cells[m+1] if m<len(cells)-1 else None
        res += _mk_prompt(aic, rep, cells[:m], ns=ns)
        cells = cells[m+1 if rep else m:]
        if len(cells)<2: break
    return res

In [None]:
dialog = _mk_dialog(cells)

In [None]:
#|export
def _show_dialog(dialog):
    for o in dialog: print('- ', o['role'], ':\n', o['content'],'\n----', sep='')

In [None]:
# _show_dialog(dialog)

## Magic

In [None]:
#| export
class AiMagic:
    def __init__(self, model, shell=None):
        "Backend functionality for `create_magic`, using Claude `model`"
        self.c = AnthClient(model)
        self.usage,self.shell = [],shell or get_ipython()

    def __call__(self, x): return self.cell(cell=x)

    def cell(self, line='', cell='', pre='', temp=0.5):
        if line=='0' or not cell: return
        is_code = pre.startswith('```')
        cells = get_cells(self.shell.user_ns['nbmeta'])
        if len(cell.split())==1:
            meth = getattr(self, cell.strip(), None)
            if meth: meth(cells)
        chat = _mk_dialog(cells, ns=self.shell.user_ns)
        sp = _mk_sysp(pre, '<nb_cells' in chat[-1]['content'])
        if pre: chat.append(mk_msg(pre.strip(), role='assistant'))

        cts = pre
        if pre.startswith('`'): cts = ''.join(pre.splitlines(True)[1:]) 
        display(Markdown(cts))
        clear_output(wait=True)
        try:
            for chunk in self.c.stream(chat, sp=sp, stop=['```\n'] if is_code else None, temp=temp):
                cts += chunk
                display(Markdown(cts))
                clear_output(wait=True)
        except KeyboardInterrupt: pass
        display('Done') # (I need this or the next clear_output is ignored :shrug:)
        clear_output() # Remove once fully done
        cts = cts.strip() # Remove superflouoususs line ending
        if line=='-t': _show_dialog(chat)
        set_next_cell(self.shell, cts, code=is_code)

In [None]:
#| export
def create_magic(model=None, sysp=None, nm='ai', shell=None):
    "Create magic named `nm` using `model` and sys prompt `sysp`"
    if not model: model = models[-1]
    nm = str(nm)
    if not shell: shell = get_ipython()
    r = AiMagic(model, shell)

    js = '''require(['base/js/namespace', 'notebook/js/codecell'], function(j, c) {
    c.CodeCell.options_default.highlight_modes['magic_markdown'] = {'reg':['^%%ai']} ;
    j.notebook.get_cells().forEach(function(cell){
        if (cell.cell_type === 'code'){ cell.auto_highlight(); }
    });
});'''
    display(Javascript(js))
    clear_output()
    
    def f(line, cell=None, pre=''): return r.cell(line, cell, pre)
    shell.register_magic_function(partial(f, pre=''), 'line_cell', nm)
    shell.register_magic_function(partial(f, pre='```python'), 'line_cell', nm+'p')
    shell.register_magic_function(partial(f, pre='```\n%%bash\n'), 'line_cell', nm+'s')
    shell.register_magic_function(partial(f, pre='```\n%%javascript\n'), 'line_cell', nm+'j')
    shell.register_magic_function(partial(f, pre='- '), 'line_cell', nm+'o')

In [None]:
create_magic(models[-1])

In [None]:
#|export
def replace_req(cells, replace):
    cell = cells[-1]
    st,*_ = cell['source'].splitlines(True)
    cell['source'] = '\n'.join([st,replace])

## Patches and examples

In [None]:
#|export
@patch
def explain(self:AiMagic, cells): replace_req(cells, "Explain clearly and concisely what's happening here.")

In [None]:
#|export
@patch
def fix(self:AiMagic, cells): replace_req(cells, "Provide code to fix the most recent error in the notebook.")

In [None]:
%ai reset

In [None]:
a,b = 7,5

In [None]:
%%aip 0
Set `minval` to the minimum of two values `a` and `b`

In [None]:
minval = a if a < b else b

In [None]:
%%ai 0
explain

The code `minval = a if a < b else b` uses a ternary conditional expression to set the value of `minval` to the minimum of `a` and `b`. 

The ternary operator takes the form `value_if_true if condition else value_if_false`. In this case, `a if a < b` evaluates to `a` if `a` is less than `b`, and `b` otherwise.

## Export -

In [None]:
#|hide
#|eval: false
from nbdev.doclinks import nbdev_export
nbdev_export()