## Notebook Converter 1

In [2]:
CHILL_BROWN='#948979'
SOLARIZED_BACKGROUND_COLOR='#fdf4e0'

import json
import re
from pygments import highlight
from pygments.lexers import PythonLexer
from pygments.formatters import SvgFormatter
from pygments.styles import get_style_by_name
import xml.etree.ElementTree as ET

def extract_cell_data(cell):
    """Extract code, outputs, and execution count from a Jupyter notebook cell."""
    if cell.get('cell_type') != 'code':
        return None, None, None
    
    # Extract source code
    source = cell.get('source', [])
    if isinstance(source, list):
        code = ''.join(source)
    else:
        code = source
    
    # Extract execution count
    execution_count = cell.get('execution_count')
    
    # Extract outputs
    outputs = cell.get('outputs', [])
    output_text = []
    
    for output in outputs:
        if 'data' in output:
            # Handle different output types
            if 'text/plain' in output['data']:
                plain_text = output['data']['text/plain']
                if isinstance(plain_text, list):
                    output_text.extend(plain_text)
                else:
                    output_text.append(plain_text)
        elif 'text' in output:
            # Handle direct text output
            text_content = output['text']
            if isinstance(text_content, list):
                output_text.extend(text_content)
            else:
                output_text.append(text_content)
    
    return code.strip(), '\n'.join(output_text).strip(), execution_count

def create_text_element(text, x, y, font_family='Menlo', font_size=12, fill='black', font_weight='normal'):
    """Create an SVG text element."""
    text_elem = ET.Element('text')
    text_elem.set('x', str(x))
    text_elem.set('y', str(y))
    text_elem.set('font-family', font_family)
    text_elem.set('font-size', f'{font_size}px')
    text_elem.set('fill', fill)
    text_elem.set('font-weight', font_weight)
    text_elem.text = text
    return text_elem

def create_multiline_text(text, x, y, font_family='Menlo', font_size=12, fill='black', line_height=1.4):
    """Create SVG text elements for multiline text."""
    lines = text.split('\n')
    text_group = ET.Element('g')
    
    for i, line in enumerate(lines):
        if line.strip():  # Only create elements for non-empty lines
            text_elem = create_text_element(
                line, x, y + (i * font_size * line_height), 
                font_family, font_size, fill
            )
            text_group.append(text_elem)
    
    return text_group, len(lines) * font_size * line_height

def create_cell_svg_group(cell_data, y_offset, font_family='Menlo', font_size=12, max_width=800):
    """
    Create an SVG group element for a single cell at a given y offset.
    
    Args:
        cell_data (dict): Jupyter cell dictionary
        y_offset (float): Y position to start this cell
        font_family (str): Font family to use
        font_size (int): Font size in pixels
        max_width (int): Maximum width for the cell
    
    Returns:
        tuple: (svg_group_element, height_used)
    """
    
    code, output_text, execution_count = extract_cell_data(cell_data)
    
    if code is None:
        return None, 0
    
    # Create lexer and formatter for Python code
    lexer = PythonLexer()
    formatter = SvgFormatter(
        style='solarized-light',
        fontfamily=font_family,
        fontsize=f'{font_size}px',
        linenos=False,
        noclasses=True,
        nobackground=True
    )
    
    # Generate highlighted code SVG
    code_svg = highlight(code, lexer, formatter)
    code_root = ET.fromstring(code_svg)
    
    # Create group for this cell
    cell_group = ET.Element('g')
    
    # Constants for layout
    margin = 20
    execution_count_width = 60
    cell_padding = 12
    line_height = 1.4
    cell_spacing = 0  # Space between cells
    
    # Calculate dimensions
    code_lines = len(code.split('\n'))
    code_height = code_lines * font_size * line_height
    
    # Start position within this cell
    current_y = y_offset
    
    # Execution count for input
    exec_count_text = f"[{execution_count or ' '}]:"
    exec_count_elem = create_text_element(
        exec_count_text, margin, current_y + font_size,
        font_family, font_size, CHILL_BROWN, 'normal'
    )
    cell_group.append(exec_count_elem)
    
    # Input code area with background
    code_x = margin + execution_count_width
    code_y = current_y
    
    # Create background rectangle for input code
    code_width = min(max_width - code_x - margin, max(400, len(max(code.split('\n'), key=len)) * font_size * 0.6))
    input_bg = ET.Element('rect')
    input_bg.set('x', str(code_x - cell_padding))
    input_bg.set('y', str(code_y - cell_padding//2))
    input_bg.set('width', str(code_width + 2 * cell_padding))
    input_bg.set('height', str(code_height + cell_padding))
    input_bg.set('fill', SOLARIZED_BACKGROUND_COLOR)  
    input_bg.set('stroke', CHILL_BROWN) 
    input_bg.set('opacity', '0.2') 
    input_bg.set('stroke-width', '1')
    cell_group.append(input_bg)
    
    # Add the syntax-highlighted code
    code_group = ET.Element('g')
    code_group.set('transform', f'translate({code_x}, {code_y})')
    
    # Extract the highlighted content from pygments SVG
    for elem in code_root:
        if elem.tag.endswith('g') or elem.tag.endswith('text'):
            code_group.append(elem)
    
    cell_group.append(code_group)
    
    # Move to output section
    input_output_spacing = cell_padding // 2  # SW Noodling
    current_y += code_height + input_output_spacing
    
    # Handle outputs if they exist
    if output_text and output_text.strip():
        # Execution count for output
        output_exec_count = create_text_element(
            exec_count_text, margin, current_y + font_size,
            font_family, font_size, CHILL_BROWN, 'normal'
        )
        cell_group.append(output_exec_count)
        
        # Output text
        output_group, output_height = create_multiline_text(
            output_text, code_x, current_y + font_size,
            font_family, font_size, '#333'
        )
        cell_group.append(output_group)
        
        current_y += output_height + cell_padding  # Add some space after output
    
    # NOW calculate the total cell height
    cell_height = current_y - y_offset
    
    # Add spacing after cell (this controls spacing between different cells)
    cell_height += cell_spacing  # This is your 0 value
        
    return cell_group, cell_height

# def create_svg_from_cell(cell_data, output_path=None, font_family='Menlo', font_size=12):
#     """
#     Convert Jupyter cell (with input/output) to SVG with syntax highlighting.
#     (Keeping original function for backward compatibility)
#     """
    
#     code, output_text, execution_count = extract_cell_data(cell_data)
    
#     if code is None:
#         return None
    
#     # Create lexer and formatter for Python code
#     lexer = PythonLexer()
#     formatter = SvgFormatter(
#         style='solarized-light',
#         fontfamily=font_family,
#         fontsize=f'{font_size}px',
#         linenos=False,
#         noclasses=True,
#         nobackground=True
#     )
    
#     # Generate highlighted code SVG
#     code_svg = highlight(code, lexer, formatter)
#     code_root = ET.fromstring(code_svg)
    
#     # Create the main SVG container
#     svg = ET.Element('svg')
#     svg.set('xmlns', 'http://www.w3.org/2000/svg')
    
#     # Constants for layout
#     margin = 20
#     execution_count_width = 60
#     cell_padding = 12
#     line_height = 1.4
    
#     # Calculate dimensions
#     code_lines = len(code.split('\n'))
#     code_height = code_lines * font_size * line_height
    
#     # Start position
#     current_y = margin
    
#     # Execution count for input
#     exec_count_text = f"[{execution_count or ' '}]:"
#     exec_count_elem = create_text_element(
#         exec_count_text, margin, current_y + font_size,
#         font_family, font_size, CHILL_BROWN, 'normal'
#     )
#     svg.append(exec_count_elem)
    
#     # Input code area with background
#     code_x = margin + execution_count_width
#     code_y = current_y
    
#     # Create background rectangle for input code
#     input_bg = ET.Element('rect')
#     input_bg.set('x', str(code_x - cell_padding))
#     input_bg.set('y', str(code_y - cell_padding//2))
#     input_bg.set('width', str(max(400, len(max(code.split('\n'), key=len)) * font_size * 0.6) + 2 * cell_padding))
#     input_bg.set('height', str(code_height + cell_padding))
#     input_bg.set('fill', SOLARIZED_BACKGROUND_COLOR)  
#     input_bg.set('stroke', CHILL_BROWN) 
#     input_bg.set('opacity', '0.2') 
#     input_bg.set('stroke-width', '1')
#     svg.append(input_bg)
    
#     # Add the syntax-highlighted code
#     code_group = ET.Element('g')
#     code_group.set('transform', f'translate({code_x}, {code_y})')
    
#     # Extract the highlighted content from pygments SVG
#     for elem in code_root:
#         if elem.tag.endswith('g') or elem.tag.endswith('text'):
#             code_group.append(elem)
    
#     svg.append(code_group)
    
#     # Move to output section
#     current_y += code_height + cell_padding * 2
    
#     # Handle outputs if they exist
#     total_height = current_y
#     svg_width = max(600, code_x + 400)
    
#     if output_text and output_text.strip():
#         # Execution count for output
#         output_exec_count = create_text_element(
#             exec_count_text, margin, current_y + font_size,
#             font_family, font_size, CHILL_BROWN, 'normal'
#         )
#         svg.append(output_exec_count)
        
#         # Output text
#         output_group, output_height = create_multiline_text(
#             output_text, code_x, current_y + font_size,
#             font_family, font_size, '#333'
#         )
#         svg.append(output_group)
        
#         total_height = current_y + output_height + margin
    
#     # Set final SVG dimensions
#     svg.set('width', f'{svg_width}px')
#     svg.set('height', f'{total_height}px')
#     svg.set('viewBox', f'0 0 {svg_width} {total_height}')
    
#     # Add overall background
#     overall_bg = ET.Element('rect')
#     overall_bg.set('x', '0')
#     overall_bg.set('y', '0')
#     overall_bg.set('width', str(svg_width))
#     overall_bg.set('height', str(total_height))
#     overall_bg.set('fill', '#ffffff')
#     svg.insert(0, overall_bg)
    
#     # Convert to string
#     svg_string = ET.tostring(svg, encoding='unicode')
#     svg_string = '<?xml version="1.0" encoding="UTF-8"?>\n' + svg_string
    
#     if output_path:
#         with open(output_path, 'w', encoding='utf-8') as f:
#             f.write(svg_string)
#         print(f"SVG saved to: {output_path}")
    
#     return svg_string

# def process_jupyter_cell(cell_data, output_path=None):
#     """
#     Process a single Jupyter cell and convert to SVG.
#     (Keeping original function for backward compatibility)
#     """
#     code, output_text, execution_count = extract_cell_data(cell_data)
#     if code is None or not code.strip():
#         print("Cell is empty or not a code cell")
#         return None
    
#     return create_svg_from_cell(cell_data, output_path)

def process_jupyter_notebook(notebook_path, output_path='notebook.svg', font_family='Menlo', font_size=12, max_width=900):
    """
    Process an entire Jupyter notebook and convert all code cells to a single SVG file.
    
    Args:
        notebook_path (str): Path to the .ipynb file
        output_path (str): Path to save the combined SVG file
        font_family (str): Font family to use
        font_size (int): Font size in pixels
        max_width (int): Maximum width of the SVG
    
    Returns:
        str: SVG content as string
    """
    import os
    
    with open(notebook_path, 'r', encoding='utf-8') as f:
        notebook = json.load(f)
    
    # Create the main SVG container
    svg = ET.Element('svg')
    svg.set('xmlns', 'http://www.w3.org/2000/svg')
    
    margin = 20
    current_y = margin
    actual_width = 0
    
    # Process each code cell
    code_cell_count = 0
    for i, cell in enumerate(notebook.get('cells', [])):
        if cell.get('cell_type') == 'code':
            code, _, _ = extract_cell_data(cell)
            if code and code.strip():
                code_cell_count += 1
                
                # Create SVG group for this cell
                cell_group, cell_height = create_cell_svg_group(
                    cell, current_y, font_family, font_size, max_width
                )
                
                if cell_group is not None:
                    svg.append(cell_group)
                    current_y += cell_height
                    
                    # Update actual width based on content
                    # This is a rough estimate - you might want to make this more precise
                    code_lines = code.split('\n')
                    max_line_length = max(len(line) for line in code_lines) if code_lines else 0
                    estimated_width = margin + 60 + max_line_length * font_size * 0.6 + 40
                    actual_width = max(actual_width, min(estimated_width, max_width))
    
    if code_cell_count == 0:
        print("No code cells found in notebook")
        return None
    
    # Final dimensions
    total_height = current_y + margin
    final_width = max(600, actual_width)
    
    svg.set('width', f'{final_width}px')
    svg.set('height', f'{total_height}px')
    svg.set('viewBox', f'0 0 {final_width} {total_height}')
    
    # Add overall background
    overall_bg = ET.Element('rect')
    overall_bg.set('x', '0')
    overall_bg.set('y', '0')
    overall_bg.set('width', str(final_width))
    overall_bg.set('height', str(total_height))
    overall_bg.set('fill', '#ffffff')
    svg.insert(0, overall_bg)
    
    # Convert to string
    svg_string = ET.tostring(svg, encoding='unicode')
    svg_string = '<?xml version="1.0" encoding="UTF-8"?>\n' + svg_string
    
    # Save to file
    output_dir = os.path.dirname(output_path) if os.path.dirname(output_path) else '.'
    os.makedirs(output_dir, exist_ok=True)
    
    with open(output_path, 'w', encoding='utf-8') as f:
        f.write(svg_string)
    
    print(f"Combined notebook SVG saved to: {output_path}")
    print(f"Processed {code_cell_count} code cells")
    
    return svg_string

In [3]:
# out_path='/Users/stephen/Stephencwelch Dropbox/welch_labs/ai_book/3_backprop_2/graphics/book_chapter_3_1.svg'
# process_jupyter_cell(example_cell_with_output, out_path)

In [4]:
in_path="book_chapter_3_1.ipynb"
out_path='/Users/stephen/Stephencwelch Dropbox/welch_labs/ai_book/3_backprop_2/graphics/book_chapter_3_1.svg'
process_jupyter_notebook(in_path, out_path)

Combined notebook SVG saved to: /Users/stephen/Stephencwelch Dropbox/welch_labs/ai_book/3_backprop_2/graphics/book_chapter_3_1.svg
Processed 8 code cells


'<?xml version="1.0" encoding="UTF-8"?>\n<svg xmlns:ns0="http://www.w3.org/2000/svg" xmlns="http://www.w3.org/2000/svg" width="681.6px" height="863.1999999999999px" viewBox="0 0 681.6 863.1999999999999"><rect x="0" y="0" width="681.6" height="863.1999999999999" fill="#ffffff" /><g><text x="20" y="32" font-family="Menlo" font-size="12px" fill="#948979" font-weight="normal">[1]:</text><rect x="68" y="14" width="456.0" height="62.4" fill="#fdf4e0" stroke="#948979" opacity="0.2" stroke-width="1" /><g transform="translate(80, 20)"><ns0:g font-family="Menlo" font-size="12px">\n<ns0:text x="0" y="12" xml:space="preserve"><ns0:tspan fill="#cb4b16">import</ns0:tspan><ns0:tspan fill="#657b83">\xa0</ns0:tspan><ns0:tspan fill="#268bd2">torch</ns0:tspan><ns0:tspan fill="#657b83" /></ns0:text>\n<ns0:text x="0" y="29" xml:space="preserve"><ns0:tspan fill="#657b83" /><ns0:tspan fill="#cb4b16">from</ns0:tspan><ns0:tspan fill="#657b83">\xa0</ns0:tspan><ns0:tspan fill="#268bd2">transformers</ns0:tspan><n

In [None]:
# Your example cell data
sample_cell =   {
   "cell_type": "code",
   "execution_count": 2,
   "id": "d49540bb-6a89-4db7-83f5-8f230f7a5f54",
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
    "device='cuda'\n",
    "\n",
    "model_id = \"meta-llama/Llama-3.2-1B\"\n",
    "\n",
    "model = AutoModelForCausalLM.from_pretrained(model_id).to(device)\n",
    "tokenizer = AutoTokenizer.from_pretrained(model_id)\n",
    "\n",
    "prompt = \"The capital of France is\"\n",
    "inputs = tokenizer(prompt, return_tensors=\"pt\").to(device)\n",
    "\n",
    "with torch.no_grad():\n",
    "    outputs = model(inputs['input_ids'])\n",
    "my_probs=torch.nn.functional.softmax(outputs.logits, dim=-1)\n",
    "\n",
    "for i in torch.argsort(my_probs[0,-1, :].detach().cpu(), descending=True)[:5]:\n",
    "    print(i, round(my_probs[0, -1, i].item(),5), tokenizer.decode([i]))"
   ]
  }

example_cell_with_output = {
    "cell_type": "code",
    "execution_count": 11,
    "id": "5932bd35-8716-4d26-b4a5-57fb66826236",
    "metadata": {},
    "outputs": [
        {
            "data": {
                "text/plain": [
                    "tensor([[128000,    791,   6864,    315,   9822,    374]], device='cuda:0')"
                ]
            },
            "execution_count": 11,
            "metadata": {},
            "output_type": "execute_result"
        }
    ],
    "source": [
            "prompt = \"The capital of France is\"\n",
            "inputs = tokenizer(prompt, return_tensors=\"pt\").to(device)\n",
            "inputs['input_ids']"
    ]
}

In [None]:
CHILL_BROWN='#948979'
SOLARIZED_BACKGROUND_COLOR='#fdf4e0'

import json
import re
from pygments import highlight
from pygments.lexers import PythonLexer
from pygments.formatters import SvgFormatter
from pygments.styles import get_style_by_name
import xml.etree.ElementTree as ET

def extract_cell_data(cell):
    """Extract code, outputs, and execution count from a Jupyter notebook cell."""
    if cell.get('cell_type') != 'code':
        return None, None, None
    
    # Extract source code
    source = cell.get('source', [])
    if isinstance(source, list):
        code = ''.join(source)
    else:
        code = source
    
    # Extract execution count
    execution_count = cell.get('execution_count')
    
    # Extract outputs
    outputs = cell.get('outputs', [])
    output_text = []
    
    for output in outputs:
        if 'data' in output:
            # Handle different output types
            if 'text/plain' in output['data']:
                plain_text = output['data']['text/plain']
                if isinstance(plain_text, list):
                    output_text.extend(plain_text)
                else:
                    output_text.append(plain_text)
        elif 'text' in output:
            # Handle direct text output
            text_content = output['text']
            if isinstance(text_content, list):
                output_text.extend(text_content)
            else:
                output_text.append(text_content)
    
    return code.strip(), '\n'.join(output_text).strip(), execution_count

def create_text_element(text, x, y, font_family='Menlo', font_size=12, fill='black', font_weight='normal'):
    """Create an SVG text element."""
    text_elem = ET.Element('text')
    text_elem.set('x', str(x))
    text_elem.set('y', str(y))
    text_elem.set('font-family', font_family)
    text_elem.set('font-size', f'{font_size}px')
    text_elem.set('fill', fill)
    text_elem.set('font-weight', font_weight)
    text_elem.text = text
    return text_elem

def create_multiline_text(text, x, y, font_family='Menlo', font_size=12, fill='black', line_height=1.4):
    """Create SVG text elements for multiline text."""
    lines = text.split('\n')
    text_group = ET.Element('g')
    
    for i, line in enumerate(lines):
        if line.strip():  # Only create elements for non-empty lines
            text_elem = create_text_element(
                line, x, y + (i * font_size * line_height), 
                font_family, font_size, fill
            )
            text_group.append(text_elem)
    
    return text_group, len(lines) * font_size * line_height

def create_cell_svg_group(cell_data, y_offset, font_family='Menlo', font_size=12, max_width=800):
    """
    Create an SVG group element for a single cell at a given y offset.
    
    Args:
        cell_data (dict): Jupyter cell dictionary
        y_offset (float): Y position to start this cell
        font_family (str): Font family to use
        font_size (int): Font size in pixels
        max_width (int): Maximum width for the cell
    
    Returns:
        tuple: (svg_group_element, height_used)
    """
    
    code, output_text, execution_count = extract_cell_data(cell_data)
    
    if code is None:
        return None, 0
    
    # Create lexer and formatter for Python code
    lexer = PythonLexer()
    formatter = SvgFormatter(
        style='solarized-light',
        fontfamily=font_family,
        fontsize=f'{font_size}px',
        linenos=False,
        noclasses=True,
        nobackground=True
    )
    
    # Generate highlighted code SVG
    code_svg = highlight(code, lexer, formatter)
    code_root = ET.fromstring(code_svg)
    
    # Create group for this cell
    cell_group = ET.Element('g')
    
    # Constants for layout
    margin = 20
    execution_count_width = 60
    cell_padding = 12
    line_height = 1.4
    cell_spacing = 0  # Space between cells
    
    # Calculate dimensions
    code_lines = len(code.split('\n'))
    code_height = code_lines * font_size * line_height
    
    # Start position within this cell
    current_y = y_offset
    
    # Execution count for input
    exec_count_text = f"[{execution_count or ' '}]:"
    exec_count_elem = create_text_element(
        exec_count_text, margin, current_y + font_size,
        font_family, font_size, CHILL_BROWN, 'normal'
    )
    cell_group.append(exec_count_elem)
    
    # Input code area with background
    code_x = margin + execution_count_width
    code_y = current_y
    
    # Create background rectangle for input code
    code_width = min(max_width - code_x - margin, max(400, len(max(code.split('\n'), key=len)) * font_size * 0.6))
    input_bg = ET.Element('rect')
    input_bg.set('x', str(code_x - cell_padding))
    input_bg.set('y', str(code_y - cell_padding//2))
    input_bg.set('width', str(code_width + 2 * cell_padding))
    input_bg.set('height', str(code_height + cell_padding))
    input_bg.set('fill', SOLARIZED_BACKGROUND_COLOR)  
    input_bg.set('stroke', CHILL_BROWN) 
    input_bg.set('opacity', '0.2') 
    input_bg.set('stroke-width', '1')
    cell_group.append(input_bg)
    
    # Add the syntax-highlighted code
    code_group = ET.Element('g')
    code_group.set('transform', f'translate({code_x}, {code_y})')
    
    # Extract the highlighted content from pygments SVG
    for elem in code_root:
        if elem.tag.endswith('g') or elem.tag.endswith('text'):
            code_group.append(elem)
    
    cell_group.append(code_group)
    
    # Move to output section
    input_output_spacing = cell_padding // 2  # SW Noodling
    current_y += code_height + input_output_spacing
    
    # Handle outputs if they exist
    if output_text and output_text.strip():
        # Execution count for output
        output_exec_count = create_text_element(
            exec_count_text, margin, current_y + font_size,
            font_family, font_size, CHILL_BROWN, 'normal'
        )
        cell_group.append(output_exec_count)
        
        # Output text
        output_group, output_height = create_multiline_text(
            output_text, code_x, current_y + font_size,
            font_family, font_size, '#333'
        )
        cell_group.append(output_group)
        
        current_y += output_height + cell_padding  # Add some space after output
    
    # NOW calculate the total cell height
    cell_height = current_y - y_offset
    
    # Add spacing after cell (this controls spacing between different cells)
    cell_height += cell_spacing  # This is your 0 value
        
    return cell_group, cell_height

def process_jupyter_notebook(notebook_path, output_path='notebook.svg', font_family='Menlo', font_size=12, max_width=900):
    """
    Process an entire Jupyter notebook and convert all code cells to a single SVG file.
    
    Args:
        notebook_path (str): Path to the .ipynb file
        output_path (str): Path to save the combined SVG file
        font_family (str): Font family to use
        font_size (int): Font size in pixels
        max_width (int): Maximum width of the SVG
    
    Returns:
        str: SVG content as string
    """
    import os
    
    with open(notebook_path, 'r', encoding='utf-8') as f:
        notebook = json.load(f)
    
    # Create the main SVG container
    svg = ET.Element('svg')
    svg.set('xmlns', 'http://www.w3.org/2000/svg')
    
    margin = 20
    current_y = margin
    actual_width = 0
    
    # Process each code cell
    code_cell_count = 0
    for i, cell in enumerate(notebook.get('cells', [])):
        if cell.get('cell_type') == 'code':
            code, _, _ = extract_cell_data(cell)
            if code and code.strip():
                code_cell_count += 1
                
                # Create SVG group for this cell
                cell_group, cell_height = create_cell_svg_group(
                    cell, current_y, font_family, font_size, max_width
                )
                
                if cell_group is not None:
                    svg.append(cell_group)
                    current_y += cell_height
                    
                    # Update actual width based on content
                    # This is a rough estimate - you might want to make this more precise
                    code_lines = code.split('\n')
                    max_line_length = max(len(line) for line in code_lines) if code_lines else 0
                    estimated_width = margin + 60 + max_line_length * font_size * 0.6 + 40
                    actual_width = max(actual_width, min(estimated_width, max_width))
    
    if code_cell_count == 0:
        print("No code cells found in notebook")
        return None
    
    # Final dimensions
    total_height = current_y + margin
    final_width = max(600, actual_width)
    
    svg.set('width', f'{final_width}px')
    svg.set('height', f'{total_height}px')
    svg.set('viewBox', f'0 0 {final_width} {total_height}')
    
    # Add overall background
    overall_bg = ET.Element('rect')
    overall_bg.set('x', '0')
    overall_bg.set('y', '0')
    overall_bg.set('width', str(final_width))
    overall_bg.set('height', str(total_height))
    overall_bg.set('fill', '#ffffff')
    svg.insert(0, overall_bg)
    
    # Convert to string
    svg_string = ET.tostring(svg, encoding='unicode')
    svg_string = '<?xml version="1.0" encoding="UTF-8"?>\n' + svg_string
    
    # Save to file
    output_dir = os.path.dirname(output_path) if os.path.dirname(output_path) else '.'
    os.makedirs(output_dir, exist_ok=True)
    
    with open(output_path, 'w', encoding='utf-8') as f:
        f.write(svg_string)
    
    print(f"Combined notebook SVG saved to: {output_path}")
    print(f"Processed {code_cell_count} code cells")
    
    return svg_string

In [None]:
CHILL_BROWN='#948979'
SOLARIZED_BACKGROUND_COLOR='#fdf4e0'

import json
import re
from pygments import highlight
from pygments.lexers import PythonLexer
from pygments.formatters import SvgFormatter
from pygments.styles import get_style_by_name
import xml.etree.ElementTree as ET

def extract_cell_data(cell):
    """Extract code, outputs, and execution count from a Jupyter notebook cell."""
    if cell.get('cell_type') != 'code':
        return None, None, None
    
    # Extract source code
    source = cell.get('source', [])
    if isinstance(source, list):
        code = ''.join(source)
    else:
        code = source
    
    # Extract execution count
    execution_count = cell.get('execution_count')
    
    # Extract outputs
    outputs = cell.get('outputs', [])
    output_text = []
    
    for output in outputs:
        if 'data' in output:
            # Handle different output types
            if 'text/plain' in output['data']:
                plain_text = output['data']['text/plain']
                if isinstance(plain_text, list):
                    output_text.extend(plain_text)
                else:
                    output_text.append(plain_text)
        elif 'text' in output:
            # Handle direct text output
            text_content = output['text']
            if isinstance(text_content, list):
                output_text.extend(text_content)
            else:
                output_text.append(text_content)
    
    return code.strip(), '\n'.join(output_text).strip(), execution_count

def create_text_element(text, x, y, font_family='Menlo', font_size=12, fill='black', font_weight='normal'):
    """Create an SVG text element."""
    text_elem = ET.Element('text')
    text_elem.set('x', str(x))
    text_elem.set('y', str(y))
    text_elem.set('font-family', font_family)
    text_elem.set('font-size', f'{font_size}px')
    text_elem.set('fill', fill)
    text_elem.set('font-weight', font_weight)
    text_elem.text = text
    return text_elem

def create_multiline_text(text, x, y, font_family='Menlo', font_size=12, fill='black', line_height=1.4):
    """Create SVG text elements for multiline text."""
    lines = text.split('\n')
    text_group = ET.Element('g')
    
    for i, line in enumerate(lines):
        if line.strip():  # Only create elements for non-empty lines
            text_elem = create_text_element(
                line, x, y + (i * font_size * line_height), 
                font_family, font_size, fill
            )
            text_group.append(text_elem)
    
    return text_group, len(lines) * font_size * line_height

def create_svg_from_cell(cell_data, output_path=None, font_family='Menlo', font_size=12):
    """
    Convert Jupyter cell (with input/output) to SVG with syntax highlighting.
    
    Args:
        cell_data (dict): Jupyter cell dictionary
        output_path (str): Path to save SVG file (optional)
        font_family (str): Font family to use
        font_size (int): Font size in pixels
    
    Returns:
        str: SVG content as string
    """
    
    code, output_text, execution_count = extract_cell_data(cell_data)
    
    if code is None:
        return None
    
    # Create lexer and formatter for Python code
    lexer = PythonLexer()
    formatter = SvgFormatter(
        style='solarized-light',
        fontfamily=font_family,
        fontsize=f'{font_size}px',
        linenos=False,
        noclasses=True,
        nobackground=True  # We'll add our own background
    )
    
    # Generate highlighted code SVG
    code_svg = highlight(code, lexer, formatter)
    code_root = ET.fromstring(code_svg)
    
    # Create the main SVG container
    svg = ET.Element('svg')
    svg.set('xmlns', 'http://www.w3.org/2000/svg')
    
    # Constants for layout
    margin = 20
    execution_count_width = 60
    cell_padding = 12
    line_height = 1.4
    
    # Calculate dimensions
    code_lines = len(code.split('\n'))
    code_height = code_lines * font_size * line_height
    
    # Start position
    current_y = margin
    
    # Execution count for input
    exec_count_text = f"[{execution_count or ' '}]:"
    exec_count_elem = create_text_element(
        exec_count_text, margin, current_y + font_size,
        font_family, font_size, CHILL_BROWN, 'normal'
    )
    svg.append(exec_count_elem)
    
    # Input code area with background
    code_x = margin + execution_count_width
    code_y = current_y
    
    # Create background rectangle for input code (like Jupyter's light gray background)
    input_bg = ET.Element('rect')
    input_bg.set('x', str(code_x - cell_padding))
    input_bg.set('y', str(code_y - cell_padding//2))
    input_bg.set('width', str(max(400, len(max(code.split('\n'), key=len)) * font_size * 0.6) + 2 * cell_padding))
    input_bg.set('height', str(code_height + cell_padding))
    input_bg.set('fill', SOLARIZED_BACKGROUND_COLOR)  
    input_bg.set('stroke', CHILL_BROWN) 
    input_bg.set('opacity', '0.2') 
    input_bg.set('stroke-width', '1')
    svg.append(input_bg)
    
    # Add the syntax-highlighted code
    # We need to extract the content from the pygments SVG and position it
    code_group = ET.Element('g')
    code_group.set('transform', f'translate({code_x}, {code_y})') #{code_y + font_size})')
    
    # Extract the highlighted content from pygments SVG
    for elem in code_root:
        if elem.tag.endswith('g') or elem.tag.endswith('text'):
            code_group.append(elem)
    
    svg.append(code_group)
    
    # Move to output section
    current_y += code_height + cell_padding * 2
    
    # Handle outputs if they exist
    total_height = current_y
    svg_width = max(600, code_x + 400)
    
    if output_text and output_text.strip():
        # Execution count for output
        output_exec_count = create_text_element(
            exec_count_text, margin, current_y + font_size,
            font_family, font_size, CHILL_BROWN, 'normal'
        )
        svg.append(output_exec_count)
        
        # Output text (no background, just plain text)
        output_group, output_height = create_multiline_text(
            output_text, code_x, current_y + font_size,
            font_family, font_size, '#333'
        )
        svg.append(output_group)
        
        total_height = current_y + output_height + margin
    
    # Set final SVG dimensions
    svg.set('width', f'{svg_width}px')
    svg.set('height', f'{total_height}px')
    svg.set('viewBox', f'0 0 {svg_width} {total_height}')
    
    # Add overall background
    overall_bg = ET.Element('rect')
    overall_bg.set('x', '0')
    overall_bg.set('y', '0')
    overall_bg.set('width', str(svg_width))
    overall_bg.set('height', str(total_height))
    overall_bg.set('fill', '#ffffff')
    svg.insert(0, overall_bg)
    
    # Convert to string
    svg_string = ET.tostring(svg, encoding='unicode')
    svg_string = '<?xml version="1.0" encoding="UTF-8"?>\n' + svg_string
    
    if output_path:
        with open(output_path, 'w', encoding='utf-8') as f:
            f.write(svg_string)
        print(f"SVG saved to: {output_path}")
    
    return svg_string

def process_jupyter_cell(cell_data, output_path=None):
    """
    Process a single Jupyter cell and convert to SVG.
    
    Args:
        cell_data (dict): Jupyter cell dictionary
        output_path (str): Path to save SVG file
    
    Returns:
        str: SVG content or None if not a code cell
    """
    code, output_text, execution_count = extract_cell_data(cell_data)
    if code is None or not code.strip():
        print("Cell is empty or not a code cell")
        return None
    
    return create_svg_from_cell(cell_data, output_path)

def process_jupyter_notebook(notebook_path, output_dir='./svg_output'):
    """
    Process an entire Jupyter notebook and convert code cells to SVG files.
    
    Args:
        notebook_path (str): Path to the .ipynb file
        output_dir (str): Directory to save SVG files
    """
    import os
    
    with open(notebook_path, 'r', encoding='utf-8') as f:
        notebook = json.load(f)
    
    os.makedirs(output_dir, exist_ok=True)
    
    code_cell_count = 0
    for i, cell in enumerate(notebook.get('cells', [])):
        if cell.get('cell_type') == 'code':
            code, _, _ = extract_cell_data(cell)
            if code and code.strip():
                code_cell_count += 1
                output_path = os.path.join(output_dir, f'cell_{code_cell_count:02d}.svg')
                process_jupyter_cell(cell, output_path)

In [41]:
converter = JupyterToSVGConverter(
    font_family='Menlo',
    font_size=13,
    line_height=1.4,
    page_width=800
)

In [None]:
import json
import re
from pygments import highlight
from pygments.lexers import PythonLexer
from pygments.formatters import SvgFormatter
from pygments.styles import get_style_by_name
import xml.etree.ElementTree as ET

def extract_cell_data(cell):
    """Extract code, outputs, and execution count from a Jupyter notebook cell."""
    if cell.get('cell_type') != 'code':
        return None, None, None
    
    # Extract source code
    source = cell.get('source', [])
    if isinstance(source, list):
        code = ''.join(source)
    else:
        code = source
    
    # Extract execution count
    execution_count = cell.get('execution_count')
    
    # Extract outputs
    outputs = cell.get('outputs', [])
    output_text = []
    
    for output in outputs:
        if 'data' in output:
            # Handle different output types
            if 'text/plain' in output['data']:
                plain_text = output['data']['text/plain']
                if isinstance(plain_text, list):
                    output_text.extend(plain_text)
                else:
                    output_text.append(plain_text)
        elif 'text' in output:
            # Handle direct text output
            text_content = output['text']
            if isinstance(text_content, list):
                output_text.extend(text_content)
            else:
                output_text.append(text_content)
    
    return code.strip(), '\n'.join(output_text).strip(), execution_count

def create_text_element(text, x, y, font_family='Menlo', font_size=12, fill='black', font_weight='normal'):
    """Create an SVG text element."""
    text_elem = ET.Element('text')
    text_elem.set('x', str(x))
    text_elem.set('y', str(y))
    text_elem.set('font-family', font_family)
    text_elem.set('font-size', f'{font_size}px')
    text_elem.set('fill', fill)
    text_elem.set('font-weight', font_weight)
    text_elem.text = text
    return text_elem

def create_multiline_text(text, x, y, font_family='Menlo', font_size=12, fill='black', line_height=1.4):
    """Create SVG text elements for multiline text."""
    lines = text.split('\n')
    text_group = ET.Element('g')
    
    for i, line in enumerate(lines):
        if line.strip():  # Only create elements for non-empty lines
            text_elem = create_text_element(
                line, x, y + (i * font_size * line_height), 
                font_family, font_size, fill
            )
            text_group.append(text_elem)
    
    return text_group, len(lines) * font_size * line_height

def create_svg_from_cell(cell_data, output_path=None, font_family='Menlo', font_size=12):
    """
    Convert Jupyter cell (with input/output) to SVG with syntax highlighting.
    
    Args:
        cell_data (dict): Jupyter cell dictionary
        output_path (str): Path to save SVG file (optional)
        font_family (str): Font family to use
        font_size (int): Font size in pixels
    
    Returns:
        str: SVG content as string
    """
    
    code, output_text, execution_count = extract_cell_data(cell_data)
    
    if code is None:
        return None
    
    # Create lexer and formatter for Python code
    lexer = PythonLexer()
    formatter = SvgFormatter(
        style='solarized-light',
        fontfamily=font_family,
        fontsize=f'{font_size}px',
        linenos=False,
        noclasses=True,
        nobackground=True  # We'll add our own background
    )
    
    # Generate highlighted code SVG
    code_svg = highlight(code, lexer, formatter)
    code_root = ET.fromstring(code_svg)
    
    # Create the main SVG container
    svg = ET.Element('svg')
    svg.set('xmlns', 'http://www.w3.org/2000/svg')
    
    # Constants for layout
    margin = 20
    execution_count_width = 60
    cell_padding = 12
    line_height = 1.4
    
    # Calculate dimensions
    code_lines = len(code.split('\n'))
    code_height = code_lines * font_size * line_height
    
    # Start position
    current_y = margin
    
    # Execution count for input
    exec_count_text = f"[{execution_count or ' '}]:"
    exec_count_elem = create_text_element(
        exec_count_text, margin, current_y + font_size,
        font_family, font_size, '#666', 'normal'
    )
    svg.append(exec_count_elem)
    
    # Input code area with background
    code_x = margin + execution_count_width
    code_y = current_y
    
    # Create background rectangle for input code (like Jupyter's light gray background)
    input_bg = ET.Element('rect')
    input_bg.set('x', str(code_x - cell_padding))
    input_bg.set('y', str(code_y - cell_padding//2))
    input_bg.set('width', str(max(400, len(max(code.split('\n'), key=len)) * font_size * 0.6) + 2 * cell_padding))
    input_bg.set('height', str(code_height + cell_padding))
    input_bg.set('fill', '#f7f7f7')  # Light gray background like Jupyter
    input_bg.set('stroke', '#e1e4e5')
    input_bg.set('stroke-width', '1')
    svg.append(input_bg)
    
    # Add the syntax-highlighted code
    # We need to extract the content from the pygments SVG and position it
    code_group = ET.Element('g')
    code_group.set('transform', f'translate({code_x}, {code_y})') #f'translate({code_x}, {code_y + font_size})')
    
    # Extract the highlighted content from pygments SVG
    for elem in code_root:
        if elem.tag.endswith('g') or elem.tag.endswith('text'):
            code_group.append(elem)
    
    svg.append(code_group)
    
    # Move to output section
    current_y += code_height + cell_padding * 2
    
    # Handle outputs if they exist
    total_height = current_y
    svg_width = max(600, code_x + 400)
    
    if output_text and output_text.strip():
        # Execution count for output
        output_exec_count = create_text_element(
            exec_count_text, margin, current_y + font_size,
            font_family, font_size, '#666', 'normal'
        )
        svg.append(output_exec_count)
        
        # Output text (no background, just plain text)
        output_group, output_height = create_multiline_text(
            output_text, code_x, current_y + font_size,
            font_family, font_size, '#333'
        )
        svg.append(output_group)
        
        total_height = current_y + output_height + margin
    
    # Set final SVG dimensions
    svg.set('width', f'{svg_width}px')
    svg.set('height', f'{total_height}px')
    svg.set('viewBox', f'0 0 {svg_width} {total_height}')
    
    # Add overall background
    overall_bg = ET.Element('rect')
    overall_bg.set('x', '0')
    overall_bg.set('y', '0')
    overall_bg.set('width', str(svg_width))
    overall_bg.set('height', str(total_height))
    overall_bg.set('fill', '#ffffff')
    svg.insert(0, overall_bg)
    
    # Convert to string
    svg_string = ET.tostring(svg, encoding='unicode')
    svg_string = '<?xml version="1.0" encoding="UTF-8"?>\n' + svg_string
    
    if output_path:
        with open(output_path, 'w', encoding='utf-8') as f:
            f.write(svg_string)
        print(f"SVG saved to: {output_path}")
    
    return svg_string

def process_jupyter_cell(cell_data, output_path=None):
    """
    Process a single Jupyter cell and convert to SVG.
    
    Args:
        cell_data (dict): Jupyter cell dictionary
        output_path (str): Path to save SVG file
    
    Returns:
        str: SVG content or None if not a code cell
    """
    code, output_text, execution_count = extract_cell_data(cell_data)
    if code is None or not code.strip():
        print("Cell is empty or not a code cell")
        return None
    
    return create_svg_from_cell(cell_data, output_path)

def process_jupyter_notebook(notebook_path, output_dir='./svg_output'):
    """
    Process an entire Jupyter notebook and convert code cells to SVG files.
    
    Args:
        notebook_path (str): Path to the .ipynb file
        output_dir (str): Directory to save SVG files
    """
    import os
    
    with open(notebook_path, 'r', encoding='utf-8') as f:
        notebook = json.load(f)
    
    os.makedirs(output_dir, exist_ok=True)
    
    code_cell_count = 0
    for i, cell in enumerate(notebook.get('cells', [])):
        if cell.get('cell_type') == 'code':
            code, _, _ = extract_cell_data(cell)
            if code and code.strip():
                code_cell_count += 1
                output_path = os.path.join(output_dir, f'cell_{code_cell_count:02d}.svg')
                process_jupyter_cell(cell, output_path)

# Example usage
# if __name__ == "__main__":
#     # Example cell with input and output (your provided cell)
#     example_cell_with_output = {
#         "cell_type": "code",
#         "execution_count": 11,
#         "id": "5932bd35-8716-4d26-b4a5-57fb66826236",
#         "metadata": {},
#         "outputs": [
#             {
#                 "data": {
#                     "text/plain": [
#                         "tensor([[128000,    791,   6864,    315,   9822,    374]], device='cuda:0')"
#                     ]
#                 },
#                 "execution_count": 11,
#                 "metadata": {},
#                 "output_type": "execute_result"
#             }
#         ],
#         "source": [
#             "inputs['input_ids']"
#         ]
#     }
    
#     # Process the example cell
#     svg_content = process_jupyter_cell(example_cell_with_output, "example_cell_with_output.svg")
#     print("Example cell with output processed!")
    
#     # Uncomment the line below to process an entire notebook
#     # process_jupyter_notebook("your_notebook.ipynb")

In [14]:
sample_code = """
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
device='cuda'

model_id = "meta-llama/Llama-3.2-1B"

model = AutoModelForCausalLM.from_pretrained(model_id).to(device)
tokenizer = AutoTokenizer.from_pretrained(model_id)

prompt = "The capital of France is"
inputs = tokenizer(prompt, return_tensors="pt").to(device)

with torch.no_grad():
    outputs = model(inputs['input_ids'])
my_probs=torch.nn.functional.softmax(outputs.logits, dim=-1)

for i in torch.argsort(my_probs[0,-1, :].detach().cpu(), descending=True)[:5]:
    print(i, round(my_probs[0, -1, i].item(),5), tokenizer.decode([i]))

"""

In [11]:
code_cell_to_svg(sample_code, output_file=out_path)

SVG saved to /Users/stephen/Stephencwelch Dropbox/welch_labs/ai_book/3_backprop_2/graphics/book_chapter_3_1.svg


'<?xml version="1.0"?>\n<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN" "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd">\n<svg xmlns="http://www.w3.org/2000/svg">\n<g font-family="Menlo, Monaco, Consolas, monospace" font-size="13px">\n<text x="0" y="13" xml:space="preserve"><tspan fill="#657b83">i</tspan><tspan fill="#657b83"></tspan></text>\n<text x="0" y="31" xml:space="preserve"><tspan fill="#657b83"></tspan><tspan fill="#cb4b16">import</tspan><tspan fill="#657b83">&#160;</tspan><tspan fill="#268bd2">torch</tspan><tspan fill="#657b83"></tspan></text>\n<text x="0" y="49" xml:space="preserve"><tspan fill="#657b83"></tspan><tspan fill="#cb4b16">from</tspan><tspan fill="#657b83">&#160;</tspan><tspan fill="#268bd2">transformers</tspan><tspan fill="#657b83">&#160;</tspan><tspan fill="#cb4b16">import</tspan><tspan fill="#657b83">&#160;</tspan><tspan fill="#657b83">AutoModelForCausalLM</tspan><tspan fill="#657b83">,</tspan><tspan fill="#657b83">&#160;</tspan><tspan fill="#657b83">

In [4]:
jupyter_notebook_to_svg(in_path, out_path)

TypeError: '>=' not supported between instances of 'str' and 'int'

In [18]:
print("Debugging tokenization for method calls:")
test_line = "model = AutoModelForCausalLM.from_pretrained(model_id).to(device)"
tokenize_and_format(test_line) #, debug=True)

Debugging tokenization for method calls:


[{'text': 'model', 'color': '#000000', 'is_whitespace': False},
 {'text': ' ', 'color': '#000000', 'is_whitespace': True},
 {'text': '=', 'color': '#000000', 'is_whitespace': False},
 {'text': ' ', 'color': '#000000', 'is_whitespace': True},
 {'text': 'AutoModelForCausalLM', 'color': '#000000', 'is_whitespace': False},
 {'text': '.', 'color': '#000000', 'is_whitespace': False},
 {'text': 'from_pretrained', 'color': '#000000', 'is_whitespace': False},
 {'text': '(', 'color': '#000000', 'is_whitespace': False},
 {'text': 'model_id', 'color': '#000000', 'is_whitespace': False},
 {'text': ')', 'color': '#000000', 'is_whitespace': False},
 {'text': '.', 'color': '#000000', 'is_whitespace': False},
 {'text': 'to', 'color': '#000000', 'is_whitespace': False},
 {'text': '(', 'color': '#000000', 'is_whitespace': False},
 {'text': 'device', 'color': '#000000', 'is_whitespace': False},
 {'text': ')', 'color': '#000000', 'is_whitespace': False},
 {'text': '\n', 'color': '#000000', 'is_whitespace':

In [20]:
# Direct Pygments tokenization test
from pygments.lexers import PythonLexer
lexer = PythonLexer()
tokens = list(lexer.get_tokens(test_line))

print("Raw Pygments tokens:")
for token_type, value in tokens:
    if value.strip():  # Only show non-whitespace tokens
        print(f"  '{value}' -> {token_type}")

tokenize_and_format(test_line)

Raw Pygments tokens:
  'model' -> Token.Name
  '=' -> Token.Operator
  'AutoModelForCausalLM' -> Token.Name
  '.' -> Token.Operator
  'from_pretrained' -> Token.Name
  '(' -> Token.Punctuation
  'model_id' -> Token.Name
  ')' -> Token.Punctuation
  '.' -> Token.Operator
  'to' -> Token.Name
  '(' -> Token.Punctuation
  'device' -> Token.Name
  ')' -> Token.Punctuation


[{'text': 'model', 'color': '#000000', 'is_whitespace': False},
 {'text': ' ', 'color': '#000000', 'is_whitespace': True},
 {'text': '=', 'color': '#000000', 'is_whitespace': False},
 {'text': ' ', 'color': '#000000', 'is_whitespace': True},
 {'text': 'AutoModelForCausalLM', 'color': '#000000', 'is_whitespace': False},
 {'text': '.', 'color': '#000000', 'is_whitespace': False},
 {'text': 'from_pretrained', 'color': '#000000', 'is_whitespace': False},
 {'text': '(', 'color': '#000000', 'is_whitespace': False},
 {'text': 'model_id', 'color': '#000000', 'is_whitespace': False},
 {'text': ')', 'color': '#000000', 'is_whitespace': False},
 {'text': '.', 'color': '#000000', 'is_whitespace': False},
 {'text': 'to', 'color': '#000000', 'is_whitespace': False},
 {'text': '(', 'color': '#000000', 'is_whitespace': False},
 {'text': 'device', 'color': '#000000', 'is_whitespace': False},
 {'text': ')', 'color': '#000000', 'is_whitespace': False},
 {'text': '\n', 'color': '#000000', 'is_whitespace':

Debugging tokenization for method calls:


[{'text': 'model', 'color': '#000000', 'is_whitespace': False},
 {'text': ' ', 'color': '#000000', 'is_whitespace': True},
 {'text': '=', 'color': '#000000', 'is_whitespace': False},
 {'text': ' ', 'color': '#000000', 'is_whitespace': True},
 {'text': 'AutoModelForCausalLM', 'color': '#000000', 'is_whitespace': False},
 {'text': '.', 'color': '#000000', 'is_whitespace': False},
 {'text': 'from_pretrained', 'color': '#000000', 'is_whitespace': False},
 {'text': '(', 'color': '#000000', 'is_whitespace': False},
 {'text': 'model_id', 'color': '#000000', 'is_whitespace': False},
 {'text': ')', 'color': '#000000', 'is_whitespace': False},
 {'text': '.', 'color': '#000000', 'is_whitespace': False},
 {'text': 'to', 'color': '#000000', 'is_whitespace': False},
 {'text': '(', 'color': '#000000', 'is_whitespace': False},
 {'text': 'device', 'color': '#000000', 'is_whitespace': False},
 {'text': ')', 'color': '#000000', 'is_whitespace': False},
 {'text': '\n', 'color': '#000000', 'is_whitespace':

---

In [11]:
# ipynb_to_svg_pure.py (can also be pasted into a notebook cell)
import os, argparse, textwrap, xml.etree.ElementTree as ET
from typing import List, Tuple, Optional
import nbformat
from markdown import markdown
from pygments import highlight
from pygments.lexers import get_lexer_by_name, guess_lexer
from pygments.util import ClassNotFound
from pygments.formatters import SvgFormatter

# --- layout/theme ---
CODE_SIZE = 13.0
TEXT_SIZE = 16.0
LINE_PAD  = 6.0
CELL_PAD  = 14.0
GUTTER_W  = 80.0
RADIUS    = 6.0

def _lexer(code: str, lang: Optional[str]):
    if lang:
        try: return get_lexer_by_name(lang.lower())
        except ClassNotFound: pass
    try: return guess_lexer(code)
    except ClassNotFound: return get_lexer_by_name("text")

def _svg_fragment_for_code(code: str, dark: bool, code_font_stack: str) -> tuple[str, float, float]:
    style = "default" if not dark else "native"  # close to Jupyter classic / dark
    fmt = SvgFormatter(
        style=style,
        fontfamily=code_font_stack,
        fontsize=f"{CODE_SIZE}px",   # <-- pass a string, not a float
    )
    svg = highlight(code, _lexer(code, "python"), fmt)

    # Pygments returns a full <svg>. Extract inner content and size.
    import xml.etree.ElementTree as ET
    root = ET.fromstring(svg)
    w_attr = root.attrib.get("width", "0").replace("px", "").strip()
    h_attr = root.attrib.get("height", "0").replace("px", "").strip()
    w = float(w_attr or 0)
    h = float(h_attr or 0)
    inner = "".join(ET.tostring(child, encoding="unicode") for child in root)
    return inner, w, h


def _svg_box(x, y, w, h, fill, stroke):
    return f'<rect x="{x:.2f}" y="{y:.2f}" width="{w:.2f}" height="{h:.2f}" rx="{RADIUS}" ry="{RADIUS}" fill="{fill}" stroke="{stroke}" />'

def _svg_text(x, y, txt, size, family, fill, anchor="start"):
    txt = (txt.replace("&","&amp;").replace("<","&lt;").replace(">","&gt;"))
    return f'<text x="{x:.2f}" y="{y:.2f}" font-size="{size}" font-family="{family}" fill="{fill}" text-anchor="{anchor}">{txt}</text>'

def _plainify_html(md_html: str) -> List[str]:
    # ultra-simple HTML→text for maximum editability in Illustrator
    text = md_html
    for tag in ["<p>", "</p>", "<em>", "</em>", "<strong>", "</strong>", "<code>", "</code>",
                "<ul>", "</ul>", "<ol>", "</ol>", "<li>", "</li>", "<br>", "<br/>"]:
        text = text.replace(tag, " ")
    text = " ".join(text.split())
    return textwrap.wrap(text, width=90)

def render_notebook(nb_path: str, width: int, font_stack: str, code_font_stack: str, dark: bool) -> str:
    nb = nbformat.read(nb_path, as_version=4)

    # Theme (approx Jupyter classic / dark)
    if not dark:
        bg, fg, subfg, border = "#ffffff", "#111", "#555", "#d7d7d7"
        prompt_in, prompt_out = "#303f9f", "#388e3c"
        code_bg, out_bg = "#f7f7f7", "#fbfbfb"
        stderr_bg, stderr_fg = "#fff3f3", "#a40000"
    else:
        bg, fg, subfg, border = "#111215", "#e6e6e6", "#b5b5b5", "#2a2c33"
        prompt_in, prompt_out = "#8ab4f8", "#98c379"
        code_bg, out_bg = "#1a1c22", "#16181d"
        stderr_bg, stderr_fg = "#2a0f11", "#ff6b6b"

    x0, w = 0.0, float(width)
    y = 36.0
    out = [f'<rect x="0" y="0" width="{w:.0f}" height="999999" fill="{bg}" />']  # temp big bg

    # Title
    out.append(_svg_text(x0 + 4, y, os.path.basename(nb_path), TEXT_SIZE, font_stack, fg))
    y += TEXT_SIZE + 14

    for cell in nb.get("cells", []):
        ct = cell.get("cell_type")
        if ct == "markdown":
            y += CELL_PAD
            md = markdown(cell.get("source",""), extensions=["extra","sane_lists","tables"])
            for line in _plainify_html(md):
                out.append(_svg_text(x0 + 4, y, line, TEXT_SIZE, font_stack, fg))
                y += TEXT_SIZE + 2
            y += LINE_PAD
            out.append(f'<line x1="{x0}" y1="{y:.2f}" x2="{x0+w:.2f}" y2="{y:.2f}" stroke="{border}" />')
            continue

        if ct == "code":
            src = cell.get("source","")
            exec_count = cell.get("execution_count")
            in_label = f'In [{exec_count if exec_count is not None else " "}]:'

            y += CELL_PAD
            out.append(_svg_text(x0 + GUTTER_W - 8, y + CODE_SIZE, in_label, CODE_SIZE, code_font_stack, prompt_in, anchor="end"))

            frag, cw, ch = _svg_fragment_for_code(src, dark, code_font_stack)
            code_x = x0 + GUTTER_W + 12
            out.append(_svg_box(code_x - 8, y - 8, w - GUTTER_W - 24, ch + 16, code_bg, border))
            out.append(f'<svg x="{code_x:.2f}" y="{y:.2f}" width="{cw:.2f}" height="{ch:.2f}" viewBox="0 0 {cw:.2f} {ch:.2f}">{frag}</svg>')
            y += ch + LINE_PAD

            # text outputs only
            outputs = []
            for o in cell.get("outputs", []):
                ot = o.get("output_type")
                if ot == "stream":
                    outputs.append(("stderr" if o.get("name") == "stderr" else "stdout", o.get("text","")))
                elif ot in ("execute_result","display_data"):
                    data = o.get("data", {})
                    if "text/plain" in data:
                        outputs.append(("result", data["text/plain"]))
                elif ot == "error":
                    tb = o.get("traceback") or []
                    outputs.append(("stderr", "\n".join(tb) if tb else f"{o.get('ename','Error')}: {o.get('evalue','')}"))

            if outputs:
                out.append(_svg_text(x0 + GUTTER_W - 8, y + CODE_SIZE, f'Out[{exec_count if exec_count is not None else " "}]:',
                                     CODE_SIZE, code_font_stack, prompt_out, anchor="end"))
                for kind, text in outputs:
                    lines = (text or "").splitlines() or [""]
                    block_h = CODE_SIZE * 1.25 * max(1, len(lines))
                    bgc = stderr_bg if kind == "stderr" else out_bg
                    fgc = stderr_fg if kind == "stderr" else fg
                    out.append(_svg_box(x0 + GUTTER_W + 12 - 8, y - 8, w - GUTTER_W - 24, block_h + 16, bgc, border))
                    ty = y + CODE_SIZE
                    for ln in lines:
                        safe = ln.replace("&","&amp;").replace("<","&lt;").replace(">","&gt;")
                        out.append(f'<text x="{x0 + GUTTER_W + 12:.2f}" y="{ty:.2f}" font-size="{CODE_SIZE}" font-family="{code_font_stack}" fill="{fgc}">{safe}</text>')
                        ty += CODE_SIZE * 1.25
                    y += block_h + LINE_PAD

            out.append(f'<line x1="{x0}" y1="{y:.2f}" x2="{x0+w:.2f}" y2="{y:.2f}" stroke="{border}" />')

    total_h = y + 36.0
    # finalize SVG
    svg = f'<svg xmlns="http://www.w3.org/2000/svg" width="{w:.0f}" height="{total_h:.0f}" viewBox="0 0 {w:.0f} {total_h:.0f}">' + "".join(out)
    svg = svg.replace('height="999999"', f'height="{total_h:.0f}"') + '</svg>'
    return svg

def ipynb_to_svg(ipynb_path, out_path=None, width=920,
                 font="Segoe UI, Helvetica Neue, Arial, sans-serif",
                 code_font="Menlo, Monaco, Consolas, \'Source Code Pro\', monospace",
                 dark=False):
    svg = render_notebook(ipynb_path, width, font, code_font, dark)
    out = out_path or os.path.splitext(ipynb_path)[0] + ".svg"
    with open(out, "w", encoding="utf-8") as f: f.write(svg)
    return out

# if __name__ == "__main__":
#     ap = argparse.ArgumentParser()
#     ap.add_argument("notebook")
#     ap.add_argument("-o","--output")
#     ap.add_argument("--width", type=int, default=920)
#     ap.add_argument("--font", default="Segoe UI, Helvetica Neue, Arial, sans-serif")
#     ap.add_argument("--code-font", default="Menlo, Monaco, Consolas, 'Source Code Pro', monospace")
#     ap.add_argument("--dark", action="store_true")
#     a = ap.parse_args()
#     out = ipynb_to_svg(a.notebook, a.output, a.width, a.font, a.code_font, a.dark)
#     print("Wrote SVG →", out)


In [12]:
out_path='/Users/stephen/Stephencwelch Dropbox/welch_labs/ai_book/3_backprop_2/graphics/book_chapter_3_1.svg'
ipynb_to_svg("book_chapter_3_1.ipynb", out_path=out_path, width=920,
             font="Source Sans 3, Segoe UI, Helvetica Neue, Arial, sans-serif",
             code_font="Source Code Pro, Menlo, Monaco, Consolas, monospace",
             dark=False)

'/Users/stephen/Stephencwelch Dropbox/welch_labs/ai_book/3_backprop_2/graphics/book_chapter_3_1.svg'

In [None]:

with open(out_path, "wb") as f:
    f.write(svg_bytes)

In [6]:
converter = NotebookToSVG()
converter.convert_notebook('book_chapter_3_1.ipynb', '/Users/stephen/Stephencwelch Dropbox/welch_labs/ai_book/3_backprop_2/graphics/book_chapter_3_1.svg')

Created: /Users/stephen/Stephencwelch Dropbox/welch_labs/ai_book/3_backprop_2/graphics/book_chapter_3_1.svg/cell_001.svg
Created: /Users/stephen/Stephencwelch Dropbox/welch_labs/ai_book/3_backprop_2/graphics/book_chapter_3_1.svg/cell_002.svg
Created: /Users/stephen/Stephencwelch Dropbox/welch_labs/ai_book/3_backprop_2/graphics/book_chapter_3_1.svg/cell_003.svg
Created: /Users/stephen/Stephencwelch Dropbox/welch_labs/ai_book/3_backprop_2/graphics/book_chapter_3_1.svg/cell_004.svg
Created: /Users/stephen/Stephencwelch Dropbox/welch_labs/ai_book/3_backprop_2/graphics/book_chapter_3_1.svg/cell_005.svg
Created: /Users/stephen/Stephencwelch Dropbox/welch_labs/ai_book/3_backprop_2/graphics/book_chapter_3_1.svg/cell_006.svg
Created: /Users/stephen/Stephencwelch Dropbox/welch_labs/ai_book/3_backprop_2/graphics/book_chapter_3_1.svg/cell_007.svg
Created: /Users/stephen/Stephencwelch Dropbox/welch_labs/ai_book/3_backprop_2/graphics/book_chapter_3_1.svg/cell_008.svg


[PosixPath('/Users/stephen/Stephencwelch Dropbox/welch_labs/ai_book/3_backprop_2/graphics/book_chapter_3_1.svg/cell_001.svg'),
 PosixPath('/Users/stephen/Stephencwelch Dropbox/welch_labs/ai_book/3_backprop_2/graphics/book_chapter_3_1.svg/cell_002.svg'),
 PosixPath('/Users/stephen/Stephencwelch Dropbox/welch_labs/ai_book/3_backprop_2/graphics/book_chapter_3_1.svg/cell_003.svg'),
 PosixPath('/Users/stephen/Stephencwelch Dropbox/welch_labs/ai_book/3_backprop_2/graphics/book_chapter_3_1.svg/cell_004.svg'),
 PosixPath('/Users/stephen/Stephencwelch Dropbox/welch_labs/ai_book/3_backprop_2/graphics/book_chapter_3_1.svg/cell_005.svg'),
 PosixPath('/Users/stephen/Stephencwelch Dropbox/welch_labs/ai_book/3_backprop_2/graphics/book_chapter_3_1.svg/cell_006.svg'),
 PosixPath('/Users/stephen/Stephencwelch Dropbox/welch_labs/ai_book/3_backprop_2/graphics/book_chapter_3_1.svg/cell_007.svg'),
 PosixPath('/Users/stephen/Stephencwelch Dropbox/welch_labs/ai_book/3_backprop_2/graphics/book_chapter_3_1.svg/

In [5]:
!ls

European Cities 1D [Part 1].ipynb  llama_learning_animation_6.py
European Cities 2D [Part 1].ipynb  llama_learning_animation_7.py
Hackin 1.ipynb                     llama_learning_animation_8.py
Hackin 2.ipynb                     llama_learning_animation_9.py
Hackin 3.ipynb                     map_of_language_1.ipynb
Hacking 4.ipynb                    network_pranav_pr_1.py
Hacking 5.ipynb                    notebook_converter_1.ipynb
Llama Exporting 1.ipynb            p12_quick_zoom.py
Llama Exporting 2.ipynb            p22_24.py
Llama Exporting 3.ipynb            p22_24_cleaner_transition.py
Llama Exporting 4.ipynb            p26_28.py
[34m__pycache__[m[m                        p32.py
book_chapter_3_1.ipynb             p32_40.py
book_llama_learning_animation_1.py p40_44.py
llama_learning_animation_1.py      p44_46.py
llama_learning_animation_10.py     p44_46_old.py
llama_learning_animation_11.py     p48_49.py
llama_learning_animation_12.py     p48_49_old.py
llama_learning_animatio