## Notebook Converter 1

In [13]:
#!/usr/bin/env python3
"""
Jupyter Notebook to SVG Converter
Converts .ipynb files to publication-ready SVG files that maintain the notebook appearance
"""

import nbformat
import svgwrite
from pygments import highlight
from pygments.lexers import PythonLexer
from pygments.formatters import get_formatter_by_name
import re
import json
from pathlib import Path
from typing import List, Tuple, Dict, Any

class JupyterSVGConverter:
    def __init__(self, 
                 font_family: str = "Monaco, 'Courier New', monospace",
                 font_size: int = 12,
                 line_height: float = 1.4,
                 cell_padding: int = 12,
                 cell_spacing: int = 20,
                 page_width: int = 800):
        
        self.font_family = font_family
        self.font_size = font_size
        self.line_height = line_height
        self.cell_padding = cell_padding
        self.cell_spacing = cell_spacing
        self.page_width = page_width
        
        # Colors matching Jupyter's default theme
        self.colors = {
            'input_bg': '#f7f7f7',
            'input_border': '#cfcfcf',
            'output_bg': '#ffffff',
            'prompt_color': '#303f9f',
            'text_color': '#000000',
            'output_text': '#000000'
        }
        
        # Initialize Pygments for syntax highlighting
        self.lexer = PythonLexer()
        self.formatter = get_formatter_by_name('html', style='default', nowrap=True)
        
        # Build color mapping from Pygments CSS
        self._build_syntax_colors()
    
    def _build_syntax_colors(self):
        """Extract color mappings from Pygments CSS"""
        # Comprehensive color mapping for Python syntax highlighting
        # Based on Pygments 'default' style
        self.syntax_colors = {
            # Keywords
            'k': '#008000',      # Keyword (def, class, if, etc.)
            'kc': '#008000',     # Keyword.Constant (True, False, None)
            'kd': '#008000',     # Keyword.Declaration
            'kn': '#008000',     # Keyword.Namespace (import, from)
            'kp': '#008000',     # Keyword.Pseudo
            'kr': '#008000',     # Keyword.Reserved
            'kt': '#008000',     # Keyword.Type
            
            # Names
            'n': '#000000',      # Name
            'na': '#7d9029',     # Name.Attribute
            'nb': '#008000',     # Name.Builtin (len, print, etc.)
            'nc': '#0000ff',     # Name.Class
            'nd': '#aa22ff',     # Name.Decorator (@property, etc.)
            'ne': '#d2413a',     # Name.Exception
            'nf': '#0000ff',     # Name.Function
            'ni': '#999999',     # Name.Entity
            'nl': '#a0a000',     # Name.Label
            'nn': '#0000ff',     # Name.Namespace
            'nt': '#008000',     # Name.Tag
            'nv': '#19177c',     # Name.Variable
            'nx': '#000000',     # Name.Other
            
            # Literals
            'l': '#666666',      # Literal
            'ld': '#ba2121',     # Literal.Date
            
            # Strings
            's': '#ba2121',      # String
            'sa': '#ba2121',     # String.Affix
            'sb': '#ba2121',     # String.Backtick
            'sc': '#ba2121',     # String.Char
            'sd': '#ba2121',     # String.Doc (docstrings)
            'se': '#bb6622',     # String.Escape
            'sh': '#ba2121',     # String.Heredoc
            'si': '#bb6688',     # String.Interpol
            'sx': '#008000',     # String.Other
            'sr': '#bb6688',     # String.Regex
            'ss': '#19177c',     # String.Symbol
            's1': '#ba2121',     # String.Single
            's2': '#ba2121',     # String.Double
            
            # Numbers
            'm': '#666666',      # Number
            'mb': '#666666',     # Number.Bin
            'mf': '#666666',     # Number.Float
            'mh': '#666666',     # Number.Hex
            'mi': '#666666',     # Number.Integer
            'mo': '#666666',     # Number.Oct
            
            # Operators
            'o': '#666666',      # Operator
            'ow': '#aa22ff',     # Operator.Word (and, or, not, in, is)
            
            # Punctuation
            'p': '#000000',      # Punctuation
            
            # Comments
            'c': '#408080',      # Comment
            'ch': '#408080',     # Comment.Hashbang
            'cm': '#408080',     # Comment.Multiline
            'cp': '#bc7a00',     # Comment.Preproc
            'cpf': '#408080',    # Comment.PreprocFile
            'cs': '#408080',     # Comment.Special
            'c1': '#408080',     # Comment.Single
            
            # Generic (for diffs, etc.)
            'g': '#000000',      # Generic
            'gd': '#a00000',     # Generic.Deleted
            'ge': '#000000',     # Generic.Emph
            'gh': '#000080',     # Generic.Heading
            'gi': '#00a000',     # Generic.Inserted
            'go': '#888888',     # Generic.Output
            'gp': '#000080',     # Generic.Prompt
            'gs': '#000000',     # Generic.Strong
            'gu': '#800080',     # Generic.Subheading
            'gt': '#0044dd',     # Generic.Traceback
            
            # Others
            'w': '#bbbbbb',      # Text.Whitespace
            'default': '#000000'  # Default text
        }
    
    def convert_notebook(self, notebook_path: str, output_path: str = None) -> str:
        """Convert a Jupyter notebook to SVG"""
        
        # Read notebook
        with open(notebook_path, 'r', encoding='utf-8') as f:
            notebook = nbformat.read(f, as_version=4)
        
        # Generate output path if not provided
        if output_path is None:
            output_path = Path(notebook_path).with_suffix('.svg')
        
        # Process cells and create SVG
        svg_content = self._create_svg_document(notebook)
        
        # Write SVG file
        with open(output_path, 'w', encoding='utf-8') as f:
            f.write(svg_content)
        
        print(f"Converted {notebook_path} to {output_path}")
        return str(output_path)
    
    def _create_svg_document(self, notebook) -> str:
        """Create the main SVG document"""
        
        # Calculate total height needed
        total_height = self._calculate_total_height(notebook)
        
        # Create SVG document
        dwg = svgwrite.Drawing(size=(f"{self.page_width}px", f"{total_height}px"))
        
        # Add styles
        dwg.defs.add(dwg.style(self._get_css_styles()))
        
        current_y = 20  # Start with some top margin
        execution_count = 0
        
        for cell in notebook.cells:
            if cell.cell_type == 'code':
                execution_count += 1
                current_y = self._add_code_cell(dwg, cell, current_y, execution_count)
            # We could add markdown cells here later if needed
            
        return dwg.tostring()
    
    def _calculate_total_height(self, notebook) -> int:
        """Estimate total height needed for the SVG"""
        total_lines = 0
        
        for cell in notebook.cells:
            if cell.cell_type == 'code':
                # Count input lines
                input_lines = len(cell.source.split('\n'))
                total_lines += input_lines
                
                # Count output lines
                if hasattr(cell, 'outputs') and cell.outputs:
                    for output in cell.outputs:
                        if 'text' in output:
                            output_lines = len(str(output['text']).split('\n'))
                            total_lines += output_lines
                        elif 'data' in output and 'text/plain' in output['data']:
                            output_lines = len(str(output['data']['text/plain']).split('\n'))
                            total_lines += output_lines
        
        # Estimate height (lines * line_height + padding + spacing)
        estimated_height = (total_lines * self.font_size * self.line_height + 
                          len([c for c in notebook.cells if c.cell_type == 'code']) * 
                          (self.cell_padding * 2 + self.cell_spacing) + 100)
        
        return int(estimated_height)
    
    def _add_code_cell(self, dwg, cell, y_position: int, execution_count: int) -> int:
        """Add a code cell to the SVG and return the new y position"""
        
        # Calculate cell dimensions
        input_lines = cell.source.split('\n')
        input_height = len(input_lines) * self.font_size * self.line_height + self.cell_padding * 2
        
        # Draw input cell background
        input_rect = dwg.rect(
            insert=(60, y_position),
            size=(self.page_width - 80, input_height),
            fill=self.colors['input_bg'],
            stroke=self.colors['input_border'],
            stroke_width=1,
            rx=3  # Rounded corners
        )
        dwg.add(input_rect)
        
        # Add input prompt
        prompt_text = f"In [{execution_count}]:"
        prompt = dwg.text(
            prompt_text,
            insert=(10, y_position + 20),
            fill=self.colors['prompt_color'],
            font_family=self.font_family,
            font_size=f"{self.font_size}px",
            font_weight="bold"
        )
        dwg.add(prompt)
        
        # Add syntax-highlighted code
        current_line_y = y_position + self.cell_padding + self.font_size
        
        for line in input_lines:
            if line.strip():  # Only process non-empty lines
                highlighted_elements = self._create_highlighted_line(line, 80, current_line_y)
                for element in highlighted_elements:
                    dwg.add(element)
            else:
                # Add empty line space
                pass
            current_line_y += self.font_size * self.line_height
        
        # Move past input cell
        y_position += input_height + 10
        
        # Add output if it exists
        if hasattr(cell, 'outputs') and cell.outputs:
            y_position = self._add_output(dwg, cell.outputs, y_position, execution_count)
        
        # Add spacing between cells
        y_position += self.cell_spacing
        
        return y_position
    
    def _create_highlighted_line(self, code_line: str, x_pos: int, y_pos: int) -> List:
        """Create SVG elements for a syntax-highlighted line of code"""
        
        if not code_line.strip():
            return []
        
        # Get highlighted HTML
        highlighted_html = highlight(code_line, self.lexer, self.formatter)
        
        # Parse the HTML to extract tokens and their classes
        tokens = self._parse_highlighted_html(highlighted_html)
        
        elements = []
        current_x = x_pos
        
        for token_text, token_class in tokens:
            # Decode HTML entities in the token text
            import html as html_lib
            clean_token_text = html_lib.unescape(token_text)
            
            color = self.syntax_colors.get(token_class, self.syntax_colors['default'])
            
            text_element = svgwrite.text.Text(
                clean_token_text,
                insert=(current_x, y_pos),
                fill=color,
                font_family=self.font_family,
                font_size=f"{self.font_size}px"
            )
            elements.append(text_element)
            
            # Estimate text width (rough approximation)
            current_x += len(clean_token_text) * (self.font_size * 0.6)
        
        return elements
    
    def _parse_highlighted_html(self, html: str) -> List[Tuple[str, str]]:
        """Parse Pygments HTML output to extract tokens and their classes"""
        
        # Simple regex to find spans with classes
        pattern = r'<span class="([^"]*)"[^>]*>([^<]*)</span>'
        matches = re.findall(pattern, html)
        
        tokens = []
        last_end = 0
        
        # Find all spans and track positions
        for match in re.finditer(pattern, html):
            # Add any text before this span
            if match.start() > last_end:
                plain_text = html[last_end:match.start()]
                if plain_text:
                    tokens.append((plain_text, 'default'))
            
            # Add the span content
            class_name = match.group(1)
            content = match.group(2)
            tokens.append((content, class_name))
            
            last_end = match.end()
        
        # Add any remaining text
        if last_end < len(html):
            remaining = html[last_end:]
            if remaining:
                tokens.append((remaining, 'default'))
        
        # If no spans found, treat as plain text
        if not tokens:
            # Remove any HTML tags and return as plain text
            clean_text = re.sub(r'<[^>]+>', '', html)
            if clean_text:
                tokens.append((clean_text, 'default'))
        
        return tokens
    
    def _add_output(self, dwg, outputs, y_position: int, execution_count: int) -> int:
        """Add output section and return new y position"""
        
        # Add output prompt
        prompt_text = f"Out[{execution_count}]:"
        prompt = dwg.text(
            prompt_text,
            insert=(10, y_position + 15),
            fill=self.colors['prompt_color'],
            font_family=self.font_family,
            font_size=f"{self.font_size}px",
            font_weight="bold"
        )
        dwg.add(prompt)
        
        current_y = y_position + 20
        
        for output in outputs:
            if 'text' in output:
                output_text = str(output['text'])
            elif 'data' in output and 'text/plain' in output['data']:
                output_text = str(output['data']['text/plain'])
            else:
                continue  # Skip other output types for now
            
            # Split into lines and add each line
            for line in output_text.strip().split('\n'):
                if line.strip():  # Only add non-empty lines
                    text_element = dwg.text(
                        line,
                        insert=(85, current_y),
                        fill=self.colors['output_text'],
                        font_family=self.font_family,
                        font_size=f"{self.font_size}px"
                    )
                    dwg.add(text_element)
                
                current_y += self.font_size * self.line_height
        
        return current_y + 10
    
    def _get_css_styles(self) -> str:
        """Generate CSS styles for the SVG"""
        return f"""
        text {{
            font-family: {self.font_family};
            font-size: {self.font_size}px;
        }}
        """

def main():
    """Example usage"""
    converter = JupyterSVGConverter()
    
    # Example: convert a notebook
    # converter.convert_notebook('example.ipynb', 'example.svg')
    
    print("Jupyter to SVG Converter ready!")
    print("Usage: converter.convert_notebook('notebook.ipynb', 'output.svg')")

if __name__ == "__main__":
    main()

Jupyter to SVG Converter ready!
Usage: converter.convert_notebook('notebook.ipynb', 'output.svg')


In [14]:
converter = JupyterSVGConverter()
converter.convert_notebook('book_chapter_3_1.ipynb', 'book_chapter_3_1.svg')

Converted book_chapter_3_1.ipynb to book_chapter_3_1.svg


'book_chapter_3_1.svg'

In [5]:
!ls

European Cities 1D [Part 1].ipynb  llama_learning_animation_6.py
European Cities 2D [Part 1].ipynb  llama_learning_animation_7.py
Hackin 1.ipynb                     llama_learning_animation_8.py
Hackin 2.ipynb                     llama_learning_animation_9.py
Hackin 3.ipynb                     map_of_language_1.ipynb
Hacking 4.ipynb                    network_pranav_pr_1.py
Hacking 5.ipynb                    notebook_converter_1.ipynb
Llama Exporting 1.ipynb            p12_quick_zoom.py
Llama Exporting 2.ipynb            p22_24.py
Llama Exporting 3.ipynb            p22_24_cleaner_transition.py
Llama Exporting 4.ipynb            p26_28.py
[34m__pycache__[m[m                        p32.py
book_chapter_3_1.ipynb             p32_40.py
book_llama_learning_animation_1.py p40_44.py
llama_learning_animation_1.py      p44_46.py
llama_learning_animation_10.py     p44_46_old.py
llama_learning_animation_11.py     p48_49.py
llama_learning_animation_12.py     p48_49_old.py
llama_learning_animatio