# Pandoc Sidecar Demo

This notebook demonstrates how to use the pandoc/extra CLI to render LaTeX files.

## Prerequisites

Before running this demo, ensure you have the following installed:
- Docker (for containerized pandoc)

### Verify pandoc service is running and test basic functionality

In [13]:
import subprocess
import os
import sys

In [12]:
result = subprocess.run(['docker-compose', 'run', '--rm', 'pandoc-extra', 'pandoc', '--version'],
                        capture_output=True, text=True, check=True, cwd='..')
print(result.stdout)

pandoc 3.7.0.2
Features: +server +lua
Scripting engine: Lua 5.4
User data directory: /usr/local/share/pandoc
Copyright (C) 2006-2024 John MacFarlane. Web: https://pandoc.org
This is free software; see the source for copying conditions. There is no
warranty, not even for merchantability or fitness for a particular purpose.



## 2. Check pandoc CLI availability

In [None]:
# Check if pandoc is available
try:
    result = subprocess.run(['pandoc', '--version'], 
                          capture_output=True, text=True, check=True)
    print("✓ Pandoc is available")
    print("Version info:")
    print(result.stdout.split('\n')[0])  # First line contains version
except subprocess.CalledProcessError:
    print("✗ Pandoc not found or error occurred")
except FileNotFoundError:
    print("✗ Pandoc not installed or not in PATH")

## 3. Check if input LaTeX file exists

In [None]:
# Check the basic example file
input_file = "test/data/basic_example.tex"
if os.path.exists(input_file):
    print(f"✓ Input file found: {input_file}")
    
    # Read and display the LaTeX content
    with open(input_file, 'r') as f:
        content = f.read()
    print("\nLaTeX content:")
    print(content)
else:
    print(f"✗ Input file not found: {input_file}")

## 4. Render LaTeX to PDF

In [None]:
# Render LaTeX to PDF using pandoc CLI
output_pdf = "output/basic_example.pdf"

# Create output directory if it doesn't exist
os.makedirs("output", exist_ok=True)

try:
    # Use pandoc CLI to convert LaTeX to PDF
    cmd = ['pandoc', input_file, '-o', output_pdf, '--pdf-engine=pdflatex']
    result = subprocess.run(cmd, capture_output=True, text=True, check=True)
    
    print(f"✓ PDF rendered successfully: {output_pdf}")
    if os.path.exists(output_pdf):
        file_size = os.path.getsize(output_pdf)
        print(f"  File size: {file_size} bytes")
    
    if result.stderr:
        print("Warnings/Info:")
        print(result.stderr)
        
except subprocess.CalledProcessError as e:
    print(f"✗ Error rendering PDF: {e}")
    if e.stderr:
        print("Error details:")
        print(e.stderr)
except FileNotFoundError:
    print("✗ Pandoc not found. Please install pandoc first.")

## 5. Render LaTeX to HTML

In [None]:
# Render LaTeX to HTML using pandoc CLI
output_html = "output/basic_example.html"

try:
    # Use pandoc CLI to convert LaTeX to HTML
    cmd = ['pandoc', input_file, '-o', output_html, '--standalone', '--mathjax']
    result = subprocess.run(cmd, capture_output=True, text=True, check=True)
    
    print(f"✓ HTML rendered successfully: {output_html}")
    if os.path.exists(output_html):
        file_size = os.path.getsize(output_html)
        print(f"  File size: {file_size} bytes")
        
        # Display a preview of the HTML content
        with open(output_html, 'r') as f:
            html_content = f.read()
        print("\\nHTML preview (first 300 characters):")
        print(html_content[:300] + "..." if len(html_content) > 300 else html_content)
    
    if result.stderr:
        print("Warnings/Info:")
        print(result.stderr)
        
except subprocess.CalledProcessError as e:
    print(f"✗ Error rendering HTML: {e}")
    if e.stderr:
        print("Error details:")
        print(e.stderr)

## 6. Render LaTeX to Markdown

In [None]:
# Render LaTeX to Markdown using pandoc CLI
output_md = "output/basic_example.md"

try:
    # Use pandoc CLI to convert LaTeX to Markdown
    cmd = ['pandoc', input_file, '-o', output_md, '--from=latex', '--to=markdown']
    result = subprocess.run(cmd, capture_output=True, text=True, check=True)
    
    print(f"✓ Markdown rendered successfully: {output_md}")
    if os.path.exists(output_md):
        file_size = os.path.getsize(output_md)
        print(f"  File size: {file_size} bytes")
        
        # Display the Markdown content
        with open(output_md, 'r') as f:
            md_content = f.read()
        print("\\nMarkdown content:")
        print(md_content)
    
    if result.stderr:
        print("Warnings/Info:")
        print(result.stderr)
        
except subprocess.CalledProcessError as e:
    print(f"✗ Error rendering Markdown: {e}")
    if e.stderr:
        print("Error details:")
        print(e.stderr)

## 7. List all generated output files

In [None]:
# List all generated output files
if os.path.exists("output"):
    print("Generated output files:")
    for file in sorted(os.listdir("output")):
        file_path = os.path.join("output", file)
        if os.path.isfile(file_path):
            file_size = os.path.getsize(file_path)
            print(f"  📄 {file} ({file_size:,} bytes)")
else:
    print("No output directory found")

## 8. Test with Docker Compose pandoc service

In [None]:
# Use docker-compose to run pandoc commands
# This uses the pandoc service defined in docker-compose.yaml

def run_pandoc_compose(input_file, output_file, extra_args=None):
    """Run pandoc using the docker-compose pandoc service"""
    if extra_args is None:
        extra_args = []

    # Copy input file to docker volume
    copy_cmd = ['docker', 'cp', input_file, 'pandoc:/input/']
    subprocess.run(copy_cmd, capture_output=True, text=True, check=True)

    # Build pandoc command
    input_basename = os.path.basename(input_file)
    output_basename = os.path.basename(output_file)

    cmd = [
        'docker-compose', 'exec', '-T', 'pandoc-extra',
        'pandoc', f'/input/{input_basename}',
        '-o', f'/output/{output_basename}'
    ] + extra_args

    result = subprocess.run(cmd, capture_output=True, text=True, cwd='..')

    # Copy output file from docker volume
    if result.returncode == 0:
        copy_cmd = ['docker', 'cp', f'pandoc:/output/{output_basename}', output_file]
        subprocess.run(copy_cmd, capture_output=True, text=True, check=True)

    return result

# Test multiple formats with Docker Compose
try:
    print("Testing pandoc service with multiple output formats...")

    # Test PDF generation
    docker_pdf = "output/docker_compose_example.pdf"
    result = run_pandoc_compose(input_file, docker_pdf, ['--pdf-engine=pdflatex'])

    if result.returncode == 0:
        print(f"✓ Docker Compose PDF rendered: {docker_pdf}")
        if os.path.exists(docker_pdf):
            file_size = os.path.getsize(docker_pdf)
            print(f"  File size: {file_size:,} bytes")
    else:
        print(f"✗ Docker Compose PDF failed: {result.stderr}")

    # Test HTML generation
    docker_html = "output/docker_compose_example.html"
    result = run_pandoc_compose(input_file, docker_html, ['--standalone', '--mathjax'])

    if result.returncode == 0:
        print(f"✓ Docker Compose HTML rendered: {docker_html}")
        if os.path.exists(docker_html):
            file_size = os.path.getsize(docker_html)
            print(f"  File size: {file_size:,} bytes")
    else:
        print(f"✗ Docker Compose HTML failed: {result.stderr}")

    # Test DOCX generation
    docker_docx = "output/docker_compose_example.docx"
    result = run_pandoc_compose(input_file, docker_docx)

    if result.returncode == 0:
        print(f"✓ Docker Compose DOCX rendered: {docker_docx}")
        if os.path.exists(docker_docx):
            file_size = os.path.getsize(docker_docx)
            print(f"  File size: {file_size:,} bytes")
    else:
        print(f"✗ Docker Compose DOCX failed: {result.stderr}")

except Exception as e:
    print(f"✗ Docker Compose test failed: {e}")

## Minikube Deployment Instructions

### Setup Minikube and deploy pandoc/extra:3.7

In [None]:
# Check Minikube status and start if needed
try:
    # Check if minikube is running
    result = subprocess.run(['minikube', 'status'], 
                          capture_output=True, text=True)
    
    if result.returncode == 0 and 'Running' in result.stdout:
        print("✓ Minikube is already running")
        print(result.stdout)
    else:
        print("Starting Minikube...")
        result = subprocess.run(['minikube', 'start'], 
                              capture_output=True, text=True, check=True)
        print("✓ Minikube started successfully")
        
    # Get minikube version
    result = subprocess.run(['minikube', 'version'], 
                          capture_output=True, text=True, check=True)
    print(f"Minikube version: {result.stdout.split()[2]}")
    
except subprocess.CalledProcessError as e:
    print(f"✗ Minikube error: {e}")
    if e.stderr:
        print("Error details:", e.stderr.strip())
except FileNotFoundError:
    print("✗ Minikube not found. Please install Minikube first.")

### Create Kubernetes deployment for pandoc/extra:3.7

In [None]:
# Create Kubernetes YAML configuration for pandoc/extra:3.7
k8s_config = '''apiVersion: v1
kind: Pod
metadata:
  name: pandoc-extra-pod
  labels:
    app: pandoc-extra
spec:
  containers:
  - name: pandoc-container
    image: pandoc/extra:3.7
    command: ["sleep", "infinity"]  # Keep container running
    volumeMounts:
    - name: shared-data
      mountPath: /data
    resources:
      requests:
        memory: "256Mi"
        cpu: "250m"
      limits:
        memory: "512Mi"
        cpu: "500m"
  volumes:
  - name: shared-data
    emptyDir: {}
---
apiVersion: v1
kind: Service
metadata:
  name: pandoc-service
spec:
  selector:
    app: pandoc-extra
  ports:
  - port: 80
    targetPort: 8080
    protocol: TCP
  type: ClusterIP
'''

# Write the configuration to a file
k8s_config_file = "k8s-pandoc-deployment.yaml"
with open(k8s_config_file, 'w') as f:
    f.write(k8s_config)

print(f"✓ Created Kubernetes configuration: {k8s_config_file}")
print("Configuration contents:")
print(k8s_config)

### Deploy to Minikube and test

In [None]:
# Deploy to Minikube and test the pandoc pod
try:
    # Apply the Kubernetes configuration
    print("Deploying pandoc/extra:3.7 to Minikube...")
    result = subprocess.run(['kubectl', 'apply', '-f', k8s_config_file], 
                          capture_output=True, text=True, check=True)
    print("✓ Deployment applied successfully")
    print(result.stdout)
    
    # Wait a moment for pod to start
    import time
    print("Waiting for pod to start...")
    time.sleep(5)
    
    # Check pod status
    result = subprocess.run(['kubectl', 'get', 'pods', '-l', 'app=pandoc-extra'], 
                          capture_output=True, text=True, check=True)
    print("Pod status:")
    print(result.stdout)
    
    # Test pandoc in the pod
    print("\\nTesting pandoc in Kubernetes pod...")
    result = subprocess.run(['kubectl', 'exec', 'pandoc-extra-pod', '--', 'pandoc', '--version'], 
                          capture_output=True, text=True)
    
    if result.returncode == 0:
        print("✓ Pandoc working in Kubernetes pod")
        print("Version:", result.stdout.split('\\n')[0])
    else:
        print("Pod might still be starting. Check status with: kubectl get pods")
        
    # Show service status
    result = subprocess.run(['kubectl', 'get', 'services', 'pandoc-service'], 
                          capture_output=True, text=True)
    if result.returncode == 0:
        print("\\nService status:")
        print(result.stdout)
    
except subprocess.CalledProcessError as e:
    print(f"✗ Kubernetes deployment error: {e}")
    if e.stderr:
        print("Error details:", e.stderr.strip())
except FileNotFoundError:
    print("✗ kubectl not found. Please install kubectl first.")

### Cleanup Kubernetes resources (optional)

In [None]:
# Cleanup Kubernetes resources when done
# Uncomment the lines below to clean up the deployment

cleanup = False  # Set to True to perform cleanup

if cleanup:
    try:
        print("Cleaning up Kubernetes resources...")
        
        # Delete the deployment
        result = subprocess.run(['kubectl', 'delete', '-f', k8s_config_file], 
                              capture_output=True, text=True, check=True)
        print("✓ Kubernetes resources deleted")
        print(result.stdout)
        
        # Remove the config file
        os.remove(k8s_config_file)
        print(f"✓ Removed {k8s_config_file}")
        
    except subprocess.CalledProcessError as e:
        print(f"✗ Cleanup error: {e}")
        if e.stderr:
            print("Error details:", e.stderr.strip())
else:
    print("Cleanup skipped. Set cleanup=True to remove Kubernetes resources.")
    print(f"To manually cleanup later, run: kubectl delete -f {k8s_config_file}")