In [0]:
import requests
import time
import re
from urllib.parse import urlparse

def export_and_download_confluence_page(confluence_base_url, page_id, username, password):
    """
    Export a Confluence page as PDF and download it
    
    Args:
        confluence_base_url: Base URL of your Confluence instance
        page_id: ID of the page to export
        username: Confluence username
        password: Confluence password
        
    Returns:
        PDF content as bytes
    """
    # Step 1: Call the export URL to initiate PDF generation
    export_url = f"{confluence_base_url}/flyingpdf/pdfpageexport.action?pageId={page_id}"
    
    headers = {
        "X-Atlassian-Token": "no-check"
    }
    
    response = requests.get(
        export_url,
        headers=headers,
        auth=(username, password),
        allow_redirects=False
    )
    
    if response.status_code != 302:
        raise Exception(f"Failed to initiate PDF export: {response.status_code}, {response.text}")
    
    # Step 2: Extract the download location from the response headers
    if 'location' not in response.headers:
        raise Exception("No download location found in response headers")
    
    download_path = response.headers['location']
    
    # Check if the path is relative or absolute
    if not urlparse(download_path).netloc:
        download_url = f"{confluence_base_url}{download_path}"
    else:
        download_url = download_path
    
    # Step 3: Download the PDF file
    pdf_response = requests.get(
        download_url,
        auth=(username, password)
    )
    
    if pdf_response.status_code != 200:
        raise Exception(f"Failed to download PDF: {pdf_response.status_code}, {pdf_response.text}")
    
    return pdf_response.content

def store_pdf_in_uc_volume(pdf_content, catalog_name, schema_name, volume_name, file_path):
    """
    Store PDF content in a Unity Catalog volume
    
    Args:
        pdf_content: PDF content as bytes
        catalog_name: UC catalog name
        schema_name: UC schema name
        volume_name: UC volume name
        file_path: Path within the volume where the PDF should be stored
    """
    # Construct the full path to the file in the UC volume
    volume_path = f"/Volumes/{catalog_name}/{schema_name}/{volume_name}/{file_path}"
    
    # Write the PDF content to the volume
    with open(volume_path, 'wb') as f:
        f.write(pdf_content)
    
    return volume_path

# Main function to export Confluence pages and store them in UC Volume
def export_confluence_pages_to_uc_volume(confluence_base_url, page_ids, 
                                        confluence_username, confluence_password,
                                        catalog_name, schema_name, volume_name, 
                                        base_path="confluence_exports"):
    """
    Export multiple Confluence pages and store them in a UC Volume
    
    Args:
        confluence_base_url: Base URL of your Confluence instance
        page_ids: List of page IDs to export
        confluence_username: Confluence username
        confluence_password: Confluence password
        catalog_name: UC catalog name
        schema_name: UC schema name
        volume_name: UC volume name
        base_path: Base path within the volume to store the exports
    
    Returns:
        List of paths to the exported PDFs in the UC Volume
    """
    exported_paths = []
    
    for page_id in page_ids:
        try:
            # Export and download the PDF
            pdf_content = export_and_download_confluence_page(
                confluence_base_url, 
                page_id, 
                confluence_username, 
                confluence_password
            )
            
            # Store the PDF in the UC Volume
            file_path = f"{base_path}/page_{page_id}.pdf"
            volume_path = store_pdf_in_uc_volume(
                pdf_content,
                catalog_name,
                schema_name,
                volume_name,
                file_path
            )
            
            exported_paths.append(volume_path)
            print(f"Successfully exported page {page_id} to {volume_path}")
            
        except Exception as e:
            print(f"Failed to export page {page_id}: {str(e)}")
    
    return exported_paths

# Example usage
confluence_base_url = "https://databricks.atlassian.net/wiki/spaces/FE/" #https://databricks.atlassian.net/wiki/spaces/FE/pages/1657504546/Databricks+Field+Workspaces
page_ids = ["1657504546"]  # List of page IDs to export
confluence_username = "theodore.kop@databricks.com"
confluence_password = "******************"
catalog_name = "theodore_kop_personal"
schema_name = "bcp"
volume_name = "documentation_exports"

# Execute the export
exported_files = export_confluence_pages_to_uc_volume(
    confluence_base_url,
    page_ids,
    confluence_username,
    confluence_password,
    catalog_name,
    schema_name,
    volume_name
)

print(f"Exported {len(exported_files)} files to UC Volume")
