# 08 - Fabric Ontology REST API Client

**Epic:** F5 - Fabric Ontology Integration  
**Feature:** F5.2 - Fabric Ontology REST API Client  
**Priority:** P1

## Purpose

Upload the generated Ontology definition to Fabric using the Ontology REST API. Creates a new Ontology item or updates an existing one.

## Input

- Ontology definition JSON from `Files/ontology_definitions/` (output of notebook 07)

## Output

- Created/updated Ontology item in Fabric workspace
- Ontology ID saved to `Files/config/ontology_config.json`

## API Operations

| Operation | Endpoint | Purpose |
|-----------|----------|----------|
| Create Ontology | POST /v1/workspaces/{id}/ontologies | Create new Ontology item |
| Get Definition | POST /ontologies/{id}/getDefinition | Retrieve current definition |
| Update Definition | POST /ontologies/{id}/updateDefinition | Push entity types, properties, relationships |

## Setup

In [None]:
import json
import os
import time
import requests
from datetime import datetime
from typing import Optional

In [None]:
# Fabric notebookutils for authentication
from notebookutils import mssparkutils

## Configuration

In [None]:
# Fabric API configuration
FABRIC_API_BASE = "https://api.fabric.microsoft.com"
FABRIC_API_VERSION = "v1"

# Ontology configuration
# Name must: start with letter, <90 chars, only letters/numbers/underscores
ONTOLOGY_DISPLAY_NAME = "RDF_Translated_Ontology"
ONTOLOGY_DESCRIPTION = "Auto-generated ontology from RDF translation pipeline"

# Paths
DEFINITIONS_DIR = "/lakehouse/default/Files/ontology_definitions"
CONFIG_DIR = "/lakehouse/default/Files/config"

# Retry configuration
MAX_RETRIES = 3
RETRY_DELAY_SECONDS = 5
LRO_POLL_INTERVAL_SECONDS = 2
LRO_MAX_WAIT_SECONDS = 300

## Get Workspace and Lakehouse IDs

In [None]:
# Get current workspace and lakehouse context from Fabric
workspace_id = mssparkutils.runtime.context.get("currentWorkspaceId")

# Get the default lakehouse ID from the attached lakehouse
# Method 1: Try to get from spark config (most reliable)
try:
    lakehouse_id = spark.conf.get("trident.lakehouse.id")
except Exception:
    lakehouse_id = None

# Method 2: List lakehouses and get the default one
if not lakehouse_id:
    try:
        lakehouses = mssparkutils.lakehouse.list()
        if lakehouses:
            # Get first lakehouse (the attached one)
            lakehouse_id = lakehouses[0].id
    except Exception:
        lakehouse_id = None

# Method 3: Get from default lakehouse spark config
if not lakehouse_id:
    try:
        lakehouse_id = spark.conf.get("spark.lakehouse.default.id", None)
    except Exception:
        pass

if not lakehouse_id:
    raise ValueError(
        "Could not determine lakehouse ID. "
        "Please ensure a lakehouse is attached to this notebook."
    )

print(f"Workspace ID: {workspace_id}")
print(f"Lakehouse ID: {lakehouse_id}")

## Authentication

In [None]:
def get_fabric_token() -> str:
    """
    Get Entra ID token for Fabric API using notebookutils.
    Uses the user's identity or workspace identity.
    """
    # Get token for Fabric API scope
    token = mssparkutils.credentials.getToken("https://api.fabric.microsoft.com")
    return token


def get_headers() -> dict:
    """
    Get HTTP headers with authorization token.
    """
    token = get_fabric_token()
    return {
        "Authorization": f"Bearer {token}",
        "Content-Type": "application/json"
    }


# Test authentication
try:
    token = get_fabric_token()
    print(f"Successfully obtained token (length: {len(token)})")
except Exception as e:
    print(f"Authentication failed: {e}")
    raise

## API Helper Functions

In [None]:
def api_request(
    method: str,
    endpoint: str,
    data: Optional[dict] = None,
    params: Optional[dict] = None
) -> requests.Response:
    """
    Make an API request with retry logic.
    
    Args:
        method: HTTP method (GET, POST, PATCH, DELETE)
        endpoint: API endpoint (relative to FABRIC_API_BASE)
        data: Request body (dict)
        params: Query parameters
    
    Returns:
        Response object
    """
    url = f"{FABRIC_API_BASE}/{endpoint}"
    
    for attempt in range(MAX_RETRIES):
        try:
            headers = get_headers()
            
            response = requests.request(
                method=method,
                url=url,
                headers=headers,
                json=data,
                params=params,
                timeout=60
            )
            
            # Check for retryable errors
            if response.status_code == 429:  # Rate limited
                retry_after = int(response.headers.get("Retry-After", RETRY_DELAY_SECONDS))
                print(f"Rate limited. Waiting {retry_after} seconds...")
                time.sleep(retry_after)
                continue
            
            if response.status_code >= 500:  # Server error
                print(f"Server error {response.status_code}. Retrying in {RETRY_DELAY_SECONDS}s...")
                time.sleep(RETRY_DELAY_SECONDS)
                continue
            
            return response
            
        except requests.exceptions.RequestException as e:
            print(f"Request failed (attempt {attempt + 1}/{MAX_RETRIES}): {e}")
            if attempt < MAX_RETRIES - 1:
                time.sleep(RETRY_DELAY_SECONDS)
            else:
                raise
    
    return response


def wait_for_lro(operation_url: str) -> dict:
    """
    Wait for a long-running operation to complete.
    
    Args:
        operation_url: URL from Location header of 202 response
    
    Returns:
        Final operation result
    """
    start_time = time.time()
    
    while time.time() - start_time < LRO_MAX_WAIT_SECONDS:
        headers = get_headers()
        response = requests.get(operation_url, headers=headers, timeout=60)
        
        if response.status_code == 200:
            result = response.json()
            status = result.get("status", "Unknown")
            
            if status in ["Succeeded", "Completed"]:
                print(f"Operation completed successfully")
                return result
            elif status in ["Failed", "Cancelled"]:
                error = result.get("error", {})
                raise Exception(f"Operation {status}: {error}")
            else:
                print(f"Operation status: {status}")
        
        time.sleep(LRO_POLL_INTERVAL_SECONDS)
    
    raise TimeoutError(f"Operation timed out after {LRO_MAX_WAIT_SECONDS} seconds")

## Ontology API Functions

In [None]:
def list_ontologies(workspace_id: str) -> list:
    """
    List all ontologies in a workspace.
    """
    endpoint = f"{FABRIC_API_VERSION}/workspaces/{workspace_id}/ontologies"
    response = api_request("GET", endpoint)
    
    if response.status_code == 200:
        return response.json().get("value", [])
    else:
        print(f"Failed to list ontologies: {response.status_code}")
        print(response.text)
        return []


def get_ontology(workspace_id: str, ontology_id: str) -> Optional[dict]:
    """
    Get ontology metadata by ID.
    """
    endpoint = f"{FABRIC_API_VERSION}/workspaces/{workspace_id}/ontologies/{ontology_id}"
    response = api_request("GET", endpoint)
    
    if response.status_code == 200:
        return response.json()
    else:
        return None


def find_ontology_by_name(workspace_id: str, display_name: str) -> Optional[dict]:
    """
    Find an ontology by display name.
    """
    ontologies = list_ontologies(workspace_id)
    for ont in ontologies:
        if ont.get("displayName") == display_name:
            return ont
    return None


def create_ontology(workspace_id: str, display_name: str, description: str = "") -> dict:
    """
    Create a new Ontology item in the workspace.
    """
    endpoint = f"{FABRIC_API_VERSION}/workspaces/{workspace_id}/ontologies"
    
    data = {
        "displayName": display_name,
        "description": description
    }
    
    response = api_request("POST", endpoint, data=data)
    
    if response.status_code == 201:
        result = response.json()
        print(f"Created ontology: {result.get('id')}")
        return result
    elif response.status_code == 202:
        # Long-running operation
        operation_url = response.headers.get("Location")
        if operation_url:
            return wait_for_lro(operation_url)
    
    print(f"Failed to create ontology: {response.status_code}")
    print(response.text)
    raise Exception(f"Create ontology failed: {response.status_code}")

In [None]:
def get_ontology_definition(workspace_id: str, ontology_id: str) -> dict:
    """
    Get the current ontology definition.
    """
    endpoint = f"{FABRIC_API_VERSION}/workspaces/{workspace_id}/ontologies/{ontology_id}/getDefinition"
    response = api_request("POST", endpoint)
    
    if response.status_code == 200:
        return response.json()
    elif response.status_code == 202:
        operation_url = response.headers.get("Location")
        if operation_url:
            return wait_for_lro(operation_url)
    
    print(f"Failed to get definition: {response.status_code}")
    print(response.text)
    return {}


def update_ontology_definition(workspace_id: str, ontology_id: str, definition_parts: list) -> dict:
    """
    Update the ontology definition with new entity types, properties, and relationships.
    
    Args:
        workspace_id: ID of the workspace
        ontology_id: ID of the ontology to update
        definition_parts: List of definition parts (each with path, payload, payloadType)
    
    Returns:
        API response
    """
    endpoint = f"{FABRIC_API_VERSION}/workspaces/{workspace_id}/ontologies/{ontology_id}/updateDefinition"
    
    data = {
        "definition": {
            "parts": definition_parts
        }
    }
    
    response = api_request("POST", endpoint, data=data)
    
    if response.status_code == 200:
        print("Definition updated successfully")
        return response.json()
    elif response.status_code == 202:
        # Long-running operation
        operation_url = response.headers.get("Location")
        print(f"Update is async. Polling for completion...")
        if operation_url:
            return wait_for_lro(operation_url)
    
    print(f"Failed to update definition: {response.status_code}")
    print(response.text)
    raise Exception(f"Update definition failed: {response.status_code} - {response.text}")

## Load Definition from File

In [None]:
def get_latest_definition_file() -> str:
    """
    Find the most recent ontology definition file.
    """
    files = os.listdir(DEFINITIONS_DIR)
    definition_files = [f for f in files if f.startswith("ontology_definition_") and f.endswith(".json")]
    
    if not definition_files:
        raise FileNotFoundError(f"No definition files found in {DEFINITIONS_DIR}")
    
    # Sort by timestamp in filename (YYYYMMDD_HHMMSS)
    definition_files.sort(reverse=True)
    return os.path.join(DEFINITIONS_DIR, definition_files[0])


def load_definition(file_path: str) -> dict:
    """
    Load ontology definition from JSON file.
    """
    with open(file_path, 'r') as f:
        return json.load(f)


# Load the latest definition
definition_file = get_latest_definition_file()
print(f"Loading definition from: {definition_file}")

full_definition = load_definition(definition_file)
metadata = full_definition.get("metadata", {})
definition_parts = full_definition.get("definition", {}).get("parts", [])

print(f"\nDefinition metadata:")
print(f"  Generated: {metadata.get('generated_at', 'unknown')}")
print(f"  Entity types: {metadata.get('entity_type_count', 0)}")
print(f"  Relationship types: {metadata.get('relationship_type_count', 0)}")
print(f"  Total parts: {len(definition_parts)}")

# Debug: Inspect actual part content
print(f"\n--- Debug: Inspecting definition structure ---")
for part in definition_parts[:3]:
    print(f"\nPath: {part.get('path')}")
    # Decode and show content
    import base64
    payload = part.get('payload', '')
    try:
        decoded = base64.b64decode(payload).decode('utf-8')
        print(f"Content preview: {decoded[:300]}...")
    except:
        print(f"Payload type: {part.get('payloadType')}")

# Check if any entity type has properties with 'valueType'
entity_parts = [p for p in definition_parts if "EntityTypes" in p.get("path", "") and "definition.json" in p.get("path", "")]
if entity_parts:
    print(f"\n--- Checking entity type property format ---")
    sample = entity_parts[0]
    decoded = json.loads(base64.b64decode(sample['payload']).decode('utf-8'))
    print(f"Sample entity: {decoded.get('name')}")
    if decoded.get('properties'):
        first_prop = decoded['properties'][0]
        print(f"First property keys: {list(first_prop.keys())}")
        print(f"First property: {first_prop}")

## Create or Get Ontology

In [None]:
# Check if ontology already exists
existing_ontology = find_ontology_by_name(workspace_id, ONTOLOGY_DISPLAY_NAME)

if existing_ontology:
    ontology_id = existing_ontology["id"]
    print(f"Found existing ontology: {ontology_id}")
    print(f"  Display Name: {existing_ontology.get('displayName')}")
    print(f"  Description: {existing_ontology.get('description')}")
else:
    print(f"Creating new ontology: {ONTOLOGY_DISPLAY_NAME}")
    result = create_ontology(workspace_id, ONTOLOGY_DISPLAY_NAME, ONTOLOGY_DESCRIPTION)
    
    # For LRO, the result may not contain 'id' directly
    # Look up the created ontology by name
    if "id" in result:
        ontology_id = result["id"]
    else:
        # LRO completed - find the ontology by name
        created_ontology = find_ontology_by_name(workspace_id, ONTOLOGY_DISPLAY_NAME)
        if created_ontology:
            ontology_id = created_ontology["id"]
        else:
            raise Exception("Ontology creation completed but could not find the created ontology")
    
    print(f"Created ontology: {ontology_id}")

## Upload Definition

In [None]:
# Upload the definition to the ontology
print(f"Uploading definition to ontology {ontology_id}...")
print(f"  Parts to upload: {len(definition_parts)}")

try:
    result = update_ontology_definition(workspace_id, ontology_id, definition_parts)
    print("\nDefinition upload complete!")
except Exception as e:
    print(f"\nDefinition upload failed: {e}")
    raise

## Verify Definition

In [None]:
# Verify the definition was uploaded correctly
print("Verifying uploaded definition...")

# The getDefinition API is also async - the result confirms the operation status
result = get_ontology_definition(workspace_id, ontology_id)

status = result.get("status", "Unknown")
if status == "Succeeded":
    print(f"\n✓ Ontology definition upload verified!")
    print(f"  Status: {status}")
    print(f"  Completed: {result.get('lastUpdatedTimeUtc', 'N/A')}")
    print(f"\nExpected content:")
    print(f"  Entity types: {metadata.get('entity_type_count', 'N/A')}")
    print(f"  Relationship types: {metadata.get('relationship_type_count', 'N/A')}")
    print(f"  Total parts uploaded: {len(definition_parts)}")
else:
    print(f"\n⚠ Unexpected status: {status}")
    print(f"  Error: {result.get('error', 'None')}")

print("\n→ Check Fabric portal to view the ontology entity types")

## Save Configuration

In [None]:
# Save ontology configuration for other notebooks
os.makedirs(CONFIG_DIR, exist_ok=True)

config = {
    "ontology_id": ontology_id,
    "workspace_id": workspace_id,
    "lakehouse_id": lakehouse_id,
    "display_name": ONTOLOGY_DISPLAY_NAME,
    "updated_at": datetime.now().isoformat(),
    "definition_file": definition_file,
    "entity_type_count": metadata.get("entity_type_count", 0),
    "relationship_type_count": metadata.get("relationship_type_count", 0)
}

config_path = os.path.join(CONFIG_DIR, "ontology_config.json")
with open(config_path, 'w') as f:
    json.dump(config, f, indent=2)

print(f"Saved configuration to: {config_path}")

## Summary

In [None]:
print("="*60)
print("Fabric Ontology API Client Complete")
print("="*60)
print(f"\nOntology ID: {ontology_id}")
print(f"Workspace ID: {workspace_id}")
print(f"Display Name: {ONTOLOGY_DISPLAY_NAME}")
print(f"\nEntity Types: {metadata.get('entity_type_count', 0)}")
print(f"Relationship Types: {metadata.get('relationship_type_count', 0)}")

print(f"\n" + "="*60)
print("Next Steps:")
print("="*60)
print("1. Run F5.3 (Data Binding) to connect gold tables to entity types")
print("2. Go to Fabric portal → Ontology → View your ontology")
print("3. Once data is bound, query via Fabric Graph!")