# Fabric Pipeline & Dataflow Monitoring

This notebook collects **Pipeline Runs** and **Dataflow Runs** from the Fabric REST APIs and sends them to Azure Log Analytics.

In [None]:
# === Updated Framework Integration ===
# Using the consolidated fabricla_connector framework
import sys
import os

# Add the framework to path
sys.path.append(os.path.join(os.path.dirname(os.getcwd()), 'src'))

# Import the framework components
from fabricla_connector.config import get_config
from fabricla_connector.api import FabricAPIClient
from fabricla_connector.collectors import PipelineCollector, DataflowCollector
from fabricla_connector.workflows import collect_and_ingest_pipeline_data_enhanced
from fabricla_connector.ingestion import FabricIngestion
from fabricla_connector.utils import within_lookback_minutes, create_time_window

print("✅ FabricLA Connector framework loaded successfully")

In [None]:
# === Framework-Based Configuration ===
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Get configuration through the framework
config = get_config()

# Workspace and item configuration
workspace_id = config.get("FABRIC_WORKSPACE_ID")

# List only the items you want to collect. Leave empty lists to skip.
pipeline_item_ids = [
    # Add your pipeline IDs here, e.g.:
    "fffa88df-ed23-45ba-bb2e-803f0089dc39",
    "92243a88-c144-4749-8eac-e2dd8e7f9b31",
]
dataflow_item_ids = [
    # Add your dataflow IDs here, e.g.:
    "bc5c92b0-d58a-487b-8692-965e69345792",
    "065696af-4621-4538-953c-65899053ae24",
]

# === CONFIGURATION MODES ===
# OPTION 1: Bulk Ingestion (Historical Data Collection)
lookback_minutes = 43200  # 30 days for comprehensive bulk load
collect_activity_runs = True  # Enabled for detailed monitoring

# OPTION 2: Incremental Collection (Regular Monitoring)  
# lookback_minutes = 1200  # 20 hours for regular incremental collection
# collect_activity_runs = True  # Enabled for detailed activity monitoring

# 🎯 OPTION 3: Activity Runs Backfill (After bulk completion)
# lookback_minutes = 10080  # 7 days for recent activity runs
# collect_activity_runs = True  # Enabled for detailed data

# DCR / Logs Ingestion API settings from environment variables (using correct parameter names)
dcr_endpoint_host = config.get("DCR_ENDPOINT_HOST")
dcr_immutable_id = config.get("DCR_IMMUTABLE_ID")

# Stream names must match your DCR configuration and map to the LA tables we created
stream_pipeline = "Custom-FabricPipelineRun_CL"             
stream_activity = "Custom-FabricPipelineActivityRun_CL"     
stream_dataflow = "Custom-FabricDataflowRun_CL"

# === Authentication Configuration ===
# Basic credentials from environment variables
tenant_id = config.get("FABRIC_TENANT_ID")
client_id = config.get("FABRIC_APP_ID")
client_secret_env = config.get("FABRIC_APP_SECRET")

# Key Vault configuration (optional)
use_key_vault = False  # Set to True to use Key Vault
use_managed_identity = False  # Set to True when running on Azure resources (VM, Container App, etc.)
key_vault_uri = config.get("AZURE_KEY_VAULT_URI", "https://kaydemokeyvault.vault.azure.net/")
key_vault_secret_name = config.get("AZURE_KEY_VAULT_SECRET_NAME", "FabricServicePrincipal")

# Validation using correct parameter names from .env file
required_vars = ["FABRIC_WORKSPACE_ID", "FABRIC_TENANT_ID", "FABRIC_APP_ID", "DCR_ENDPOINT_HOST", "DCR_IMMUTABLE_ID"]
missing = [var for var in required_vars if not config.get(var)]

if missing:
    print(f"❌ Missing required environment variables: {', '.join(missing)}")
    print("   Please check your .env file or environment configuration")
else:
    print("✅ All required environment variables loaded successfully")

if not client_secret_env and not use_key_vault:
    print("⚠️  Warning: No authentication method configured")
    print("   Either set FABRIC_APP_SECRET or configure Key Vault")

print(f"\n📊 Data Collection Configuration:")
print(f"  Workspace ID: {workspace_id or 'Not set'}")
print(f"  Lookback Window: {lookback_minutes:,} minutes ({lookback_minutes/1440:.1f} days)")
print(f"  Collect Activity Runs: {collect_activity_runs}")
print(f"  Pipeline Items: {len(pipeline_item_ids)}")
print(f"  Dataflow Items: {len(dataflow_item_ids)}")

print(f"\n🔗 Azure Monitor Configuration:")
print(f"  DCR Endpoint: {dcr_endpoint_host or 'Not set'}")
print(f"  DCR Immutable ID: {dcr_immutable_id or 'Not set'}")
print(f"  Stream Names:")
print(f"    - Pipeline: {stream_pipeline}")
print(f"    - Activity: {stream_activity}")
print(f"    - Dataflow: {stream_dataflow}")

# Store configuration for use in other cells
ingestion_config = {
    "dcr_endpoint_host": dcr_endpoint_host,
    "dcr_immutable_id": dcr_immutable_id,
    "stream_pipeline": stream_pipeline,
    "stream_activity": stream_activity,
    "stream_dataflow": stream_dataflow
}

In [None]:
# === Framework-Based Authentication ===
from azure.identity import DefaultAzureCredential

print("🔐 Setting up authentication...")

# Initialize the framework components with the correct configuration
try:
    # Create API client using framework
    fabric_client = FabricAPIClient()
    
    # Initialize ingestion client with the correct DCR parameters
    ingestion_client = FabricIngestion(
        endpoint_host=f"https://{dcr_endpoint_host}",  # Ensure proper URL format
        dcr_id=dcr_immutable_id,
        stream_name=stream_pipeline  # Default stream, can be overridden per call
    )
    
    print("✅ Framework components initialized successfully")
    print(f"   DCR Endpoint: https://{dcr_endpoint_host}")
    print(f"   DCR ID: {dcr_immutable_id[:20]}...")
    
    # Test authentication
    credential = DefaultAzureCredential()
    fabric_token = credential.get_token("https://api.fabric.microsoft.com/.default")
    monitor_token = credential.get_token("https://monitor.azure.com/.default")
    
    print("✅ Authentication tokens acquired successfully")
    print(f"   Fabric token: {fabric_token.token[:10]}...{fabric_token.token[-10:]}")
    print(f"   Monitor token: {monitor_token.token[:10]}...{monitor_token.token[-10:]}")
    
except Exception as e:
    print(f"❌ Authentication or framework initialization failed: {e}")
    print("   Please check your credentials and configuration")
    import traceback
    traceback.print_exc()
    raise

In [None]:
# === Environment Variable Validation ===
# This cell validates that all required environment variables are properly set

import os

# Check if .env file exists in current directory or parent directories
env_files_found = []
for path in ['.env', '../.env', '../../.env']:
    if os.path.exists(path):
        env_files_found.append(path)

print("🔍 Environment File Detection:")
if env_files_found:
    print(f"   Found .env files: {', '.join(env_files_found)}")
else:
    print("   No .env files found in current or parent directories")
    print("   Make sure to copy .env.example to .env and fill in your values")

# Detailed environment variable status
required_vars = [
    ("FABRIC_TENANT_ID", "Azure tenant ID"),
    ("FABRIC_APP_ID", "Service principal client ID"),
    ("FABRIC_APP_SECRET", "Service principal client secret"),
    ("FABRIC_WORKSPACE_ID", "Fabric workspace ID"),
    ("DCR_ENDPOINT_HOST", "Data Collection Rule endpoint host"),
    ("DCR_IMMUTABLE_ID", "Data Collection Rule immutable ID"),
]

optional_vars = [
    ("LOG_ANALYTICS_WORKSPACE_ID", "Log Analytics workspace ID"),
    ("AZURE_SUBSCRIPTION_ID", "Azure subscription ID"),
    ("FABRIC_RUNTIME_VERSION", "Fabric runtime version"),
    ("AZURE_KEY_VAULT_URI", "Azure Key Vault URI"),
    ("AZURE_KEY_VAULT_SECRET_NAME", "Key Vault secret name"),
]

print("\n📋 Required Environment Variables:")
for var_name, description in required_vars:
    value = os.getenv(var_name)
    if value:
        # Show first 8 chars for security
        display_value = value[:8] + "..." if len(value) > 8 else value
        print(f"   ✅ {var_name}: {display_value}")
    else:
        print(f"   ❌ {var_name}: Not set - {description}")

print("\n📋 Optional Environment Variables:")
for var_name, description in optional_vars:
    value = os.getenv(var_name)
    if value:
        display_value = value[:8] + "..." if len(value) > 8 else value
        print(f"   ✅ {var_name}: {display_value}")
    else:
        print(f"   ⚪ {var_name}: Not set - {description}")

print("\n💡 Next Steps:")
print("   1. Ensure all required variables are set in your .env file")
print("   2. Run the parameters cell above to load configuration")
print("   3. Proceed with authentication and data collection")

In [None]:
# === Fabric Runtime Detection ===
# This cell detects if we're running in Fabric and adapts accordingly

import sys
import os

# Detect if we're running in Fabric
running_in_fabric = False
try:
    import notebookutils
    running_in_fabric = True
    print("🏭 Running in Microsoft Fabric environment")
    print(f"   Fabric notebook utilities available: {notebookutils is not None}")
    
    # Try to get workspace context from Fabric
    try:
        # In Fabric, you can get current workspace info
        fabric_workspace_info = notebookutils.credentials.getSecret("FabricWorkspace", "WorkspaceId")
        if fabric_workspace_info and not workspace_id:
            workspace_id = fabric_workspace_info
            print(f"   Using Fabric workspace context: {workspace_id[:8]}...")
    except:
        pass
        
except ImportError:
    print("💻 Running in local development environment")
    print("   Loading configuration from .env file")

# Check for semantic-link-sempy (available in Fabric)
try:
    import sempy.fabric as fabric
    print("✅ Semantic Link available - can use Fabric workspace functions")
    
    # Get current workspace if not set
    if not workspace_id:
        try:
            current_workspace = fabric.get_workspace_id()
            if current_workspace:
                workspace_id = current_workspace
                print(f"   Auto-detected workspace ID: {workspace_id[:8]}...")
        except:
            pass
            
except ImportError:
    if running_in_fabric:
        print("⚠️  Semantic Link not available in this Fabric runtime")
    else:
        print("ℹ️  Semantic Link not available (local environment)")

# Fabric-specific authentication options
if running_in_fabric:
    print("\n🔐 Fabric Authentication Options:")
    print("   1. Use Fabric workspace identity (recommended)")
    print("   2. Use Key Vault with workspace managed identity")
    print("   3. Set credentials in parameters cell")
    print("   4. Use environment variables (if .env file uploaded)")
    
    # In Fabric, you can use workspace identity for authentication
    try:
        # Check if we can use Fabric's built-in authentication
        if hasattr(notebookutils, 'credentials'):
            print("   ✅ Fabric credential utilities available")
        else:
            print("   ⚠️  Fabric credential utilities not available")
    except:
        pass
else:
    print("\n🔐 Local Development Authentication:")
    print("   Using environment variables from .env file")

print(f"\n📍 Current Configuration:")
print(f"   Runtime Environment: {'Fabric' if running_in_fabric else 'Local'}")
print(f"   Workspace ID: {workspace_id[:8] + '...' if workspace_id else 'Not set'}")
print(f"   Python Version: {sys.version.split()[0]}")
print(f"   Working Directory: {os.getcwd()}")

In [None]:
# === Alternative: Using Individual Framework Components ===
# This cell shows how to use framework components individually for custom scenarios

"""
# For advanced users who want granular control:

# 1. Individual collectors
pipeline_collector = PipelineCollector(fabric_client)
dataflow_collector = DataflowCollector(fabric_client)

# 2. Manual collection with custom filtering
pipeline_runs = pipeline_collector.collect_pipeline_runs(
    workspace_id=workspace_id,
    pipeline_id="your-pipeline-id",
    lookback_minutes=lookback_minutes
)

# 3. Custom ingestion with specific configuration
custom_ingestion = FabricIngestion(
    endpoint_host=config.get("AZURE_MONITOR_DCE_ENDPOINT"),
    dcr_id=config.get("AZURE_MONITOR_DCR_IMMUTABLE_ID"),
    stream_name="Custom-MyCustomTable_CL"
)

# 4. Enhanced ingestion with troubleshooting
result = custom_ingestion.ingest_enhanced(
    records=pipeline_runs,
    troubleshoot=True
)

# The framework provides flexibility for both simple workflows and advanced scenarios
"""

print("💡 Framework components available for custom scenarios:")
print("   - FabricAPIClient: Low-level Fabric REST API access")
print("   - PipelineCollector: Focused pipeline data collection")
print("   - DataflowCollector: Focused dataflow data collection") 
print("   - FabricIngestion: Enhanced ingestion with retry logic")
print("   - Workflows: High-level orchestration functions")
print("   - Utils: Helper functions for date/time, chunking, validation")

In [None]:
# === Data Collection and Ingestion ===
import datetime as dt
import json

print("🚀 Starting pipeline and dataflow data collection...")
print("=" * 60)

# Initialize summary for tracking results
summary = {
    "collection_timestamp": dt.datetime.utcnow().isoformat() + "Z",
    "lookback_minutes": lookback_minutes,
    "workspace_id": workspace_id,
    "configuration": {
        "dcr_endpoint_host": dcr_endpoint_host,
        "dcr_immutable_id": dcr_immutable_id,
        "pipeline_items": len(pipeline_item_ids),
        "dataflow_items": len(dataflow_item_ids)
    }
}

# Initialize collectors
pipeline_collector = PipelineCollector(fabric_client)
dataflow_collector = DataflowCollector(fabric_client)

# === COLLECT PIPELINE DATA ===
pipeline_rows = []
activity_rows = []

if pipeline_item_ids:
    print(f"\n📋 Collecting pipeline data for {len(pipeline_item_ids)} items...")
    
    for pipeline_id in pipeline_item_ids:
        print(f"   Collecting from pipeline: {pipeline_id}")
        
        try:
            # Collect pipeline runs
            runs = pipeline_collector.collect_pipeline_runs(
                workspace_id=workspace_id,
                pipeline_id=pipeline_id,
                lookback_minutes=lookback_minutes
            )
            pipeline_rows.extend(runs)
            print(f"     Found {len(runs)} pipeline runs")
            
            # Collect activity runs if enabled
            if collect_activity_runs:
                for run in runs:
                    if run.get('id'):
                        activities = pipeline_collector.collect_activity_runs(
                            workspace_id=workspace_id,
                            pipeline_run_id=run['id'],
                            lookback_minutes=lookback_minutes
                        )
                        activity_rows.extend(activities)
                        print(f"     Found {len(activities)} activity runs for pipeline run {run['id'][:8]}...")
                        
        except Exception as e:
            print(f"     ❌ Error collecting from pipeline {pipeline_id}: {e}")
            
    print(f"✅ Pipeline collection completed: {len(pipeline_rows)} runs, {len(activity_rows)} activities")
else:
    print("⏭️  No pipeline IDs configured - skipping pipeline collection")

# === COLLECT DATAFLOW DATA ===
dataflow_rows = []

if dataflow_item_ids:
    print(f"\n🔄 Collecting dataflow data for {len(dataflow_item_ids)} items...")
    
    for dataflow_id in dataflow_item_ids:
        print(f"   Collecting from dataflow: {dataflow_id}")
        
        try:
            runs = dataflow_collector.collect_dataflow_runs(
                workspace_id=workspace_id,
                dataflow_id=dataflow_id,
                lookback_minutes=lookback_minutes
            )
            dataflow_rows.extend(runs)
            print(f"     Found {len(runs)} dataflow runs")
            
        except Exception as e:
            print(f"     ❌ Error collecting from dataflow {dataflow_id}: {e}")
            
    print(f"✅ Dataflow collection completed: {len(dataflow_rows)} runs")
else:
    print("⏭️  No dataflow IDs configured - skipping dataflow collection")

# === INGEST DATA TO AZURE MONITOR ===
print(f"\n📤 Starting data ingestion to Azure Monitor...")

# Ingest pipeline runs
if pipeline_rows:
    print(f"   Ingesting {len(pipeline_rows)} pipeline runs...")
    try:
        pipeline_ingestion = FabricIngestion(
            endpoint_host=f"https://{dcr_endpoint_host}",
            dcr_id=dcr_immutable_id,
            stream_name=stream_pipeline
        )
        pipeline_result = pipeline_ingestion.ingest_enhanced(
            records=pipeline_rows,
            troubleshoot=True
        )
        summary["pipeline_runs"] = {
            "collected": len(pipeline_rows),
            "ingested": pipeline_result.get("successful_records", 0),
            "failed": pipeline_result.get("failed_records", 0),
            "success_rate": pipeline_result.get("success_rate", 0)
        }
        print(f"     ✅ Pipeline runs: {pipeline_result.get('successful_records', 0)}/{len(pipeline_rows)} ingested")
    except Exception as e:
        print(f"     ❌ Pipeline ingestion failed: {e}")
        summary["pipeline_runs"] = {"collected": len(pipeline_rows), "ingested": 0, "error": str(e)}

# Ingest activity runs
if activity_rows:
    print(f"   Ingesting {len(activity_rows)} activity runs...")
    try:
        activity_ingestion = FabricIngestion(
            endpoint_host=f"https://{dcr_endpoint_host}",
            dcr_id=dcr_immutable_id,
            stream_name=stream_activity
        )
        activity_result = activity_ingestion.ingest_enhanced(
            records=activity_rows,
            troubleshoot=True
        )
        summary["activity_runs"] = {
            "collected": len(activity_rows),
            "ingested": activity_result.get("successful_records", 0),
            "failed": activity_result.get("failed_records", 0),
            "success_rate": activity_result.get("success_rate", 0)
        }
        print(f"     ✅ Activity runs: {activity_result.get('successful_records', 0)}/{len(activity_rows)} ingested")
    except Exception as e:
        print(f"     ❌ Activity ingestion failed: {e}")
        summary["activity_runs"] = {"collected": len(activity_rows), "ingested": 0, "error": str(e)}

# Ingest dataflow runs
if dataflow_rows:
    print(f"   Ingesting {len(dataflow_rows)} dataflow runs...")
    try:
        dataflow_ingestion = FabricIngestion(
            endpoint_host=f"https://{dcr_endpoint_host}",
            dcr_id=dcr_immutable_id,
            stream_name=stream_dataflow
        )
        dataflow_result = dataflow_ingestion.ingest_enhanced(
            records=dataflow_rows,
            troubleshoot=True
        )
        summary["dataflow_runs"] = {
            "collected": len(dataflow_rows),
            "ingested": dataflow_result.get("successful_records", 0),
            "failed": dataflow_result.get("failed_records", 0),
            "success_rate": dataflow_result.get("success_rate", 0)
        }
        print(f"     ✅ Dataflow runs: {dataflow_result.get('successful_records', 0)}/{len(dataflow_rows)} ingested")
    except Exception as e:
        print(f"     ❌ Dataflow ingestion failed: {e}")
        summary["dataflow_runs"] = {"collected": len(dataflow_rows), "ingested": 0, "error": str(e)}

# === SUMMARY REPORT ===
total_collected = len(pipeline_rows) + len(activity_rows) + len(dataflow_rows)
total_ingested = (
    summary.get("pipeline_runs", {}).get("ingested", 0) +
    summary.get("activity_runs", {}).get("ingested", 0) +
    summary.get("dataflow_runs", {}).get("ingested", 0)
)

print("\n" + "=" * 60)
print("✅ DATA COLLECTION AND INGESTION COMPLETED")
print("=" * 60)
print(f"📊 Total records collected: {total_collected}")
print(f"📤 Total records ingested: {total_ingested}")
print(f"📋 Pipeline runs: {summary.get('pipeline_runs', {}).get('ingested', 0)}")
print(f"🔄 Activity runs: {summary.get('activity_runs', {}).get('ingested', 0)}")
print(f"💧 Dataflow runs: {summary.get('dataflow_runs', {}).get('ingested', 0)}")

if total_ingested > 0:
    print(f"\n🎯 Data should appear in Log Analytics within 5-15 minutes")
    print(f"   Tables: {stream_pipeline}, {stream_activity}, {stream_dataflow}")
else:
    print(f"\n⚠️  No records ingested - check your configuration and logs above")

print(f"\n📄 Detailed summary available in 'summary' variable")
print(json.dumps(summary, indent=2, default=str))

In [None]:
# === Enhanced Troubleshooting with Framework ===
import json
import datetime as dt

print("🔍 ENHANCED TROUBLESHOOTING REPORT")
print("=" * 50)

# 1. Display comprehensive summary
if 'summary' in locals():
    print("\n1. COLLECTION & INGESTION SUMMARY:")
    print(json.dumps(summary, indent=2, default=str))
    
    # Calculate totals
    total_collected = (
        summary.get("pipeline_runs", {}).get("collected", 0) +
        summary.get("activity_runs", {}).get("collected", 0) +
        summary.get("dataflow_runs", {}).get("collected", 0)
    )
    
    total_ingested = (
        summary.get("pipeline_runs", {}).get("ingested", 0) +
        summary.get("activity_runs", {}).get("ingested", 0) +
        summary.get("dataflow_runs", {}).get("ingested", 0)
    )
    
    print(f"\n📊 Summary Statistics:")
    print(f"   Total collected: {total_collected}")
    print(f"   Total ingested: {total_ingested}")
    print(f"   Success rate: {(total_ingested/total_collected*100) if total_collected > 0 else 0:.1f}%")
    
    if total_ingested == 0:
        print("\n❌ No data was ingested - this explains why tables are empty!")
        print("🔍 Possible causes:")
        print("   - No recent runs in the lookback window")
        print("   - Empty item ID lists")
        print("   - API authentication issues")
        print("   - DCR configuration problems")
        print("   - Network connectivity issues")
    else:
        print(f"\n✅ {total_ingested} records were successfully ingested to DCR")

else:
    print("❌ No summary data available - collection may have failed")
    print("   Make sure to run the data collection cell first")

# 2. Configuration verification
print("\n2. CONFIGURATION VERIFICATION:")
if 'dcr_endpoint_host' in locals() and 'dcr_immutable_id' in locals():
    print(f"   DCR Endpoint: https://{dcr_endpoint_host}")
    print(f"   DCR Immutable ID: {dcr_immutable_id}")
    print(f"   Workspace ID: {workspace_id}")
    print(f"   Lookback Window: {lookback_minutes} minutes ({lookback_minutes/1440:.1f} days)")
else:
    print("   ❌ Configuration variables not found - run configuration cell first")

# 3. Stream and table mapping
print("\n3. LOG ANALYTICS TABLE MAPPING:")
print("   Expected tables in your Log Analytics workspace:")
if 'stream_pipeline' in locals():
    print(f"   - {stream_pipeline.replace('Custom-', '').replace('_CL', '_CL')}")
if 'stream_activity' in locals():
    print(f"   - {stream_activity.replace('Custom-', '').replace('_CL', '_CL')}")
if 'stream_dataflow' in locals():
    print(f"   - {stream_dataflow.replace('Custom-', '').replace('_CL', '_CL')}")

print("\n   Stream to Table Mapping:")
if 'stream_pipeline' in locals():
    print(f"   {stream_pipeline} → FabricPipelineRun_CL")
if 'stream_activity' in locals():
    print(f"   {stream_activity} → FabricPipelineActivityRun_CL")
if 'stream_dataflow' in locals():
    print(f"   {stream_dataflow} → FabricDataflowRun_CL")

# 4. KQL queries for verification
print("\n4. KQL VERIFICATION QUERIES:")
print("   Run these queries in your Log Analytics workspace:")
print("   ```kql")
print("   // Check table row counts")
print("   FabricPipelineRun_CL | count")
print("   FabricPipelineActivityRun_CL | count") 
print("   FabricDataflowRun_CL | count")
print()
print("   // Check recent data (last 24 hours)")
print("   FabricPipelineRun_CL | where TimeGenerated > ago(24h) | take 10")
print("   FabricPipelineActivityRun_CL | where TimeGenerated > ago(24h) | take 10")
print("   FabricDataflowRun_CL | where TimeGenerated > ago(24h) | take 10")
print()
print("   // Check data freshness")
print("   FabricPipelineRun_CL | summarize max(TimeGenerated)")
print("   FabricDataflowRun_CL | summarize max(TimeGenerated)")
print("   ```")

# 5. Framework-specific troubleshooting
print("\n5. FRAMEWORK TROUBLESHOOTING:")
print("   The framework provides enhanced error handling:")
print("   - Automatic retry with exponential backoff")
print("   - Size-aware batching (950KB JSON limit)")
print("   - Detailed HTTP status code handling")
print("   - Comprehensive error reporting")

# 6. Sample data inspection
if 'pipeline_rows' in locals() and pipeline_rows:
    print("\n6. SAMPLE PIPELINE DATA:")
    sample = pipeline_rows[0]
    print("   First pipeline run structure:")
    for key, value in list(sample.items())[:5]:  # Show first 5 fields
        print(f"     {key}: {value}")
    print(f"   ... and {len(sample)-5} more fields")

if 'dataflow_rows' in locals() and dataflow_rows:
    print("\n   SAMPLE DATAFLOW DATA:")
    sample = dataflow_rows[0]
    print("   First dataflow run structure:")
    for key, value in list(sample.items())[:5]:  # Show first 5 fields
        print(f"     {key}: {value}")
    print(f"   ... and {len(sample)-5} more fields")

# 7. Next steps based on results
print("\n7. RECOMMENDED NEXT STEPS:")
if 'summary' in locals():
    if total_ingested > 0:
        print("   ✅ Data was ingested successfully")
        print("   ⏰ Wait 10-15 minutes for Log Analytics processing")
        print("   🔍 Use the KQL queries above to verify data arrival")
        print("   📊 Check TimeGenerated column for data freshness")
    else:
        print("   ❌ No data was ingested - investigate issues:")
        print("   1. Verify workspace ID and item IDs are correct")
        print("   2. Check authentication and permissions")
        print("   3. Verify DCR endpoint and immutable ID")
        print("   4. Test with a smaller lookback window")
        print("   5. Check if items have runs in the specified time window")

# 8. Environment validation
print("\n8. ENVIRONMENT VALIDATION:")
print("   Required environment variables status:")
required_env_vars = ["DCR_ENDPOINT_HOST", "DCR_IMMUTABLE_ID", "FABRIC_WORKSPACE_ID", "FABRIC_TENANT_ID", "FABRIC_APP_ID"]
for var in required_env_vars:
    value = config.get(var) if 'config' in locals() else None
    status = "✅" if value else "❌"
    print(f"   {status} {var}: {'Set' if value else 'Missing'}")

print(f"\n⏰ Troubleshooting completed at: {dt.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')} UTC")
print("💡 If issues persist, check the detailed error messages in the collection cell output above")