# Environment Validation for Fabric Log Analytics Collectors

This notebook validates that your environment is properly configured to run the Fabric monitoring notebooks.

## What this notebook checks:
1. ✅ Environment file (.env) exists and is readable
2. ✅ All required environment variables are set
3. ✅ Required Python packages are installed
4. ✅ Azure authentication works
5. ✅ Fabric API connectivity
6. ✅ Azure Monitor Log Analytics connectivity

## Before running this notebook:
1. Copy `.env.example` to `.env`
2. Fill in your credentials in the `.env` file
3. Run this notebook to validate everything works

In [None]:
# Install required packages
%pip install --quiet msal requests azure-identity azure-keyvault-secrets python-dotenv

In [None]:
import os
import sys
from pathlib import Path
import importlib.util

# Colors for output
class Colors:
    GREEN = '\033[92m'
    RED = '\033[91m'
    YELLOW = '\033[93m'
    BLUE = '\033[94m'
    END = '\033[0m'
    BOLD = '\033[1m'

def print_success(message):
    print(f"{Colors.GREEN}✅ {message}{Colors.END}")

def print_error(message):
    print(f"{Colors.RED}❌ {message}{Colors.END}")

def print_warning(message):
    print(f"{Colors.YELLOW}⚠️  {message}{Colors.END}")

def print_info(message):
    print(f"{Colors.BLUE}ℹ️  {message}{Colors.END}")

def print_header(message):
    print(f"\n{Colors.BOLD}{Colors.BLUE}{'='*60}{Colors.END}")
    print(f"{Colors.BOLD}{Colors.BLUE}{message}{Colors.END}")
    print(f"{Colors.BOLD}{Colors.BLUE}{'='*60}{Colors.END}")

print_header("🔍 FABRIC MONITORING ENVIRONMENT VALIDATION")
print("This notebook will validate your environment setup for Fabric monitoring.")

In [None]:
print_header("1️⃣ CHECKING ENVIRONMENT FILE")

# Check for .env file
env_file_paths = ['.env', '../.env', '../../.env']
env_file_found = None

for path in env_file_paths:
    if os.path.exists(path):
        env_file_found = path
        break

if env_file_found:
    print_success(f"Environment file found: {env_file_found}")
    
    # Check if .env.example exists
    example_path = env_file_found.replace('.env', '.env.example')
    if os.path.exists(example_path):
        print_info(f"Example file found: {example_path}")
    else:
        print_warning("No .env.example file found")
        
else:
    print_error("No .env file found!")
    print_info("Please copy .env.example to .env and fill in your credentials")
    
    # Check if .env.example exists
    for path in ['.env.example', '../.env.example', '../../.env.example']:
        if os.path.exists(path):
            print_info(f"Template available: {path}")
            break

In [None]:
print_header("2️⃣ LOADING AND VALIDATING ENVIRONMENT VARIABLES")

try:
    from dotenv import load_dotenv
    load_dotenv()
    print_success("Environment variables loaded from .env file")
except ImportError:
    print_error("python-dotenv not installed. Installing now...")
    import subprocess
    subprocess.check_call([sys.executable, "-m", "pip", "install", "python-dotenv"])
    from dotenv import load_dotenv
    load_dotenv()
    print_success("python-dotenv installed and environment variables loaded")

# Required environment variables
required_vars = {
    "FABRIC_TENANT_ID": "Azure tenant ID",
    "FABRIC_APP_ID": "Service principal client ID", 
    "FABRIC_APP_SECRET": "Service principal client secret",
    "DCR_ENDPOINT_HOST": "Data Collection Rule endpoint host",
    "DCR_IMMUTABLE_ID": "Data Collection Rule immutable ID"
}

# Optional but recommended variables
optional_vars = {
    "FABRIC_WORKSPACE_ID": "Default Fabric workspace ID",
    "LOG_ANALYTICS_WORKSPACE_ID": "Log Analytics workspace ID",
    "AZURE_SUBSCRIPTION_ID": "Azure subscription ID",
    "AZURE_RESOURCE_GROUP": "Azure resource group name"
}

print("\n📋 Required Variables:")
missing_required = []
for var_name, description in required_vars.items():
    value = os.getenv(var_name)
    if value:
        display_value = value[:8] + "..." if len(value) > 8 else value
        print_success(f"{var_name}: {display_value}")
    else:
        print_error(f"{var_name}: Not set - {description}")
        missing_required.append(var_name)

print("\n📋 Optional Variables:")
for var_name, description in optional_vars.items():
    value = os.getenv(var_name)
    if value:
        display_value = value[:8] + "..." if len(value) > 8 else value
        print_success(f"{var_name}: {display_value}")
    else:
        print_warning(f"{var_name}: Not set - {description}")

if missing_required:
    print_error(f"\n❌ Missing {len(missing_required)} required environment variables")
    print_info("Please update your .env file with the missing variables")
else:
    print_success("\n✅ All required environment variables are set")

In [None]:
print_header("3️⃣ CHECKING REQUIRED PACKAGES")

required_packages = {
    'msal': 'Microsoft Authentication Library',
    'requests': 'HTTP library for API calls',
    'azure.identity': 'Azure Identity library',
    'azure.keyvault.secrets': 'Azure Key Vault secrets',
    'dotenv': 'Environment variable management'
}

optional_packages = {
    'azure.mgmt.monitor': 'Azure Monitor management (for capacity monitoring)',
    'pandas': 'Data manipulation (for data processing)',
    'numpy': 'Numerical computing (for data processing)'
}

def check_package(package_name):
    """Check if a package is installed"""
    try:
        if '.' in package_name:
            # For packages like azure.identity
            __import__(package_name)
        else:
            # For simple packages like msal
            importlib.import_module(package_name)
        return True
    except ImportError:
        return False

print("\n📦 Required Packages:")
missing_packages = []
for package, description in required_packages.items():
    if check_package(package):
        print_success(f"{package}: {description}")
    else:
        print_error(f"{package}: Not installed - {description}")
        missing_packages.append(package)

print("\n📦 Optional Packages:")
for package, description in optional_packages.items():
    if check_package(package):
        print_success(f"{package}: {description}")
    else:
        print_warning(f"{package}: Not installed - {description}")

if missing_packages:
    print_error(f"\n❌ Missing {len(missing_packages)} required packages")
    print_info("Run: %pip install msal requests azure-identity azure-keyvault-secrets python-dotenv")
else:
    print_success("\n✅ All required packages are installed")

In [None]:
print_header("4️⃣ TESTING AZURE AUTHENTICATION")

if missing_required:
    print_warning("Skipping authentication test due to missing environment variables")
else:
    try:
        import msal
        
        # Get credentials from environment
        tenant_id = os.getenv("FABRIC_TENANT_ID")
        client_id = os.getenv("FABRIC_APP_ID")
        client_secret = os.getenv("FABRIC_APP_SECRET")
        
        print_info("Testing Azure AD authentication...")
        
        # Create MSAL app
        app = msal.ConfidentialClientApplication(
            client_id=client_id,
            client_credential=client_secret,
            authority=f"https://login.microsoftonline.com/{tenant_id}"
        )
        
        # Try to get token for Fabric API
        result = app.acquire_token_silent(
            scopes=["https://api.fabric.microsoft.com/.default"],
            account=None
        )
        
        if not result:
            result = app.acquire_token_for_client(
                scopes=["https://api.fabric.microsoft.com/.default"]
            )
        
        if "access_token" in result:
            print_success("Azure AD authentication successful")
            print_success("Fabric API token acquired")
            
            # Test token validity by checking expiration
            import time
            expires_in = result.get('expires_in', 0)
            if expires_in > 300:  # More than 5 minutes
                print_success(f"Token valid for {expires_in//60} more minutes")
            else:
                print_warning(f"Token expires soon ({expires_in} seconds)")
                
        else:
            print_error("Failed to acquire access token")
            if "error" in result:
                print_error(f"Error: {result['error']}")
                print_error(f"Description: {result.get('error_description', 'No description')}")
    
    except Exception as e:
        print_error(f"Authentication test failed: {str(e)}")
        print_info("Check your service principal credentials and permissions")

In [None]:
print_header("5️⃣ TESTING FABRIC API CONNECTIVITY")

if missing_required or 'result' not in locals() or "access_token" not in result:
    print_warning("Skipping Fabric API test due to authentication issues")
else:
    try:
        import requests
        
        # Test Fabric API connectivity
        headers = {
            "Authorization": f"Bearer {result['access_token']}",
            "Content-Type": "application/json"
        }
        
        print_info("Testing Fabric API connectivity...")
        
        # Test with workspaces endpoint (least privileged)
        response = requests.get(
            "https://api.fabric.microsoft.com/v1/workspaces",
            headers=headers,
            timeout=30
        )
        
        if response.status_code == 200:
            workspaces = response.json().get('value', [])
            print_success(f"Fabric API connectivity successful")
            print_success(f"Found {len(workspaces)} accessible workspaces")
            
            # Show first few workspaces
            if workspaces:
                print_info("Sample workspaces:")
                for i, ws in enumerate(workspaces[:3]):
                    print(f"   {i+1}. {ws.get('displayName', 'Unknown')} ({ws.get('id', 'No ID')[:8]}...)")
            
            # Check if FABRIC_WORKSPACE_ID is valid
            workspace_id = os.getenv("FABRIC_WORKSPACE_ID")
            if workspace_id:
                matching_ws = [ws for ws in workspaces if ws.get('id') == workspace_id]
                if matching_ws:
                    print_success(f"FABRIC_WORKSPACE_ID matches workspace: {matching_ws[0].get('displayName')}")
                else:
                    print_warning("FABRIC_WORKSPACE_ID not found in accessible workspaces")
                    
        elif response.status_code == 403:
            print_error("Access denied - check service principal permissions")
            print_info("Required permission: Fabric.ReadAll or Workspace.Read.All")
        else:
            print_error(f"API call failed with status {response.status_code}")
            print_error(f"Response: {response.text[:200]}...")
            
    except requests.exceptions.Timeout:
        print_error("Fabric API request timed out")
        print_info("Check your network connectivity")
    except Exception as e:
        print_error(f"Fabric API test failed: {str(e)}")

In [None]:
print_header("6️⃣ TESTING AZURE MONITOR CONNECTIVITY")

dcr_endpoint = os.getenv("DCR_ENDPOINT_HOST")
dcr_id = os.getenv("DCR_IMMUTABLE_ID")

if not dcr_endpoint or not dcr_id:
    print_warning("Skipping Azure Monitor test - DCR configuration missing")
    print_info("Set DCR_ENDPOINT_HOST and DCR_IMMUTABLE_ID in your .env file")
elif missing_required or 'result' not in locals() or "access_token" not in result:
    print_warning("Skipping Azure Monitor test due to authentication issues")
else:
    try:
        # Get Azure Monitor token
        monitor_result = app.acquire_token_for_client(
            scopes=["https://monitor.azure.com/.default"]
        )
        
        if "access_token" not in monitor_result:
            print_error("Failed to get Azure Monitor token")
        else:
            print_success("Azure Monitor token acquired")
            
            # Test DCR endpoint connectivity (without sending data)
            import socket
            
            print_info(f"Testing DCR endpoint connectivity: {dcr_endpoint}")
            
            try:
                # Extract hostname from endpoint
                hostname = dcr_endpoint.replace('https://', '').replace('http://', '')
                if '/' in hostname:
                    hostname = hostname.split('/')[0]
                    
                # Test DNS resolution
                ip = socket.gethostbyname(hostname)
                print_success(f"DNS resolution successful: {hostname} -> {ip}")
                
                # Test port connectivity
                sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
                sock.settimeout(10)
                result_port = sock.connect_ex((hostname, 443))
                sock.close()
                
                if result_port == 0:
                    print_success("DCR endpoint is reachable on port 443")
                else:
                    print_error("DCR endpoint is not reachable on port 443")
                    
            except socket.gaierror:
                print_error(f"DNS resolution failed for {hostname}")
                print_info("Check your DCR_ENDPOINT_HOST value")
            except Exception as e:
                print_warning(f"Network test failed: {str(e)}")
            
            print_info(f"DCR Immutable ID: {dcr_id[:8]}...")
            print_success("Azure Monitor configuration appears valid")
            print_info("Note: Actual data ingestion test would require sending sample data")
            
    except Exception as e:
        print_error(f"Azure Monitor test failed: {str(e)}")

In [None]:
print_header("📊 VALIDATION SUMMARY")

# Count issues
total_checks = 6
issues = []

# Environment file check
if not env_file_found:
    issues.append("No .env file found")

# Environment variables check
if missing_required:
    issues.append(f"{len(missing_required)} required environment variables missing")

# Packages check
if 'missing_packages' in locals() and missing_packages:
    issues.append(f"{len(missing_packages)} required packages missing")

# Authentication check
if 'result' not in locals() or (isinstance(result, dict) and "access_token" not in result):
    issues.append("Azure authentication failed")

# Summary
if not issues:
    print_success("🎉 ALL CHECKS PASSED!")
    print_success("Your environment is ready for Fabric monitoring")
    print_info("You can now run the collector notebooks:")
    print_info("  • fabric_LA_collector.ipynb - Pipeline & Dataflow monitoring")
    print_info("  • fabric_user_activity_collector.ipynb - User activity tracking")
    print_info("  • fabric_dataset_refresh_collector.ipynb - Dataset refresh monitoring")
    print_info("  • fabric_capacity_utilization_collector.ipynb - Capacity monitoring")
else:
    print_error(f"❌ {len(issues)} ISSUES FOUND:")
    for i, issue in enumerate(issues, 1):
        print_error(f"   {i}. {issue}")
    
    print_info("\n🔧 NEXT STEPS:")
    print_info("1. Fix the issues listed above")
    print_info("2. Re-run this validation notebook")
    print_info("3. Proceed with the collector notebooks once all checks pass")

print_info("\n📚 Documentation:")
print_info("  • README.md - Setup instructions")
print_info("  • .env.example - Environment variable template")
print_info("  • requirements.txt - Python package dependencies")