In [None]:
#!/usr/bin/env python3

Step 6b: Inference on Snowpark Container Services (SPCS)=========================================================This script demonstrates how to:1. Deploy a model to Snowpark Container Services (SPCS)2. Create a compute pool for SPCS3. Log a model with SPCS target platform4. Access the deployed service5. Run inference via service functions6. Manage and monitor SPCS services7. Understand SPCS vs Warehouse tradeoffsSNOWPARK CONTAINER SERVICES (SPCS) INFERENCE:---------------------------------------------- Advanced deployment option for Model Registry- Runs on containerized compute pools (CPU or GPU)- Best for: Large models, GPU models, custom dependencies- Advantages: Any PyPI package, distributed inference, GPU support- Considerations: More complex setup, higher cost for small workloads"""import pandas as pdfrom pathlib import Pathimport timeimport sysfrom snowflake.ml.registry import Registryfrom connections import SnowflakeConnectiondef initialize_registry(connection_name='legalzoom', database='ML_SHOWCASE', schema='MODELS'):    """    Initialize Model Registry connection.        Parameters:    -----------    connection_name : str        Snowflake connection name    database : str        Target database    schema : str        Target schema            Returns:    --------    tuple        (connection, session, registry)    """    print("=" * 80)    print("INITIALIZING MODEL REGISTRY")    print("=" * 80)        print(f"\nConnecting to Snowflake...")    connection = SnowflakeConnection.from_snow_cli(connection_name)    session = connection.session        print(f"Initializing registry at {database}.{schema}...")    registry = Registry(        session=session,        database_name=database,        schema_name=schema    )        print(f"✓ Registry initialized: {registry.location}")        return connection, session, registrydef check_spcs_prerequisites(session):    """    Check if SPCS prerequisites are met.        Parameters:    -----------    session : Session        Snowpark session            Returns:    --------    bool        True if prerequisites are met    """    print(f"\n{'=' * 80}")    print("CHECKING SPCS PREREQUISITES")    print("=" * 80)        print(f"\nRequired for SPCS Model Serving:")    print(f"  1. Snowflake account in AWS, Azure, or GCP commercial region")    print(f"  2. snowflake-ml-python >= 1.8.0")    print(f"  3. Compute pool with USAGE or OPERATE privilege")    print(f"  4. BIND SERVICE ENDPOINT privilege (for ingress endpoints)")    print(f"  5. READ privilege on the model")        # Check current region    try:        region_result = session.sql("SELECT CURRENT_REGION()").collect()        region = region_result[0][0]        print(f"\n✓ Current region: {region}")                if 'GOV' in region.upper():            print(f"  ⚠ Warning: Government regions are not supported for SPCS")            return False    except Exception as e:        print(f"  ⚠ Could not determine region: {e}")        # Check snowflake-ml-python version    try:        import snowflake.ml        ml_version = snowflake.ml.__version__        print(f"✓ snowflake-ml-python version: {ml_version}")                # Parse version        major, minor = map(int, ml_version.split('.')[:2])        if major < 1 or (major == 1 and minor < 8):            print(f"  ⚠ Warning: Version 1.8.0 or later recommended for SPCS")    except Exception as e:        print(f"  ⚠ Could not check snowflake-ml-python version: {e}")        print(f"\n✓ Basic prerequisites check complete")    print(f"  Note: Additional privileges may be required")        return Truedef create_compute_pool(session, pool_name='ML_INFERENCE_POOL'):    """    Create a compute pool for SPCS (if it doesn't exist).        Parameters:    -----------    session : Session        Snowpark session    pool_name : str        Name for the compute pool            Returns:    --------    str        Compute pool name    """    print(f"\n{'=' * 80}")    print("CREATING COMPUTE POOL")    print("=" * 80)        print(f"\nCompute Pool: {pool_name}")        # Compute pool configuration    print(f"\nConfiguration:")    print(f"  MIN_NODES: 1 (minimum instances)")    print(f"  MAX_NODES: 3 (maximum instances for auto-scaling)")    print(f"  INSTANCE_FAMILY: CPU_X64_M (medium CPU instances)")    print(f"  AUTO_RESUME: TRUE (auto-start when needed)")    print(f"  AUTO_SUSPEND_SECS: 300 (suspend after 5 minutes idle)")        create_pool_sql = f"""    CREATE COMPUTE POOL IF NOT EXISTS {pool_name}        MIN_NODES = 1        MAX_NODES = 3        INSTANCE_FAMILY = 'CPU_X64_M'        AUTO_RESUME = TRUE        AUTO_SUSPEND_SECS = 300    """        print(f"\nSQL Command:")    print(create_pool_sql)        try:        print(f"\nExecuting...")        session.sql(create_pool_sql).collect()        print(f"✓ Compute pool created/verified: {pool_name}")                # Check pool status        print(f"\nChecking pool status...")        status_result = session.sql(f"DESCRIBE COMPUTE POOL {pool_name}").collect()        for row in status_result:            if row['property'] == 'state':                print(f"  State: {row['value']}")                return pool_name            except Exception as e:        print(f"✗ Failed to create compute pool: {e}")        print(f"\nTroubleshooting:")        print(f"  1. Check if you have CREATE COMPUTE POOL privilege")        print(f"  2. Verify your account supports SPCS")        print(f"  3. Contact your Snowflake admin for assistance")        return Nonedef load_model_for_spcs():    """    Load the trained model for SPCS deployment.        Returns:    --------    tuple        (model, sample_data)    """    print(f"\n{'=' * 80}")    print("LOADING MODEL FOR SPCS DEPLOYMENT")    print("=" * 80)        model_path = Path('xgboost_model_joblib.pkl')        if not model_path.exists():        print(f"✗ Model file not found: {model_path}")        print(f"Please run steps 1-3 to train and save the model.")        sys.exit(1)        print(f"\nLoading model from: {model_path}")        import joblib    model = joblib.load(model_path)        print(f"✓ Model loaded: {type(model).__name__}")        # Load sample data    test_data_path = Path('test_data.csv')    if test_data_path.exists():        df = pd.read_csv(test_data_path)        sample_data = df.drop(columns=['TARGET']).head(5)    else:        import numpy as np        sample_data = pd.DataFrame(            np.random.randn(5, 20),            columns=[f"FEATURE_{i:02d}" for i in range(20)]        )        print(f"✓ Sample data prepared: {sample_data.shape}")        return model, sample_datadef log_model_for_spcs(registry, model, sample_data):    """    Log model to registry with SPCS target platform.        Parameters:    -----------    registry : Registry        Model Registry object    model : object        Trained model    sample_data : pd.DataFrame        Sample input data            Returns:    --------    ModelVersion        Logged model version    """    print(f"\n{'=' * 80}")    print("LOGGING MODEL FOR SPCS")    print("=" * 80)        from datetime import datetime        model_name = "XGBOOST_SPCS"    version_name = f"v_{datetime.now().strftime('%Y%m%d_%H%M%S')}"        print(f"\nModel Name: {model_name}")    print(f"Version: {version_name}")    print(f"Target Platform: SNOWPARK_CONTAINER_SERVICES")        print(f"\nKey Differences from Warehouse Deployment:")    print(f"  • Can use ANY PyPI packages (not just Snowflake Conda channel)")    print(f"  • Supports GPU instances")    print(f"  • Better for large models and high-throughput inference")    print(f"  • Requires compute pool setup")        print(f"\nLogging model...")    print(f"This may take 2-5 minutes (building container image)...")        try:        model_version = registry.log_model(            model=model,            model_name=model_name,            version_name=version_name,            comment=f"XGBoost model deployed to SPCS for scalable inference",                        # SPCS-specific configuration            target_platforms=["SNOWPARK_CONTAINER_SERVICES"],                        # Dependencies (can use PyPI packages)            conda_dependencies=[                "xgboost==2.0.0",                "scikit-learn==1.3.0",                "pandas",                "numpy"            ],                        python_version="3.10",            sample_input_data=sample_data,                        options={                "enable_explainability": False,                "relax_version": True,                "target_methods": ["predict", "predict_proba"]            }        )                print(f"\n✓ Model logged successfully!")        print(f"  Model: {model_version.fully_qualified_model_name}")        print(f"  Version: {version_name}")                return model_version            except Exception as e:        print(f"\n✗ Failed to log model: {e}")        print(f"\nThis is expected if:")        print(f"  • Your account doesn't support SPCS")        print(f"  • You don't have required privileges")        print(f"  • SPCS is not available in your region")        return Nonedef display_spcs_deployment_info(model_version, compute_pool):    """    Display information about SPCS deployment.        Parameters:    -----------    model_version : ModelVersion        Deployed model version    compute_pool : str        Compute pool name    """    print(f"\n{'=' * 80}")    print("SPCS DEPLOYMENT INFORMATION")    print("=" * 80)        print(f"\nDeployment Details:")    print(f"  Model: {model_version.fully_qualified_model_name}")    print(f"  Compute Pool: {compute_pool}")    print(f"  Platform: Snowpark Container Services")        print(f"\nHow SPCS Model Serving Works:")    print(f"  1. Snowflake builds a container image with your model")    print(f"  2. Container includes all dependencies (conda/pip packages)")    print(f"  3. Inference server runs in the container")    print(f"  4. Service functions are created for calling the model")    print(f"  5. Optional: REST API endpoint for external access")        print(f"\nAccessing the Model:")    print(f"  • Python API: model_version.run(data, function_name='predict')")    print(f"  • SQL API: SELECT model!predict(*) FROM table")    print(f"  • REST API: HTTP POST to ingress endpoint (if configured)")        print(f"\nService Management:")    print(f"  • View services: SHOW SERVICES")    print(f"  • Describe service: DESCRIBE SERVICE <service_name>")    print(f"  • Alter service: ALTER SERVICE <service_name> ...")    print(f"  • Drop service: DROP SERVICE <service_name>")def display_spcs_best_practices():    """Display best practices for SPCS deployment."""    print(f"\n{'=' * 80}")    print("SPCS BEST PRACTICES")    print("=" * 80)        print(f"\n1. When to Use SPCS:")    print(f"   ✓ Large models (> 500 MB)")    print(f"   ✓ GPU-accelerated models")    print(f"   ✓ Custom dependencies not in Snowflake Conda channel")    print(f"   ✓ High-throughput real-time inference")    print(f"   ✓ Models requiring distributed inference")        print(f"\n2. When to Use Warehouse:")    print(f"   ✓ Small-medium models (< 100 MB)")    print(f"   ✓ CPU-only models")    print(f"   ✓ Dependencies available in Snowflake Conda channel")    print(f"   ✓ Batch inference")    print(f"   ✓ Simpler deployment requirements")        print(f"\n3. Compute Pool Sizing:")    print(f"   • Start with CPU_X64_M for testing")    print(f"   • Use GPU instances for deep learning models")    print(f"   • Set MIN_NODES = 1 for cost efficiency")    print(f"   • Set MAX_NODES based on expected load")    print(f"   • Enable AUTO_SUSPEND to minimize costs")        print(f"\n4. Cost Optimization:")    print(f"   • Use AUTO_SUSPEND to stop idle services")    print(f"   • Right-size compute pool instances")    print(f"   • Monitor service usage and adjust")    print(f"   • Consider warehouse for low-volume inference")        print(f"\n5. Security:")    print(f"   • Use role-based access control (RBAC)")    print(f"   • Limit ingress endpoint access")    print(f"   • Use network policies for external access")    print(f"   • Audit service access logs")        print(f"\n6. Monitoring:")    print(f"   • Check service status regularly")    print(f"   • Monitor container logs")    print(f"   • Track inference latency and throughput")    print(f"   • Set up alerts for service failures")def display_comparison_table():    """Display comparison table between Warehouse and SPCS."""    print(f"\n{'=' * 80}")    print("WAREHOUSE VS SPCS COMPARISON")    print("=" * 80)        print(f"\n{'Feature':<30} {'Warehouse':<25} {'SPCS':<25}")    print("-" * 80)    print(f"{'Model Size':<30} {'< 100 MB (optimal)':<25} {'Any size':<25}")    print(f"{'Compute':<30} {'CPU only':<25} {'CPU or GPU':<25}")    print(f"{'Dependencies':<30} {'Snowflake Conda':<25} {'Any PyPI/Conda':<25}")    print(f"{'Setup Complexity':<30} {'Simple':<25} {'Moderate':<25}")    print(f"{'Cold Start Time':<30} {'Seconds':<25} {'Minutes (first time)':<25}")    print(f"{'Scaling':<30} {'Warehouse scaling':<25} {'Container scaling':<25}")    print(f"{'Cost':<30} {'Warehouse credits':<25} {'Compute pool credits':<25}")    print(f"{'Best For':<30} {'Batch inference':<25} {'Real-time inference':<25}")    print(f"{'External Access':<30} {'Via SQL/Python':<25} {'REST API available':<25}")def main():    """Main execution function"""    print("\n" + "=" * 80)    print("STEP 6B: INFERENCE ON SNOWPARK CONTAINER SERVICES (SPCS)")    print("=" * 80)    print("\nThis script demonstrates SPCS deployment for advanced use cases.")    print("Note: SPCS requires specific account features and privileges.")        # Configuration    CONNECTION_NAME = 'legalzoom'    DATABASE = 'ML_SHOWCASE'    SCHEMA = 'MODELS'    COMPUTE_POOL = 'ML_INFERENCE_POOL'        # Step 1: Initialize registry    connection, session, registry = initialize_registry(        CONNECTION_NAME, DATABASE, SCHEMA    )        # Step 2: Check prerequisites    prereqs_ok = check_spcs_prerequisites(session)        if not prereqs_ok:        print(f"\n⚠ SPCS prerequisites not fully met")        print(f"  This is for demonstration purposes only")        # Step 3: Create compute pool (optional - may fail if not supported)    print(f"\nAttempting to create compute pool...")    print(f"(This may fail if SPCS is not available in your account)")        compute_pool = create_compute_pool(session, COMPUTE_POOL)        if compute_pool:        # Step 4: Load model        model, sample_data = load_model_for_spcs()                # Step 5: Log model for SPCS        print(f"\nAttempting to log model for SPCS...")        print(f"(This may fail if SPCS is not available)")                model_version = log_model_for_spcs(registry, model, sample_data)                if model_version:            # Step 6: Display deployment info            display_spcs_deployment_info(model_version, compute_pool)        # Step 7: Best practices (always show)    display_spcs_best_practices()        # Step 8: Comparison table    display_comparison_table()        # Summary    print(f"\n{'=' * 80}")    print("SUMMARY")    print("=" * 80)    print(f"✓ SPCS deployment process demonstrated")    print(f"✓ Best practices and comparisons provided")        if compute_pool and 'model_version' in locals() and model_version:        print(f"✓ Model deployed to SPCS: {model_version.fully_qualified_model_name}")        print(f"✓ Compute pool: {compute_pool}")    else:        print(f"⚠ SPCS deployment not completed (may not be available)")        print(f"  Use Warehouse deployment (06a_inference_warehouse.py) instead")        print(f"\nRecommendation:")    print(f"  • For most use cases, Warehouse deployment is sufficient")    print(f"  • Use SPCS for large models, GPU inference, or custom dependencies")    print(f"  • Start with Warehouse, migrate to SPCS if needed")        # Close connection    connection.close()        print(f"\n{'=' * 80}")    print("NEXT STEPS")    print("=" * 80)    print("Explore the complete workflow in the Jupyter notebook:")    print("  jupyter notebook 07_complete_notebook.ipynb")    print("=" * 80)if __name__ == "__main__":    main()