# Configuration

> This module provides configuration classes for a feature store system. It defines a comprehensive configuration structure using Pydantic models to support feature view management with validation capabilities. The configuration includes settings for feature views, data sources, and other related components.

In [None]:
Hi Everyone, 

Network rules specify the external destinations (hostnames, IPs, ports) that your container service is allowed to communicate with. They define the boundaries of where your service can send network traffic. Think of them as a whitelist of destinations. The values from SYSTEM$GET_PRIVATELINK_CONFIG() are what you should be focusing on if you want to work through your privatelink connection. You have to create access points for these to work.1. First, identify exactly what your service needs:-- Get the exact PrivateLink endpoints for your accountSELECT SYSTEM$GET_PRIVATELINK_CONFIG();2. Create a targeted network rule:-- Create a network rule specifically for Snowflake's endpointsCREATE OR REPLACE NETWORK RULE jupyter_snowflake_connectivity MODE = EGRESS TYPE = PRIVATE_HOST_PORT VALUE_LIST = ( '<your-spcs-auth-privatelink-url-value>', '<your-app-service-privatelink-url-value>' );This approach only grants access to the specific Snowflake PrivateLink endpoints needed for authentication and accessing your service, rather than including S3 and PyPI which may not be required for basic functionality.3. For additional functionality:If your Jupyter service needs to access specific packages or data sources, you can create separate, purpose-specific network rules:-- Only add if your service needs to fetch packagesCREATE OR REPLACE NETWORK RULE jupyter_package_access MODE = EGRESS TYPE = HOST_PORT VALUE_LIST = ('pypi.org:443', 'files.pythonhosted.org:443');-- Only add if your service needs S3 accessCREATE OR REPLACE NETWORK RULE jupyter_s3_access MODE = EGRESS TYPE = PRIVATE_HOST_PORT VALUE_LIST = ('<your-specific-s3-bucket>.s3.<your-region>.amazonaws.com');4. Create a more targeted EAI:
-- Include only the network rules your service actually needsCREATE OR REPLACE EXTERNAL ACCESS INTEGRATION jupyter_eai ALLOWED_NETWORK_RULES = (jupyter_snowflake_connectivity) ENABLED = true;-- If you added the optional rules above, you might use:-- ALLOWED_NETWORK_RULES = (jupyter_snowflake_connectivity, jupyter_package_access, jupyter_s3_access)This approach follows the principle of least privilege by only granting the specific access needed for the service to function, which is a security best practice.For your POC environment, you might start with just the Snowflake PrivateLink endpoints and then add additional access only if you encounter specific functionality issues.

In [None]:
#| default_exp config

In [None]:
#| export
from __future__ import annotations
from typing import Optional, Dict, List, Union
from pydantic import BaseModel, Field, validator
from datetime import timedelta
import yaml
from pathlib import Path

# Import our custom exceptions
from snowflake_feature_store.exceptions import ConfigurationError


In [None]:
#| export
class RefreshConfig(BaseModel):
    """Configuration for feature refresh settings"""
    frequency: str = Field("1 day", description="Refresh frequency (e.g., '1 day', '30 minutes')")
    mode: str = Field("FULL", description="Refresh mode (FULL or INCREMENTAL)")
    
    @validator('frequency')
    def validate_frequency(cls, v):
        """Validate refresh frequency format"""
        try:
            # Check if it's a cron expression
            if ' ' in v and len(v.split()) == 5:
                return v
            
            # Parse as time duration
            parts = v.split()
            if len(parts) != 2:
                raise ValueError
            
            num = int(parts[0])
            unit = parts[1].lower()
            
            valid_units = ['minute', 'minutes', 'hour', 'hours', 'day', 'days']
            if unit not in valid_units:
                raise ValueError
                
            return v
        except ValueError:
            raise ConfigurationError(
                f"Invalid refresh frequency: {v}. "
                "Use either cron expression or duration (e.g., '1 day', '30 minutes')"
            )


/var/folders/hm/zsqyytm950g1dc_00qtbp2zh0000gn/T/ipykernel_47609/2816683478.py:7: PydanticDeprecatedSince20: Pydantic V1 style `@validator` validators are deprecated. You should migrate to Pydantic V2 style `@field_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  @validator('frequency')


In [None]:

#| export
class FeatureValidationConfig(BaseModel):
    """Configuration for feature validation rules"""
    null_check: bool = Field(True, description="Check for null values")
    null_threshold: float = Field(0.1, description="Maximum allowed null ratio")
    range_check: bool = Field(False, description="Check value ranges")
    min_value: Optional[float] = None
    max_value: Optional[float] = None
    unique_check: bool = Field(False, description="Check for uniqueness")
    unique_threshold: float = Field(0.9, description="Minimum unique ratio")


In [None]:

#| export
class FeatureConfig(BaseModel):
    """Configuration for individual features"""
    name: str
    description: str
    validation: Optional[FeatureValidationConfig] = Field(
        default_factory=FeatureValidationConfig,
        description="Validation rules for this feature"
    )
    dependencies: List[str] = Field(
        default_factory=list,
        description="List of features this feature depends on"
    )


In [None]:

#| export
class FeatureViewConfig(BaseModel):
    """Enhanced configuration for feature views"""
    name: str
    domain: str = ""
    entity: str = "CUSTOMER"
    feature_type: str = "BASE"
    major_version: int = Field(1, ge=1)
    minor_version: int = Field(0, ge=0)
    refresh: RefreshConfig = Field(default_factory=RefreshConfig)
    timestamp_col: Optional[str] = None
    description: Optional[str] = None
    features: Dict[str, FeatureConfig] = Field(
        default_factory=dict,
        description="Configuration for each feature"
    )
    tags: Dict[str, str] = Field(default_factory=dict)

    @property
    def version(self) -> str:
        """Get formatted version string"""
        return f"V{self.major_version}_{self.minor_version}"

    @property
    def full_name(self) -> str:
        """Get formatted full name for the feature view"""
        parts = ["FV"]
        if self.domain:
            parts.append(self.domain)
        parts.extend([self.entity, self.feature_type])
        return "_".join(part.upper() for part in parts)
    
    @property
    def refresh_frequency(self) -> str:
        """Get refresh frequency from RefreshConfig"""
        return self.refresh.frequency

    @classmethod
    def from_yaml(cls, path: Union[str, Path]) -> FeatureViewConfig:
        """Load configuration from YAML file"""
        try:
            with open(path) as f:
                data = yaml.safe_load(f)
            return cls(**data)
        except Exception as e:
            raise ConfigurationError(f"Error loading config from {path}: {str(e)}")

    def to_yaml(self, path: Union[str, Path]) -> None:
        """Save configuration to YAML file"""
        try:
            with open(path, 'w') as f:
                yaml.dump(self.dict(), f)
        except Exception as e:
            raise ConfigurationError(f"Error saving config to {path}: {str(e)}")


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()