# core

> Fill in a module description here

In [12]:
# ! pip install snowflake-ml-python
# ! pip install snowflake-snowpark-python

In [13]:
#| default_exp core

In [14]:
#| hide
from nbdev.showdoc import *

In [15]:

# | export
from __future__ import annotations
from typing import Union, List, Dict, Optional, Protocol, Callable
from dataclasses import dataclass
from fastcore.basics import listify
import snowflake.snowpark.functions as F
from snowflake.snowpark import DataFrame, Session


In [16]:
# | export
@dataclass
class FeatureStoreDefaults:
    "Default settings for feature store operations"
    version_prefix: str = 'V'
    refresh_frequency: str = '1 day'
    feature_view_prefix: str = 'FV'
    creation_mode: str = 'CREATE_IF_NOT_EXIST'
    refresh_mode: str = 'FULL'


In [17]:
# | export
defaults = FeatureStoreDefaults()


In [18]:
# | hide
def test_defaults():
    "Test default values are set correctly"
    assert defaults.version_prefix == 'V'
    assert defaults.refresh_frequency == '1 day'
    assert defaults.feature_view_prefix == 'FV'
    test_defaults()


In [19]:
# | export
def create_version(major:int, minor:int=0) -> str:
    "Create a standardized version string (e.g., 'V1_0' for major=1, minor=0)"
    return f"{defaults.version_prefix}{major}_{minor}"

In [20]:
# | export
def create_feature_view_name(
    domain:str,
    entity:str,
    feature_type:str
) -> str:
    """Create standardized feature view name
    
    Args:
        domain: Business domain (e.g., 'RETAIL', 'FINANCE')
        entity: Main entity name (e.g., 'CUSTOMER', 'PRODUCT')
        feature_type: Type of features (e.g., 'BEHAVIOR', 'PROFILE')
        
    Returns:
        Formatted name like 'FV_RETAIL_CUSTOMER_BEHAVIOR'
    
    Example:
        >>> create_feature_view_name('RETAIL', 'CUSTOMER', 'BEHAVIOR')
        'FV_RETAIL_CUSTOMER_BEHAVIOR'
    """
    parts = [defaults.feature_view_prefix, entity, feature_type]
    if domain: parts.insert(1, domain)
    return "_".join(part.upper() for part in parts)


In [21]:
# | hide
def test_create_version():
    "Test version string creation"
    assert create_version(1) == 'V1_0'
    assert create_version(1, 1) == 'V1_1'
    assert create_version(2, 5) == 'V2_5'

def test_create_feature_view_name():
    "Test feature view name creation"
    assert create_feature_view_name('RETAIL', 'CUSTOMER', 'BEHAVIOR') == 'FV_RETAIL_CUSTOMER_BEHAVIOR'
    assert create_feature_view_name('', 'CUSTOMER', 'PROFILE') == 'FV_CUSTOMER_PROFILE'
    
test_create_version()
test_create_feature_view_name()


In [22]:
# | export
class SQLFormatter:
    "Utilities for SQL query formatting and analysis"
    
    @staticmethod
    def format_sql(query:str, subq_to_cte:bool=False) -> str:
        """Format SQL query with proper indentation
        
        Args:
            query: SQL query string to format
            subq_to_cte: If True, convert subqueries to CTEs
            
        Returns:
            Formatted SQL string
            
        Example:
            >>> sql = "SELECT a, b FROM (SELECT * FROM table)"
            >>> print(SQLFormatter.format_sql(sql, True))
            WITH _q1 AS (
              SELECT *
              FROM table
            )
            SELECT a, b
            FROM _q1
        """
        import sqlglot
        expression = sqlglot.parse_one(query)
        if subq_to_cte:
            query = sqlglot.optimizer.optimizer.eliminate_subqueries(expression).sql()
        return sqlglot.transpile(query, read='snowflake', pretty=True)[0]
    
    @staticmethod
    def extract_table_names(query:str) -> List[str]:
        """Extract all table names from a SQL query
        
        Args:
            query: SQL query to analyze
            
        Returns:
            List of table names found in query
            
        Example:
            >>> sql = "SELECT * FROM table1 JOIN table2"
            >>> SQLFormatter.extract_table_names(sql)
            ['table1', 'table2']
        """
        import sqlglot
        from sqlglot.expressions import Table
        tables = set()
        for expr in sqlglot.parse(query):
            for table in expr.find_all(Table):
                tables.add(table.name)
        return list(tables)


In [23]:
# | hide
def test_sql_formatter():
    "Test SQL formatting functionality"
    # Test basic formatting
    sql = "SELECT a,b FROM table"
    formatted = SQLFormatter.format_sql(sql)
    assert "SELECT" in formatted
    assert "FROM" in formatted
    
    # Test subquery conversion
    sql = "SELECT a FROM (SELECT * FROM table)"
    formatted = SQLFormatter.format_sql(sql, True)
    assert "WITH" in formatted
    
    # Test table extraction
    sql = "SELECT * FROM table1 JOIN table2"
    tables = SQLFormatter.extract_table_names(sql)
    assert 'table1' in tables
    assert 'table2' in tables
    assert len(tables) == 2

test_sql_formatter()


In [24]:
# | eval: false
# Example usage of core functionality
from snowflake_feature_store.core import *

# Create feature view name
fv_name = create_feature_view_name('RETAIL', 'CUSTOMER', 'BEHAVIOR')
print(f"Feature view name: {fv_name}")

# Create version string
version = create_version(1, 2)
print(f"Version: {version}")

# Format SQL
sql = """
SELECT a, b 
FROM (SELECT * FROM table1) 
JOIN table2
"""
formatted = SQLFormatter.format_sql(sql, True)
print("Formatted SQL:")
print(formatted)

tables = SQLFormatter.extract_table_names(sql)
print(f"Tables used: {tables}")


Feature view name: FV_RETAIL_CUSTOMER_BEHAVIOR
Version: V1_2
Formatted SQL:
WITH cte AS (
  SELECT
    *
  FROM table1
)
SELECT
  a,
  b
FROM cte AS cte, table2
Tables used: ['table2', 'table1']


In [25]:
#| hide
import nbdev; nbdev.nbdev_export()