## Notebook Index
1. Feature Store 👈
2. [Feature Reduction ](https://app.snowflake.com/sfpscogs/rpegu_aiml/#/notebooks/ML_MODELS.DS.%2202_Feature_Reduction%22)
3. [Model Training](https://app.snowflake.com/sfpscogs/rpegu_aiml/#/notebooks/ML_MODELS.DS.%2203_Model_Training%22)
4. [Model Inference & scheduling](https://app.snowflake.com/sfpscogs/rpegu_aiml/#/notebooks/ML_MODELS.DS.%2204_Batch_Inferencing%22)


## Notebook Overview?
In this notebook, we set up the Snowflake Feature Store by:
* Defining entities and feature views
* Registering them in the feature store
* Creating a feature dataset to be used for model training

The Snowflake Feature Store serves as a centralized repository for storing, managing, and serving machine learning features at scale. It supports consistent feature computation, promotes reuse across models, and simplifies data workflows for both training and inference.

In [None]:
# Import python packages
import json

# SNOWFLAKE
import snowflake.snowpark.functions as F
import snowflake.snowpark.types as T
from snowflake.snowpark.version import VERSION

# Snowflake Feature Store
from snowflake.ml.feature_store import (
    FeatureStore,
    FeatureView,
    Entity,  CreationMode)


# We can also use Snowpark for our analyses!
from snowflake.snowpark.context import get_active_session
session = get_active_session()


In [None]:
#database
input_database          = 'ML_MODELS'
working_database       = 'ML_MODELS'

#schema
input_schema            = 'DS'
working_schema          = 'DS'
fs_schema               = 'FEATURE_STORE'

session.use_role('FR_SCIENTIST')

## base snowflake tables
tbl1= '.'.join([input_database, input_schema, 'DEMO_TBL_1'])
tbl2= '.'.join([input_database, input_schema, 'DEMO_TBL_2'])
tbl3= '.'.join([input_database, input_schema, 'DEMO_TBL_3'])
tbl4= '.'.join([input_database, input_schema, 'DEMO_TBL_4'])  
universe_tbl = '.'.join([input_database, input_schema, 'DEMO_TARGETS_TBL'])
                 
## snowpark dataframe
tbl1_sdf = session.table(tbl1)
tbl2_sdf = session.table(tbl2)
tbl3_sdf = session.table(tbl3)
tbl4_sdf = session.table(tbl4)

universe_sdf = session.table(universe_tbl)



In [None]:
warehouse = 'DS_W'
snowpark_opt_warehouse  = 'SNOWPARK_OPT_WH'
session.use_warehouse(warehouse )

# Create/Reference Snowflake Feature Store for Training (Development) Environment
try: 
    fs = FeatureStore(
        session=session,        
        database=working_database, 
        name=fs_schema,
        default_warehouse=warehouse
    )
except:
    # need privs to create fs if not exists
    fs = FeatureStore(
        session=session,        
        database=working_database, 
        name=fs_schema, 
        default_warehouse=warehouse,
        creation_mode=CreationMode.CREATE_IF_NOT_EXIST
    )
## define the primary or join keys 
join_keys = ["MEMBER_ID", "REF_MMYY"]

In [None]:
def register_feature(fs, entity_nm, fv_version, feature_df, join_keys):
    """
    Registers an entity and a feature view in the feature store if they do not already exist.

    Parameters:
        fs (FeatureStore): Feature store client instance
        entity_nm (str): Name of the entity
        fv_version (str): Version of the feature view
        feature_df (DataFrame):  DataFrame containing feature data
        join_keys (list): List of join keys for the entity

    Returns:
        FeatureView: The registered or retrieved FeatureView instance
    """

    fv_name = f"FV_FEATURE_{entity_nm}"

    # Check if entity exists
    entity_names_json = fs.list_entities().select(F.to_json(F.array_agg("NAME", True))).collect()[0][0]
    existing_entities = json.loads(entity_names_json)

    if entity_nm not in existing_entities:
        entity_instance = Entity(name=entity_nm, join_keys=join_keys, desc=f"Primary Keys for {entity_nm}")
        fs.register_entity(entity_instance)
    else:
        entity_instance = fs.get_entity(entity_nm)

    # Try to get the FeatureView; register it if it doesn't exist
    try:
        fv_feature_instance = fs.get_feature_view(fv_name, fv_version)
    except:
        fv_feature_instance = FeatureView(
            name=fv_name,
            entities=[entity_instance],
            feature_df=feature_df
        )
        fs.register_feature_view(fv_feature_instance, version=fv_version, block=True)

    return fv_feature_instance

In [None]:
# FeatureView creations
#entity1
entity_nm = "ENT_1"
fv_name = f"FV_FEATURE_{entity_nm}"
fv_version = "V_1"
sfd = tbl1_sdf 
output= register_feature(fs, entity_nm, fv_version, sfd, join_keys)

#entity2
entity_nm = "ENT_2"
fv_name = f"FV_FEATURE_{entity_nm}"
fv_version = "V_1"
sfd = tbl2_sdf 
output= register_feature(fs, entity_nm, fv_version, sfd, join_keys)
#entity3
entity_nm = "ENT_3"
fv_name = f"FV_FEATURE_{entity_nm}"
fv_version = "V_1"
sfd = tbl3_sdf 
output= register_feature(fs, entity_nm, fv_version, sfd, join_keys)
#entity4
entity_nm = "ENT_4"
fv_name = f"FV_FEATURE_{entity_nm}"
fv_version = "V_1"
sfd = tbl4_sdf 
output= register_feature(fs, entity_nm, fv_version, sfd, join_keys)


fs.list_feature_views().show()

### DATASET CREATION 

In [None]:
#retrieve the entity views

fv_feature_ent1_instance  = fs.get_feature_view("FV_FEATURE_ENT_1", "V_1")
fv_feature_ent2_instance  = fs.get_feature_view("FV_FEATURE_ENT_2", "V_1")
fv_feature_ent3_instance  = fs.get_feature_view("FV_FEATURE_ENT_3", "V_1")
fv_feature_ent4_instance  = fs.get_feature_view("FV_FEATURE_ENT_4", "V_1")


fv_list = [fv_feature_ent1_instance, 
           fv_feature_ent2_instance, 
           fv_feature_ent3_instance,
           fv_feature_ent4_instance] 

ds_cols = []
slice_list = []
for fv in fv_list:
    fv_cols = list(fv._feature_desc)
    slice_cols = [col for col in fv_cols if col not in ds_cols]
    #fv = fv.slice(slice_cols)
    slice_list.append(fv.slice(slice_cols))
    ds_cols += fv_cols

 ## create DS   
dataset = fs.generate_dataset(
    name=f"{working_database}.{working_schema}.DEMO_DATASET",
    spine_df=universe_sdf,
    features = slice_list,
    version="V_1",
    output_type="table",
    spine_label_cols=["TARGET"],
    desc="training dataset for ml poc"
)    

In [None]:
dataset.show()