### Regarding Warnings
* Warning in the notebook is due to the discrepancy in package version between local and server environment
* Readers can ignore them or update the packages to the required version through "pip install [package]==[version]"

In [86]:
from snowflake.snowpark import Session
from snowflake.snowpark.functions import col
import configparser


# Loading Credentials From Config File
snowflake_credentials_file = '../snowflake_creds.config'
config = configparser.ConfigParser()
config.read(snowflake_credentials_file)
connection_parameters = dict(config['default'])
session = Session.builder.configs(connection_parameters).create()

In [87]:
#CREATE FEATURE STORE

from snowflake.ml.feature_store import (FeatureStore, FeatureView, Entity, CreationMode)

fs = FeatureStore(
    session=session,
    database="SNOWPARK_DEFINITIVE_GUIDE",
    name="BIKE_SHARE_FEATURES",
    default_warehouse="COMPUTE_WH",
    creation_mode=CreationMode.CREATE_IF_NOT_EXIST,
)

### Creating & Registering Feature Entity

In [98]:
entity = Entity(name="ENTITY_WEATHER", join_keys=["ID"])
fs.register_entity(entity)
fs.list_entities().show()

-----------------------------------------
|"NAME"          |"JOIN_KEYS"  |"DESC"  |
-----------------------------------------
|ENTITY_WEATHER  |["ID"]       |        |
-----------------------------------------



### COPYING BSD_TRAINING TO FEATURE SCHEMA 

In [88]:

df_temp = session.table("SNOWPARK_DEFINITIVE_GUIDE.MY_SCHEMA.BSD_TRAINING")
df_temp.write.mode("overwrite").save_as_table("BSD_TRAINING")

### Creating & Registering Feature Views

In [92]:
import snowflake.ml.modeling.preprocessing as snowml
from snowflake.ml.modeling.pipeline import Pipeline
from snowflake.snowpark.types import IntegerType

# CREATING ID COLUMN
from snowflake.snowpark.functions import monotonically_increasing_id
df = df.withColumn("ID", monotonically_increasing_id())


df = df.drop("DATETIME","DATE")
CATEGORICAL_COLUMNS = ["SEASON","WEATHER"]
CATEGORICAL_COLUMNS_OHE = ["SEASON_OE","WEATHER_OE"]
MIN_MAX_COLUMNS = ["TEMP"]
import numpy as np
categories = {
    "SEASON": np.array([1,2,3,4]),
    "WEATHER": np.array([1,2,3,4]),
}



In [93]:
preprocessing_pipeline = Pipeline(
    steps=[
            (
                "OE",
                snowml.OrdinalEncoder(
                    input_cols=CATEGORICAL_COLUMNS,
                    output_cols=CATEGORICAL_COLUMNS_OHE,
                    categories=categories
                )
            ),
            (
                "MMS",
                snowml.MinMaxScaler(
                    clip=True,
                    input_cols=MIN_MAX_COLUMNS,
                    output_cols=MIN_MAX_COLUMNS,
                )
            )
    ]
) 
transformed_df = preprocessing_pipeline.fit(df).transform(df)
transformed_df.show()

--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"TEMP"               |"SEASON_OE"  |"WEATHER_OE"  |"SEASON"  |"HOLIDAY"  |"WORKINGDAY"  |"WEATHER"  |"ATEMP"  |"HUMIDITY"  |"WINDSPEED"  |"CASUAL"  |"REGISTERED"  |"COUNT"  |"ID"  |
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|0.22448979591836735  |0.0          |0.0           |1         |0          |0             |1          |14.395   |81          |0.0          |3         |13            |16       |0     |
|0.20408163265306123  |0.0          |0.0           |1         |0          |0             |1          |13.635   |80          |0.0          |8         |32            |40       |1     |
|0.20408163265306123  |0.0          |0.0           |1         |0          |0         

In [94]:
# SELECTING COLUMNS FOR FIRST FEATURE VIEW
feature_df = transformed_df.select(["SEASON_OE", "WEATHER_OE", "TEMP", "ATEMP", "HUMIDITY", "WINDSPEED", "ID"])

feature_df.show()

------------------------------------------------------------------------------------------------
|"SEASON_OE"  |"WEATHER_OE"  |"TEMP"               |"ATEMP"  |"HUMIDITY"  |"WINDSPEED"  |"ID"  |
------------------------------------------------------------------------------------------------
|0.0          |0.0           |0.22448979591836735  |14.395   |81          |0.0          |0     |
|0.0          |0.0           |0.20408163265306123  |13.635   |80          |0.0          |1     |
|0.0          |0.0           |0.20408163265306123  |13.635   |80          |0.0          |2     |
|0.0          |0.0           |0.22448979591836735  |14.395   |75          |0.0          |3     |
|0.0          |0.0           |0.22448979591836735  |14.395   |75          |0.0          |4     |
|0.0          |1.0           |0.22448979591836735  |12.88    |75          |6.0032       |5     |
|0.0          |0.0           |0.20408163265306123  |13.635   |80          |0.0          |6     |
|0.0          |0.0           |

In [99]:
#CREATING FEATURE VIEW

fv = FeatureView(
    name="WEATHER_FEATURES",
    entities=[entity],
    feature_df=feature_df,
    desc="weather features"
)

fv = fs.register_feature_view(
    feature_view=fv,
    version="V1",
    block=True
)

In [101]:
# EXAMINE FEATURE VIEW CONTENT

fs.read_feature_view(fv).show()

------------------------------------------------------------------------------------------------
|"SEASON_OE"  |"WEATHER_OE"  |"TEMP"               |"ATEMP"  |"HUMIDITY"  |"WINDSPEED"  |"ID"  |
------------------------------------------------------------------------------------------------
|0.0          |0.0           |0.22448979591836735  |14.395   |81          |0.0          |0     |
|0.0          |0.0           |0.20408163265306123  |13.635   |80          |0.0          |1     |
|0.0          |0.0           |0.20408163265306123  |13.635   |80          |0.0          |2     |
|0.0          |0.0           |0.22448979591836735  |14.395   |75          |0.0          |3     |
|0.0          |0.0           |0.22448979591836735  |14.395   |75          |0.0          |4     |
|0.0          |1.0           |0.22448979591836735  |12.88    |75          |6.0032       |5     |
|0.0          |0.0           |0.20408163265306123  |13.635   |80          |0.0          |6     |
|0.0          |0.0           |

In [102]:
# LIST ALL FEATURE VIEW
fs.list_feature_views(entity_name="ENTITY_WEATHER").select(["NAME", "ENTITIES", "FEATURE_DESC"]).show()

-------------------------------------------------------------------------
|"NAME"            |"ENTITIES"                    |"FEATURE_DESC"       |
-------------------------------------------------------------------------
|WEATHER_FEATURES  |[                             |{                    |
|                  |  {                           |  "ATEMP": "",       |
|                  |    "desc": "",               |  "HUMIDITY": "",    |
|                  |    "join_keys": [            |  "SEASON_OE": "",   |
|                  |      "ID"                    |  "TEMP": "",        |
|                  |    ],                        |  "WEATHER_OE": "",  |
|                  |    "name": "ENTITY_WEATHER"  |  "WINDSPEED": ""    |
|                  |  }                           |}                    |
|                  |]                             |                     |
-------------------------------------------------------------------------



### Data Generation through Feature views

In [103]:
#GENERATING TRAINING DATA
spine_df = session.table("BSD_TRAINING")
spine_df = spine_df.withColumn("ID", monotonically_increasing_id())
spine_df = spine_df.select("ID", "COUNT")
spine_df.show()


train_data = fs.generate_dataset(
    spine_df=spine_df,
    features=[
        fv.slice([
            "HUMIDITY","SEASON_OE","TEMP","WEATHER_OE","WINDSPEED"
        ])
    ],
    materialized_table=None,
    spine_timestamp_col=None,
    spine_label_cols=["COUNT"],
    save_mode="merge",
    exclude_columns=['ID']
)

train_data.df.show()

------------------
|"ID"  |"COUNT"  |
------------------
|0     |16       |
|1     |40       |
|2     |32       |
|3     |13       |
|4     |1        |
|5     |1        |
|6     |2        |
|7     |3        |
|8     |8        |
|9     |14       |
------------------

-----------------------------------------------------------------------------------------
|"COUNT"  |"HUMIDITY"  |"SEASON_OE"  |"TEMP"               |"WEATHER_OE"  |"WINDSPEED"  |
-----------------------------------------------------------------------------------------
|16       |81          |0.0          |0.22448979591836735  |0.0           |0.0          |
|40       |80          |0.0          |0.20408163265306123  |0.0           |0.0          |
|32       |80          |0.0          |0.20408163265306123  |0.0           |0.0          |
|13       |75          |0.0          |0.22448979591836735  |0.0           |0.0          |
|1        |75          |0.0          |0.22448979591836735  |0.0           |0.0          |
|1        |75

### Model Training & Prediction Using Enriched Data

In [105]:
# TRAIN A MODEL

from snowflake.ml.modeling.model_selection import GridSearchCV
from snowflake.ml.modeling.ensemble import GradientBoostingRegressor

FEATURE_LIST = ["TEMP", "WINDSPEED", "SEASON_OE", "WEATHER_OE"]
LABEL_COLUMNS = ['COUNT']
OUTPUT_COLUMNS = ['PREDICTED_COUNT']

param_grid = {
        "n_estimators":[100, 200, 300, 400, 500],
        "learning_rate":[0.1, 0.2, 0.3, 0.4, 0.5],
}

grid_search = GridSearchCV(
    estimator=GradientBoostingRegressor(),
    param_grid=param_grid,
    n_jobs = -1,
    scoring="neg_root_mean_squared_error",
    input_cols=FEATURE_LIST,
    label_cols=LABEL_COLUMNS,
    output_cols=OUTPUT_COLUMNS
)

train_df = train_data.df.drop(["ID"])

In [106]:
grid_search.fit(train_df)

<snowflake.ml.modeling.model_selection.grid_search_cv.GridSearchCV at 0x160d3f0afa0>

In [107]:
#PREDICT FROM FEATURE STORE GENERATED DATA
test_df = spine_df.limit(3).select("ID")
enriched_df = fs.retrieve_feature_values(
    test_df, train_data.load_features())
enriched_df = enriched_df.drop('ID')
enriched_df.show()

-------------------------------------------------------------------------------
|"HUMIDITY"  |"SEASON_OE"  |"TEMP"               |"WEATHER_OE"  |"WINDSPEED"  |
-------------------------------------------------------------------------------
|81          |0.0          |0.22448979591836735  |0.0           |0.0          |
|80          |0.0          |0.20408163265306123  |0.0           |0.0          |
|80          |0.0          |0.20408163265306123  |0.0           |0.0          |
-------------------------------------------------------------------------------



In [108]:
pred = grid_search.predict(enriched_df.to_pandas())
pred.head()

Unnamed: 0,HUMIDITY,SEASON_OE,TEMP,WEATHER_OE,WINDSPEED,PREDICTED_COUNT
0,81,0.0,0.22449,0.0,0.0,70.897276
1,80,0.0,0.204082,0.0,0.0,62.313673
2,80,0.0,0.204082,0.0,0.0,62.313673


### Feature Store - Support Functions

In [72]:
# FEATURE VIEW  FROM LIBRARY

#SLICE - GET SUBSET OF FEATURES FROM A VIEW
sliced_fv = fv.slice(["TEMP"])
sliced_fv

FeatureViewSlice(feature_view_ref=FeatureView(_name=WEATHER_FEATURES, _entities=[Entity(name=ENTITY_WEATHER, join_keys=['ID'], desc=)], _feature_df=<snowflake.snowpark.dataframe.DataFrame object at 0x00000160CB982AF0>, _timestamp_col=None, _desc=weather features, _query=SELECT "SEASON", "WEATHER", "TEMP", "ATEMP", "HUMIDITY", "WINDSPEED", seq8(0) AS "ID" FROM MODEL_DATA, _version=V1, _status=FeatureViewStatus.STATIC, _feature_desc=OrderedDict([('SEASON', ''), ('WEATHER', ''), ('TEMP', ''), ('ATEMP', ''), ('HUMIDITY', ''), ('WINDSPEED', '')]), _refresh_freq=None, _database=SNOWPARK_DEFINITIVE_GUIDE, _schema=BIKE_SHARE_FEATURES, _warehouse=None, _refresh_mode=None, _refresh_mode_reason=None), names=['TEMP'])

In [73]:
#FEATURE VIEW PHYSICAL NAME
print(fv.physical_name())

#FEATURE VIEW QUALIFIED NAME
print(fv.fully_qualified_name())

WEATHER_FEATURES$V1
SNOWPARK_DEFINITIVE_GUIDE.BIKE_SHARE_FEATURES.WEATHER_FEATURES$V1


In [74]:
#SET DESCRIPTION TO FEATURE

fv = fv.attach_feature_desc({"TEMP": "Current Temperature"})

In [75]:
# FEATURE STORE FROM LIBRARY

#GET FEATURE VIEW
fs.get_feature_view("WEATHER_FEATURES", "V1").to_df(session).show()

--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"NAME"            |"ENTITIES"                    |"TIMESTAMP_COL"  |"DESC"            |"QUERY"                                             |"VERSION"  |"STATUS"                  |"FEATURE_DESC"     |"REFRESH_FREQ"  |"DATABASE"                 |"SCHEMA"             |"WAREHOUSE"  |"REFRESH_MODE"  |"REFRESH_MODE_REASON"  |"PHYSICAL_NAME"      |
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

In [None]:
# DELETE A FEATURE VIEW

fs.list_feature_views(entity_name="WEATHER").select(["NAME"]).show()
fs.delete_feature_view(fv)

--------------------
|"NAME"            |
--------------------
|RENTAL_FEATURES   |
|WEATHER_FEATURES  |
|FULL_FEATURES     |
--------------------



In [97]:
#LIST ENTITIES

# entity = Entity(name="TEST", join_keys=["ID"])
# fs.register_entity(entity)
fs.list_entities().show()

---------------------------------
|"NAME"  |"JOIN_KEYS"  |"DESC"  |
---------------------------------
|        |             |        |
---------------------------------



In [None]:
# GET ENTITY
fs.get_entity("TEST")

Entity(name=TEST, join_keys=['ID'], desc=)

In [109]:
# #DELETE ENTITY
# fs.delete_entity("TEST")
# fs.list_entities().show()

In [None]:
#load data into feature view from dataset
fs.load_feature_views_from_dataset(train_data)

[FeatureViewSlice(feature_view_ref=FeatureView(_name=FULL_FEATURES, _entities=[Entity(name=WEATHER, join_keys=['ID'], desc=)], _feature_df=<snowflake.snowpark.dataframe.DataFrame object at 0x1381108e0>, _timestamp_col=None, _desc=, _query=SELECT  *  FROM (( SELECT "SEASON" AS "SEASON", "WEATHER" AS "WEATHER", "TEMP" AS "TEMP", "ATEMP" AS "ATEMP", "HUMIDITY" AS "HUMIDITY", "WINDSPEED" AS "WINDSPEED", "QUARTER" AS "QUARTER", "YEAR" AS "YEAR", "ID" AS "ID" FROM (SELECT * FROM SNOWPARK.TUTORIAL.WEATHER_FEATURES$V1)) AS SNOWPARK_LEFT INNER JOIN ( SELECT "HOLIDAY" AS "HOLIDAY", "WORKINGDAY" AS "WORKINGDAY", "CASUAL" AS "CASUAL", "REGISTERED" AS "REGISTERED", "COUNT" AS "COUNT", "HOUR" AS "HOUR", "MONTH" AS "MONTH", "WEEKDAY" AS "WEEKDAY", "ID" AS "ID" FROM (SELECT *  FROM SNOWPARK.TUTORIAL.RENTAL_FEATURES$V1)) AS SNOWPARK_RIGHT USING (ID)), _version=V1, _status=FeatureViewStatus.STATIC, _feature_desc=OrderedDict([('SEASON', None), ('WEATHER', None), ('TEMP', None), ('ATEMP', None), ('HUMIDIT

In [None]:
#CLEAR ALL THE ITEMS IN THE FEATURE STORE
fs.clear()