In [None]:
# Import python packages
import streamlit as st
import pandas as pd
import numpy as np 
import warnings; warnings.simplefilter('ignore') 
from snowflake.snowpark.version import VERSION 
from snowflake.ml.registry import Registry

# We can also use Snowpark for our analyses!
from snowflake.snowpark.context import get_active_session
from sklearn.preprocessing import StandardScaler 
from snowflake.ml.modeling.cluster import KMeans 
session = get_active_session()


In [None]:
snowpark_version = VERSION
print('Snowpark for Python version : {}.{}.{}'.format(snowpark_version[0],snowpark_version[1],snowpark_version[2]))

In [None]:
USE DATABASE hol_db;
USE SCHEMA public;
df = session.sql('select * from CUSTOMER_PRESEGMENT')
df.show()

In [None]:
from kmodes.kmodes import KModes
clusters = 3

pd_df = df.to_pandas()
pd_df.head()
kmodes_model = KModes(n_clusters = clusters, init = 'Huang', random_state = 0)
kmodes_model.fit(pd_df)

In [None]:
import matplotlib.pyplot as plt
# Elbow curve to find optimal K
cost = []
K = range(1,5)
for num_clusters in list(K):
    kmode = KModes(n_clusters=num_clusters, init = "random", n_init = 5, verbose=1)
    kmode.fit_predict(pd_df)
    cost.append(kmode.cost_)
    
plt.plot(K, cost, 'bx-')
plt.xlabel('No. of clusters')
plt.ylabel('Cost')
plt.title('Elbow Method For Optimal k')
plt.show()

In [None]:
# Get sample input data to pass into the registry logging function
snowpark_df = session.create_dataframe(pd_df)
X = snowpark_df.limit(100)

db = session.get_current_database()
schema = session.get_current_schema()

# Define model name
model_name = "KMODES_MODEL_9_9_2024"

# Create a registry and log the model
native_registry = Registry(session=session, database_name=db, schema_name=schema)

# delete model
#native_registry.delete_model(model_name)

# Let's first log the very first model we trained
model_ver = native_registry.log_model(
    model_name=model_name,
    model=kmodes_model,
    sample_input_data=X, # to provide the feature schema
    conda_dependencies=['kmodes']
)

# Add evaluation metric
# model_ver.set_metric(metric_name="mean_abs_pct_err", value=mape)

# Add a description
model_ver.comment = "This is the KMODES model with 2 clusters. "

In [None]:
native_registry.get_model(model_name).default.version_name

In [None]:
model_ver = native_registry.get_model(model_name).version('v0')
result_sdf2 = model_ver.run(snowpark_df, function_name="predict")
result_sdf2.limit(100).to_pandas().head()

In [None]:
# session.sql(f"CREATE OR REPLACE TABLE CUSTOMER_SEGMENTS AS SELECT a.*, KMODES_MODEL_9_9_2024!predict(a.* )['output_feature_0'] as prediction from CUSTOMER_PRESEGMENT a")
# df.write.saveAsTable("CUSTOMER_SEGMENTS", mode="overwrite", enable_schema_evolution=True)
# df.limit(100).to_pandas().head()

In [None]:
# response_df = session.sql(f"select * from customer_segments")
# response_df.show(10)