In [26]:
import snowflake.snowpark.functions as F
from snowflake.snowpark.types import VariantType

from snowflake.snowpark.session import Session
from snowflake.snowpark.types import StructType, StructField, FloatType
from snowflake.snowpark import Session
import os
import json

In [27]:
connection_parameters = {
    "account": os.getenv("SNOWFLAKE_ACCOUNT"),
    "user": os.getenv("SNOWFLAKE_USER"),
    "password": os.getenv("SNOWFLAKE_PASSWORD"),
    "schema": os.getenv("SNOWFLAKE_SCHEMA"),
    "database": os.getenv("SNOWFLAKE_DATABASE"),
    "role": os.getenv("SNOWFLAKE_ROLE"),
    "warehouse": os.getenv("SNOWFLAKE_WAREHOUSE"),
}

session = Session.builder.configs(connection_parameters).create()


In [28]:

print(f"Current Database and schema: {session.get_fully_qualified_current_schema()}")
print(f"Current Warehouse: {session.get_current_warehouse()}")

Current Database and schema: "MLOPS"."ADVERTISING"
Current Warehouse: "COMPUTE_WH"


In [29]:
ad_df = session.table("ADVERTISING")


In [30]:
# Stage for storing the trained model without specifying file format
session.sql("""
CREATE OR REPLACE STAGE ml_models
""").collect()



[Row(status='Stage area ML_MODELS successfully created.')]

In [31]:
session.sql(
    f"ALTER WAREHOUSE {session.get_current_warehouse()[1:-1]} SET WAREHOUSE_SIZE=LARGE;"
).collect()

[Row(status='Statement executed successfully.')]

In [32]:


create_procedure_sql = """
CREATE OR REPLACE PROCEDURE train()
  RETURNS VARIANT
  LANGUAGE PYTHON
  RUNTIME_VERSION = 3.11
  PACKAGES = ('snowflake-snowpark-python', 'scikit-learn', 'joblib')
  HANDLER = 'main'
AS $$
import os
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split, GridSearchCV
from joblib import dump

def main(session):
  df = session.table('ADVERTISING').to_pandas()
  X = df[['TV', 'RADIO', 'NEWSPAPER']]
  y = df['SALES']
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state = 42)
  numeric_features = ['TV', 'RADIO', 'NEWSPAPER']
  numeric_transformer = Pipeline(steps=[('poly',PolynomialFeatures(degree = 2)),('scaler', StandardScaler())])
  preprocessor = ColumnTransformer(transformers=[('num', numeric_transformer, numeric_features)])
  pipeline = Pipeline(steps=[('preprocessor', preprocessor),('classifier', LinearRegression(n_jobs=-1))])
  model = GridSearchCV(pipeline, param_grid={}, n_jobs=-1, cv=10)
  model.fit(X_train, y_train)
  model_file = os.path.join('/tmp', 'model.joblib')
  dump(model, model_file)
  session.file.put(model_file, "@ml_models",overwrite=True)
  return {"R2 score on Train": model.score(X_train, y_train), "R2 score on Test": model.score(X_test, y_test)}
$$;
"""




In [33]:
# Execute SQL to create the stored procedure
session.sql(create_procedure_sql).collect()


[Row(status='Function TRAIN successfully created.')]

In [34]:
session.sql(
    f"ALTER WAREHOUSE {session.get_current_warehouse()[1:-1]} SET WAREHOUSE_SIZE=XSMALL;"
).collect()

[Row(status='Statement executed successfully.')]

In [35]:
session.call('train')

'{\n  "R2 score on Test": 9.533174341074796e-01,\n  "R2 score on Train": 9.288133512730626e-01\n}'

In [37]:
from snowflake.snowpark import Session
from snowflake.ml.registry import Registry
import joblib



In [38]:
# Create a registry instance
registry = Registry(session, database_name='MLOPS', schema_name='ADVERTISING')

# Register the model
model_version = registry.log_model(
    model_name="AdvertisingModel",
    model_version="v1",
    model_path="@ml_models/model.joblib"
)

TypeError: (0000) Registry.log_model() got an unexpected keyword argument 'model_version'