In [1]:
from snowflake.snowpark import Session
from snowflake.snowpark.functions import upper, col, avg
from snowflake.snowpark.types import StringType
from snowflake.snowpark.context import get_active_session
from snowflake.ml.modeling.preprocessing import StandardScaler
from snowflake.ml.modeling.xgboost import XGBClassifier
from snowflake.ml.modeling.pipeline import Pipeline
from snowflake.snowpark.functions import when, col


  import pkg_resources


In [None]:
# 1つめのストアドプロシージャ
# cleaningする

def sproc_clean_customer_data(session: Session) -> str:
    df = session.table("SUSTOMER_DATA_1000")
    df_clean = (
        df.dropna().with_column("GENDER", upper(col("GENDER")))
    )
    df_clean.write.mode("overwrite")\
        .save_as_table("CUSTOMER_CLEAN")

    return "updated"


In [None]:
# 1つめのストアドプロシージャ
# sp登録

session.sproc.register(
    func = sproc_clean_customer_data,
    name = "SP_CLEAN_CUSTOMER_DATA",
    package = ["snowflake-snowpark-python"],
    input_type = [],
    return_type = StringType(),
    is_permanent = True,
    stage_location = "@SPROC_STAGE",
    replace = True
)

In [None]:
# 2) モデリングをするストアドプロシージャ
def sproc_train_churn_model(session: Session) -> str:
    df = session.table("CUSTOMER_CLEAN")
    
    # GENDERを数値化（例: M=1, F=0）
    df = df.with_column(
        "GENDER",
        when(col("GENDER") == "M", 1).when(col("GENDER") == "F", 0).otherwise(None)
    )
    train_df, test_df = df.random_split([0.8, 0.2], seed=1)
    scl = StandardScaler(
        input_cols=["AGE", "ANNUAL_INCOME"],
        output_cols=["AGE", "ANNUAL_INCOME"],
        passthrough_cols=["GENDER", "CHURN"],
        drop_input_cols=True
    )
    clf = XGBClassifier(
        input_cols=["AGE", "ANNUAL_INCOME", "GENDER"],
        label_cols=["CHURN"],
        output_cols=["PREDICTED_CHURN"],
    )
    pipeline = Pipeline(steps=[("scl", scl), ("clf", clf)])
    model = pipeline.fit(train_df)
    pred_df = model.predict(test_df).select(
        "ID", "GENDER", "CHURN", "PREDICTED_CHURN"
    )

    pred_df.write.mode("overwrite").save_as_table("CUSTOMER_PREDICTION")

    return "updated"

In [None]:
# 2) モデリングをするストアドプロシージャ
# sp登録
session.sproc.register(
    func = sproc_train_churn_model,
    name = "SP_TRAIN_CHURN_MODEL",
    packages = ["snowflake-snowpark-python", "snowflake-ml-python"],
    input_types = [],
    return_type = StringType(),
    is_permanent = True,
    stage_location = "@SPROC_STAGE",
    replace = True,
    execute_as_owner = True
)

In [None]:
create or replace task task1_clean
    warehouse = 'TAKUMA_NISHIZUKA_WH'
    schedule = 'using cron 37 18 * * * Asia/Tokyo'
as call SP_CLEAN_CUSTOMER_DATA();

In [None]:
create or replace task task2_model
    warehouse = 'TAKUMA_NISHIZUKA_WH'
    after task1_clean
as call SP_TRAIN_CHURN_MODEL();

In [None]:
alter task task1_clean suspend;
alter task task2_model suspend;


In [None]:
alter task task2_model resume;

In [None]:
alter task task1_clean resume;

In [None]:
select * from CUSTOMER_PREDICTION;