In [None]:
# Train and Register Risk Model to Snowflake Model Registry
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from snowflake.snowpark.context import get_active_session

session = get_active_session()
print(f"Connected to: {session.get_current_account()}")

In [None]:
# Set context
session.use_database("REALTIME_ML_PIPELINE")
session.use_schema("ML_MODELS")
print(f"Using: {session.get_current_database()}.{session.get_current_schema()}")

In [None]:
# Create training data for evidence risk model
np.random.seed(42)
n_samples = 500

X_train = pd.DataFrame({
    'MIB_TOTAL_RECORDS': np.random.poisson(2, n_samples),
    'MIB_HIT_COUNT': np.random.poisson(0.5, n_samples),
    'MIB_HAS_HIT': np.random.binomial(1, 0.3, n_samples),
    'MIB_AVG_BMI': np.random.normal(27, 5, n_samples).clip(18, 45),
    'RX_TOTAL_FILLS': np.random.poisson(5, n_samples),
    'RX_UNIQUE_DRUGS': np.random.poisson(3, n_samples),
    'RX_DRUG_OPIOID': np.random.binomial(1, 0.15, n_samples),
    'HAS_MIB_EVIDENCE': np.random.binomial(1, 0.6, n_samples),
    'HAS_RX_EVIDENCE': np.random.binomial(1, 0.8, n_samples),
    'COMBINED_RISK_SCORE': np.random.uniform(0, 0.7, n_samples)
})

y_train = ((X_train['MIB_HIT_COUNT'] > 0) | 
           (X_train['RX_DRUG_OPIOID'] == 1) | 
           (X_train['MIB_AVG_BMI'] > 35) |
           (X_train['COMBINED_RISK_SCORE'] > 0.5)).astype(int)

print(f"Training samples: {len(X_train)}")
print(f"Class distribution: {y_train.value_counts().to_dict()}")

In [None]:
# Train GradientBoostingClassifier
X_tr, X_val, y_tr, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

model = GradientBoostingClassifier(n_estimators=100, max_depth=4, learning_rate=0.1, random_state=42)
model.fit(X_tr, y_tr)

y_pred = model.predict(X_val)
metrics = {
    'accuracy': accuracy_score(y_val, y_pred),
    'precision': precision_score(y_val, y_pred),
    'recall': recall_score(y_val, y_pred),
    'f1': f1_score(y_val, y_pred)
}

print("Model Performance:")
for metric, value in metrics.items():
    print(f"  {metric}: {value:.4f}")

In [None]:
# Register model to Snowflake Model Registry
from snowflake.ml.registry import Registry

registry = Registry(session=session, database_name='REALTIME_ML_PIPELINE', schema_name='ML_MODELS')

mv = registry.log_model(
    model=model,
    model_name='EVIDENCE_RISK_MODEL',
    version_name='V2',
    sample_input_data=X_train.iloc[:1],
    metrics=metrics,
    comment='GradientBoostingClassifier for evidence-based risk scoring'
)

print(f"Model registered: {mv.model_name} version {mv.version_name}")

In [None]:
# Verify model is registered
session.sql("SHOW MODELS IN SCHEMA REALTIME_ML_PIPELINE.ML_MODELS").show()