# E2E Demo

Please ensure you've installed MLflow and the Snowflake Deployment Plugin first.

## Train a model packaged by MLflow First

In [2]:
import pandas as pd
red_wine = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv", sep=';')
data = red_wine
data.rename(columns=lambda x: x.replace(' ', '_'), inplace=True)
data.head()

Unnamed: 0,fixed_acidity,volatile_acidity,citric_acid,residual_sugar,chlorides,free_sulfur_dioxide,total_sulfur_dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [3]:
from sklearn.model_selection import train_test_split
X = data.drop(["quality"], axis=1)
y = data.quality
# Split out the training data
X_train, X_rem, y_train, y_rem = train_test_split(X, y, train_size=0.6, random_state=123)
# Split the remaining data equally into validation and test
X_val, X_test, y_val, y_test = train_test_split(X_rem, y_rem, test_size=0.5, random_state=123)

In [None]:
import mlflow
import mlflow.pyfunc
import mlflow.sklearn
import numpy as np
import sklearn
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score
from mlflow.models.signature import infer_signature
from mlflow.utils.environment import _mlflow_conda_env
import cloudpickle
import time

experiment_name = f"Wine Quality Demo {int(time.time())}"
experiment_id = mlflow.create_experiment(experiment_name)
run_name = 'random_forest_model'
model_name = 'random_forest_model'

with mlflow.start_run(experiment_id=experiment_id, run_name=run_name):
  n_estimators = 10
  model = RandomForestClassifier(n_estimators=n_estimators, random_state=np.random.RandomState(123))
  model.fit(X_train, y_train)
 
  predictions_test = model.predict_proba(X_test)
  auc_score = roc_auc_score(y_test, predictions_test, multi_class="ovo", labels=model.classes_)
  mlflow.log_param('n_estimators', n_estimators)
  mlflow.log_metric('auc', auc_score)
  signature = infer_signature(X_train, model.predict(X_train))
  conda_env = _mlflow_conda_env(
        additional_conda_deps=None,
        additional_pip_deps=["cloudpickle=={}".format(cloudpickle.__version__), "scikit-learn=={}".format(sklearn.__version__)],
        additional_conda_channels=None,
    )
  mlflow.sklearn.log_model(model, model_name, conda_env=conda_env, signature=signature)

In [None]:
last_run = mlflow.search_runs(experiment_ids=[experiment_id], \
                               filter_string=f'tags.mlflow.runName = "{run_name}"', \
                               order_by=["metrics.auc DESC"]).iloc[0]
last_run

## Usage of Snowflake Deployment Plugin

In [6]:
from snowflake.snowpark import Session
import os
connection_params = {

}
test_session = Session.builder.configs(connection_params).create()


In [7]:
model_uri = f"runs:/{last_run['run_id']}/random_forest_model"
model_uri

'runs:/7420606e216741d7ad60e7d238478c14/random_forest_model'

In [8]:
from snowflake.ml.mlflow import set_session
set_session(test_session)
from mlflow.deployments import get_deploy_client
client = get_deploy_client('snowflake')

In [None]:
client.create_deployment(
    name='wine_quality_model', 
    model_uri=model_uri,
)

In [10]:
client.list_deployments()

[{'name': 'MLFLOW$WINE_QUALITY_MODEL',
  'signature': 'MLFLOW$WINE_QUALITY_MODEL(FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, FLOAT)'}]

In [11]:
client.get_deployment(name='wine_quality_model')

{'name': 'wine_quality_model',
 'signature': 'MLFLOW$WINE_QUALITY_MODEL(FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, FLOAT)'}

In [13]:
client.predict(deployment_name='wine_quality_model', df=X_test.head(5))

Unnamed: 0,"MLFLOW$WINE_QUALITY_MODEL(""FIXED_ACIDITY"", ""VOLATILE_ACIDITY"", ""CITRIC_ACID"", ""RESIDUAL_SUGAR"", ""CHLORIDES"", ""FREE_SULFUR_DIOXIDE"", ""TOTAL_SULFUR_DIOXIDE"", ""DENSITY"", ""PH"", ""SULPHATES"", ""ALCOHOL"")"
0,5
1,5
2,6
3,5
4,5
