# Imports and Setups

In [1]:
# Standard Libraries
import os

# External Libraries
import click
import pandas as pd
import altair as alt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
import mlflow.sklearn
from mlflow.models.signature import infer_signature
import altair as alt
import altair_viewer

# Internal Libraries
import mlflow_vismod

# Constants

In [2]:
# MLflow
MLFLOW_TRACKING_URI = 'http://localhost:5000'
EXPERIMENT = 'iris'

# Sklearn Model
TEST_SIZE = 0.33
RANDOM_STATE = 42

  and should_run_async(code)


# Configure Environment

In [3]:
os.environ['MLFLOW_TRACKING_URI'] = MLFLOW_TRACKING_URI
mlflow.set_experiment(EXPERIMENT)

# Data Preparation

In [4]:
iris = datasets.load_iris(as_frame=True, )
X_train, X_test, y_train, y_test = train_test_split(
    iris['data'],
    iris['target'],
    test_size=TEST_SIZE,
    random_state=RANDOM_STATE,
)

# Sklearn Model

In [5]:
with mlflow.start_run() as run:
    # Define and Fit Model
    clf = RandomForestClassifier(max_depth=7, random_state=RANDOM_STATE)
    clf.fit(X_train, y_train)
    
    # Log Accuracy
    mlflow.log_metric('accuracy', value=clf.score(X_test, y_test))
    
    # Log Model
    mlflow.sklearn.log_model(
        sk_model=clf,
        artifact_path='model',
        signature=infer_signature(X_train, clf.predict(X_train)),
    )

# Viz Model

In [6]:
with mlflow.start_run() as run:
    # Define Viz
    viz = alt.Chart(
        pd.concat([X_train, y_train], axis=1, sort=False)
    ).mark_circle(size=60).encode(
        x='sepal length (cm)',
        y='sepal width (cm)',
        color='target:N'
    ).interactive()
    
    # Log Model
    mlflow_vismod.log_model(
        model=viz, 
        artifact_path='viz',
        style='vegalite',
        signature=infer_signature(X_train, None),
        input_example=pd.concat([X_train, y_train], axis=1, sort=False),
    )

    
viz

  and should_run_async(code)


# Sklearn + Viz Models

In [7]:
with mlflow.start_run() as run:
    ######################
    # Sklearn Classifier #
    ######################
    
    # Define and Fit Model
    clf = RandomForestClassifier(max_depth=7, random_state=RANDOM_STATE)
    clf.fit(X_train, y_train)
    
    # Log Accuracy
    mlflow.log_metric('accuracy', value=clf.score(X_test, y_test))
    
    # Log Sklearn Model
    mlflow.sklearn.log_model(
        sk_model=clf,
        artifact_path='model',
        signature=infer_signature(X_train, clf.predict(X_train)),
    )
    
    #############
    # Viz Model #
    #############
    
    # Define Viz
    iris_dataset = pd.concat([X_test, y_test], axis=1, sort=False)
    iris_dataset['predicted'] = clf.predict(X_test)
    iris_dataset['predicted_correctly'] = iris_dataset['target'] == iris_dataset['predicted']
    viz = alt.Chart(
        iris_dataset
    ).mark_circle(size=60).encode(
        x='sepal length (cm)',
        y='sepal width (cm)',
        color='predicted_correctly:N',
        tooltip=[*iris_dataset.columns],
    ).interactive()
    
    # Log Viz Model
    mlflow_vismod.log_model(
        model=viz, 
        artifact_path='viz',
        style='vegalite',
        signature=infer_signature(X_train, None),
    )


viz

# Loading Models

In [8]:
altair_viewer.display(viz)

  and should_run_async(code)


In [9]:
iris_dataset.columns

Index(['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)',
       'petal width (cm)', 'target', 'predicted', 'predicted_correctly'],
      dtype='object')

In [10]:
model_uri = os.path.join(run.to_dictionary()['info']['artifact_uri'], 'viz')
loaded = mlflow_vismod.load_model(
    model_uri=model_uri,
    style='vegalite'
)

loaded.display(iris_dataset[iris_dataset['target'] == 0])

In [11]:
loaded.display(iris_dataset[iris_dataset['sepal width (cm)'] < 3.5])

  and should_run_async(code)


In [12]:
print(loaded.__repr__())

mlflow_vismod.styles.vegalite.Style(artifact_uri='/Users/jhibba/PycharmProjects/mlflow-vizmod/notebooks/mlruns/1/5c3810eb36ee41a998ec8957e9a50db4/artifacts/viz/viz.pkl')


  and should_run_async(code)


In [13]:
type(loaded)

mlflow_vismod.styles.vegalite.Style

In [21]:
loaded.predict?

[0;31mSignature:[0m [0mloaded[0m[0;34m.[0m[0mpredict[0m[0;34m([0m[0mcontext[0m[0;34m,[0m [0mmodel_input[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Evaluates a pyfunc-compatible input and produces a pyfunc-compatible output.
For more information about the pyfunc input/output API, see the :ref:`pyfunc-inference-api`.

:param context: A :class:`~PythonModelContext` instance containing artifacts that the model
                can use to perform inference.
:param model_input: A pyfunc-compatible input for the model to evaluate.
[0;31mFile:[0m      ~/miniconda3/envs/mlflow-vizmod/lib/python3.8/site-packages/mlflow/pyfunc/model.py
[0;31mType:[0m      method


In [14]:
type(loaded)

mlflow_vismod.styles.vegalite.Style

In [None]:
loaded.artifact_uri

In [None]:
model_uri = os.path.join(run.to_dictionary()['info']['artifact_uri'], 'model')
loaded_sk = mlflow.sklearn.load_model(
    model_uri=model_uri,
)

loaded_sk

In [None]:
type(loaded)

In [None]:
loaded.display?

# Data Test

In [None]:
loaded.display(iris_dataset[iris_dataset['target'] > 0])

# Serialization

In [None]:
# Standard Libraries
import pickle

# External Libraries
import cloudpickle

In [None]:
SERIALIZATION_FORMAT_PICKLE = 'pickle'
SERIALIZATION_FORMAT_CLOUDPICKLE = 'cloudpickle'
SUPPORTED_SERIALIZATION_FORMATS = [SERIALIZATION_FORMAT_PICKLE, SERIALIZATION_FORMAT_CLOUDPICKLE]

In [None]:
viz

In [None]:
with open('./viz_pickle.pkl', 'wb') as out:
    pickle.dump(viz, out)

In [None]:
with open('./viz_cloud.pkl', 'wb') as out:
    cloudpickle.dump(viz, out)