# Requirements

## Install packages

In [None]:
import sys

!{sys.executable} -m pip install sklearn
!{sys.executable} -m pip install mlflow
!{sys.executable} -m pip install numpy
!{sys.executable} -m pip install seaborn
!{sys.executable} -m pip install pandas
!{sys.executable} -m pip install hyperopt

## Imports

In [None]:
import mlflow
import time
import sklearn
import numpy as np
import seaborn as sns
import pandas as pd
import getpass
import random
from hyperopt import hp, fmin, tpe, space_eval
import mlflow.pyfunc

# Experiment tracking with ML Flow

MLflow is an open source platform to manage the ML lifecycle, including experimentation, reproducibility and deployment.

![ML flow](img/mlflow.png)
![ML flow tracking](img/introduction-to-mlflow-11-638.jpg)

## Simple example

In [None]:
# Create a new experiment or use existing one
mlflow.set_experiment(experiment_name="simple_example")

# Start a ML flow experiment
with mlflow.start_run():
    
    # Log parameters
    mlflow.log_param("Param1", random.randint(0, 10))
    mlflow.log_param("Param2", random.randint(0, 10))
    
    # ... run your ML code ...
    
    # Log metrics
    mlflow.log_metric("Metric1", random.random())
    mlflow.log_metric("Metric2", random.random())
    
    # Log artifacts
    sns_plot = sns.violinplot(x="x", data=pd.DataFrame(columns=["x"], data=np.random.randn(1000)))
    fig = sns_plot.get_figure() 
    fig.savefig("fig.png")
    mlflow.log_artifact("fig.png")
    
    # Log tag
    mlflow.set_tag("user_name", getpass.getuser())

Experiment can now be accessed with `mlflow ui` command.

## Monitoring

In [None]:
# Create a new experiment or use existing one
mlflow.set_experiment(experiment_name="monitoring")

# Start a ML flow experiment
with mlflow.start_run():
    for i in range(60):
        time.sleep(0.5)
        mlflow.log_metric(key="metric", 
                          value=random.random(), 
                          step=i)

## Hyperparameter optimization

In [None]:
# Create a new experiment or use existing one
mlflow.set_experiment(experiment_name="hyperopt")

# Loss function
def loss(case, val):
    return val if case == 'case 1' else val**2 

# Wrap loss function to add mlflow loging
def run_loss(args):
    # Start a nested experiment
    with mlflow.start_run(nested=True) as nested_run:      
        case, val = args
        
        # Add tag
        mlflow.set_tag("loss", "dummy")
        
        # Log params
        mlflow.log_param("case", case)
        mlflow.log_param("val", val)

        metric = loss(case=case, val=val)
        
        # Log metric
        mlflow.log_metric("metric", metric)
        
        return metric
        
# Define search space
space = hp.choice('a',
    [
        ('case 1', 1 + hp.lognormal('c1', 0, 1)),
        ('case 2', hp.uniform('c2', -10, 10))
    ])
        
# Start ml flow run
with mlflow.start_run() as run:   
    best = fmin(run_loss, space, algo=tpe.suggest, max_evals=100)
    
    best_case, best_val = space_eval(space, best)
    best_metric = loss(case=best_case, val=best_val)
    
    # Log params for best run
    mlflow.log_param("case", best_case)
    mlflow.log_param("val", best_val)
    
    # Log result for best run
    mlflow.log_metric("metric", best_metric)

## Model history

The `mlflow_project_model.py` script train a simple model and log it as an artifact. Let's run it.

In [None]:
%%bash 
python mlflow_project_model.py

While runing a .py file, mlflow log the commit hash for free. This is convenient to reproduce experiments. Good practice: run from a clean directory! <br>
Logged model contains also all information about dependencies versions.

In [None]:
# Look at the mlflow ui and retrieve trained model id
run_id = ""

In [None]:
# Load previously trained model
classifier = mlflow.pyfunc.load_model(model_uri="runs:/{}/model".format(run_id))
classifier

Model serialization works with various ML frameworks (sklearn, tensorflow, keras ...)

## Packaging

In [None]:
%pycat conda.yaml

In [None]:
%pycat MLproject

Run a local project

In [None]:
%%bash
mlflow run . -P max_iter=10

Run a distant project

In [None]:
%%bash
mlflow run https://github.com/KiewanVillatel/technical_presentations.git

In [None]:
%%bash
mlflow run https://github.com/KiewanVillatel/technical_presentations.git -v 3655ae

# TP

`experiment_tracking_tp/my_reproducible_experiment.py` contains simple code to train a MLP on the diabetes dataset. The goal is to instrument it with ML flow to get a fully reproducible experiment.

In [None]:
%%bash
python ./experiment_tracking_tp/my_reproducible_experiment.py

Once you managed to create an experiment, log parameters, metrics and any relevant information, add a `conda.yaml` and `MLproject` file so that the experiment can be run with the following command.

In [None]:
%%bash
mlflow run ./experiment_tracking_tp/

Some other ideas:
* add hyper-parameter tuning
* modularize your code (see https://github.com/mlflow/mlflow/tree/master/examples/multistep_workflow)

# Going further
* https://mlflow.org/
* https://github.com/mlflow/mlflow/tree/master/examples