# Notebook to verify Mlflow

## Import mlflow library

In [1]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier
import mlflow
import os

## Prepare training data

In [2]:
X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])
y = np.dot(X, np.array([1, 2])) + 3

## Setup Mlflow
You will put the location of Mlflow server. Note that becuase your notebook and the Mlflow server is running on Kubernetes, we just put the location of Kubernetes Service. 

We are using our local Minio server as the S3 storage and therefore pass the variables named AWS_SECRET_ACCESS_KEY containing the password.

### Experiment Name
This is one important variable via which all of your experiment runs will be stored in the Mlflow server

In [None]:
HOST = "http://mlflow:5500"

EXPERIMENT_NAME = "HelloMlFlow"

os.environ['MLFLOW_S3_ENDPOINT_URL']='http://minio-ml-workshop:9000'
os.environ['AWS_ACCESS_KEY_ID']='minio'
# os.environ['AWS_SECRET_ACCESS_KEY']='minio123'
os.environ['AWS_REGION']='us-east-1'
os.environ['AWS_BUCKET_NAME']='mlflow'

# Connect to local MLflow tracking server
mlflow.set_tracking_uri(HOST)

# Set the experiment name through which you will label all your exerpiments runs
mlflow.set_experiment(EXPERIMENT_NAME)

# enable autologging for scikit
mlflow.sklearn.autolog()

## Perform training as usual

In [None]:

model = DecisionTreeClassifier(max_depth=5, criterion='gini',min_samples_leaf = 3 ,min_samples_split = 10)
with mlflow.start_run() as run:
    model.fit(X, y)