### Loading data

In [15]:
import sklearn.datasets as datasets
from sklearn.model_selection import train_test_split
import pandas as pd

iris = datasets.load_iris()

x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target)

df = pd.DataFrame(x_train, columns = iris.feature_names)

df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,6.0,2.2,5.0,1.5
1,5.7,2.9,4.2,1.3
2,5.0,3.4,1.5,0.2
3,6.3,3.4,5.6,2.4
4,6.0,2.2,4.0,1.0


### Connecting to MLFlow / Starting the experiment and a run

In [16]:
import mlflow

mlflow.set_tracking_uri("http://127.0.0.1:5000") # Change this if you modified the MLFlow UI URL

mlflow.set_experiment("Tracking-Exercise")

mlflow.start_run()

<ActiveRun: >

### Outputting data

In [17]:
df.to_csv("data.csv", header=True)

mlflow.log_artifact("data.csv", "input_data")

### Fitting the model

In [18]:
from sklearn.ensemble import RandomForestClassifier

import mlflow.sklearn

num_trees = 100

mlflow.log_param("num_trees", num_trees)

classifier = RandomForestClassifier(n_estimators = num_trees)

model = classifier.fit(x_train, y_train)

mlflow.sklearn.log_model(model, "model")

model

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

### Assessign Accuracy

In [19]:
from sklearn.metrics import accuracy_score

y_prediction = model.predict(x_test)

accuracy = accuracy_score(y_prediction, y_test)

mlflow.log_metric("accuracy", accuracy)

output_df = pd.DataFrame(y_prediction, columns=["prediction"])
output_df.to_csv("output.csv")

mlflow.log_artifact("output.csv", "output_data")

"Model Accuracy: " + str(accuracy)

'Model Accuracy: 1.0'

### Ending the MLFlow Run

In [20]:
# This sets a tag on your MLFlow run
mlflow.set_tag("notebook-type", "exercise")

mlflow.end_run()