### Loading data

In [14]:
import sklearn.datasets as datasets
from sklearn.model_selection import train_test_split
import pandas as pd

iris = datasets.load_iris()

x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target)

df = pd.DataFrame(x_train, columns = iris.feature_names)

df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,7.7,2.6,6.9,2.3
1,5.4,3.9,1.3,0.4
2,7.2,3.6,6.1,2.5
3,6.5,3.0,5.2,2.0
4,6.1,3.0,4.9,1.8


### Connecting to MLFlow / Starting the experiment and a run

In [15]:
import mlflow

mlflow.set_tracking_uri("http://127.0.0.1:5000") # Change this if you modified the MLFlow UI URL

# This connects to an existing experiment or creates a new one
mlflow.set_experiment("Tracking-Example-One")

# This starts a new run in the existing experiment
mlflow.start_run()

<ActiveRun: >

### Outputting data

In [16]:
df.to_csv("data.csv", header=True)

# This saves the input data used to MLFlow as an artifact
mlflow.log_artifact("data.csv", "input_data")

### Fitting the model

In [17]:
from sklearn.linear_model import LogisticRegression

import mlflow.sklearn

max_iterations = 3

# This logs the model parameter to your MLFlow run
mlflow.log_param("max_iter", 3)

classifier = LogisticRegression(max_iter=max_iterations)

model = classifier.fit(x_train, y_train)

# This saves the Scikit-Learn model as an artifact in your MLFlow run
mlflow.sklearn.log_model(model, "classifier")

model

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html.
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=3,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

### Assessign Accuracy

In [18]:
from sklearn.metrics import accuracy_score

y_prediction = model.predict(x_test)

accuracy = accuracy_score(y_prediction, y_test)

## This logs a model metric to your MLFlow run
mlflow.log_metric("accuracy_score", accuracy)

output_df = pd.DataFrame(y_prediction, columns=["prediction"])
output_df.to_csv("output.csv")

# This logs out your output data to your MLFlow run
mlflow.log_artifact("output.csv", "output")

"Model Accuracy: " + str(accuracy)

'Model Accuracy: 0.7631578947368421'

### Ending the MLFlow Run

In [19]:
# This sets a tag on your MLFlow run
mlflow.set_tag("notebook-type", "example")

mlflow.end_run()