## Assignment 1

Use iris dataset to write an MLflow program with below requirements to classify the 'variety' of petal.



1. Experiments, Runs, Parameters, Metrics shall be logged and tracked.
2. Input dataset, Train and Test data, Model shall be logged as artifacts.
3. Set multiple tags to the run.
4. Print the evaluation metrics.
5. Print the last active run.

Questions for this assignment
Which library, module and class can be used for this problem statement ?

In [None]:
# Import libraries 
import warnings, os, joblib
import pandas as pd
import numpy as np 
import mlflow
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics, svm


if __name__ == "__main__":
    warnings.filterwarnings("ignore")
    np.random.seed(40)
    
    
    # Set the experiment
    exp = mlflow.set_experiment(experiment_name="assignment_1")

    mlflow.start_run()

    # Encode the classes to codes and load the dataset, change the variety data from categorical to numerical codes
    class_codes = {"Setosa": 1, "Versicolor": 2, "Virginica": 3}
    data =  pd.read_csv("data/iris.csv")
    actual_data = data.replace({"variety": class_codes})
    features = ["sepal.length", "sepal.width", "petal.length", "petal.width"]
    features = actual_data[features]
    labels =  actual_data["variety"]
    
    # Create the train-test split
    train_data, test_data, train_labels, test_labels = train_test_split(features.to_numpy(), labels.to_numpy(), random_state=1)
    print(f"The number of data rows in the dataset is: {len(actual_data)} and after the train-test split we have {len(train_data)} train rows and {len(test_data)} test rows")
    
    
    # Log the input dataset, train and test datasets
    mlflow.log_artifact("data/iris.csv", "data")
    
    train_df = pd.DataFrame(train_data, columns=["sepal.length", "sepal.width", "petal.length", "petal.width"])
    train_df.to_csv("train.csv")
    mlflow.log_artifact("train.csv", "data")
    os.remove("train.csv")

    test_df  = pd.DataFrame(test_data, columns=["sepal.length", "sepal.width", "petal.length", "petal.width"])
    train_df.to_csv("test.csv")
    mlflow.log_artifact("test.csv", "data")
    os.remove("test.csv")
    
    # Create all the classifiers we want to test on the data and save as well [Decision Tree classifier object, Support Vector Machine]
    decision_tree_classifier = DecisionTreeClassifier()
    svm_classifier = svm.SVC(kernel='linear')
    
    # Train the models
    decision_tree_classifier = decision_tree_classifier.fit(train_data, train_labels)
    svm_classifier = svm_classifier.fit(train_data, train_labels)
    
    #Predict the response for test dataset
    decision_tree_predictions = decision_tree_classifier.predict(test_data)
    svm_predictions = svm_classifier.predict(test_data)
    print("Decision Tree classifier Accuracy:",metrics.accuracy_score(test_labels, decision_tree_predictions))
    print("Support Vector Machine Accuracy:",metrics.accuracy_score(test_labels, decision_tree_predictions))
    
    # Save the two models and save them in an artifact
    joblib.dump(decision_tree_classifier, "decision_tree_classifier.sav")
    joblib.dump(svm_classifier, "svm_classifier.sav")
    mlflow.log_artifact("decision_tree_classifier.sav", "model")
    mlflow.log_artifact("svm_classifier.sav", "model")
    os.remove("decision_tree_classifier.sav")
    os.remove("svm_classifier.sav")
    
    
    tags = {"Model": "Decision Tree", "Model": "Support Vector Machine"}
    mlflow.set_tags(tags=tags)
    
    mlflow.end_run()
    print(f"The last active run is: {mlflow.last_active_run()}")

## Assignment 2

Evaluate and validate the Model created in Assignment 1 (classification with iris dataset) with a DummyClassifier baseline model using mlflow.evaluate() method. Solution to Assignment 1 is attached.

Keep the accuracy threshold to 0.8 in evaluate method

Create 4 custom evaluation metrics using the below functions from sklearn.metrics module on predicted and actual target values

- accuracy_score

- precision_score

- recall_score

- f1_score

Note : Metrics should be computed by taking the weighted average across classes

Questions for this assignment
Which argument would you use while defining custom metrics methods ?

_builtin_metrics OR  builtin_metrics

In [None]:
import warnings
import argparse
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.dummy import DummyClassifier
import mlflow
import mlflow.sklearn
from mlflow.models import MetricThreshold
from mlflow.models import make_metric
import logging
import os

logging.basicConfig(level=logging.WARN)
logger = logging.getLogger(__name__)

parser = argparse.ArgumentParser()
parser.add_argument("--penality", type=str, required=False, default="l2")
parser.add_argument("--C", type=float, required=False, default=1.0)
args = parser.parse_args()

def eval_metrics(actual, pred):
    accuracy = accuracy_score(actual, pred)
    precision = precision_score(actual, pred, average='weighted')
    recall = recall_score(actual, pred, average='weighted')
    f1 = f1_score(actual, pred, average='weighted')
    return accuracy, precision, recall, f1

if __name__ == "__main__":
    warnings.filterwarnings("ignore")
    np.random.seed(40)

    # Read the dataset
    data = pd.read_csv('data/iris.csv')
    # Split the data into training and test sets.
    train, test = train_test_split(data)

    # Data Preprocessing
    le = LabelEncoder()
    train['variety'] = le.fit_transform(train['variety'])
    test['variety'] = le.fit_transform(test['variety'])

    # Storing the training and testing dataset
    train.to_csv("data/train.csv")
    test.to_csv("data/test.csv")

    # Split
    train_x = train.drop(["variety"], axis=1)
    test_x = test.drop(["variety"], axis=1)
    train_y = train[["variety"]]
    test_y = test[["variety"]]

    # Hyperparameters
    penality = args.penality
    C = args.C

    experiment = mlflow.set_experiment(
        experiment_name="Classifier exp"
    )

    print("Name: {}".format(experiment.name))
    print("Experiment_id: {}".format(experiment.experiment_id))
    print("Artifact Location: {}".format(experiment.artifact_location))
    print("Tags: {}".format(experiment.tags))
    print("Lifecycle_stage: {}".format(experiment.lifecycle_stage))
    print("Creation timestamp: {}".format(experiment.creation_time))

    with mlflow.start_run(run_name="Runer6", experiment_id=experiment.experiment_id):
        tags = {
            "engineering": "ML platform",
            "release.candidate": "RC1",
            "release.version": "2.0"
        }
        #set tags
        mlflow.set_tags(tags)

        # Model
        lr = LogisticRegression(penalty=penality, C=C)
        lr.fit(train_x, train_y)

        predicted_classes = lr.predict(test_x)

        (accuracy, precision, recall, f1) = eval_metrics(test_y, predicted_classes)

        print(f"Logistic Regression model (penality={penality}, C={C}):")
        print("  Accuracy: %s" % accuracy)
        print("  Precision: %s" % precision)
        print("  Recall: %s" % recall)
        print("  F1 Score: %s" % f1)

        # Logging parameters
        params = {
            "penality": penality,
            "C": C
        }
        mlflow.log_params(params)

        # Logging Metrics
        metrics = {
            "accuracy": accuracy,
            "precision": precision,
            "recall": recall,
            "f1_score": f1
        }
        mlflow.log_metrics(metrics)

        # Logging artifacts the data
        mlflow.log_artifacts("data/")
        # Logging model
        mlflow.sklearn.log_model(lr, "model")
        
        
        # Step 1: Create the dummy classifier and fit on data
        dummy_classifier = DummyClassifier(strategy="uniform").fit(train_x, train_y)
        
        # Step 2: Create the predicted classes in order to get the accuracy metrics
        predicted_classes = lr.predict(test_x)
        (accuracy, precision, recall, f1) = eval_metrics(test_y, predicted_classes)
        print(" For the Dummy Classifier the accuracy metrics are the following:  ")
        print(f"Logistic Regression model (penality={penality}, C={C}):")
        print("  Accuracy: %s" % accuracy)
        print("  Precision: %s" % precision)
        print("  Recall: %s" % recall)
        print("  F1 Score: %s" % f1)
        
        
        thresholds = {
        "accuracy_score": MetricThreshold(
            threshold=0.8,  # accuracy should be >=0.8
            greater_is_better=True, ),
        }
        
        # Define custom metric functions
        def weighted_accuracy_fn(eval_df, _builtin_metrics):
            y_true = eval_df["target"]
            y_pred = eval_df["prediction"]
            return accuracy_score(y_true, y_pred)

        def weighted_precision_fn(eval_df, _builtin_metrics):
            y_true = eval_df["target"]
            y_pred = eval_df["prediction"]
            return precision_score(y_true, y_pred, average="weighted")

        def weighted_recall_fn(eval_df, _builtin_metrics):
            y_true = eval_df["target"]
            y_pred = eval_df["prediction"]
            return recall_score(y_true, y_pred, average="weighted")

        def weighted_f1_fn(eval_df, _builtin_metrics):
            y_true = eval_df["target"]
            y_pred = eval_df["prediction"]
            return f1_score(y_true, y_pred, average="weighted")
        
        
        weighted_accuracy_fn_metric = make_metric(
            eval_fn=weighted_accuracy_fn,
            greater_is_better=False,
            name="weighted accuracy"
        )
                
        weighted_precision_fn_metric = make_metric(
            eval_fn=weighted_precision_fn,
            greater_is_better=False,
            name="weighted precision"
        )

        weighted_recall_fn_metric = make_metric(
            eval_fn=weighted_recall_fn,
            greater_is_better=False,
            name="weighted recall"
        )
            
        weighted_f1_fn_metric = make_metric(
            eval_fn=weighted_f1_fn,
            greater_is_better=False,
            name="weighted f1"
        )
        

        
        candidate_model_uri = mlflow.sklearn.log_model(candidate_model, "candidate_model", signature=signature).model_uri
        baseline_model_uri = mlflow.sklearn.log_model(baseline_model, "baseline_model", signature=signature).model_uri
        
        mlflow.evaluate(candidate_model_uri, eval_data, targets="label", model_type="classifier", validation_thresholds=thresholds, baseline_model=baseline_model_uri,)

        

        run = mlflow.last_active_run()
        print("Active run_id: {}".format(run.info.run_id))
