In [None]:
#project creation
import mlrun
project = mlrun.get_or_create_project("traincancerdatasetfromjenkins", context= './', user_project= False)

In [None]:
%%writefile ./src/trainingFunction.py
import pandas as pd

from sklearn import ensemble
from sklearn.model_selection import train_test_split

import mlrun
from mlrun.frameworks.sklearn import apply_mlrun

@mlrun.handler()
def train(
    dataset: pd.DataFrame,
    label_column: str = "label",
    n_estimators: int = 100,
    learning_rate: float = 0.1,
    max_depth: int = 3,
    model_name: str = "cancer_classifier",
):
    # Initialize the x & y data
    x = dataset.drop(label_column, axis=1)
    y = dataset[label_column]

    # Train/Test split the dataset
    x_train, x_test, y_train, y_test = train_test_split(
        x, y, test_size=0.2, random_state=42
    )

    # Pick an ideal ML model
    model = ensemble.GradientBoostingClassifier(
        n_estimators=n_estimators, learning_rate=learning_rate, max_depth=max_depth
    )

    # -------------------- The only line you need to add for MLOps -------------------------
    # Wraps the model with MLOps (test set is provided for analysis & accuracy measurements)
    apply_mlrun(model=model, model_name=model_name, x_test=x_test, y_test=y_test)
    # --------------------------------------------------------------------------------------

    # Train the model
    model.fit(x_train, y_train)

In [None]:
# add the training function to the project

TrainingFunction = project.set_function("src/trainingFunction.py", name="training", kind="job", image="mlrun/mlrun", handler="train")

In [None]:
#creation a dataset for training

import pandas as pd
from sklearn.datasets import load_breast_cancer

breast_cancer = load_breast_cancer()
breast_cancer_dataset = pd.DataFrame(data=breast_cancer.data, columns=breast_cancer.feature_names)
breast_cancer_labels = pd.DataFrame(data=breast_cancer.target, columns=["label"])
breast_cancer_dataset = pd.concat([breast_cancer_dataset, breast_cancer_labels], axis=1)

breast_cancer_dataset.to_csv("cancer-dataset.csv", index=False)

In [None]:
# Run the training function 
training_function_run = project.run_function(
                            "training",
                            inputs = {"dataset": "cancer-dataset.csv"},
                            params = {"n_estimators": 100, "learning_rate": 1e-1, "max_depth": 3},
                            local= True
                        )

In [None]:
#output all artifacts
training_function_run.outputs

In [None]:
# export the model in as a zip , then you can load it from a remote system/cluster
training_function_run.artifact('model').meta.export("model-cancer.zip")

In [None]:
#hyper-parameter tuning and model comparison
hp_tuning_run = project.run_function( "training",
                                     inputs= { "dataset": 'cancer-dataset.csv'},
                                     hyperparams={
                                         "n_estimators": [10, 100, 1000],
                                         "learning_rate": [1e-1, 1e-3],
                                         "max_depth": [2,8]
                                     },
                                     selector = "max.accuracy", 
                )

In [None]:
# create a model serving 
serving_fn = mlrun.new_function("serving", image="mlrun/mlrun", kind="serving")
serving_fn.add_model('cancer-classifier',model_path= hp_tuning_run.outputs["model"], class_name="mlrun.frameworks.sklearn.SklearnModelServer")

In [None]:
# Create a mock server(similator of real-time function)
server = serving_fn.to_mock_server()
my_data = {"inputs"
           :[[
               1.371e+01, 2.083e+01, 9.020e+01, 5.779e+02, 1.189e-01, 1.645e-01,
               9.366e-02, 5.985e-02, 2.196e-01, 7.451e-02, 5.835e-01, 1.377e+00,
               3.856e+00, 5.096e+01, 8.805e-03, 3.029e-02, 2.488e-02, 1.448e-02,
               1.486e-02, 5.412e-03, 1.706e+01, 2.814e+01, 1.106e+02, 8.970e+02,
               1.654e-01, 3.682e-01, 2.678e-01, 1.556e-01, 3.196e-01, 1.151e-01]
            ]
}
server.test("/v2/models/cancer-classifier/infer", body=my_data)

In [None]:
serving_fn.plot(rankdir="LR")