In [1]:
import mlflow
import os
import yaml

In [2]:
hyperparams = {
    "resume_checkpoint": "None",  # "restored_model_checkpoint/checkpoint_epoch=06.ckpt",
    "resume_run_id": "None",  # "ef539b4138fa4055bf65c58f30249211",
    "max_epochs": 10,
    "gpus": 0,
    "batch_size": 32,
    "lr": 0.0001,
    "num_samples": -1,
    "val_ratio": 0.2,
    "test_ratio": 0,
    "random_seed": "None",
    "dataset": "/FileStore/tables/datasets/dummy.json",
    # "dataset": "data/processed/dummy.json",
}

In [3]:
# Two options: None to run locally or configure a cluster to run on databricks
# https://docs.databricks.com/dev-tools/api/latest/clusters.html
# Change the configuration accordingly to AWS, Azure, Google Cloud
#backend_config = None

backend_config = {
    "num_workers": 0,
    "spark_version": "10.4.x-scala2.12",
    "spark_conf": {
        "spark.databricks.cluster.profile": "singleNode",
        "spark.master": "local[*, 4]",
    },
    "azure_attributes": {
        "first_on_demand": 1,
        "availability": "ON_DEMAND_AZURE",
        "spot_bid_max_price": -1,
    },
    "node_type_id": "Standard_F4",
    "driver_node_type_id": "Standard_F4",
    "ssh_public_keys": [],
    "custom_tags": {"ResourceClass": "SingleNode"},
    "spark_env_vars": {"PYSPARK_PYTHON": "/databricks/python3/bin/python3"},
    "enable_elastic_disk": "true",
    "cluster_source": "UI",
    "init_scripts": [],
    "cluster_id": "0519-085431-rvfc7gfq",
}


In [4]:
# You must create a config file, containing your databricks and github access tokens and
# the mlflow tracking experiment name. DON'T PUSH THIS FILE TO THE REMOTE REPO!
with open("../mlflow_config.yaml") as f:
    mlflow_config = yaml.safe_load(f)

# Set databricks env variables
os.environ["MLFLOW_TRACKING_URI"] = "databricks"
os.environ["DATABRICKS_HOST"] = mlflow_config["db_host"]
os.environ["DATABRICKS_TOKEN"] = mlflow_config["db_token"]

# Set github access uri
user = mlflow_config["git_user"]
token = mlflow_config["git_token"]
git_uri = mlflow_config["git_uri"]
uri = f"https://{user}:{token}@{git_uri}"
git_branch = "main"

# Local filesystem path
# uri = "/Users/alexmfalm/Documents/Git_Repos/basics-mlops-python"

experiment_name = mlflow_config["exp_name"]
experiment = mlflow.set_experiment(experiment_name)
backend = "databricks"  # local

In [5]:
lrs = [0.001]  # , 0.0005, 0.0001, 0.00005, 0.00001, 0.000005, 0.000001]

for lr in lrs:
    hyperparams["lr"] = lr

    mlflow.projects.run(
        uri,
        version=git_branch,
        entry_point="train",
        # env_manager="local",
        parameters=hyperparams,
        backend=backend,
        backend_config=backend_config,
        experiment_id=experiment.experiment_id,
        synchronous=False,  # Set to False, if you don't want to wait for the model to train
    )

2022/09/28 15:24:21 INFO mlflow.projects.utils: === Fetching project from https://alexandre1-almeida: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@github.com/AxiansML/basics-mlops-python into /var/folders/p6/5l5fzz397vq3lwg4c8p3t7lw0000gn/T/tmpt4x4qqcj ===
2022/09/28 15:24:24 INFO mlflow.projects.databricks: === Creating tarball from /var/folders/p6/5l5fzz397vq3lwg4c8p3t7lw0000gn/T/tmpt4x4qqcj in temp directory /var/folders/p6/5l5fzz397vq3lwg4c8p3t7lw0000gn/T/tmpn_00skyd ===
2022/09/28 15:24:24 INFO mlflow.projects.databricks: === Total file size to compress: 361.9 KB ===
2022/09/28 15:24:24 INFO mlflow.projects.databricks: === Uploading project tarball (size: 206.0 KB) to /dbfs/mlflow-experiments/4457423338043857/projects-code/9acc5c8122863b6c36ac0a75f9961f74b65ac3f630423af7bdbf2273cb3b4eee.tar.gz ===
2022/09/28 15:24:24 INFO mlflow.projects.databricks: === Uploading project to DBFS path /dbfs/mlflow-experiments/4457423338043857/projects-code/9acc5c8122863b6c36ac0a75f9961f74b65ac3f630423af