In [1]:
import mlflow
import os
import yaml

In [6]:
hyperparams = {
    "resume_checkpoint": "None",  # "restored_model_checkpoint/checkpoint_epoch=06.ckpt",
    "resume_run_id": "None",  # "ef539b4138fa4055bf65c58f30249211",
    "max_epochs": 10,
    "gpus": 0,
    "accelerator": "None",
    "batch_size": 32,
    "lr": 0.0001,
    "num_samples": -1,
    "val_ratio": 0.2,
    "test_ratio": 0,
    "random_seed": "None",
    "dataset": "/FileStore/tables/datasets/dummy.json",
}

In [7]:
# Two options: None to run locally or configure a cluster to run on databricks
# https://docs.databricks.com/dev-tools/api/latest/clusters.html
# Change the configuration accordingly to AWS, Azure, Google Cloud
# backend_config = None
backend_config = {
    "num_workers": 0,
    "spark_version": "10.4.x-scala2.12",
    "spark_conf": {
        "spark.databricks.cluster.profile": "singleNode",
        "spark.master": "local[*, 4]",
    },
    "azure_attributes": {
        "first_on_demand": 1,
        "availability": "ON_DEMAND_AZURE",
        "spot_bid_max_price": -1,
    },
    "node_type_id": "Standard_F4",
    "driver_node_type_id": "Standard_F4",
    "ssh_public_keys": [],
    "custom_tags": {"ResourceClass": "SingleNode"},
    "spark_env_vars": {"PYSPARK_PYTHON": "/databricks/python3/bin/python3"},
    "enable_elastic_disk": "true",
    "cluster_source": "UI",
    "init_scripts": [],
    "cluster_id": "0519-085431-rvfc7gfq",
}

In [8]:
# You must create a config file, containing your databricks and github access tokens and
# the mlflow tracking experiment name. DON'T PUSH THIS FILE TO THE REMOTE REPO!
with open("../mlflow_config.yaml") as f:
    mlflow_config = yaml.safe_load(f)

# Set databricks env variables
os.environ["MLFLOW_TRACKING_URI"] = "databricks"
os.environ["DATABRICKS_HOST"] = mlflow_config["db_host"]
os.environ["DATABRICKS_TOKEN"] = mlflow_config["db_token"]

# Set github access uri
user = mlflow_config["git_user"]
token = mlflow_config["git_token"]
git_uri = mlflow_config["git_uri"]
uri = f"https://{user}:{token}@{git_uri}"
# uri = "/Users/alexmfalm/Documents/Git_Repos/python-mlops-template"  # Local filesystem path
git_branch = "main"

experiment_name = mlflow_config["exp_name"]
experiment = mlflow.set_experiment(experiment_name)
backend = "databricks"  # local

In [9]:
lrs = [0.001, 0.0005, 0.0001, 0.00005, 0.00001, 0.000005, 0.000001]

for lr in lrs:
    hyperparams["lr"] = lr

    mlflow.projects.run(
        uri,
        version=git_branch,
        entry_point="train",
        parameters=hyperparams,
        backend=backend,
        backend_config=backend_config,
        experiment_id=experiment.experiment_id,
        synchronous=False,  # Set to False, if you don't want to wait for the model to train
    )

2022/09/27 15:43:57 INFO mlflow.projects.utils: === Fetching project from https://alexandre1-almeida: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@github.com/AxiansML/python-mlops-template into /var/folders/p6/5l5fzz397vq3lwg4c8p3t7lw0000gn/T/tmp9921knys ===
2022/09/27 15:43:59 INFO mlflow.projects.utils: Fetched 'main' branch
2022/09/27 15:44:01 INFO mlflow.projects.databricks: === Creating tarball from /var/folders/p6/5l5fzz397vq3lwg4c8p3t7lw0000gn/T/tmp9921knys in temp directory /var/folders/p6/5l5fzz397vq3lwg4c8p3t7lw0000gn/T/tmpt2rmu6j_ ===
2022/09/27 15:44:01 INFO mlflow.projects.databricks: === Total file size to compress: 356.8 KB ===
2022/09/27 15:44:01 INFO mlflow.projects.databricks: === Uploading project tarball (size: 203.0 KB) to /dbfs/mlflow-experiments/4457423338043857/projects-code/e7b18b5582b17f93e9968e660b20a8d24908220abd0e6391795775c93f8c7978.tar.gz ===
2022/09/27 15:44:01 INFO mlflow.projects.databricks: === Uploading project to DBFS path /dbfs/mlflow-experiments/445742