In [None]:
import os
from autogluon.multimodal import MultiModalPredictor
from autogluon.core.utils.loaders import load_zip

In [None]:
zip_file = "https://automl-mm-bench.s3.amazonaws.com/object_detection/dataset/pothole.zip"
download_dir = "./pothole"

load_zip.unzip(zip_file, unzip_dir=download_dir)
data_dir = os.path.join(download_dir, "pothole")
train_path = os.path.join(data_dir, "Annotations", "usersplit_train_cocoformat.json")
val_path = os.path.join(data_dir, "Annotations", "usersplit_val_cocoformat.json")
test_path = os.path.join(data_dir, "Annotations", "usersplit_test_cocoformat.json")

In [None]:
checkpoint_name = "yolox_s"
num_gpus = 1  # only use one GPU

In [None]:
predictor = MultiModalPredictor(
    hyperparameters={
        "model.mmdet_image.checkpoint_name": checkpoint_name,
        "env.num_gpus": num_gpus,
    },
    problem_type="object_detection",
    sample_data_path=train_path,
)

In [None]:
predictor.fit(
    train_path,
    tuning_data=val_path,
    hyperparameters={
        "optimization.learning_rate": 1e-4,  # we use two stage and detection head has 100x lr
        "env.per_gpu_batch_size": 32,  # decrease it when model is large or GPU memory is small
        "optimization.max_epochs": 10,  # max number of training epochs, note that we may early stop before this based on validation setting
        "optimization.val_check_interval": 1.0,  # Do 1 validation each epoch
        "optimization.check_val_every_n_epoch": 3,  # Do 1 validation each 3 epochs
        "optimization.patience": 3,  # Early stop after 3 consective validations are not the best
    },
    time_limit=180,
)

In [None]:
predictor.evaluate(test_path, eval_tool='torchmetrics')