In [1]:
from fastai.vision.all import *
import params # local file import

import wandb

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
URL = "https://storage.googleapis.com/wandb_course/bdd_simple_1k.zip"

In [3]:
path = Path(untar_data(URL, force_download=True))

In [4]:
# what is the structure of the data
(path/"labels").ls()

(#1001) [Path('C:/Users/Allison Ogechukwu/.fastai/data/bdd_simple_1k/labels/.ipynb_checkpoints'),Path('C:/Users/Allison Ogechukwu/.fastai/data/bdd_simple_1k/labels/0027eed2-09c90000_mask.png'),Path('C:/Users/Allison Ogechukwu/.fastai/data/bdd_simple_1k/labels/0027eed2-09c90001_mask.png'),Path('C:/Users/Allison Ogechukwu/.fastai/data/bdd_simple_1k/labels/00aad4a0-ee8135fe_mask.png'),Path('C:/Users/Allison Ogechukwu/.fastai/data/bdd_simple_1k/labels/00d79c0a-23befe54_mask.png'),Path('C:/Users/Allison Ogechukwu/.fastai/data/bdd_simple_1k/labels/00e69ee0-9656df95_mask.png'),Path('C:/Users/Allison Ogechukwu/.fastai/data/bdd_simple_1k/labels/00e9be89-00000130_mask.png'),Path('C:/Users/Allison Ogechukwu/.fastai/data/bdd_simple_1k/labels/00e9be89-00000175_mask.png'),Path('C:/Users/Allison Ogechukwu/.fastai/data/bdd_simple_1k/labels/00e9be89-00001025_mask.png'),Path('C:/Users/Allison Ogechukwu/.fastai/data/bdd_simple_1k/labels/00e9be89-00001030_mask.png')...]

We defines three functions: `label_func`, `get_classes_per_image`, and `create_table`.

The `label_func` function takes a file name as an input and returns a new file path that points to a file with the same name as the input file but located in a different directory. Specifically, it constructs a new file path by taking the parent directory of the parent directory of the input file (`fname.parent.parent`) and appending a directory called "labels" and a file name that is the same as the input file's name stem but with "_mask.png" appended. The resulting file path is returned as a string.

The `get_classes_per_image` function takes two arguments: `mask_data`, which is a NumPy array representing an image mask, and `class_labels`, which is a dictionary mapping class IDs to class names. The function computes a dictionary that maps each class name to a binary value indicating whether or not the corresponding class ID is present in the mask data. Specifically, it first computes a list of unique values in `mask_data`, which are assumed to correspond to class IDs. It then iterates over each class ID in the `class_labels` dictionary and checks whether it is in the list of unique values. If so, it maps the class name to 1 (indicating that the class is present in the mask), otherwise it maps it to 0. The resulting dictionary is returned.

The `create_table` function takes two arguments: `image_files`, which is a list of file paths to image files, and `class_labels`, which is a dictionary mapping class IDs to class names. The function creates a new WandB table object that can be used to visualize and analyze the data in the image files. Specifically, it creates a table with columns for the file name, two additional columns derived from the file name (P1 and P2), a column for the image data (represented as a WandB Image object), a column indicating the name of the dataset (bdd1k), and additional columns for each class name in class_labels, each of which contains a binary value indicating whether or not the corresponding class is present in the image mask. The function iterates over each image file in `image_files`, opens the image file and its corresponding mask file (which is assumed to be located in the "labels" directory), computes the class presence dictionary using the `get_classes_per_image` function, and adds a new row to the table with the appropriate data. The resulting table is returned as a WandB Table object.

In [5]:
def label_func(fname):
    """get the labels of the file name"""
    return (fname.parent.parent/"labels")/f"{fname.stem}_mask.png"

def get_classes_per_image(mask_data, class_labels):
    """get the classes present in a mask data"""
    unique = list(np.unique(mask_data))
    result_dict = {}
    for _class in class_labels.keys():
        result_dict[class_labels[_class]] = int(_class in unique)
    return result_dict

def create_table(image_files, class_labels):
    """createa a WandB table with the dataset"""
    labels = list(class_labels.values())
    table = wandb.Table(columns=["File_name", "P1", "P2", "Images", "Dataset"] + labels)

    for i, image_file in progress_bar(enumerate(image_files), total=len(image_files)):
        image = Image.open(image_file)
        mask_data = np.array(Image.open(label_func(image_file)))
        class_in_image = get_classes_per_image(mask_data, class_labels)
        table.add_data(
            image_file.stem,
            image_file.stem.split("-")[0],
            image_file.stem.split("-")[1],
            wandb.Image(
                image,
                masks={
                    "predictions": {
                        "mask_data": mask_data,
                        "class_labels": class_labels,
                    }
                } 
            ),
            "bdd1k",
            *[class_in_image[_lab] for _lab in labels]
        )

    return table

In [6]:
# start a new wandb run
run = wandb.init(project=params.WANDB_PROJECT, entity=params.ENTITY, job_type="upload")

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin


In [7]:
# create an artifact
artifact = wandb.Artifact(params.RAW_DATA_AT, type="raw_data")

In [8]:
# add file to artifact
artifact.add_file(path/"LICENSE.txt", name="LICENSE.txt")

<ManifestEntry digest: X+6ZFkDOlnKesJCNt20yRg==>

In [9]:
# add foldaer for artifact
artifact.add_dir(path/"images", name="images")
artifact.add_dir(path/"labels", name="labels")

[34m[1mwandb[0m: Adding directory to artifact (C:\Users\Allison Ogechukwu\.fastai\data\bdd_simple_1k\images)... Done. 20.1s
[34m[1mwandb[0m: Adding directory to artifact (C:\Users\Allison Ogechukwu\.fastai\data\bdd_simple_1k\labels)... Done. 11.9s


In [10]:
image_files = get_image_files(path/"images", recurse=False)

In [11]:
table = create_table(image_files, params.BDD_CLASSES)

In [12]:
# add table to artifact
artifact.add(table, "eda_table")

<ManifestEntry digest: t3YlJItMgKDwoh+KYyXthA==>

In [13]:
# log artifact
run.log_artifact(artifact)

<wandb.sdk.wandb_artifacts.Artifact at 0x23537ad7cd0>

In [14]:
# finist run
run.finish()