### Create Labelbox Dataset

This Notebook finds RGB images in the GCP storage bucket. For each geographical location where images exist, this notebooks create a Labelbox dataset. It populates this dataset with pointers to the GCP bucket.

In [None]:
import os, sys
sys.path.insert(0, os.path.abspath('..'))

%load_ext autoreload
%autoreload 2

In [None]:
from google.cloud import storage
from labelbox import Client as LabelboxClient
from labelbox import Dataset


from utils import gcp_utils
from utils.labelbox_utils import create_new_dataset, create_data_row_dict

In [None]:
# Load environment variables from .env.
# Alternatively, manually set environment variables.

from dotenv import load_dotenv
load_dotenv()

In [None]:
from project_config import GCP_PROJECT_NAME, BUCKET_NAME
LABELBOX_API_KEY = os.getenv('LABELBOX_API_KEY')

gcp_client = storage.Client(project=GCP_PROJECT_NAME)
labelbox_client = LabelboxClient(api_key=LABELBOX_API_KEY)

In [None]:
all_label_locations = gcp_utils.list_subfolders(gcp_client, "labels")
print(all_label_locations)

Choose the locations for which to push images to Labelbox

In [None]:
#label_locations = all_label_locations
#label_locations = ['Sone_Rohtas_84-21_24-91']

In [None]:
assert set(label_locations).issubset(set(all_label_locations))

bucket = gcp_client.bucket(BUCKET_NAME)

def create_labelbox_dataset_for_location(location):
    rgd_image_blobs = bucket.list_blobs(prefix=f"labels/{location}/rgb")
    data_rows = []
    for rbg_image_blob in rgd_image_blobs:
        public_imgage_url = gcp_utils.get_public_url(rbg_image_blob.name)
        global_key = rbg_image_blob.name.split('/')[-1]
        data_row = create_data_row_dict(
            img_url=public_imgage_url,
            global_key=global_key
        )
        data_rows.append(data_row)

    dataset: Dataset = create_new_dataset(labelbox_client, location)
    print(f"Creating {len(data_rows)} data rows in dataset {dataset.name}")
    task = dataset.create_data_rows(data_rows)
    task.wait_till_done()
    if task.errors:
        print(f"Finished with error: {task.errors}")
    else:
        print(f"Finished without error.")

for location in label_locations:
    print(f"--- Location: {location}")
    create_labelbox_dataset_for_location(location)