# Data rows

* Data rows are the items that are actually being labeled. We currently support the following:
    * Image
    * Text
    * Video
* A data row is a member of a dataset 
* A data row cannot exist without belonging to a dataset.
* DataRows are added to labeling tasks by first attaching them to datasets and then attaching datasets to projects.

In [1]:
!pip install labelbox

In [2]:
from labelbox import DataRow, Client
from getpass import getpass
import uuid
import os

In [3]:
# If you don't want to give google access to drive you can skip this cell
# and manually set `API_KEY` below.

COLAB = "google.colab" in str(get_ipython())
if COLAB:
    !pip install colab-env -qU
    from colab_env import envvar_handler
    envvar_handler.envload()

API_KEY = os.environ.get("LABELBOX_API_KEY")
if not os.environ.get("LABELBOX_API_KEY"):
    API_KEY = getpass("Please enter your labelbox api key")
    if COLAB:
        envvar_handler.add_env("LABELBOX_API_KEY", API_KEY)

* Set the following cell with your data to run this notebook

In [4]:
# Pick a project that has a dataset attached, data has external ids, and there are some labels
# This will modify the project so just pick a dummy one that you don't care about
PROJECT_ID = "ckk4q1viuc0w20704eh69u28h"
# Only update this if you have an on-prem deployment
ENDPOINT = "https://api.labelbox.com/graphql"

In [5]:
client = Client(api_key=API_KEY, endpoint=ENDPOINT)

In [6]:
project = client.get_project(PROJECT_ID)
dataset = next(project.datasets())
# This is the same as
# -> dataset = client.get_dataset(dataset_id)

### Read

In [7]:
data_rows = dataset.data_rows()
data_row = next(data_rows)

In [8]:
# Url
print("Associated dataset", data_row.dataset())
print("Associated label(s)", next(data_row.labels()))
print("External id", data_row.external_id)

Associated dataset <Dataset {'created_at': datetime.datetime(2021, 3, 28, 23, 35, 34, tzinfo=datetime.timezone.utc), 'description': '', 'name': 'image_mal_dataset', 'uid': 'ckmtsvzps21f80y6205t304se', 'updated_at': datetime.datetime(2021, 3, 28, 23, 35, 34, tzinfo=datetime.timezone.utc)}>
Associated label(s) <Label {'agreement': None, 'benchmark_agreement': None, 'created_at': datetime.datetime(2021, 3, 29, 17, 53, 36, tzinfo=datetime.timezone.utc), 'is_benchmark_reference': False, 'label': '{"objects":[{"featureId":"ckmuw40nm00013g68kfua4i88","schemaId":"ckk4q1vo80nhv0y92b7vgctgt","title":"Frog","value":"frog","color":"#00D4FF","bbox":{"top":815,"left":847,"height":247,"width":512},"instanceURI":"https://api.labelbox.com/masks/feature/ckmuw40nm00013g68kfua4i88"}],"classifications":[]}', 'seconds_to_label': 0, 'uid': 'ckmuw42qy00033g68zdhpove7', 'updated_at': datetime.datetime(2021, 3, 29, 17, 54, 32, tzinfo=datetime.timezone.utc)}>
External id dbb168f6-4f2c-46e0-a22a-abc837fda3f1


In [9]:
# External ids can be a reference to your internal datasets
data_row = dataset.data_row_for_external_id(data_row.external_id)
print(data_row)

<DataRow {'created_at': datetime.datetime(2021, 3, 28, 23, 35, 35, tzinfo=datetime.timezone.utc), 'external_id': 'dbb168f6-4f2c-46e0-a22a-abc837fda3f1', 'row_data': 'https://upload.wikimedia.org/wikipedia/commons/thumb/0/08/Kitano_Street_Kobe01s5s4110.jpg/2560px-Kitano_Street_Kobe01s5s4110.jpg', 'uid': 'ckmtsvzx421fb0y62bbsmbavz', 'updated_at': datetime.datetime(2021, 3, 29, 17, 54, 56, tzinfo=datetime.timezone.utc)}>


### Create

In [10]:
#Add one at a time
dataset = client.create_dataset(name="testing-dataset")
dataset.create_data_row(row_data="https://picsum.photos/200/300")

# It is reccomended that you use external ids but optional.
# These are useful for users to maintain references to a data_row.
dataset.create_data_row(row_data="https://picsum.photos/200/300",
                        external_id=str(uuid.uuid4()))

<DataRow ID: ckmuw60q700ur0y8wcu178hbi>

In [11]:
# Bulk create data_rows
task1 = dataset.create_data_rows([{
    DataRow.row_data: "https://picsum.photos/200/300"
}, {
    DataRow.row_data: "https://picsum.photos/200/300"
}])

In [12]:
# Local paths
local_data_path = '/tmp/test_data_row.txt'
with open(local_data_path, 'w') as file:
    file.write("sample data")

task2 = dataset.create_data_rows([local_data_path])

In [13]:
# You can mix local files with urls
task3 = dataset.create_data_rows([{
    DataRow.row_data: "https://picsum.photos/200/300"
}, local_data_path])

In [14]:
# Note that you cannot set external_ids at this time when uploading from local files.
# To do this you have to first
item_url = client.upload_file(local_data_path)
task4 = dataset.create_data_rows([{
    DataRow.row_data: item_url,
    DataRow.external_id: str(uuid.uuid4())
}])

In [15]:
# Blocking wait until complete
task1.wait_till_done()
task2.wait_till_done()
task3.wait_till_done()
task4.wait_till_done()

print(task1.status, task2.status, task3.status, task4.status)

COMPLETE COMPLETE COMPLETE COMPLETE


### Update

In [16]:
# Useful for resigning urls
new_id = str(uuid.uuid4())
data_row.update(external_id=new_id)
print(data_row.external_id, new_id)

18201a8d-34a5-41ea-8ea0-feede9776b4a 18201a8d-34a5-41ea-8ea0-feede9776b4a


In [17]:
# We can also attach metadata
# Metadata is visible for all projects with this data_row attached
data_row.create_metadata(meta_type="TEXT", meta_value="LABELERS WILL SEE THIS ")
# See more information here:
# https://docs.labelbox.com/data-model/en/index-en#attachments
# Note that meta_value must always be a string (url to a video/image or a text value to display)

<AssetMetadata ID: ckmuw6mme5mpr0y839dzheh7c>

### Delete

In [18]:
data_row.delete()
# Will remove from the dataset too

In [19]:
# Bulk delete a list of data_rows (in this case all of them we just uploaded)
DataRow.bulk_delete(list(dataset.data_rows()))