# Data rows

* Data rows are the items that are actually being labeled. We currently support the following:
    * Image
    * Text
    * Video
* A datarow is a member of a dataset 
* A datarow cannot exist without belonging to a dataset.
* Datarows are staged to be labeled by attaching the dataset that they are members of to a project
    * See dataset notebook on information about datasets

In [None]:
!pip install labelbox

In [None]:
from labelbox import Project, Dataset, DataRow, Client
import uuid
import os

* Set the following cell with your data to run this notebook

In [None]:
# Pick a project that has a dataset attached
PROJECT_ID = "ckk4q1viuc0w20704eh69u28h"
# Set this if running in colab. Otherwise it should work if you have the LABELBOX_API_KEY set.
API_KEY = os.environ["LABELBOX_API_KEY"]
# Only update this if you have an on-prem deployment
ENDPOINT = "https://api.labelbox.com/graphql" 

In [None]:
client = Client(api_key = API_KEY, endpoint = ENDPOINT)

In [None]:
project = client.get_project(PROJECT_ID)
dataset = next(project.datasets())
# This is the same as
# -> dataset = client.get_dataset(dataset_id)

### Read

In [None]:
datarows = dataset.data_rows()
datarow = next(datarows)

In [None]:
# Url
print("Associated dataset", datarow.dataset())
print("Associated label(s)",  next(datarow.labels()))
print("External id", datarow.external_id)

In [None]:
# External ids can be a reference to your internal datasets
datarow = dataset.data_row_for_external_id(datarow.external_id)
print(datarow)

### Create

In [None]:
#Add one at a time
dataset = client.create_dataset(name = "testing-dataset")
dataset.create_data_row(row_data = "https://picsum.photos/200/300")

# It is reccomended that you use external ids but optional.
# These are useful for users to maintain references to a data_row.
dataset.create_data_row(row_data = "https://picsum.photos/200/300", external_id = str(uuid.uuid4()))


In [None]:
# Bulk create datarows
task1 = dataset.create_data_rows([{DataRow.row_data : "https://picsum.photos/200/300"}
                          , {DataRow.row_data : "https://picsum.photos/200/300"}])

In [None]:
# Local paths
local_data_path = '/tmp/test_data_row.txt'
with open(local_data_path, 'w') as file:
    file.write("sample data")
    
task2 = dataset.create_data_rows([local_data_path])

In [None]:
# You can mix local files with urls
task3 = dataset.create_data_rows([{DataRow.row_data : "https://picsum.photos/200/300"}, local_data_path])

In [None]:
# Note that you cannot set external_ids at this time when uploading from local files.
# To do this you have to first
item_url = client.upload_file(local_data_path)
task4 = dataset.create_data_rows([{DataRow.row_data : item_url, DataRow.external_id : str(uuid.uuid4())}])

In [None]:
# Blocking wait until complete
task1.wait_till_done()
task2.wait_till_done()
task3.wait_till_done()
task4.wait_till_done()

print(task1.status, task2.status, task3.status, task4.status)


### Update

In [None]:
# Useful for resigning urls
new_id = str(uuid.uuid4())
datarow.update(external_id = new_id)
print(datarow.external_id, new_id)


In [None]:
# We can also attach metadata (See metadata tutorial for more)
# Metadata is visible for all projects with this datarow attached
datarow.create_metadata(meta_type = "TEXT", meta_value = "LABELERS WILL SEE THIS ")
# See more information here:
# https://docs.labelbox.com/en/import-data/attachments
# Note that meta_value must always be a string (url to a video/image or a text value to display)

### Delete

In [None]:
datarow.delete()
# Will remove from the dataset too

In [None]:
# Bulk delete a list of datarows (in this case all of them we just uploaded)
DataRow.bulk_delete(list(dataset.data_rows()))