# Text Data Export
* Export labels from text annotation projects

In [None]:
!pip install labelbox
!pip install requests

In [2]:
from labelbox import Client
import requests
from collections import Counter
import os

In [3]:
# Pick a project that has entity tools in the ontology and has completed labels
PROJECT_ID = "ckme5v7aykpoj0709ufi5h6i2"
# Set this if running in colab. Otherwise it should work if you have the LABELBOX_API_KEY set.
API_KEY = os.environ["LABELBOX_API_KEY"]
# Only update this if you have an on-prem deployment
ENDPOINT = "https://api.labelbox.com/graphql"

In [4]:
client = Client(api_key=API_KEY, endpoint=ENDPOINT)
project = client.get_project(PROJECT_ID)

### Export the labels

In [5]:
export_url = project.export_labels()

In [6]:
print(export_url)

https://storage.googleapis.com/labelbox-exports/ckk4q1vgapsau07324awnsjq2/ckme5v7aykpoj0709ufi5h6i2/export-2021-03-22T11%3A31%3A05.907Z.json?GoogleAccessId=api-prod%40labelbox-193903.iam.gserviceaccount.com&Expires=1617622268&Signature=VmqCl%2FTy60h8FO9q3E6TMmHpS5zgL5ZSD4YY%2BqBPBm2WCexOYnWsbCJ%2BHpqv%2Fy3y%2B9hMdSQiHVPbsScclza1UJC1xKCAdmNlzTnqZAaEkxoCSwKxNCtnKjRoMkYymlhjdrjxadxXeCmfnMGrGA3fr01KYweUdzUYX%2BzWoedno5Uq7aJNOB9HPjTJrltyJnmXbdQNdoKHr11xhzbqwdLFFZ8sW%2B5I2ZRiK2sC5LRoxazIlBu7om4clES4CzEwSSbggNb0A1ZtVg4MVp22XFzS7Ijdes%2FyjHbjm0HfXVzv4e6F5ag3eQ5oq3agUDJZsHw9m9PSbDwnDCAjUT4lRH7mMw%3D%3D&response-content-disposition=attachment


In [7]:
exports = requests.get(export_url).json()

In [12]:
# Print first label
exports[0]["Label"]["objects"][0]

{'featureId': 'ckme60w4306hv0y8d7g7k64ky',
 'schemaId': 'ckme5v8wt01n10ybafw48f72g',
 'title': 'org',
 'value': 'org',
 'color': '#ff0000',
 'version': 1,
 'format': 'text.location',
 'data': {'location': {'start': 32670, 'end': 32690}},
 'instanceURI': 'https://api.labelbox.com/masks/feature/ckme60w4306hv0y8d7g7k64ky?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJja2s0cTF2Z3djMHZwMDcwNHhoeDdtNHZrIiwib3JnYW5pemF0aW9uSWQiOiJja2s0cTF2Z2Fwc2F1MDczMjRhd25zanEyIiwiaWF0IjoxNjE2NDEyNjY1LCJleHAiOjE2MTkwMDQ2NjV9.BjsyyZebUwFqfv993ePUXl0DNAoNlXKwLYzgH1s7JUw'}

### Using the data
* This one data_row dataset is pretty simple. 
* We are just going to look at the entities

In [11]:
text = exports[0]["Labeled Data"]

In [12]:
people = []
orgs = []
for entity in exports[0]["Label"]["objects"]:
    location = entity["data"]["location"]
    if entity["title"] == "person":
        people.append(text[location["start"]:location["end"]])
    elif entity["title"] == "org":
        orgs.append(text[location["start"]:location["end"]])

In [16]:
Counter(people)

Counter({'Robin Wensley': 1,
         'Jones': 1,
         'Frank Cass': 1,
         'Robert': 1,
         'Armstrong': 1,
         'Kotler': 1,
         "Adam Smith's": 1,
         'Philip Kotler': 1})

In [18]:
Counter(orgs)

Counter({'Wikiquote\n Marketing': 1,
         'Wiktionary\n Quotations': 1,
         'Handbook of Marketing': 1,
         'Barton A.': 1,
         '2014\nWeitz': 1,
         'The Rise and Fall of Mass Marketing, Routledge': 1,
         'Geoffrey G.': 1,
         'Richard S.': 1,
         'Tedlow': 1,
         'Vol 25': 1,
         'Periodization in Marketing History," Journal of Macromarketing': 1,
         'Dix and Farlow, L.': 1,
         'D.G. Brian': 1,
         'Kathleen M.; Jones': 1,
         'Rassuli': 1,
         'Stanley C.': 1,
         'Hollander': 1,
         'The Emergence of Modern Marketing': 1,
         'Roy and Godley, Andrew (eds)': 1,
         'Harvard Business School Press. ISBN 978-0-87584-585-2.\nChurch': 1,
         'Christensen, Clayton M': 1,
         'Grid': 1,
         'The History of Marketing Thought': 1,
         'PLCIn': 1,
         'PLC': 2,
         'SBU': 5,
         'SBUs': 1,
         'SBU)': 1,
         'The Marketing Plan': 1,
         'YouTube': 