diff --git a/examples/prediction_upload/text_predictions.ipynb b/examples/prediction_upload/text_predictions.ipynb index 6db5b16d4..64ba4d45c 100644 --- a/examples/prediction_upload/text_predictions.ipynb +++ b/examples/prediction_upload/text_predictions.ipynb @@ -88,45 +88,23 @@ "!pip install -q 'labelbox[data]'" ], "metadata": { - "id": "cm8xMaLbGb7v", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "a524ede8-eb09-4e90-eb8c-1de6ceeb7598" + "id": "cm8xMaLbGb7v" }, - "execution_count": 1, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m185.5/185.5 KB\u001b[0m \u001b[31m9.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m82.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Building wheel for pygeotile (setup.py) ... \u001b[?25l\u001b[?25hdone\n" - ] - } - ] + "execution_count": 165, + "outputs": [] }, { "cell_type": "code", "source": [ - "from labelbox.schema.ontology import OntologyBuilder, Tool, Classification, Option\n", - "from labelbox import Client, MALPredictionImport, LabelImport\n", - "from labelbox.data.serialization import NDJsonConverter\n", - "from labelbox.schema.media_type import MediaType\n", - "from labelbox.data.annotation_types import (\n", - " Label, TextData, Checklist, Radio, ObjectAnnotation, TextEntity,\n", - " ClassificationAnnotation, ClassificationAnswer, LabelList, Text, ImageData\n", - ")\n", - "import uuid\n", - "import numpy as np\n", - "from labelbox.schema.queue_mode import QueueMode" + "import labelbox as lb\n", + "import labelbox.data.annotation_types as lb_types\n", + "import labelbox.data.serialization as lb_serializers\n", + "import uuid" ], "metadata": { "id": "NIq-6M9kHKSs" }, - "execution_count": 2, + "execution_count": 166, "outputs": [] }, { @@ -142,13 +120,13 @@ { "cell_type": "code", "source": [ - "API_KEY = None\n", - "client = Client(API_KEY)" + "API_KEY=None\n", + "client = lb.Client(API_KEY)" ], "metadata": { "id": "z7ZLKLYLHP__" }, - "execution_count": null, + "execution_count": 167, "outputs": [] }, { @@ -166,8 +144,8 @@ "########## Entities ##########\n", "\n", "# Python annotation\n", - "named_entity = TextEntity(start=10, end=20)\n", - "entities_prediction = ObjectAnnotation(value=named_entity, name = \"named_entity\", confidence=0.5)\n", + "named_entity = lb_types.TextEntity(start=10, end=20)\n", + "entities_prediction = lb_types.ObjectAnnotation(value=named_entity, name = \"named_entity\", confidence=0.5)\n", "\n", "\n", "# NDJSON\n", @@ -183,7 +161,7 @@ "metadata": { "id": "FJhCAqeR-cUJ" }, - "execution_count": 4, + "execution_count": 168, "outputs": [] }, { @@ -192,25 +170,25 @@ "########## Classification - Radio (single choice ) ##########\n", "\n", "# Python annotation \n", - "radio_prediction = ClassificationAnnotation(\n", + "radio_prediction = lb_types.ClassificationAnnotation(\n", " name=\"radio_question\",\n", - " value=Radio(answer = \n", - " ClassificationAnswer(name = \"first_radio_answer\", confidence=0.5)\n", + " value=lb_types.Radio(answer =\n", + " lb_types.ClassificationAnswer(name = \"first_radio_answer\", confidence=0.5)\n", " )\n", ")\n", "\n", "\n", "# NDJSON\n", "radio_prediction_ndjson = {\n", - " 'name': 'radio_question',\n", - " 'confidence': 0.5,\n", - " 'answer': {'name': 'first_radio_answer', 'confidence': 0.5}\n", + " \"name\": \"radio_question\",\n", + " \"confidence\": 0.5,\n", + " \"answer\": {\"name\": \"first_radio_answer\", \"confidence\": 0.5}\n", "} " ], "metadata": { "id": "SOOR0zCBKef_" }, - "execution_count": 5, + "execution_count": 169, "outputs": [] }, { @@ -218,14 +196,18 @@ "source": [ "#### Nested Classifications only supported with NDJSON tools ######\n", "\n", + "\n", + "# Python annotation\n", + "\n", "nested_radio_prediction_ndjson = {\n", - " 'name': 'radio_question_sub',\n", - " 'answer': {\n", - " 'name': 'first_radio_answer',\n", + " \"name\": \"nested_radio_question\",\n", + " \"confidence\": 0.5,\n", + " \"answer\": {\n", + " \"name\": \"first_radio_answer\",\n", " \"confidence\": 0.5,\n", - " 'classifications': [{\n", - " 'name':'sub_radio_question',\n", - " 'answer': { 'name' : 'first_sub_radio_answer', 'confidence': 0.5 }\n", + " \"classifications\": [{\n", + " \"name\":\"sub_radio_question\",\n", + " \"answer\": { \"name\" : \"first_sub_radio_answer\", \"confidence\": 0.5 }\n", " }]\n", " }\n", "}\n", @@ -248,7 +230,7 @@ "metadata": { "id": "cXKGv0BeNXQi" }, - "execution_count": 6, + "execution_count": 170, "outputs": [] }, { @@ -257,19 +239,19 @@ "########## Checklist ##########\n", "\n", "# Python annotation\n", - "checklist_prediction = ClassificationAnnotation(\n", + "checklist_prediction = lb_types.ClassificationAnnotation(\n", " name=\"checklist_question\",\n", - " value=Checklist(\n", + " value=lb_types.Checklist(\n", " answer = [\n", - " ClassificationAnswer(\n", + " lb_types.ClassificationAnswer(\n", " name = \"first_checklist_answer\",\n", " confidence=0.5\n", " ),\n", - " ClassificationAnswer(\n", + " lb_types.ClassificationAnswer(\n", " name = \"second_checklist_answer\", \n", " confidence=0.5\n", " ),\n", - " ClassificationAnswer(\n", + " lb_types.ClassificationAnswer(\n", " name = \"third_checklist_answer\", \n", " confidence=0.5\n", " )\n", @@ -279,19 +261,17 @@ "\n", "# NDJSON\n", "checklist_prediction_ndjson = {\n", - " 'name': 'checklist_question',\n", - " 'confidence': 0.5,\n", - " 'answer': [\n", - " {'name': 'first_checklist_answer', 'confidence': 0.5}\n", + " \"name\": \"checklist_question\",\n", + " \"confidence\": 0.5,\n", + " \"answer\": [\n", + " {\"name\": \"first_checklist_answer\", \"confidence\": 0.5}\n", " ]\n", - "}\n", - "\n", - "\n" + "}" ], "metadata": { "id": "vdwPTRkP6JIx" }, - "execution_count": 7, + "execution_count": 171, "outputs": [] }, { @@ -300,21 +280,21 @@ "########## Classification Free-Form text ##########\n", "\n", "# Python annotation\n", - "text_prediction = ClassificationAnnotation(\n", + "text_prediction = lb_types.ClassificationAnnotation(\n", " name = \"free_text\", \n", - " value = Text(answer=\"sample text\")\n", + " value = lb_types.Text(answer=\"sample text\")\n", ")\n", "\n", "# NDJSON\n", "text_prediction_ndjson = {\n", - " 'name': 'free_text',\n", - " 'answer': 'sample text'\n", + " \"name\": \"free_text\",\n", + " \"answer\": \"sample text\"\n", "}" ], "metadata": { "id": "ie2Zni_d6MI8" }, - "execution_count": 8, + "execution_count": 172, "outputs": [] }, { @@ -343,24 +323,24 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "125b5130-5dd5-4e7f-8c53-f6da76359752" + "outputId": "8c427e26-591a-4a90-edba-9d28dd686f3e" }, - "execution_count": 9, + "execution_count": 173, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\n" ] } @@ -381,53 +361,53 @@ "source": [ "## Setup the ontology and link the tools created above.\n", "\n", - "ontology_builder = OntologyBuilder(\n", + "ontology_builder = lb.OntologyBuilder(\n", " classifications=[ # List of Classification objects\n", - " Classification( \n", - " class_type=Classification.Type.RADIO, \n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", " instructions=\"radio_question\", \n", - " options=[Option(value=\"first_radio_answer\")]\n", + " options=[lb.Option(value=\"first_radio_answer\")]\n", " ),\n", - " Classification( \n", - " class_type=Classification.Type.RADIO, \n", - " instructions=\"radio_question_sub\", \n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " instructions=\"nested_radio_question\", \n", " options=[\n", - " Option(value=\"first_radio_answer\",\n", + " lb.Option(value=\"first_radio_answer\",\n", " options=[\n", - " Classification(\n", - " class_type=Classification.Type.RADIO,\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", " instructions=\"sub_radio_question\",\n", " options=[\n", - " Option(value=\"first_sub_radio_answer\")\n", + " lb.Option(value=\"first_sub_radio_answer\")\n", " ]\n", " ),\n", " ]\n", " )\n", " ],\n", " ),\n", - " Classification( \n", - " class_type=Classification.Type.CHECKLIST, \n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", " instructions=\"checklist_question\", \n", " options=[\n", - " Option(value=\"first_checklist_answer\"),\n", - " Option(value=\"second_checklist_answer\"), \n", - " Option(value=\"third_checklist_answer\") \n", + " lb.Option(value=\"first_checklist_answer\"),\n", + " lb.Option(value=\"second_checklist_answer\"),\n", + " lb.Option(value=\"third_checklist_answer\")\n", " ]\n", " ), \n", - " Classification( \n", - " class_type=Classification.Type.TEXT,\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.TEXT,\n", " instructions=\"free_text\"\n", " ),\n", - " Classification(\n", - " class_type=Classification.Type.CHECKLIST, \n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", " instructions=\"nested_checklist_question\",\n", " options=[\n", - " Option(\"first_checklist_answer\",\n", + " lb.Option(\"first_checklist_answer\",\n", " options=[\n", - " Classification(\n", - " class_type=Classification.Type.CHECKLIST, \n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", " instructions=\"sub_checklist_question\", \n", - " options=[Option(\"first_sub_checklist_answer\")]\n", + " options=[lb.Option(\"first_sub_checklist_answer\")]\n", " )\n", " ]\n", " )\n", @@ -435,17 +415,17 @@ " )\n", " ],\n", " tools=[ # List of Tool objects\n", - " Tool(tool=Tool.Type.NER, \n", + " lb.Tool(tool=lb.Tool.Type.NER,\n", " name=\"named_entity\")\n", " ]\n", ")\n", "\n", - "ontology = client.create_ontology(\"Ontology Text Predictions\", ontology_builder.asdict() , media_type=MediaType.Text)\n" + "ontology = client.create_ontology(\"Ontology Text Predictions\", ontology_builder.asdict() , media_type=lb.MediaType.Text)\n" ], "metadata": { "id": "Kt4XWWqgIiWk" }, - "execution_count": 10, + "execution_count": 174, "outputs": [] }, { @@ -469,7 +449,7 @@ "metadata": { "id": "8n-AvzdiOR6d" }, - "execution_count": 11, + "execution_count": 175, "outputs": [] }, { @@ -491,9 +471,9 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "d2a3efb9-f848-4041-e930-0c8ce8dcdc01" + "outputId": "b99b53c1-9296-460f-bed1-28a4f05079d8" }, - "execution_count": 20, + "execution_count": 176, "outputs": [ { "output_type": "execute_result", @@ -503,7 +483,7 @@ ] }, "metadata": {}, - "execution_count": 20 + "execution_count": 176 } ] }, @@ -526,13 +506,13 @@ "cell_type": "code", "source": [ "# Create a Label for predictions\n", - "label_prediction = Label(\n", - " data=TextData(uid=data_row.uid),\n", + "label_prediction = lb_types.Label(\n", + " data=lb_types.TextData(uid=data_row.uid),\n", " annotations = [\n", " entities_prediction, \n", " radio_prediction, \n", " checklist_prediction,\n", - " text_prediction\n", + " text_prediction,\n", " ]\n", ")\n", "\n", @@ -540,12 +520,12 @@ "label_list_prediction = [label_prediction]\n", "\n", "# Convert the prediction label from a Labelbox class object to the underlying NDJSON format required for upload - uploads can be directly built in this syntax as well\n", - "ndjson_prediction = list(NDJsonConverter.serialize(label_list_prediction))" + "ndjson_prediction = list(lb_serializers.NDJsonConverter.serialize(label_list_prediction))" ], "metadata": { "id": "zv2OLTXKSGWv" }, - "execution_count": 21, + "execution_count": 177, "outputs": [] }, { @@ -560,14 +540,13 @@ { "cell_type": "code", "source": [ - "\n", "ndjson_prediction_method2 = []\n", "for annot in [\n", " entities_prediction_ndjson, \n", " radio_prediction_ndjson, \n", - " nested_radio_prediction_ndjson,\n", " checklist_prediction_ndjson,\n", " text_prediction_ndjson, \n", + " nested_radio_prediction_ndjson,\n", " nested_checklist_prediction_ndjson\n", " ]:\n", " annot.update({\n", @@ -579,7 +558,7 @@ "metadata": { "id": "F-Y7sSyAV3tn" }, - "execution_count": 22, + "execution_count": 178, "outputs": [] }, { @@ -608,9 +587,9 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "3829ae1e-3cd1-4619-9a25-73d7c2c1952c" + "outputId": "a62c87c5-8d66-47bc-dc82-8899bedf2d04" }, - "execution_count": 23, + "execution_count": 179, "outputs": [ { "output_type": "stream", @@ -645,19 +624,30 @@ "cell_type": "code", "source": [ "# Create a Labelbox project\n", - "project = client.create_project(name=\"Text Prediction Import\", \n", - " queue_mode=QueueMode.Batch,\n", + "project = client.create_project(name=\"Text Prediction Import\",\n", " # Quality Settings setup \n", " auto_audit_percentage=1,\n", " auto_audit_number_of_labels=1,\n", - " media_type=MediaType.Text)\n", + " media_type=lb.MediaType.Text)\n", "project.setup_editor(ontology)" ], "metadata": { - "id": "jEtoDiDrPFvI" + "id": "jEtoDiDrPFvI", + "outputId": "51a8e2e5-27c9-4051-8176-7a37018d1852", + "colab": { + "base_uri": "https://localhost:8080/" + } }, - "execution_count": 14, - "outputs": [] + "execution_count": 180, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "WARNING:labelbox.client:Default createProject behavior will soon be adjusted to prefer batch projects. Pass in `queue_mode` parameter explicitly to opt-out for the time being.\n" + ] + } + ] }, { "cell_type": "markdown", @@ -682,19 +672,22 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "cf36bc23-3f88-4820-a682-8cd3d152d426" + "outputId": "6708c469-4544-4a00-e866-8fd446944554", + "pycharm": { + "is_executing": true + } }, - "execution_count": 15, + "execution_count": 181, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ - "" + "" ] }, "metadata": {}, - "execution_count": 15 + "execution_count": 181 } ] }, @@ -723,16 +716,6 @@ " \"answer\": {\"name\": \"first_radio_answer\"}\n", "} \n", "\n", - "radio_annotation_ndjson_with_subclass = {\n", - " \"name\": \"radio_question_sub\",\n", - " \"answer\": {\n", - " \"name\": \"first_radio_answer\",\n", - " \"classifications\": [{\n", - " \"name\":\"sub_radio_question\",\n", - " \"answer\": { \"name\" : \"first_sub_radio_answer\"}\n", - " }]\n", - " }\n", - "}\n", "\n", "checklist_annotation_ndjson = {\n", " \"name\": \"checklist_question\",\n", @@ -748,7 +731,18 @@ " \"answer\": \"sample text\",\n", "}\n", "\n", - "nested_checklist_prediction_ndjson = {\n", + "nested_radio_annotation_ndjson = {\n", + " \"name\": \"nested_radio_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_radio_answer\",\n", + " \"classifications\": [{\n", + " \"name\":\"sub_radio_question\",\n", + " \"answer\": { \"name\" : \"first_sub_radio_answer\"}\n", + " }]\n", + " }\n", + "}\n", + "\n", + "nested_checklist_annotation_ndjson = {\n", " \"name\": \"nested_checklist_question\",\n", " \"answer\": [{\n", " \"name\": \"first_checklist_answer\", \n", @@ -764,7 +758,7 @@ "metadata": { "id": "A8_HVvu9Uvfl" }, - "execution_count": 16, + "execution_count": 182, "outputs": [] }, { @@ -784,10 +778,10 @@ "for annot in [\n", " entities_ndjson, \n", " radio_annotation_ndjson, \n", - " radio_annotation_ndjson_with_subclass,\n", " checklist_annotation_ndjson,\n", " text_annotation_ndjson,\n", - " nested_checklist_prediction_ndjson \n", + " nested_checklist_annotation_ndjson ,\n", + " nested_radio_annotation_ndjson \n", " ]:\n", " annot.update({\n", " 'uuid': str(uuid.uuid4()),\n", @@ -798,7 +792,7 @@ "metadata": { "id": "9gD_alThQA3G" }, - "execution_count": 17, + "execution_count": 183, "outputs": [] }, { @@ -813,7 +807,7 @@ { "cell_type": "code", "source": [ - "upload_job_annotation = LabelImport.create_from_objects(\n", + "upload_job_annotation = lb.LabelImport.create_from_objects(\n", " client = client,\n", " project_id = project.uid,\n", " name=\"text_label_import_job\"+ str(uuid.uuid4()),\n", @@ -828,9 +822,9 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "df994b08-d6c9-4699-bf3c-bd28ab6372e5" + "outputId": "e78e544b-e6a3-42db-8a71-b0af8c3113c3" }, - "execution_count": 18, + "execution_count": 184, "outputs": [ { "output_type": "stream", @@ -862,9 +856,9 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "3440e9fc-2c51-48d6-f17b-095406abe49f" + "outputId": "748982b6-7593-4232-c2cc-d30d8203a2dd" }, - "execution_count": 19, + "execution_count": 185, "outputs": [ { "output_type": "execute_result", @@ -874,7 +868,7 @@ ] }, "metadata": {}, - "execution_count": 19 + "execution_count": 185 } ] }, @@ -890,14 +884,13 @@ { "cell_type": "code", "source": [ - "\n", "# project.delete()\n", "# dataset.delete()" ], "metadata": { "id": "aAhkyvJlWK1p" }, - "execution_count": 53, + "execution_count": 186, "outputs": [] } ]