diff --git a/examples/basics/Export_V1_to_V2_migration_support.ipynb b/examples/basics/Export_V1_to_V2_migration_support.ipynb index 70e9a7051..e1b7e0ad4 100644 --- a/examples/basics/Export_V1_to_V2_migration_support.ipynb +++ b/examples/basics/Export_V1_to_V2_migration_support.ipynb @@ -49,9 +49,9 @@ { "metadata": {}, "source": [ - "### Key changes included in export V2 methods (``export_v2()`` and ``export()``):\n", + "### Key changes included in export V2 methods ( ``export()`` and ``export_v2()``):\n", "1. Added flexibility to only export the data that is needed. The new methods include parameters and filters to give you more granular control over your exports.\n", - "2. Added functionality to stream your data export using ``export()`` (available on SDK >=3.56)\n", + "2. Added functionality to stream an **unlimited** number of data rows using ``export()`` (available on SDK >=3.56). Upgrading to `export()` is recommended as it is a more scalable solution.\n", "\n", "For complete details on how to use export V2 methods please see the [Export V2 methods](https://docs.labelbox.com/reference/label-export#export-v2-methods) documentation." ], @@ -99,7 +99,7 @@ "source": [ "import labelbox as lb\n", "import pprint\n", - "pp = pprint.PrettyPrinter(width=30, compact=True)" + "pp = pprint.PrettyPrinter(width=30, compact=True)\n" ], "cell_type": "code", "outputs": [], @@ -117,7 +117,8 @@ "metadata": {}, "source": [ "API_KEY = \"\"\n", - "client = lb.Client(api_key=API_KEY)" + "client = lb.Client(api_key=API_KEY)\n", + "client.enable_experimental = True ## This is required if using the export() streamable method" ], "cell_type": "code", "outputs": [], @@ -246,9 +247,10 @@ "source": [ "##### Export V2 \n", "\n", - "For complete details on the supported filters and parameters, including how they are used and what information is included, please see the [Export overview](https://docs.labelbox.com/reference/label-export#optional-parameters-and-filters) documentation.\n", + "For complete details on the supported filters abd parameters, including how they are used and what information is included, please see the [Export overview](https://docs.labelbox.com/reference/label-export#optional-parameters-and-filters) documentation.\n", + "\n", + "1. ```project.export()``` : Starting from SDK version 3.56, a streamable method is available, this method allows you to stream unlimited number of data rows. However, if you are using an earlier version, you can still utilize the ```export_v2()``` function with identical parameters. It's important to note that the output task type differs, and streaming data methods are not included in `export_v2()`.\n", "\n", - "1. ```project.export_v2()```\n", " - Parameters: \n", " - ```\"label_details\": True```\n", " - ```\"attachments\": True```\n", @@ -257,9 +259,10 @@ " - ```\"label_details\": True```\n", " - ```\"performance_details\": True```\n", " - Output: \n", - " - ```Task``` or ```ExportTask``` if using the streamable method\n", - " - `Task.result` --> Union[List[Dict[str, Any]], Dict[str, Any]]:\n", - " - `Task.errors` --> Optional[Dict[str, Any]]" + " - ```ExportTask```\n", + " - `ExportTask.has_result()` return type: bool \n", + " - `ExportTask.has_errors()` return type: bool\n", + " - `ExportTask.get_stream()` return type: Stream[JsonConverterOutput]" ], "cell_type": "markdown" }, @@ -278,20 +281,37 @@ "# You also have the option to include additional filtering to narrow down the list of labels\n", "filters = {}\n", "\n", + "export_task = project.export(params=export_params, filters=filters)\n", + "export_task.wait_till_done()" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "# Provide results with JSON converter\n", + "# Returns streamed JSON output strings from export task results/errors, one by one\n", "\n", - "# A task is returned, this provides additional information about the status of your task, such as\n", - "# any errors encountered\n", - "task = project.export_v2(params=export_params, filters=filters)\n", + "# Callback used for JSON Converter\n", + "def json_stream_handler(output: lb.JsonConverterOutput):\n", + " print(output.json_str)\n", "\n", - "task.wait_till_done()\n", - "if task.errors:\n", - " print(task.errors)\n", + "if export_task.has_errors():\n", + " export_task.get_stream(\n", + " converter=lb.JsonConverter(),\n", + " stream_type=lb.StreamType.ERRORS\n", + " ).start(stream_handler=lambda error: print(error))\n", "\n", - "export_json = task.result\n", - "# Fetch the first data row\n", - "pp.pprint(export_json[0])\n", + "if export_task.has_result():\n", + " export_json = export_task.get_stream(\n", + " converter=lb.JsonConverter(),\n", + " stream_type=lb.StreamType.RESULT\n", + " ).start(stream_handler=json_stream_handler)\n", "\n", - "\n" + "print(\"file size: \", export_task.get_total_file_size(stream_type=lb.StreamType.RESULT))\n", + "print(\"line count: \", export_task.get_total_lines(stream_type=lb.StreamType.RESULT))\n" ], "cell_type": "code", "outputs": [], @@ -351,15 +371,18 @@ "##### Export V2\n", "For complete details on the supported filters and parameters, including how they are used and what information is included, please see the [Export overview](https://docs.labelbox.com/reference/label-export#optional-parameters-and-filters) documentation.\n", "\n", - "1. ```project.export_v2()```:\n", + "1. ```project.export()``` : Starting from SDK version 3.56, a streamable method is available, this method allows you to stream unlimited number of data rows. However, if you are using an earlier version, you can still utilize the ```export_v2()``` function with identical parameters. It's important to note that the output task type differs, and streaming data methods are not included in `export_v2()`.\n", + "\n", " - Parameters (Minimum required parameters): \n", " - ```\"data_row_details\": True```\n", " - ```\"project_details\": True```\n", " - Required filters: \n", " - ``` \"workflow_status\": \"ToLabel\"```\n", - " - Output: ```Task``` or ```ExportTask``` if using the streamable method\n", - " - `Task.result` --> Union[List[Dict[str, Any]], Dict[str, Any]]:\n", - " - `Task.errors` --> Optional[Dict[str, Any]]" + " - Output: \n", + " - ```ExportTask```\n", + " - `ExportTask.has_result()` return type: bool \n", + " - `ExportTask.has_errors()` return type: bool\n", + " - `ExportTask.get_stream()` return type: Stream[JsonConverterOutput]" ], "cell_type": "markdown" }, @@ -376,17 +399,43 @@ " \"workflow_status\": \"ToLabel\" ## Using this filter will only export queued data rows\n", "}\n", "\n", - "# A task is returned, this provides additional information about the status of your task, such as\n", - "# any errors encountered\n", - "task = project.export_v2(params=export_params, filters=filters)\n", + "# An ExportTask is returned, this provides additional information about the status of your task, such as\n", + "# any errors encountered and includes additional methods to stream your data\n", "\n", - "task.wait_till_done()\n", - "if task.errors:\n", - " print(task.errors)\n", "\n", - "export_json = task.result\n", - "# Fetch the first data row\n", - "pp.pprint(export_json[0])\n" + "export_task = project.export(params=export_params, filters=filters)\n", + "export_task.wait_till_done()\n" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "# Provide results with JSON converter\n", + "# Returns streamed JSON output strings from export task results/errors, one by one\n", + "\n", + "# Callback used for JSON Converter\n", + "def json_stream_handler(output: lb.JsonConverterOutput):\n", + " print(output.json_str)\n", + "\n", + "\n", + "if export_task.has_errors():\n", + " export_task.get_stream(\n", + "\n", + " converter=lb.JsonConverter(),\n", + " stream_type=lb.StreamType.ERRORS\n", + " ).start(stream_handler=lambda error: print(error))\n", + "\n", + "if export_task.has_result():\n", + " export_json = export_task.get_stream(\n", + " converter=lb.JsonConverter(),\n", + " stream_type=lb.StreamType.RESULT\n", + " ).start(stream_handler=json_stream_handler)\n", + "\n", + "print(\"file size: \", export_task.get_total_file_size(stream_type=lb.StreamType.RESULT))\n", + "print(\"line count: \", export_task.get_total_lines(stream_type=lb.StreamType.RESULT))" ], "cell_type": "code", "outputs": [], @@ -461,12 +510,15 @@ "#### Export V2\n", "For complete details on the supported filters and parameters, including how they are used and what information is included, please see the [Export overview](https://docs.labelbox.com/reference/label-export#optional-parameters-and-filters) documentation.\n", "\n", - "1. ```project.export_v2()```:\n", + "1. ```project.export()``` : Starting from SDK version 3.56, a streamable method is available, this method allows you to stream unlimited number of data rows. However, if you are using an earlier version, you can still utilize the ```export_v2()``` function with identical parameters. It's important to note that the output task type differs, and streaming data methods are not included in `export_v2()`.\n", + "\n", " - Parameters (minimum required parameters): \n", " - ``\"data_row_details\": True``\n", - " - Output: ```Task``` or ```ExportTask``` if using streamable exports\n", - " - `Task.result` --> Union[List[Dict[str, Any]], Dict[str, Any]]:\n", - " - `Task.errors` --> Optional[Dict[str, Any]]" + " - Output: \n", + " - ```ExportTask```\n", + " - `ExportTask.has_result()` return type: bool \n", + " - `ExportTask.has_errors()` return type: bool\n", + " - `ExportTask.get_stream()` return type: Stream[JsonConverterOutput]" ], "cell_type": "markdown" }, @@ -482,16 +534,39 @@ "\n", "# A task is returned, this provides additional information about the status of your task, such as\n", "# any errors encountered\n", - "task = dataset.export_v2(params=export_params, filters=filters)\n", + "export_task = dataset.export(params=export_params, filters=filters)\n", + "export_task.wait_till_done()" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "# Provide results with JSON converter\n", + "# Returns streamed JSON output strings from export task results/errors, one by one\n", + "\n", + "# Callback used for JSON Converter\n", + "def json_stream_handler(output: lb.JsonConverterOutput):\n", + " print(output.json_str)\n", "\n", - "task.wait_till_done()\n", - "if task.errors:\n", - " print(task.errors)\n", "\n", - "# Output is a list of data row dictionaries\n", - "export_json = task.result\n", - "# Fetch the first data row\n", - "pp.pprint(export_json[0])" + "if export_task.has_errors():\n", + " export_task.get_stream(\n", + "\n", + " converter=lb.JsonConverter(),\n", + " stream_type=lb.StreamType.ERRORS\n", + " ).start(stream_handler=lambda error: print(error))\n", + "\n", + "if export_task.has_result():\n", + " export_json = export_task.get_stream(\n", + " converter=lb.JsonConverter(),\n", + " stream_type=lb.StreamType.RESULT\n", + " ).start(stream_handler=json_stream_handler)\n", + "\n", + "print(\"file size: \", export_task.get_total_file_size(stream_type=lb.StreamType.RESULT))\n", + "print(\"line count: \", export_task.get_total_lines(stream_type=lb.StreamType.RESULT))" ], "cell_type": "code", "outputs": [], @@ -555,15 +630,16 @@ "#### Export V2\n", "For complete details on the supported filters and parameters, including how they are used and what information is included, please see the [Export overview](https://docs.labelbox.com/reference/label-export#optional-parameters-and-filters) documentation.\n", "\n", - "1. ```project.export_v2()```:\n", + "1. ```project.export()``` : Starting from SDK version 3.56, a streamable method is available, this method allows you to stream unlimited number of data rows. However, if you are using an earlier version, you can still utilize the ```export_v2()``` function with identical parameters. It's important to note that the output task type differs, and streaming data methods are not included in `export_v2()`.\n", + "\n", " - Required parameters: \n", " - ```\"data_row_details\": True```,\n", " - ```\"batch_ids\": [] ```\n", - " - Required filters: \n", - " - ``` \"workflow_status\": \"ToLabel\"```\n", - " - Output: ```Task``` or ```ExportTask``` if using streamable exports \n", - " - `Task.result` --> Union[List[Dict[str, Any]], Dict[str, Any]]:\n", - " - `Task.errors` --> Optional[Dict[str, Any]]" + " - Output: \n", + " - ```ExportTask```\n", + " - `ExportTask.has_result()` return type: bool \n", + " - `ExportTask.has_errors()` return type: bool\n", + " - `ExportTask.get_stream()` return type: Stream[JsonConverterOutput]" ], "cell_type": "markdown" }, @@ -593,16 +669,39 @@ "\n", "# A task is returned, this provides additional information about the status of your task, such as\n", "# any errors encountered\n", - "task = project.export_v2(params=export_params, filters=filters)\n", + "export_task = project.export(params=export_params, filters=filters)\n", + "export_task.wait_till_done()" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "# Provide results with JSON converter\n", + "# Returns streamed JSON output strings from export task results/errors, one by one\n", + "\n", + "# Callback used for JSON Converter\n", + "def json_stream_handler(output: lb.JsonConverterOutput):\n", + " print(output.json_str)\n", + "\n", "\n", - "task.wait_till_done()\n", - "if task.errors:\n", - " print(task.errors)\n", + "if export_task.has_errors():\n", + " export_task.get_stream(\n", "\n", - "export_json = task.result\n", + " converter=lb.JsonConverter(),\n", + " stream_type=lb.StreamType.ERRORS\n", + " ).start(stream_handler=lambda error: print(error))\n", "\n", - "# Fetch the first data row\n", - "pp.pprint(export_json[0])\n" + "if export_task.has_result():\n", + " export_json = export_task.get_stream(\n", + " converter=lb.JsonConverter(),\n", + " stream_type=lb.StreamType.RESULT\n", + " ).start(stream_handler=json_stream_handler)\n", + "\n", + "print(\"file size: \", export_task.get_total_file_size(stream_type=lb.StreamType.RESULT))\n", + "print(\"line count: \", export_task.get_total_lines(stream_type=lb.StreamType.RESULT))" ], "cell_type": "code", "outputs": [], @@ -670,14 +769,19 @@ "#### Export V2\n", "For complete details on the supported filters and parameters, including how they are used and what information is included, please see the [Export overview](https://docs.labelbox.com/reference/label-export#optional-parameters-and-filters) documentation.\n", "\n", - "1. ```project.export_v2()```:\n", + "1. ```model_run.export()```: Starting from SDK version 3.56, a streamable method is available, this method allows you to stream unlimited number of data rows. However, if you are using an earlier version, you can still utilize the ```export_v2()``` function with identical parameters. It's important to note that the output task type differs, and streaming data methods are not included in `export_v2()`.\n", + "\n", " - Required parameters: \n", " - ```\"data_row_details\": True```\n", + " - ```\"project_details\": True```\n", + " - ```\"label_details\": True```\n", " - Required filters: \n", " - N/A -> Filters not supported\n", - " - Output: ```Task``` and ```ExportTask``` if using the streamable method\n", - " - `Task.result` --> Union[List[Dict[str, Any]], Dict[str, Any]]:\n", - " - `Task.errors` --> Optional[Dict[str, Any]]" + " - Output: \n", + " - ```ExportTask```\n", + " - `ExportTask.has_result()` return type: bool \n", + " - `ExportTask.has_errors()` return type: bool\n", + " - `ExportTask.get_stream()` return type: Stream[JsonConverterOutput]" ], "cell_type": "markdown" }, @@ -702,15 +806,37 @@ " \"performance_details\": True\n", "}\n", "\n", - "task = model_run.export_v2(params=export_params)\n", + "export_task = model_run.export(params=export_params)\n", + "export_task.wait_till_done()\n" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "# Provide results with JSON converter\n", + "# Returns streamed JSON output strings from export task results/errors, one by one\n", "\n", - "task.wait_till_done()\n", - "if task.errors:\n", - " print(task.errors)\n", + "# Callback used for JSON Converter\n", + "def json_stream_handler(output: lb.JsonConverterOutput):\n", + " print(output.json_str)\n", "\n", - "export_json = task.result\n", - "# Fetch a single data row\n", - "pp.pprint(export_json[0])\n" + "if export_task.has_errors():\n", + " export_task.get_stream(\n", + " converter=lb.JsonConverter(),\n", + " stream_type=lb.StreamType.ERRORS\n", + " ).start(stream_handler=lambda error: print(error))\n", + "\n", + "if export_task.has_result():\n", + " export_json = export_task.get_stream(\n", + " converter=lb.JsonConverter(),\n", + " stream_type=lb.StreamType.RESULT\n", + " ).start(stream_handler=json_stream_handler)\n", + "\n", + "print(\"file size: \", export_task.get_total_file_size(stream_type=lb.StreamType.RESULT))\n", + "print(\"line count: \", export_task.get_total_lines(stream_type=lb.StreamType.RESULT))" ], "cell_type": "code", "outputs": [], @@ -753,6 +879,29 @@ ], "cell_type": "markdown" }, + { + "metadata": {}, + "source": [ + "##### Export V2\n", + "\n", + "For complete details on the supported filters and parameters, including how they are used and what information is included, please see the [Export overview](https://docs.labelbox.com/reference/label-export#optional-parameters-and-filters) documentation.\n", + "\n", + "1. ```project.export()```: Starting from SDK version 3.56, a streamable method is available, this method allows you to stream unlimited number of data rows. However, if you are using an earlier version, you can still utilize the ```export_v2()``` function with identical parameters. It's important to note that the output task type differs, and streaming data methods are not included in `export_v2()`.\n", + "\n", + " - Required parameters: \n", + " - ```\"attachments\": True```\n", + " - ```\"data_row_details\": True```\n", + " - ```\"project_details\": True```\n", + " - ```\"label_details\": True```\n", + " - ```\"performance_details\": True```\n", + " - Output: \n", + " - ```ExportTask```\n", + " - `ExportTask.has_result()` return type: bool \n", + " - `ExportTask.has_errors()` return type: bool\n", + " - `ExportTask.get_stream()` return type: Stream[JsonConverterOutput]" + ], + "cell_type": "markdown" + }, { "metadata": {}, "source": [ @@ -779,14 +928,8 @@ "\n", "# A task is returned, this provides additional information about the status of your task, such as\n", "# any errors encountered\n", - "task = project.export_v2(params=export_params, filters=filters)\n", - "\n", - "task.wait_till_done()\n", - "if task.errors:\n", - " print(task.errors)\n", - "\n", - "# Output is a list of data row dictionaries\n", - "export_json = task.result\n" + "export_task = project.export(params=export_params, filters=filters)\n", + "export_task.wait_till_done()" ], "cell_type": "code", "outputs": [], @@ -802,23 +945,28 @@ { "metadata": {}, "source": [ + "import json\n", + "import pprint as pp # Assuming pp is imported from pprint module\n", + "\n", "frames_objects_class_list = []\n", "global_class_list = []\n", "\n", - "for project_data in export_json:\n", - " for label_data in project_data[\"projects\"][VIDEO_PROJECT_ID][\"labels\"]:\n", - " frames_data = label_data[\"annotations\"][\"frames\"]\n", + "stream = export_task.get_stream()\n", + "for output in stream:\n", + " output_json = json.loads(output.json_str)\n", + " for dr in output_json[\"projects\"][VIDEO_PROJECT_ID][\"labels\"]:\n", + " frames_data = dr[\"annotations\"][\"frames\"]\n", " for k, v in frames_data.items():\n", - " frames_objects_class_list.append({k:v})\n", - " global_class_list.extend(label_data[\"annotations\"][\"classifications\"])\n", - "\n", + " frames_objects_class_list.append({k: v})\n", + " global_class_list.extend(dr[\"annotations\"][\"classifications\"])\n", "\n", + " print(\"------- Frame specific classifications and objects -------\")\n", + " pp.pprint(frames_objects_class_list)\n", "\n", - "print(\"------- Frame specific classifications and objects -------\")\n", - "pp.pprint(frames_objects_class_list)\n", + " print(\"------ Global classifications -------\")\n", + " pp.pprint(global_class_list)\n", "\n", - "print(\"------ Global classifications -------\")\n", - "pp.pprint(global_class_list)" + "\n" ], "cell_type": "code", "outputs": [], @@ -835,8 +983,11 @@ "metadata": {}, "source": [ "keyframe_map = []\n", - "for project in export_json:\n", - " labels = project[\"projects\"][VIDEO_PROJECT_ID][\"labels\"]\n", + "\n", + "stream = export_task.get_stream()\n", + "for output in stream:\n", + " output_json = json.loads(output.json_str)\n", + " labels = output_json[\"projects\"][VIDEO_PROJECT_ID][\"labels\"]\n", " for label in labels:\n", " annotations = label[\"annotations\"][\"key_frame_feature_map\"]\n", " for key, value in annotations.items():\n", @@ -860,8 +1011,10 @@ "metadata": {}, "source": [ "segments_map = []\n", - "for project in export_json:\n", - " labels = project[\"projects\"][VIDEO_PROJECT_ID][\"labels\"]\n", + "stream = export_task.get_stream()\n", + "for output in stream:\n", + " output_json = json.loads(output.json_str)\n", + " labels = output_json[\"projects\"][VIDEO_PROJECT_ID][\"labels\"]\n", " for label in labels:\n", " annotations = label[\"annotations\"][\"segments\"]\n", " for key, value in annotations.items():\n",