diff --git a/Noie.ipynb b/Noie.ipynb
new file mode 100644
index 0000000..7bb4aa4
--- /dev/null
+++ b/Noie.ipynb
@@ -0,0 +1,3859 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "provenance": [],
+ "include_colab_link": true
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ },
+ "language_info": {
+ "name": "python"
+ },
+ "widgets": {
+ "application/vnd.jupyter.widget-state+json": {
+ "95e4153cff5345bfa05bbd03004fbce9": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HBoxModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_943c6f48b3d644e4b3cd9080267d356d",
+ "IPY_MODEL_370944fc082048e5ab90be44e9bf1f57",
+ "IPY_MODEL_befcc82bdc8f42b289933521c6112806"
+ ],
+ "layout": "IPY_MODEL_03fe4d1cdf60409ba40489ce3ec8a872"
+ }
+ },
+ "943c6f48b3d644e4b3cd9080267d356d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_96efaef86f5d4e87b2bdc5bf59fbd165",
+ "placeholder": "",
+ "style": "IPY_MODEL_5700d7adc9e04a159939285e15b9a642",
+ "value": "Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.4.0.json: "
+ }
+ },
+ "370944fc082048e5ab90be44e9bf1f57": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "FloatProgressModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_294bc7b70b67495987a037755b3888dd",
+ "max": 25998,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_18009cba28464d7c9009ebaba423a2c3",
+ "value": 25998
+ }
+ },
+ "befcc82bdc8f42b289933521c6112806": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_bbb6764656ae42718112c4746cdc1de4",
+ "placeholder": "",
+ "style": "IPY_MODEL_ff71d073a9ff47eea4864a272602d179",
+ "value": " 154k/? [00:00<00:00, 2.82MB/s]"
+ }
+ },
+ "03fe4d1cdf60409ba40489ce3ec8a872": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "96efaef86f5d4e87b2bdc5bf59fbd165": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5700d7adc9e04a159939285e15b9a642": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "294bc7b70b67495987a037755b3888dd": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "18009cba28464d7c9009ebaba423a2c3": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "ProgressStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "bbb6764656ae42718112c4746cdc1de4": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "ff71d073a9ff47eea4864a272602d179": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "dedcfbe82336450fb37857b0fc6b9e45": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HBoxModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_c59d097c733e4891b54056652df044ea",
+ "IPY_MODEL_5efe0e03dc334db9b89e6689ff75fd84",
+ "IPY_MODEL_c4b9f3a6be7c45818ca8e0ac3eac93d5"
+ ],
+ "layout": "IPY_MODEL_2a4db2d2318a474b9df8a70095430ac4"
+ }
+ },
+ "c59d097c733e4891b54056652df044ea": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_68e8b5f38ae9458b9eb1d28fd64a209d",
+ "placeholder": "",
+ "style": "IPY_MODEL_3eed62082b974b3eaef1ec0c51d2a313",
+ "value": "Downloading https://huggingface.co/stanfordnlp/stanza-pt/resolve/v1.4.0/models/tokenize/bosque.pt: 100%"
+ }
+ },
+ "5efe0e03dc334db9b89e6689ff75fd84": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "FloatProgressModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_3893e03934e842b49760e800a67de7f8",
+ "max": 635807,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_49c2fb43e8c242d0b06b44e5fd3f879e",
+ "value": 635807
+ }
+ },
+ "c4b9f3a6be7c45818ca8e0ac3eac93d5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e7aee431537a4ec285cbbd4ca3da0c54",
+ "placeholder": "",
+ "style": "IPY_MODEL_5d491f77b3e9475590fb81cbcf908fd9",
+ "value": " 636k/636k [00:00<00:00, 2.23MB/s]"
+ }
+ },
+ "2a4db2d2318a474b9df8a70095430ac4": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "68e8b5f38ae9458b9eb1d28fd64a209d": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "3eed62082b974b3eaef1ec0c51d2a313": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "3893e03934e842b49760e800a67de7f8": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "49c2fb43e8c242d0b06b44e5fd3f879e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "ProgressStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "e7aee431537a4ec285cbbd4ca3da0c54": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5d491f77b3e9475590fb81cbcf908fd9": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "5ca98c547e5249dbbd158db62b6029ca": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HBoxModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_5a3335da879b46d6a9878b480417408f",
+ "IPY_MODEL_9904ebb33cd0447395d0fbef9c505ee3",
+ "IPY_MODEL_8fa4388aea2549dab19b5c904c60b5d5"
+ ],
+ "layout": "IPY_MODEL_ef16234f1e76491e92c92a9b56a6edbf"
+ }
+ },
+ "5a3335da879b46d6a9878b480417408f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b0f9d27ec6cd41c9934592ce909aaafb",
+ "placeholder": "",
+ "style": "IPY_MODEL_7d3e77d6fb2f40d8991c73c322d1cc54",
+ "value": "Downloading https://huggingface.co/stanfordnlp/stanza-pt/resolve/v1.4.0/models/mwt/bosque.pt: 100%"
+ }
+ },
+ "9904ebb33cd0447395d0fbef9c505ee3": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "FloatProgressModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_7a60e1d28bf948fe89d1a2c088309e80",
+ "max": 601808,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_a0891fac15b5416383853c81bfd1b459",
+ "value": 601808
+ }
+ },
+ "8fa4388aea2549dab19b5c904c60b5d5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_cf80ad14241d4ce1b4a39bf5c6c542fe",
+ "placeholder": "",
+ "style": "IPY_MODEL_b51c392dc81c4dacbecc73c48f32647a",
+ "value": " 602k/602k [00:00<00:00, 1.56MB/s]"
+ }
+ },
+ "ef16234f1e76491e92c92a9b56a6edbf": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b0f9d27ec6cd41c9934592ce909aaafb": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "7d3e77d6fb2f40d8991c73c322d1cc54": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "7a60e1d28bf948fe89d1a2c088309e80": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "a0891fac15b5416383853c81bfd1b459": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "ProgressStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "cf80ad14241d4ce1b4a39bf5c6c542fe": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b51c392dc81c4dacbecc73c48f32647a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "60735140e14c4bc1b83e603470ab5995": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HBoxModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_57c21d060e454d868e19f9629f540942",
+ "IPY_MODEL_935698d26bfb4696b76047ff452db4c9",
+ "IPY_MODEL_25d4f6dd59104eb9adbe4b32c72d6ee2"
+ ],
+ "layout": "IPY_MODEL_3267681711eb446c8c17b4130de939bb"
+ }
+ },
+ "57c21d060e454d868e19f9629f540942": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_d5eca874e97742f6a171b9650b56df4e",
+ "placeholder": "",
+ "style": "IPY_MODEL_452f0f7e829544d9a38205028020e519",
+ "value": "Downloading https://huggingface.co/stanfordnlp/stanza-pt/resolve/v1.4.0/models/pos/bosque.pt: 100%"
+ }
+ },
+ "935698d26bfb4696b76047ff452db4c9": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "FloatProgressModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_9dfd979563c54f9b9a5d69a5b878f915",
+ "max": 18022686,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_638118770ef94048851f595e50b88bb9",
+ "value": 18022686
+ }
+ },
+ "25d4f6dd59104eb9adbe4b32c72d6ee2": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e345f0b9666944989e4ff468bfddb0fc",
+ "placeholder": "",
+ "style": "IPY_MODEL_5cc6c79bfe25477abdde76e2f61ea6d1",
+ "value": " 18.0M/18.0M [00:00<00:00, 28.7MB/s]"
+ }
+ },
+ "3267681711eb446c8c17b4130de939bb": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d5eca874e97742f6a171b9650b56df4e": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "452f0f7e829544d9a38205028020e519": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "9dfd979563c54f9b9a5d69a5b878f915": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "638118770ef94048851f595e50b88bb9": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "ProgressStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "e345f0b9666944989e4ff468bfddb0fc": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5cc6c79bfe25477abdde76e2f61ea6d1": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "1b697eebfd9b4e03a5fcb4fef129d8a5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HBoxModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_de17663d8b8547fe9383414dccf13430",
+ "IPY_MODEL_4af63bf8a3524d418f166d7d32334c4b",
+ "IPY_MODEL_a40a937840aa44739a135c04d2041ad3"
+ ],
+ "layout": "IPY_MODEL_4b27192eadfe4edf9eef8286282b91c4"
+ }
+ },
+ "de17663d8b8547fe9383414dccf13430": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_ececc0b7018a4381956abbd655847558",
+ "placeholder": "",
+ "style": "IPY_MODEL_2739d00e5ae04493b91d2e4494d7f256",
+ "value": "Downloading https://huggingface.co/stanfordnlp/stanza-pt/resolve/v1.4.0/models/lemma/bosque.pt: 100%"
+ }
+ },
+ "4af63bf8a3524d418f166d7d32334c4b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "FloatProgressModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e6a212657525455aaee924a1a3ed78bd",
+ "max": 3856889,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_e9be765bc39f4d188ebda3985116ea58",
+ "value": 3856889
+ }
+ },
+ "a40a937840aa44739a135c04d2041ad3": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_1b8328e196724e4e9cfd1009282bf46d",
+ "placeholder": "",
+ "style": "IPY_MODEL_5993b083610249b18a3def94ee72b31c",
+ "value": " 3.86M/3.86M [00:00<00:00, 7.45MB/s]"
+ }
+ },
+ "4b27192eadfe4edf9eef8286282b91c4": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "ececc0b7018a4381956abbd655847558": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "2739d00e5ae04493b91d2e4494d7f256": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "e6a212657525455aaee924a1a3ed78bd": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e9be765bc39f4d188ebda3985116ea58": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "ProgressStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "1b8328e196724e4e9cfd1009282bf46d": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5993b083610249b18a3def94ee72b31c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "6e2473ca8601454493b0ff58174f7d34": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HBoxModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_632b24d3d0ff40169fc774ba8da8e15c",
+ "IPY_MODEL_df9a8527b9c24249ab340cdbdba35400",
+ "IPY_MODEL_44531601ee7e42bea38bf2c03220746d"
+ ],
+ "layout": "IPY_MODEL_028e72efb22b45d4804eaaaf6dcefc7e"
+ }
+ },
+ "632b24d3d0ff40169fc774ba8da8e15c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_cf11eb172bd64ac9a437c71cb6d8a23f",
+ "placeholder": "",
+ "style": "IPY_MODEL_7fcff08c102b472f8a0cbc52e02243c3",
+ "value": "Downloading https://huggingface.co/stanfordnlp/stanza-pt/resolve/v1.4.0/models/depparse/bosque.pt: 100%"
+ }
+ },
+ "df9a8527b9c24249ab340cdbdba35400": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "FloatProgressModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_10ab218a99604c379828569d897ce978",
+ "max": 102331699,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_4961d0c247114355b65ec90b25532918",
+ "value": 102331699
+ }
+ },
+ "44531601ee7e42bea38bf2c03220746d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_d83d29b0d1a04156ab35fa0dfa3e76b5",
+ "placeholder": "",
+ "style": "IPY_MODEL_ac98ff108271427b85e475e386dae38e",
+ "value": " 102M/102M [00:02<00:00, 55.3MB/s]"
+ }
+ },
+ "028e72efb22b45d4804eaaaf6dcefc7e": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "cf11eb172bd64ac9a437c71cb6d8a23f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "7fcff08c102b472f8a0cbc52e02243c3": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "10ab218a99604c379828569d897ce978": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4961d0c247114355b65ec90b25532918": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "ProgressStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "d83d29b0d1a04156ab35fa0dfa3e76b5": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "ac98ff108271427b85e475e386dae38e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "538e612fee564d90962309a00ae34045": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HBoxModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_6da7345b165d46cc99a304dbf799c2f3",
+ "IPY_MODEL_c4cc7c8152c244be81b2f1595c304732",
+ "IPY_MODEL_a7b798ba191647829e9c9832cc5a30fd"
+ ],
+ "layout": "IPY_MODEL_1fee3d8b44624aa887901ab50e4fcf48"
+ }
+ },
+ "6da7345b165d46cc99a304dbf799c2f3": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_d843dee6a5d04233944e61b1896429ee",
+ "placeholder": "",
+ "style": "IPY_MODEL_8c6614d22e774c3ea79255aa189b52e9",
+ "value": "Downloading https://huggingface.co/stanfordnlp/stanza-pt/resolve/v1.4.0/models/pretrain/bosque.pt: 100%"
+ }
+ },
+ "c4cc7c8152c244be81b2f1595c304732": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "FloatProgressModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_9cc604dc84854e359534d17bdcd98f04",
+ "max": 106904293,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_f4058990c56b4c7b94b37c77961be5c4",
+ "value": 106904293
+ }
+ },
+ "a7b798ba191647829e9c9832cc5a30fd": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_2c07f32c04e04aa4904474b015937645",
+ "placeholder": "",
+ "style": "IPY_MODEL_03505170957e49139861f61b48c998a1",
+ "value": " 107M/107M [00:02<00:00, 55.0MB/s]"
+ }
+ },
+ "1fee3d8b44624aa887901ab50e4fcf48": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d843dee6a5d04233944e61b1896429ee": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "8c6614d22e774c3ea79255aa189b52e9": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "9cc604dc84854e359534d17bdcd98f04": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "f4058990c56b4c7b94b37c77961be5c4": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "ProgressStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "2c07f32c04e04aa4904474b015937645": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "03505170957e49139861f61b48c998a1": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "6666e1b5677e4952a90f4a016cc9a78f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HBoxModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_aa21fc27ba864e3db8ac368712761aa3",
+ "IPY_MODEL_5697cec92ad94cf5a9b6790647f95251",
+ "IPY_MODEL_cfd61cc8ca2c4462878dd13e14a2f814"
+ ],
+ "layout": "IPY_MODEL_4212904855b0484ebb305692efb61ade"
+ }
+ },
+ "aa21fc27ba864e3db8ac368712761aa3": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_c9b7d9b16a6c4e4f808c180866897897",
+ "placeholder": "",
+ "style": "IPY_MODEL_f9146b3bf2a24084899bf9b795c5de94",
+ "value": "Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.4.0.json: "
+ }
+ },
+ "5697cec92ad94cf5a9b6790647f95251": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "FloatProgressModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_9fea05c6aedd41e693c641b86e37b94b",
+ "max": 25998,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_8654f5ffefd5464e8808a1749947048f",
+ "value": 25998
+ }
+ },
+ "cfd61cc8ca2c4462878dd13e14a2f814": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b2893d5dd7df4516822f66fdab2f3aa5",
+ "placeholder": "",
+ "style": "IPY_MODEL_ab6b79a1f5ce44a49e1177aa6fcc5d03",
+ "value": " 154k/? [00:00<00:00, 2.05MB/s]"
+ }
+ },
+ "4212904855b0484ebb305692efb61ade": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "c9b7d9b16a6c4e4f808c180866897897": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "f9146b3bf2a24084899bf9b795c5de94": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "9fea05c6aedd41e693c641b86e37b94b": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "8654f5ffefd5464e8808a1749947048f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "ProgressStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "b2893d5dd7df4516822f66fdab2f3aa5": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "ab6b79a1f5ce44a49e1177aa6fcc5d03": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ }
+ }
+ }
+ },
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "view-in-github",
+ "colab_type": "text"
+ },
+ "source": [
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "Lo77yoXNo1GJ"
+ },
+ "source": [
+ "\n",
+ "# RESEARCH GROUP\n",
+ "## Noie\n",
+ "### An Open Information Extraction System based on Dependency Parser and Handcrafted Rules for Portuguese texts inspired by ClausIE \n",
+ "\n",
+ "https://formas.ufba.br/\n",
+ "\n",
+ "How to cite us:\n",
+ "\n",
+ "?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "5KzP1HLAv2iE",
+ "outputId": "3012b944-33e2-4d7d-e3be-70e0fa96def1"
+ },
+ "source": [
+ "!pip install lemminflect"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
+ "Collecting lemminflect\n",
+ " Downloading lemminflect-0.2.2-py3-none-any.whl (769 kB)\n",
+ "\u001b[K |████████████████████████████████| 769 kB 8.4 MB/s \n",
+ "\u001b[?25hRequirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from lemminflect) (1.21.6)\n",
+ "Installing collected packages: lemminflect\n",
+ "Successfully installed lemminflect-0.2.2\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "E4ds08alwScu",
+ "outputId": "4e72b911-64ac-4d76-b81a-7732104a6b7f"
+ },
+ "source": [
+ "!pip install stanza"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
+ "Collecting stanza\n",
+ " Downloading stanza-1.4.0-py3-none-any.whl (574 kB)\n",
+ "\u001b[K |████████████████████████████████| 574 kB 8.2 MB/s \n",
+ "\u001b[?25hRequirement already satisfied: protobuf in /usr/local/lib/python3.7/dist-packages (from stanza) (3.17.3)\n",
+ "Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from stanza) (2.23.0)\n",
+ "Collecting transformers\n",
+ " Downloading transformers-4.19.2-py3-none-any.whl (4.2 MB)\n",
+ "\u001b[K |████████████████████████████████| 4.2 MB 45.4 MB/s \n",
+ "\u001b[?25hCollecting emoji\n",
+ " Downloading emoji-1.7.0.tar.gz (175 kB)\n",
+ "\u001b[K |████████████████████████████████| 175 kB 50.1 MB/s \n",
+ "\u001b[?25hRequirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from stanza) (1.21.6)\n",
+ "Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from stanza) (1.15.0)\n",
+ "Requirement already satisfied: torch>=1.3.0 in /usr/local/lib/python3.7/dist-packages (from stanza) (1.11.0+cu113)\n",
+ "Requirement already satisfied: tqdm in /usr/local/lib/python3.7/dist-packages (from stanza) (4.64.0)\n",
+ "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from torch>=1.3.0->stanza) (4.2.0)\n",
+ "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->stanza) (3.0.4)\n",
+ "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->stanza) (2.10)\n",
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->stanza) (2022.5.18.1)\n",
+ "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->stanza) (1.24.3)\n",
+ "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.7/dist-packages (from transformers->stanza) (21.3)\n",
+ "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from transformers->stanza) (3.7.0)\n",
+ "Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from transformers->stanza) (4.11.4)\n",
+ "Collecting tokenizers!=0.11.3,<0.13,>=0.11.1\n",
+ " Downloading tokenizers-0.12.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.6 MB)\n",
+ "\u001b[K |████████████████████████████████| 6.6 MB 45.1 MB/s \n",
+ "\u001b[?25hCollecting huggingface-hub<1.0,>=0.1.0\n",
+ " Downloading huggingface_hub-0.7.0-py3-none-any.whl (86 kB)\n",
+ "\u001b[K |████████████████████████████████| 86 kB 4.7 MB/s \n",
+ "\u001b[?25hRequirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers->stanza) (2019.12.20)\n",
+ "Collecting pyyaml>=5.1\n",
+ " Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)\n",
+ "\u001b[K |████████████████████████████████| 596 kB 54.7 MB/s \n",
+ "\u001b[?25hRequirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=20.0->transformers->stanza) (3.0.9)\n",
+ "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->transformers->stanza) (3.8.0)\n",
+ "Building wheels for collected packages: emoji\n",
+ " Building wheel for emoji (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ " Created wheel for emoji: filename=emoji-1.7.0-py3-none-any.whl size=171046 sha256=06a49a2eec7ed186f80f65cbbb9f22e5914d4c14a9a5495a74ea120f740176e6\n",
+ " Stored in directory: /root/.cache/pip/wheels/8a/4e/b6/57b01db010d17ef6ea9b40300af725ef3e210cb1acfb7ac8b6\n",
+ "Successfully built emoji\n",
+ "Installing collected packages: pyyaml, tokenizers, huggingface-hub, transformers, emoji, stanza\n",
+ " Attempting uninstall: pyyaml\n",
+ " Found existing installation: PyYAML 3.13\n",
+ " Uninstalling PyYAML-3.13:\n",
+ " Successfully uninstalled PyYAML-3.13\n",
+ "Successfully installed emoji-1.7.0 huggingface-hub-0.7.0 pyyaml-6.0 stanza-1.4.0 tokenizers-0.12.1 transformers-4.19.2\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 1000
+ },
+ "id": "CE8dWc2Kwlnw",
+ "outputId": "d8d92429-be6c-456c-afda-c971a42c8c6e"
+ },
+ "source": [
+ "!pip install spacy_stanza"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
+ "Collecting spacy_stanza\n",
+ " Downloading spacy_stanza-1.0.2-py3-none-any.whl (9.7 kB)\n",
+ "Requirement already satisfied: stanza<1.5.0,>=1.2.0 in /usr/local/lib/python3.7/dist-packages (from spacy_stanza) (1.4.0)\n",
+ "Collecting spacy<4.0.0,>=3.0.0\n",
+ " Downloading spacy-3.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.2 MB)\n",
+ "\u001b[K |████████████████████████████████| 6.2 MB 7.8 MB/s \n",
+ "\u001b[?25hRequirement already satisfied: jinja2 in /usr/local/lib/python3.7/dist-packages (from spacy<4.0.0,>=3.0.0->spacy_stanza) (2.11.3)\n",
+ "Requirement already satisfied: requests<3.0.0,>=2.13.0 in /usr/local/lib/python3.7/dist-packages (from spacy<4.0.0,>=3.0.0->spacy_stanza) (2.23.0)\n",
+ "Requirement already satisfied: wasabi<1.1.0,>=0.9.1 in /usr/local/lib/python3.7/dist-packages (from spacy<4.0.0,>=3.0.0->spacy_stanza) (0.9.1)\n",
+ "Collecting srsly<3.0.0,>=2.4.3\n",
+ " Downloading srsly-2.4.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (457 kB)\n",
+ "\u001b[K |████████████████████████████████| 457 kB 40.2 MB/s \n",
+ "\u001b[?25hRequirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.7/dist-packages (from spacy<4.0.0,>=3.0.0->spacy_stanza) (1.0.7)\n",
+ "Collecting typing-extensions<4.0.0.0,>=3.7.4\n",
+ " Downloading typing_extensions-3.10.0.2-py3-none-any.whl (26 kB)\n",
+ "Collecting langcodes<4.0.0,>=3.2.0\n",
+ " Downloading langcodes-3.3.0-py3-none-any.whl (181 kB)\n",
+ "\u001b[K |████████████████████████████████| 181 kB 39.7 MB/s \n",
+ "\u001b[?25hCollecting spacy-loggers<2.0.0,>=1.0.0\n",
+ " Downloading spacy_loggers-1.0.2-py3-none-any.whl (7.2 kB)\n",
+ "Requirement already satisfied: setuptools in /usr/local/lib/python3.7/dist-packages (from spacy<4.0.0,>=3.0.0->spacy_stanza) (57.4.0)\n",
+ "Collecting typer<0.5.0,>=0.3.0\n",
+ " Downloading typer-0.4.1-py3-none-any.whl (27 kB)\n",
+ "Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from spacy<4.0.0,>=3.0.0->spacy_stanza) (2.0.6)\n",
+ "Requirement already satisfied: numpy>=1.15.0 in /usr/local/lib/python3.7/dist-packages (from spacy<4.0.0,>=3.0.0->spacy_stanza) (1.21.6)\n",
+ "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.7/dist-packages (from spacy<4.0.0,>=3.0.0->spacy_stanza) (21.3)\n",
+ "Requirement already satisfied: blis<0.8.0,>=0.4.0 in /usr/local/lib/python3.7/dist-packages (from spacy<4.0.0,>=3.0.0->spacy_stanza) (0.4.1)\n",
+ "Collecting spacy-legacy<3.1.0,>=3.0.9\n",
+ " Downloading spacy_legacy-3.0.9-py2.py3-none-any.whl (20 kB)\n",
+ "Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from spacy<4.0.0,>=3.0.0->spacy_stanza) (3.0.6)\n",
+ "Collecting pydantic!=1.8,!=1.8.1,<1.9.0,>=1.7.4\n",
+ " Downloading pydantic-1.8.2-cp37-cp37m-manylinux2014_x86_64.whl (10.1 MB)\n",
+ "\u001b[K |████████████████████████████████| 10.1 MB 41.0 MB/s \n",
+ "\u001b[?25hRequirement already satisfied: tqdm<5.0.0,>=4.38.0 in /usr/local/lib/python3.7/dist-packages (from spacy<4.0.0,>=3.0.0->spacy_stanza) (4.64.0)\n",
+ "Collecting pathy>=0.3.5\n",
+ " Downloading pathy-0.6.1-py3-none-any.whl (42 kB)\n",
+ "\u001b[K |████████████████████████████████| 42 kB 1.0 MB/s \n",
+ "\u001b[?25hCollecting thinc<8.1.0,>=8.0.14\n",
+ " Downloading thinc-8.0.17-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (660 kB)\n",
+ "\u001b[K |████████████████████████████████| 660 kB 55.7 MB/s \n",
+ "\u001b[?25hCollecting catalogue<2.1.0,>=2.0.6\n",
+ " Downloading catalogue-2.0.7-py3-none-any.whl (17 kB)\n",
+ "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from catalogue<2.1.0,>=2.0.6->spacy<4.0.0,>=3.0.0->spacy_stanza) (3.8.0)\n",
+ "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=20.0->spacy<4.0.0,>=3.0.0->spacy_stanza) (3.0.9)\n",
+ "Collecting smart-open<6.0.0,>=5.0.0\n",
+ " Downloading smart_open-5.2.1-py3-none-any.whl (58 kB)\n",
+ "\u001b[K |████████████████████████████████| 58 kB 6.3 MB/s \n",
+ "\u001b[?25hRequirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy<4.0.0,>=3.0.0->spacy_stanza) (1.24.3)\n",
+ "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy<4.0.0,>=3.0.0->spacy_stanza) (3.0.4)\n",
+ "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy<4.0.0,>=3.0.0->spacy_stanza) (2.10)\n",
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy<4.0.0,>=3.0.0->spacy_stanza) (2022.5.18.1)\n",
+ "Requirement already satisfied: transformers in /usr/local/lib/python3.7/dist-packages (from stanza<1.5.0,>=1.2.0->spacy_stanza) (4.19.2)\n",
+ "Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from stanza<1.5.0,>=1.2.0->spacy_stanza) (1.15.0)\n",
+ "Requirement already satisfied: torch>=1.3.0 in /usr/local/lib/python3.7/dist-packages (from stanza<1.5.0,>=1.2.0->spacy_stanza) (1.11.0+cu113)\n",
+ "Requirement already satisfied: emoji in /usr/local/lib/python3.7/dist-packages (from stanza<1.5.0,>=1.2.0->spacy_stanza) (1.7.0)\n",
+ "Requirement already satisfied: protobuf in /usr/local/lib/python3.7/dist-packages (from stanza<1.5.0,>=1.2.0->spacy_stanza) (3.17.3)\n",
+ "Requirement already satisfied: click<9.0.0,>=7.1.1 in /usr/local/lib/python3.7/dist-packages (from typer<0.5.0,>=0.3.0->spacy<4.0.0,>=3.0.0->spacy_stanza) (7.1.2)\n",
+ "Requirement already satisfied: MarkupSafe>=0.23 in /usr/local/lib/python3.7/dist-packages (from jinja2->spacy<4.0.0,>=3.0.0->spacy_stanza) (2.0.1)\n",
+ "Requirement already satisfied: huggingface-hub<1.0,>=0.1.0 in /usr/local/lib/python3.7/dist-packages (from transformers->stanza<1.5.0,>=1.2.0->spacy_stanza) (0.7.0)\n",
+ "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.7/dist-packages (from transformers->stanza<1.5.0,>=1.2.0->spacy_stanza) (6.0)\n",
+ "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from transformers->stanza<1.5.0,>=1.2.0->spacy_stanza) (3.7.0)\n",
+ "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers->stanza<1.5.0,>=1.2.0->spacy_stanza) (2019.12.20)\n",
+ "Requirement already satisfied: tokenizers!=0.11.3,<0.13,>=0.11.1 in /usr/local/lib/python3.7/dist-packages (from transformers->stanza<1.5.0,>=1.2.0->spacy_stanza) (0.12.1)\n",
+ "Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from transformers->stanza<1.5.0,>=1.2.0->spacy_stanza) (4.11.4)\n",
+ "Installing collected packages: typing-extensions, catalogue, typer, srsly, smart-open, pydantic, thinc, spacy-loggers, spacy-legacy, pathy, langcodes, spacy, spacy-stanza\n",
+ " Attempting uninstall: typing-extensions\n",
+ " Found existing installation: typing-extensions 4.2.0\n",
+ " Uninstalling typing-extensions-4.2.0:\n",
+ " Successfully uninstalled typing-extensions-4.2.0\n",
+ " Attempting uninstall: catalogue\n",
+ " Found existing installation: catalogue 1.0.0\n",
+ " Uninstalling catalogue-1.0.0:\n",
+ " Successfully uninstalled catalogue-1.0.0\n",
+ " Attempting uninstall: srsly\n",
+ " Found existing installation: srsly 1.0.5\n",
+ " Uninstalling srsly-1.0.5:\n",
+ " Successfully uninstalled srsly-1.0.5\n",
+ " Attempting uninstall: smart-open\n",
+ " Found existing installation: smart-open 6.0.0\n",
+ " Uninstalling smart-open-6.0.0:\n",
+ " Successfully uninstalled smart-open-6.0.0\n",
+ " Attempting uninstall: thinc\n",
+ " Found existing installation: thinc 7.4.0\n",
+ " Uninstalling thinc-7.4.0:\n",
+ " Successfully uninstalled thinc-7.4.0\n",
+ " Attempting uninstall: spacy\n",
+ " Found existing installation: spacy 2.2.4\n",
+ " Uninstalling spacy-2.2.4:\n",
+ " Successfully uninstalled spacy-2.2.4\n",
+ "Successfully installed catalogue-2.0.7 langcodes-3.3.0 pathy-0.6.1 pydantic-1.8.2 smart-open-5.2.1 spacy-3.3.0 spacy-legacy-3.0.9 spacy-loggers-1.0.2 spacy-stanza-1.0.2 srsly-2.4.3 thinc-8.0.17 typer-0.4.1 typing-extensions-3.10.0.2\n"
+ ]
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "application/vnd.colab-display-data+json": {
+ "pip_warning": {
+ "packages": [
+ "typing_extensions"
+ ]
+ }
+ }
+ },
+ "metadata": {}
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 912,
+ "referenced_widgets": [
+ "95e4153cff5345bfa05bbd03004fbce9",
+ "943c6f48b3d644e4b3cd9080267d356d",
+ "370944fc082048e5ab90be44e9bf1f57",
+ "befcc82bdc8f42b289933521c6112806",
+ "03fe4d1cdf60409ba40489ce3ec8a872",
+ "96efaef86f5d4e87b2bdc5bf59fbd165",
+ "5700d7adc9e04a159939285e15b9a642",
+ "294bc7b70b67495987a037755b3888dd",
+ "18009cba28464d7c9009ebaba423a2c3",
+ "bbb6764656ae42718112c4746cdc1de4",
+ "ff71d073a9ff47eea4864a272602d179",
+ "dedcfbe82336450fb37857b0fc6b9e45",
+ "c59d097c733e4891b54056652df044ea",
+ "5efe0e03dc334db9b89e6689ff75fd84",
+ "c4b9f3a6be7c45818ca8e0ac3eac93d5",
+ "2a4db2d2318a474b9df8a70095430ac4",
+ "68e8b5f38ae9458b9eb1d28fd64a209d",
+ "3eed62082b974b3eaef1ec0c51d2a313",
+ "3893e03934e842b49760e800a67de7f8",
+ "49c2fb43e8c242d0b06b44e5fd3f879e",
+ "e7aee431537a4ec285cbbd4ca3da0c54",
+ "5d491f77b3e9475590fb81cbcf908fd9",
+ "5ca98c547e5249dbbd158db62b6029ca",
+ "5a3335da879b46d6a9878b480417408f",
+ "9904ebb33cd0447395d0fbef9c505ee3",
+ "8fa4388aea2549dab19b5c904c60b5d5",
+ "ef16234f1e76491e92c92a9b56a6edbf",
+ "b0f9d27ec6cd41c9934592ce909aaafb",
+ "7d3e77d6fb2f40d8991c73c322d1cc54",
+ "7a60e1d28bf948fe89d1a2c088309e80",
+ "a0891fac15b5416383853c81bfd1b459",
+ "cf80ad14241d4ce1b4a39bf5c6c542fe",
+ "b51c392dc81c4dacbecc73c48f32647a",
+ "60735140e14c4bc1b83e603470ab5995",
+ "57c21d060e454d868e19f9629f540942",
+ "935698d26bfb4696b76047ff452db4c9",
+ "25d4f6dd59104eb9adbe4b32c72d6ee2",
+ "3267681711eb446c8c17b4130de939bb",
+ "d5eca874e97742f6a171b9650b56df4e",
+ "452f0f7e829544d9a38205028020e519",
+ "9dfd979563c54f9b9a5d69a5b878f915",
+ "638118770ef94048851f595e50b88bb9",
+ "e345f0b9666944989e4ff468bfddb0fc",
+ "5cc6c79bfe25477abdde76e2f61ea6d1",
+ "1b697eebfd9b4e03a5fcb4fef129d8a5",
+ "de17663d8b8547fe9383414dccf13430",
+ "4af63bf8a3524d418f166d7d32334c4b",
+ "a40a937840aa44739a135c04d2041ad3",
+ "4b27192eadfe4edf9eef8286282b91c4",
+ "ececc0b7018a4381956abbd655847558",
+ "2739d00e5ae04493b91d2e4494d7f256",
+ "e6a212657525455aaee924a1a3ed78bd",
+ "e9be765bc39f4d188ebda3985116ea58",
+ "1b8328e196724e4e9cfd1009282bf46d",
+ "5993b083610249b18a3def94ee72b31c",
+ "6e2473ca8601454493b0ff58174f7d34",
+ "632b24d3d0ff40169fc774ba8da8e15c",
+ "df9a8527b9c24249ab340cdbdba35400",
+ "44531601ee7e42bea38bf2c03220746d",
+ "028e72efb22b45d4804eaaaf6dcefc7e",
+ "cf11eb172bd64ac9a437c71cb6d8a23f",
+ "7fcff08c102b472f8a0cbc52e02243c3",
+ "10ab218a99604c379828569d897ce978",
+ "4961d0c247114355b65ec90b25532918",
+ "d83d29b0d1a04156ab35fa0dfa3e76b5",
+ "ac98ff108271427b85e475e386dae38e",
+ "538e612fee564d90962309a00ae34045",
+ "6da7345b165d46cc99a304dbf799c2f3",
+ "c4cc7c8152c244be81b2f1595c304732",
+ "a7b798ba191647829e9c9832cc5a30fd",
+ "1fee3d8b44624aa887901ab50e4fcf48",
+ "d843dee6a5d04233944e61b1896429ee",
+ "8c6614d22e774c3ea79255aa189b52e9",
+ "9cc604dc84854e359534d17bdcd98f04",
+ "f4058990c56b4c7b94b37c77961be5c4",
+ "2c07f32c04e04aa4904474b015937645",
+ "03505170957e49139861f61b48c998a1"
+ ]
+ },
+ "id": "RUmc9cVHxDUv",
+ "outputId": "47740352-84b2-48e1-acd6-3e7b8d8d8b61"
+ },
+ "source": [
+ "import stanza\n",
+ "stanza.download('pt', package='bosque')"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.4.0.json: 0%| …"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "95e4153cff5345bfa05bbd03004fbce9"
+ }
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "2022-06-05 17:37:43 INFO: Downloading these customized packages for language: pt (Portuguese)...\n",
+ "=======================\n",
+ "| Processor | Package |\n",
+ "-----------------------\n",
+ "| tokenize | bosque |\n",
+ "| mwt | bosque |\n",
+ "| pos | bosque |\n",
+ "| lemma | bosque |\n",
+ "| depparse | bosque |\n",
+ "| pretrain | bosque |\n",
+ "=======================\n",
+ "\n",
+ "INFO:stanza:Downloading these customized packages for language: pt (Portuguese)...\n",
+ "=======================\n",
+ "| Processor | Package |\n",
+ "-----------------------\n",
+ "| tokenize | bosque |\n",
+ "| mwt | bosque |\n",
+ "| pos | bosque |\n",
+ "| lemma | bosque |\n",
+ "| depparse | bosque |\n",
+ "| pretrain | bosque |\n",
+ "=======================\n",
+ "\n"
+ ]
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "Downloading https://huggingface.co/stanfordnlp/stanza-pt/resolve/v1.4.0/models/tokenize/bosque.pt: 0%| …"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "dedcfbe82336450fb37857b0fc6b9e45"
+ }
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "Downloading https://huggingface.co/stanfordnlp/stanza-pt/resolve/v1.4.0/models/mwt/bosque.pt: 0%| |…"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "5ca98c547e5249dbbd158db62b6029ca"
+ }
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "Downloading https://huggingface.co/stanfordnlp/stanza-pt/resolve/v1.4.0/models/pos/bosque.pt: 0%| |…"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "60735140e14c4bc1b83e603470ab5995"
+ }
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "Downloading https://huggingface.co/stanfordnlp/stanza-pt/resolve/v1.4.0/models/lemma/bosque.pt: 0%| …"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "1b697eebfd9b4e03a5fcb4fef129d8a5"
+ }
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "Downloading https://huggingface.co/stanfordnlp/stanza-pt/resolve/v1.4.0/models/depparse/bosque.pt: 0%| …"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "6e2473ca8601454493b0ff58174f7d34"
+ }
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "Downloading https://huggingface.co/stanfordnlp/stanza-pt/resolve/v1.4.0/models/pretrain/bosque.pt: 0%| …"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "538e612fee564d90962309a00ae34045"
+ }
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "2022-06-05 17:37:55 INFO: Finished downloading models and saved to /root/stanza_resources.\n",
+ "INFO:stanza:Finished downloading models and saved to /root/stanza_resources.\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "yE0dnNNdld9u"
+ },
+ "source": [
+ "#!/usr/bin/env python3\n",
+ "# -*- coding: utf-8 -*-\n",
+ "\"\"\"\n",
+ "Created on Wed Nov 6 18:07:24 2019\n",
+ "\n",
+ "@author: Emmanouil Theofanis Chourdakis\n",
+ "\n",
+ "Clausie as a spacy library\n",
+ "\n",
+ "History\n",
+ "\n",
+ "Rafael Glauber - rafaelglauber@gmail.com\n",
+ "- 2021/11/19 \n",
+ "- Source code compatible with spacy 3 (stanza models) and handcrafted rules for Portuguese Language.\n",
+ "\n",
+ "\"\"\"\n",
+ "\n",
+ "from numpy import right_shift\n",
+ "import spacy\n",
+ "import lemminflect\n",
+ "import logging\n",
+ "import typing\n",
+ "import stanza\n",
+ "import spacy_stanza\n",
+ "\n",
+ "from spacy.language import Language\n",
+ "from spacy.tokens import Span, Doc\n",
+ "from spacy.matcher import Matcher\n",
+ "from lemminflect import getInflection\n",
+ "\n",
+ "logging.basicConfig(level=logging.INFO)\n",
+ "\n",
+ "Doc.set_extension(\"clauses\", default=[], force=True)\n",
+ "Span.set_extension(\"clauses\", default=[], force=True)\n",
+ "\n",
+ "SUBJECT_DEPREL = ['nsubj', 'nsubj:pass']\n",
+ "IOBJECT_DEPREL = ['iobj']\n",
+ "DOBJECT_DEPREL = ['obj']\n",
+ "COMPLEMENT_DEPREL = ['ccomp', 'xcomp', 'amod', 'nmod', 'nummod']\n",
+ "ADVERBIAL_DEPREL = ['advcl', 'advmod', 'obl', 'obl:agent']\n",
+ "COPULAR_DEPREL = ['cop']\n",
+ "APPOSITIVE_DEPREL = ['appos']\n",
+ "CONJ_DEPREL = ['conj']\n",
+ "\n",
+ "RELATIVE_PRONOUN = ['qual', 'cujo', 'quanto', 'que', 'quem', 'onde']\n",
+ "\n",
+ "PRONOUN_POS = 'PRON'\n",
+ "VERB_POS = 'VERB'\n",
+ "\n",
+ "# aux verb to appos modifier (synthetic relationship)\n",
+ "TOBE_VERB = 'é'\n",
+ "\n",
+ "class Clause:\n",
+ " \n",
+ " def __init__(\n",
+ " self,\n",
+ " subject: typing.Optional[Span] = None,\n",
+ " verb: typing.Optional[Span] = None,\n",
+ " indirect_object: typing.Optional[Span] = None,\n",
+ " direct_object: typing.Optional[Span] = None,\n",
+ " complement: typing.Optional[Span] = None,\n",
+ " adverbials: typing.List[Span] = None,\n",
+ " ):\n",
+ " \"\"\"\n",
+ " Parameters\n",
+ " ----------\n",
+ " subject : Span\n",
+ " Subject.\n",
+ " verb : Span\n",
+ " Verb.\n",
+ " indirect_object : Span, optional\n",
+ " Indirect object, The default is None.\n",
+ " direct_object : Span, optional\n",
+ " Direct object. The default is None.\n",
+ " complement : Span, optional\n",
+ " Complement. The default is None.\n",
+ " adverbials : list, optional\n",
+ " List of adverbials. The default is [].\n",
+ "\n",
+ " Returns\n",
+ " -------\n",
+ " None.\n",
+ "\n",
+ " \"\"\"\n",
+ " if adverbials is None:\n",
+ " adverbials = []\n",
+ "\n",
+ " self.subject = subject\n",
+ " self.verb = verb\n",
+ " self.indirect_object = indirect_object\n",
+ " self.direct_object = direct_object\n",
+ " self.complement = complement\n",
+ " self.adverbials = adverbials\n",
+ "\n",
+ " self.doc = self.subject.doc\n",
+ "\n",
+ " self.type = self._get_clause_type()\n",
+ "\n",
+ " def _get_clause_type(self):\n",
+ " has_verb = self.verb is not None\n",
+ " has_complement = self.complement is not None\n",
+ " has_adverbial = len(self.adverbials) > 0\n",
+ " has_direct_object = self.direct_object is not None\n",
+ " has_indirect_object = self.indirect_object is not None\n",
+ " has_object = has_direct_object or has_indirect_object\n",
+ " \n",
+ " clause_type = \"undefined\"\n",
+ "\n",
+ " if not has_verb:\n",
+ " clause_type = \"SVC\"\n",
+ " return clause_type\n",
+ "\n",
+ " if has_object:\n",
+ " if has_direct_object and has_indirect_object:\n",
+ " clause_type = \"SVOO\"\n",
+ " elif has_complement:\n",
+ " clause_type = \"SVOC\"\n",
+ " elif not has_adverbial or not has_direct_object:\n",
+ " clause_type = \"SVO\"\n",
+ " elif has_adverbial:\n",
+ " clause_type = \"SVOA\"\n",
+ " else:\n",
+ " clause_type = \"SVO\"\n",
+ " else:\n",
+ " if has_complement:\n",
+ " clause_type = \"SVC\"\n",
+ " elif not has_adverbial:\n",
+ " clause_type = \"SV\"\n",
+ " elif has_adverbial:\n",
+ " clause_type = \"SVA\"\n",
+ " else:\n",
+ " clause_type = \"SV\"\n",
+ "\n",
+ " return clause_type\n",
+ "\n",
+ " def __repr__(self):\n",
+ " return \"<{}, {}, {}, {}, {}, {}, {}>\".format(\n",
+ " self.type,\n",
+ " self.subject,\n",
+ " self.verb,\n",
+ " self.indirect_object,\n",
+ " self.direct_object,\n",
+ " self.complement,\n",
+ " self.adverbials,\n",
+ " )\n",
+ "\n",
+ " def to_propositions(\n",
+ " self, as_text: bool = False, inflect: str or None = \"VBD\", capitalize: bool = False\n",
+ " ):\n",
+ "\n",
+ " if inflect and not as_text:\n",
+ " logging.warning(\"`inflect' argument is ignored when `as_text==False'. To suppress this warning call `to_propositions' with the argument `inflect=None'\")\n",
+ " if capitalize and not as_text:\n",
+ " logging.warning(\"`capitalize' argument is ignored when `as_text==False'. To suppress this warning call `to_propositions' with the argument `capitalize=False\")\n",
+ "\n",
+ " propositions = []\n",
+ "\n",
+ " subjects = extract_ccs_from_token_at_root(self.subject)\n",
+ " direct_objects = extract_ccs_from_token_at_root(self.direct_object)\n",
+ " indirect_objects = extract_ccs_from_token_at_root(self.indirect_object)\n",
+ " complements = extract_ccs_from_token_at_root(self.complement)\n",
+ " verbs = [self.verb] if self.verb else []\n",
+ " \n",
+ " # synthetic verb\n",
+ " tobe_verb = nlp(TOBE_VERB)\n",
+ "\n",
+ " for subj in subjects:\n",
+ " if complements and not verbs:\n",
+ " for c in complements:\n",
+ " propositions.append((subj, tobe_verb, c))\n",
+ " propositions.append((subj, tobe_verb) + tuple(complements))\n",
+ "\n",
+ " for verb in verbs:\n",
+ " prop = [subj, verb]\n",
+ " if self.type in [\"SV\", \"SVA\"]:\n",
+ " if self.adverbials:\n",
+ " for a in self.adverbials:\n",
+ " propositions.append(tuple(prop + [a]))\n",
+ " propositions.append(tuple(prop + self.adverbials))\n",
+ " else:\n",
+ " propositions.append(tuple(prop))\n",
+ "\n",
+ " elif self.type == \"SVOO\":\n",
+ " for iobj in indirect_objects:\n",
+ " for dobj in direct_objects:\n",
+ " propositions.append((subj, verb, iobj, dobj))\n",
+ " elif self.type == \"SVO\":\n",
+ " for obj in direct_objects + indirect_objects:\n",
+ " propositions.append((subj, verb, obj))\n",
+ " for a in self.adverbials:\n",
+ " propositions.append((subj, verb, obj, a))\n",
+ " elif self.type == \"SVOA\":\n",
+ " for obj in direct_objects:\n",
+ " if self.adverbials:\n",
+ " for a in self.adverbials:\n",
+ " propositions.append(tuple(prop + [obj, a]))\n",
+ " propositions.append(tuple(prop + [obj] + self.adverbials))\n",
+ "\n",
+ " elif self.type == \"SVOC\":\n",
+ " for obj in indirect_objects + direct_objects:\n",
+ " if complements:\n",
+ " for c in complements:\n",
+ " propositions.append(tuple(prop + [obj, c]))\n",
+ " propositions.append(tuple(prop + [obj] + complements))\n",
+ " elif self.type == \"SVC\":\n",
+ " if complements:\n",
+ " for c in complements:\n",
+ " propositions.append(tuple(prop + [c]))\n",
+ " propositions.append(tuple(prop + complements))\n",
+ "\n",
+ " # Remove doubles\n",
+ " propositions = list(set(propositions))\n",
+ "\n",
+ " if as_text:\n",
+ " return _convert_clauses_to_text(\n",
+ " propositions, inflect=inflect, capitalize=capitalize\n",
+ " )\n",
+ "\n",
+ " return propositions\n",
+ "\n",
+ "def inflect_token(token, inflect):\n",
+ " if (\n",
+ " inflect\n",
+ " and token.pos_ == \"VERB\"\n",
+ " and \"AUX\" not in [tt.pos_ for tt in token.lefts]\n",
+ " # t is not preceded by an auxiliary verb (e.g. `the birds were ailing`)\n",
+ " ): # t `dreamed of becoming a dancer`\n",
+ " return str(token._.inflect(inflect))\n",
+ " else:\n",
+ " return str(token)\n",
+ "\n",
+ "\n",
+ "def _convert_clauses_to_text(propositions, inflect, capitalize):\n",
+ " proposition_texts = []\n",
+ " for proposition in propositions:\n",
+ " span_texts = []\n",
+ " for span in proposition:\n",
+ "\n",
+ " token_texts = []\n",
+ " for token in span:\n",
+ " token_texts.append(inflect_token(token, inflect))\n",
+ "\n",
+ " span_texts.append(\" \".join(token_texts))\n",
+ " proposition_texts.append(\" \".join(span_texts))\n",
+ "\n",
+ " if capitalize: # Capitalize and add a full stop.\n",
+ " proposition_texts = [text.capitalize() + \".\" for text in proposition_texts]\n",
+ "\n",
+ " return proposition_texts\n",
+ "\n",
+ "\n",
+ "def _get_verb_matches(span):\n",
+ " # 1. Find verb phrases in the span\n",
+ " # (see mdmjsh answer here: https://stackoverflow.com/questions/47856247/extract-verb-phrases-using-spacy)\n",
+ " verb_matcher = Matcher(span.vocab)\n",
+ " pattern = [\n",
+ " #[{\"POS\": \"AUX\"}], \n",
+ " #[{\"POS\": \"VERB\"}], \n",
+ " #[{\"POS\": \"VERB\", \"OP\": \"+\"}], \n",
+ " [{\"POS\": \"AUX\", \"OP\": \"+\"}, \n",
+ " {\"POS\": \"VERB\", \"OP\": \"*\"}, \n",
+ " {\"POS\": \"ADV\", \"OP\": \"*\"}, \n",
+ " {\"POS\": \"ADJ\", \"OP\": \"*\"}, \n",
+ " {\"POS\": \"DET\", \"OP\": \"*\"}, \n",
+ " {\"POS\": \"NOUN\", \"OP\": \"*\"}], \n",
+ " [{\"POS\": \"VERB\", \"OP\": \"+\"}, \n",
+ " {\"POS\": \"ADV\", \"OP\": \"*\"}, \n",
+ " {\"POS\": \"ADJ\", \"OP\": \"*\"}, \n",
+ " {\"POS\": \"DET\", \"OP\": \"*\"}, \n",
+ " {\"POS\": \"NOUN\", \"OP\": \"*\"}] \n",
+ " #[{\"POS\": \"AUX\"}, {\"POS\": \"NOUN\"}],\n",
+ " #[{\"POS\": \"AUX\"}, {\"POS\": \"DET\"}, {\"POS\": \"NOUN\"}]\n",
+ " ]\n",
+ " verb_matcher.add(\"Verb phrase\", pattern)\n",
+ " return verb_matcher(span)\n",
+ "\n",
+ "\n",
+ "def _get_verb_chunks(span):\n",
+ " matches = _get_verb_matches(span)\n",
+ "\n",
+ " # Filter matches (e.g. do not have both \"has won\" and \"won\" in verbs)\n",
+ " verb_chunks = []\n",
+ " for match in [span[start:end] for _, start, end in matches]:\n",
+ " if match.root not in [vp.root for vp in verb_chunks]:\n",
+ " verb_chunks.append(match)\n",
+ " return verb_chunks\n",
+ "\n",
+ "\n",
+ "def _get_subject(verb): \n",
+ " # get verb root token\n",
+ " root = verb.root\n",
+ " \n",
+ " # if it is a copulate verb, we should climb the tree.\n",
+ " if root.dep_ in COPULAR_DEPREL:\n",
+ " children = root.head.children\n",
+ " else:\n",
+ " children = verb.root.children \n",
+ " \n",
+ " # default subject in SV format\n",
+ " for c in children:\n",
+ " if c.dep_ in SUBJECT_DEPREL:\n",
+ " subject = extract_span_from_entity(c)\n",
+ " # if relative pronoun: return left\n",
+ " if (subject.root.pos_ == PRONOUN_POS) and (subject.root.lemma_ in RELATIVE_PRONOUN):\n",
+ " return extract_span_from_entity(subject.doc[:subject.root.i])\n",
+ " else: \n",
+ " return subject\n",
+ "\n",
+ " while root.dep_ in CONJ_DEPREL:\n",
+ " for c in root.children:\n",
+ " if c.dep_ in SUBJECT_DEPREL:\n",
+ " subject = extract_span_from_entity(c)\n",
+ " return subject\n",
+ "\n",
+ " if c.dep_ in ['acl', 'acl:relcl', 'advcl']:\n",
+ " subject = find_verb_subject(c)\n",
+ " return extract_span_from_entity(subject) if subject else None\n",
+ " \n",
+ " if root == verb.root.head: \n",
+ " if root.pos_ == VERB_POS:\n",
+ " root = root.head\n",
+ " else: \n",
+ " break\n",
+ " else:\n",
+ " root = verb.root.head\n",
+ "\n",
+ " for c in root.children:\n",
+ " if c.dep_ in SUBJECT_DEPREL:\n",
+ " subject = extract_span_from_entity(c)\n",
+ " return subject\n",
+ " return None\n",
+ "\n",
+ "def _find_matching_child(root, allowed_types):\n",
+ " for c in root.children:\n",
+ " if c.dep_ in allowed_types:\n",
+ " return extract_span_from_entity(c)\n",
+ " \n",
+ " for c in root.children:\n",
+ " if (c.dep_ in CONJ_DEPREL) and (c.pos_ == root.pos_):\n",
+ " return _find_matching_child(c, allowed_types=allowed_types)\n",
+ "\n",
+ " return None\n",
+ "\n",
+ "def _find_matching_parent(root, allowed_types):\n",
+ " sub_tree = _find_matching_child(root.head, allowed_types=allowed_types)\n",
+ "\n",
+ " if sub_tree == None or root.head.i > sub_tree.end:\n",
+ " return None\n",
+ " else: \n",
+ " return Span(root.doc, root.head.i, sub_tree.end)\n",
+ "\n",
+ "def extract_clauses(span):\n",
+ " clauses = []\n",
+ " verb_chunks = _get_verb_chunks(span)\n",
+ " for verb in verb_chunks:\n",
+ "\n",
+ " subject = _get_subject(verb)\n",
+ " if not subject:\n",
+ " continue\n",
+ "\n",
+ " complement = None\n",
+ "\n",
+ " # Check if there are phrases of the form, \"AE, a scientist of ...\"\n",
+ " # If so, add a new clause of the form:\n",
+ " # \n",
+ " for c in subject.root.children:\n",
+ " if c.dep_ in APPOSITIVE_DEPREL: \n",
+ " appos = extract_span_from_entity(c)\n",
+ " complement = extract_span_from_entity_no_appos(subject.root)\n",
+ " # Change subject to appos for informative order in relationship\n",
+ " if (subject.root.pos_ == 'NOUN') and (appos.root.pos_ == 'PROPN'):\n",
+ " subject = appos \n",
+ " clause = Clause(subject=subject, complement=complement)\n",
+ " clauses.append(clause)\n",
+ "\n",
+ " indirect_object = _find_matching_child(verb.root, IOBJECT_DEPREL)\n",
+ " direct_object = _find_matching_child(verb.root, DOBJECT_DEPREL)\n",
+ " \n",
+ " # complement or \"predicate of the subject\" \n",
+ " if (verb.root.dep_ in COPULAR_DEPREL):\n",
+ " right = verb.doc[verb.root.i:].root\n",
+ " # if rigth token is not a verb: finding the parent\n",
+ " if (right.pos_ != 'VERB'):\n",
+ " complement = _find_matching_parent(right, COMPLEMENT_DEPREL)\n",
+ " else: \n",
+ " complement = _find_matching_child(verb.root, COMPLEMENT_DEPREL)\n",
+ " \n",
+ " adverbials = [\n",
+ " extract_span_from_entity(c)\n",
+ " for c in verb.root.children\n",
+ " if c.dep_ in ADVERBIAL_DEPREL\n",
+ " ]\n",
+ " \n",
+ " clause = Clause(\n",
+ " subject=subject,\n",
+ " verb=verb,\n",
+ " indirect_object=indirect_object,\n",
+ " direct_object=direct_object,\n",
+ " complement=complement,\n",
+ " adverbials=adverbials,\n",
+ " )\n",
+ " clauses.append(clause)\n",
+ " return clauses\n",
+ "\n",
+ "@Language.component('openie')\n",
+ "def do_extract_clauses(doc):\n",
+ " for sent in doc.sents:\n",
+ " clauses = extract_clauses(sent)\n",
+ " sent._.clauses = clauses\n",
+ " doc._.clauses += clauses\n",
+ " return doc\n",
+ "\n",
+ "def extract_span_from_entity(token):\n",
+ " ent_subtree = sorted([c for c in token.subtree if c.pos_ != 'PUNCT'], key=lambda x: x.i)\n",
+ " return Span(token.doc, start=ent_subtree[0].i, end=ent_subtree[-1].i + 1)\n",
+ "\n",
+ "def extract_span_from_entity_no_appos(token):\n",
+ " ent_subtree = sorted(\n",
+ " [token] + [c for c in token.children if c.dep_ not in APPOSITIVE_DEPREL],\n",
+ " key=lambda x: x.i,\n",
+ " )\n",
+ " return Span(token.doc, start=ent_subtree[0].i, end=ent_subtree[-1].i + 1)\n",
+ "\n",
+ "def extract_ccs_from_token_at_root(span):\n",
+ " if span is None:\n",
+ " return []\n",
+ " else:\n",
+ " return [span]\n",
+ " #return extract_ccs_from_token(span.root)\n",
+ "\n",
+ "def find_verb_subject(v):\n",
+ " \"\"\"\n",
+ " Returns the subject of the verb. If it does not exist and the root is a head,\n",
+ " find the subject of that verb instead.\n",
+ " \"\"\"\n",
+ " if v.dep_ in SUBJECT_DEPREL:\n",
+ " return v\n",
+ " # guard against infinite recursion on root token\n",
+ " elif v.dep_ in [\"advcl\", \"acl\", \"acl:relcl\"] and v.head.dep_ != \"root\":\n",
+ " return find_verb_subject(v.head)\n",
+ "\n",
+ " for c in v.children:\n",
+ " if c.dep_ in SUBJECT_DEPREL:\n",
+ " return c\n",
+ " elif c.dep_ in [\"advcl\", \"acl\", \"acl:relcl\"] and v.head.dep_ != \"root\":\n",
+ " return find_verb_subject(v.head)\n"
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "TRQFVkfilnX1"
+ },
+ "source": [
+ "# Run!"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 1000,
+ "referenced_widgets": [
+ "6666e1b5677e4952a90f4a016cc9a78f",
+ "aa21fc27ba864e3db8ac368712761aa3",
+ "5697cec92ad94cf5a9b6790647f95251",
+ "cfd61cc8ca2c4462878dd13e14a2f814",
+ "4212904855b0484ebb305692efb61ade",
+ "c9b7d9b16a6c4e4f808c180866897897",
+ "f9146b3bf2a24084899bf9b795c5de94",
+ "9fea05c6aedd41e693c641b86e37b94b",
+ "8654f5ffefd5464e8808a1749947048f",
+ "b2893d5dd7df4516822f66fdab2f3aa5",
+ "ab6b79a1f5ce44a49e1177aa6fcc5d03"
+ ]
+ },
+ "id": "TwAQ2rG4vdJz",
+ "outputId": "03561d3f-5a4e-4410-d280-01fc7767f674"
+ },
+ "source": [
+ "if __name__ == \"__main__\":\n",
+ " import spacy\n",
+ "\n",
+ " nlp = spacy_stanza.load_pipeline(\"pt\")\n",
+ " nlp.add_pipe(\"openie\")\n",
+ "\n",
+ " text = [\n",
+ " \"Pinoquio disse que o heroi Super-man nasceu na extinta Kripton.\",\n",
+ " \"Em 21 de maio de 2013, os proprietários da NFL em suas reuniões de primavera em Boston votaram e premiaram o jogo no Levi's Stadium.\",\n",
+ " \"EA morreu em Princeton em 1995.\",\n",
+ " \"O diretor do filme, Mohsen Makhmalbaf, decide realizar uma chamada aberta para escalar os atores de seu próximo filme através de um anúncio de jornal.\",\n",
+ " \"No imenso desacerto que foi a defesa do Penafiel, o capitão Vasco foi o homem que ainda segurou as pontas.\",\n",
+ " \"Daniela Barreiro Claro é professora da UFBA e ensina Banco de Dados.\",\n",
+ " \"Os alunos querem aprender Matemática.\",\n",
+ " \"A intervenção de Pequim é, possivelmente, a de maior alcance, desde a entrega de Hong Kong pelo Reino Unido em 1997.\",\n",
+ " \"O dono da fazenda viajou para Salvador ontem.\",\n",
+ " \"Eu compro, empresto e vendo ouro.\",\n",
+ " \"Eu gosto de banana, pera e maça.\"\n",
+ " ]\n",
+ "\n",
+ " with open('out.txt', 'w') as output:\n",
+ " \n",
+ " for s in text:\n",
+ " doc = nlp(s)\n",
+ " #explacy.print_parse_info(nlp, s)\n",
+ " output.write(s + '\\n')\n",
+ " for prop in doc._.clauses:\n",
+ " output.write('\\t' + str(prop.to_propositions(inflect=None)) + '\\n')\n",
+ "\n",
+ " output.close() \n"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.4.0.json: 0%| …"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "6666e1b5677e4952a90f4a016cc9a78f"
+ }
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "2022-06-05 17:40:30 INFO: Loading these models for language: pt (Portuguese):\n",
+ "==========================\n",
+ "| Processor | Package |\n",
+ "--------------------------\n",
+ "| tokenize | bosque |\n",
+ "| mwt | bosque |\n",
+ "| pos | bosque |\n",
+ "| lemma | bosque |\n",
+ "| depparse | bosque |\n",
+ "| constituency | cintil |\n",
+ "==========================\n",
+ "\n",
+ "INFO:stanza:Loading these models for language: pt (Portuguese):\n",
+ "==========================\n",
+ "| Processor | Package |\n",
+ "--------------------------\n",
+ "| tokenize | bosque |\n",
+ "| mwt | bosque |\n",
+ "| pos | bosque |\n",
+ "| lemma | bosque |\n",
+ "| depparse | bosque |\n",
+ "| constituency | cintil |\n",
+ "==========================\n",
+ "\n",
+ "2022-06-05 17:40:30 INFO: Use device: cpu\n",
+ "INFO:stanza:Use device: cpu\n",
+ "2022-06-05 17:40:30 INFO: Loading: tokenize\n",
+ "INFO:stanza:Loading: tokenize\n",
+ "2022-06-05 17:40:30 INFO: Loading: mwt\n",
+ "INFO:stanza:Loading: mwt\n",
+ "2022-06-05 17:40:30 INFO: Loading: pos\n",
+ "INFO:stanza:Loading: pos\n",
+ "2022-06-05 17:40:31 INFO: Loading: lemma\n",
+ "INFO:stanza:Loading: lemma\n",
+ "2022-06-05 17:40:31 INFO: Loading: depparse\n",
+ "INFO:stanza:Loading: depparse\n",
+ "2022-06-05 17:40:31 INFO: Loading: constituency\n",
+ "INFO:stanza:Loading: constituency\n",
+ "2022-06-05 17:40:32 INFO: Done loading processors!\n",
+ "INFO:stanza:Done loading processors!\n",
+ "/usr/local/lib/python3.7/dist-packages/spacy/language.py:1005: UserWarning: Due to multiword token expansion or an alignment issue, the original text has been replaced by space-separated expanded tokens.\n",
+ " doc = self._ensure_doc(text)\n",
+ "/usr/local/lib/python3.7/dist-packages/spacy/language.py:1005: UserWarning: Can't set named entities because of multi-word token expansion or because the character offsets don't map to valid tokens produced by the Stanza tokenizer:\n",
+ "Words: ['Pinoquio', 'disse', 'que', 'o', 'heroi', 'Super-man', 'nasceu', 'em', 'a', 'extinta', 'Kripton', '.']\n",
+ "Entities: []\n",
+ " doc = self._ensure_doc(text)\n",
+ "/usr/local/lib/python3.7/dist-packages/spacy/language.py:1005: UserWarning: Can't set named entities because of multi-word token expansion or because the character offsets don't map to valid tokens produced by the Stanza tokenizer:\n",
+ "Words: ['Em', '21', 'de', 'maio', 'de', '2013', ',', 'os', 'proprietários', 'de', 'a', 'NFL', 'em', 'suas', 'reuniões', 'de', 'primavera', 'em', 'Boston', 'votaram', 'e', 'premiaram', 'o', 'jogo', 'em', 'o', \"Levi's\", 'Stadium', '.']\n",
+ "Entities: []\n",
+ " doc = self._ensure_doc(text)\n",
+ "/usr/local/lib/python3.7/dist-packages/spacy/language.py:1005: UserWarning: Can't set named entities because of multi-word token expansion or because the character offsets don't map to valid tokens produced by the Stanza tokenizer:\n",
+ "Words: ['O', 'diretor', 'de', 'o', 'filme', ',', 'Mohsen', 'Makhmalbaf', ',', 'decide', 'realizar', 'uma', 'chamada', 'aberta', 'para', 'escalar', 'os', 'atores', 'de', 'seu', 'próximo', 'filme', 'através', 'de', 'um', 'anúncio', 'de', 'jornal', '.']\n",
+ "Entities: []\n",
+ " doc = self._ensure_doc(text)\n",
+ "/usr/local/lib/python3.7/dist-packages/spacy/language.py:1005: UserWarning: Can't set named entities because of multi-word token expansion or because the character offsets don't map to valid tokens produced by the Stanza tokenizer:\n",
+ "Words: ['Em', 'o', 'imenso', 'desacerto', 'que', 'foi', 'a', 'defesa', 'de', 'o', 'Penafiel', ',', 'o', 'capitão', 'Vasco', 'foi', 'o', 'homem', 'que', 'ainda', 'segurou', 'as', 'pontas', '.']\n",
+ "Entities: []\n",
+ " doc = self._ensure_doc(text)\n",
+ "/usr/local/lib/python3.7/dist-packages/spacy/language.py:1005: UserWarning: Can't set named entities because of multi-word token expansion or because the character offsets don't map to valid tokens produced by the Stanza tokenizer:\n",
+ "Words: ['Daniela', 'Barreiro', 'Claro', 'é', 'professora', 'de', 'a', 'UFBA', 'e', 'ensina', 'Banco', 'de', 'Dados', '.']\n",
+ "Entities: []\n",
+ " doc = self._ensure_doc(text)\n",
+ "/usr/local/lib/python3.7/dist-packages/spacy/language.py:1005: UserWarning: Can't set named entities because of multi-word token expansion or because the character offsets don't map to valid tokens produced by the Stanza tokenizer:\n",
+ "Words: ['A', 'intervenção', 'de', 'Pequim', 'é', ',', 'possivelmente', ',', 'a', 'de', 'maior', 'alcance', ',', 'desde', 'a', 'entrega', 'de', 'Hong', 'Kong', 'por', 'o', 'Reino', 'Unido', 'em', '1997', '.']\n",
+ "Entities: []\n",
+ " doc = self._ensure_doc(text)\n",
+ "/usr/local/lib/python3.7/dist-packages/spacy/language.py:1005: UserWarning: Can't set named entities because of multi-word token expansion or because the character offsets don't map to valid tokens produced by the Stanza tokenizer:\n",
+ "Words: ['O', 'dono', 'de', 'a', 'fazenda', 'viajou', 'para', 'Salvador', 'ontem', '.']\n",
+ "Entities: []\n",
+ " doc = self._ensure_doc(text)\n"
+ ]
+ }
+ ]
+ }
+ ]
+}
\ No newline at end of file
diff --git a/src/noie.py b/src/noie.py
deleted file mode 100644
index e8d37e6..0000000
--- a/src/noie.py
+++ /dev/null
@@ -1,504 +0,0 @@
-# -*- coding: utf-8 -*-
-"""Noie.ipynb
-
-Automatically generated by Colaboratory.
-
-Original file is located at
- https://colab.research.google.com/drive/16WEb3jBSJ71EaBQl6JaIlhIMb3GbYlW0
-
-
-# RESEARCH GROUP
-## Noie
-### An Open Information Extraction System based on Dependency Parser and Handcrafted Rules for Portuguese texts inspired by ClausIE
-
-https://formas.ufba.br/
-
-How to cite us:
-
-?
-"""
-
-!pip install lemminflect
-
-!pip install stanza
-
-!pip install spacy_stanza
-
-stanza.download('pt', package='bosque')
-
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Wed Nov 6 18:07:24 2019
-
-@author: Emmanouil Theofanis Chourdakis
-
-Clausie as a spacy library
-
-History
-
-Rafael Glauber - rafaelglauber@gmail.com
-- 2021/11/19
-- Source code compatible with spacy 3 (stanza models) and handcrafted rules for Portuguese Language.
-
-"""
-
-from numpy import right_shift
-import spacy
-import lemminflect
-import logging
-import typing
-import stanza
-import spacy_stanza
-
-from spacy.language import Language
-from spacy.tokens import Span, Doc
-from spacy.matcher import Matcher
-from lemminflect import getInflection
-
-logging.basicConfig(level=logging.INFO)
-
-Doc.set_extension("clauses", default=[], force=True)
-Span.set_extension("clauses", default=[], force=True)
-
-SUBJECT_DEPREL = ['nsubj', 'nsubj:pass']
-IOBJECT_DEPREL = ['iobj']
-DOBJECT_DEPREL = ['obj']
-COMPLEMENT_DEPREL = ['ccomp', 'xcomp', 'amod', 'nmod', 'nummod']
-ADVERBIAL_DEPREL = ['advcl', 'advmod', 'obl', 'obl:agent']
-COPULAR_DEPREL = ['cop']
-APPOSITIVE_DEPREL = ['appos']
-CONJ_DEPREL = ['conj']
-
-RELATIVE_PRONOUN = ['qual', 'cujo', 'quanto', 'que', 'quem', 'onde']
-
-PRONOUN_POS = 'PRON'
-VERB_POS = 'VERB'
-
-# aux verb to appos modifier (synthetic relationship)
-TOBE_VERB = 'é'
-
-class Clause:
-
- def __init__(
- self,
- subject: typing.Optional[Span] = None,
- verb: typing.Optional[Span] = None,
- indirect_object: typing.Optional[Span] = None,
- direct_object: typing.Optional[Span] = None,
- complement: typing.Optional[Span] = None,
- adverbials: typing.List[Span] = None,
- ):
- """
- Parameters
- ----------
- subject : Span
- Subject.
- verb : Span
- Verb.
- indirect_object : Span, optional
- Indirect object, The default is None.
- direct_object : Span, optional
- Direct object. The default is None.
- complement : Span, optional
- Complement. The default is None.
- adverbials : list, optional
- List of adverbials. The default is [].
-
- Returns
- -------
- None.
-
- """
- if adverbials is None:
- adverbials = []
-
- self.subject = subject
- self.verb = verb
- self.indirect_object = indirect_object
- self.direct_object = direct_object
- self.complement = complement
- self.adverbials = adverbials
-
- self.doc = self.subject.doc
-
- self.type = self._get_clause_type()
-
- def _get_clause_type(self):
- has_verb = self.verb is not None
- has_complement = self.complement is not None
- has_adverbial = len(self.adverbials) > 0
- has_direct_object = self.direct_object is not None
- has_indirect_object = self.indirect_object is not None
- has_object = has_direct_object or has_indirect_object
-
- clause_type = "undefined"
-
- if not has_verb:
- clause_type = "SVC"
- return clause_type
-
- if has_object:
- if has_direct_object and has_indirect_object:
- clause_type = "SVOO"
- elif has_complement:
- clause_type = "SVOC"
- elif not has_adverbial or not has_direct_object:
- clause_type = "SVO"
- elif has_adverbial:
- clause_type = "SVOA"
- else:
- clause_type = "SVO"
- else:
- if has_complement:
- clause_type = "SVC"
- elif not has_adverbial:
- clause_type = "SV"
- elif has_adverbial:
- clause_type = "SVA"
- else:
- clause_type = "SV"
-
- return clause_type
-
- def __repr__(self):
- return "<{}, {}, {}, {}, {}, {}, {}>".format(
- self.type,
- self.subject,
- self.verb,
- self.indirect_object,
- self.direct_object,
- self.complement,
- self.adverbials,
- )
-
- def to_propositions(
- self, as_text: bool = False, inflect: str or None = "VBD", capitalize: bool = False
- ):
-
- if inflect and not as_text:
- logging.warning("`inflect' argument is ignored when `as_text==False'. To suppress this warning call `to_propositions' with the argument `inflect=None'")
- if capitalize and not as_text:
- logging.warning("`capitalize' argument is ignored when `as_text==False'. To suppress this warning call `to_propositions' with the argument `capitalize=False")
-
- propositions = []
-
- subjects = extract_ccs_from_token_at_root(self.subject)
- direct_objects = extract_ccs_from_token_at_root(self.direct_object)
- indirect_objects = extract_ccs_from_token_at_root(self.indirect_object)
- complements = extract_ccs_from_token_at_root(self.complement)
- verbs = [self.verb] if self.verb else []
-
- # synthetic verb
- tobe_verb = nlp(TOBE_VERB)
-
- for subj in subjects:
- if complements and not verbs:
- for c in complements:
- propositions.append((subj, tobe_verb, c))
- propositions.append((subj, tobe_verb) + tuple(complements))
-
- for verb in verbs:
- prop = [subj, verb]
- if self.type in ["SV", "SVA"]:
- if self.adverbials:
- for a in self.adverbials:
- propositions.append(tuple(prop + [a]))
- propositions.append(tuple(prop + self.adverbials))
- else:
- propositions.append(tuple(prop))
-
- elif self.type == "SVOO":
- for iobj in indirect_objects:
- for dobj in direct_objects:
- propositions.append((subj, verb, iobj, dobj))
- elif self.type == "SVO":
- for obj in direct_objects + indirect_objects:
- propositions.append((subj, verb, obj))
- for a in self.adverbials:
- propositions.append((subj, verb, obj, a))
- elif self.type == "SVOA":
- for obj in direct_objects:
- if self.adverbials:
- for a in self.adverbials:
- propositions.append(tuple(prop + [obj, a]))
- propositions.append(tuple(prop + [obj] + self.adverbials))
-
- elif self.type == "SVOC":
- for obj in indirect_objects + direct_objects:
- if complements:
- for c in complements:
- propositions.append(tuple(prop + [obj, c]))
- propositions.append(tuple(prop + [obj] + complements))
- elif self.type == "SVC":
- if complements:
- for c in complements:
- propositions.append(tuple(prop + [c]))
- propositions.append(tuple(prop + complements))
-
- # Remove doubles
- propositions = list(set(propositions))
-
- if as_text:
- return _convert_clauses_to_text(
- propositions, inflect=inflect, capitalize=capitalize
- )
-
- return propositions
-
-def inflect_token(token, inflect):
- if (
- inflect
- and token.pos_ == "VERB"
- and "AUX" not in [tt.pos_ for tt in token.lefts]
- # t is not preceded by an auxiliary verb (e.g. `the birds were ailing`)
- ): # t `dreamed of becoming a dancer`
- return str(token._.inflect(inflect))
- else:
- return str(token)
-
-
-def _convert_clauses_to_text(propositions, inflect, capitalize):
- proposition_texts = []
- for proposition in propositions:
- span_texts = []
- for span in proposition:
-
- token_texts = []
- for token in span:
- token_texts.append(inflect_token(token, inflect))
-
- span_texts.append(" ".join(token_texts))
- proposition_texts.append(" ".join(span_texts))
-
- if capitalize: # Capitalize and add a full stop.
- proposition_texts = [text.capitalize() + "." for text in proposition_texts]
-
- return proposition_texts
-
-
-def _get_verb_matches(span):
- # 1. Find verb phrases in the span
- # (see mdmjsh answer here: https://stackoverflow.com/questions/47856247/extract-verb-phrases-using-spacy)
- verb_matcher = Matcher(span.vocab)
- pattern = [
- #[{"POS": "AUX"}],
- #[{"POS": "VERB"}],
- #[{"POS": "VERB", "OP": "+"}],
- [{"POS": "AUX", "OP": "+"},
- {"POS": "VERB", "OP": "*"},
- {"POS": "ADV", "OP": "*"},
- {"POS": "ADJ", "OP": "*"},
- {"POS": "DET", "OP": "*"},
- {"POS": "NOUN", "OP": "*"}],
- [{"POS": "VERB", "OP": "+"},
- {"POS": "ADV", "OP": "*"},
- {"POS": "ADJ", "OP": "*"},
- {"POS": "DET", "OP": "*"},
- {"POS": "NOUN", "OP": "*"}]
- #[{"POS": "AUX"}, {"POS": "NOUN"}],
- #[{"POS": "AUX"}, {"POS": "DET"}, {"POS": "NOUN"}]
- ]
- verb_matcher.add("Verb phrase", pattern)
- return verb_matcher(span)
-
-
-def _get_verb_chunks(span):
- matches = _get_verb_matches(span)
-
- # Filter matches (e.g. do not have both "has won" and "won" in verbs)
- verb_chunks = []
- for match in [span[start:end] for _, start, end in matches]:
- if match.root not in [vp.root for vp in verb_chunks]:
- verb_chunks.append(match)
- return verb_chunks
-
-
-def _get_subject(verb):
- # get verb root token
- root = verb.root
-
- # if it is a copulate verb, we should climb the tree.
- if root.dep_ in COPULAR_DEPREL:
- children = root.head.children
- else:
- children = verb.root.children
-
- # default subject in SV format
- for c in children:
- if c.dep_ in SUBJECT_DEPREL:
- subject = extract_span_from_entity(c)
- # if relative pronoun: return left
- if (subject.root.pos_ == PRONOUN_POS) and (subject.root.lemma_ in RELATIVE_PRONOUN):
- return extract_span_from_entity(subject.doc[:subject.root.i])
- else:
- return subject
-
- while root.dep_ in CONJ_DEPREL:
- for c in root.children:
- if c.dep_ in SUBJECT_DEPREL:
- subject = extract_span_from_entity(c)
- return subject
-
- if c.dep_ in ['acl', 'acl:relcl', 'advcl']:
- subject = find_verb_subject(c)
- return extract_span_from_entity(subject) if subject else None
-
- if root == verb.root.head:
- if root.pos_ == VERB_POS:
- root = root.head
- else:
- break
- else:
- root = verb.root.head
-
- for c in root.children:
- if c.dep_ in SUBJECT_DEPREL:
- subject = extract_span_from_entity(c)
- return subject
- return None
-
-def _find_matching_child(root, allowed_types):
- for c in root.children:
- if c.dep_ in allowed_types:
- return extract_span_from_entity(c)
-
- for c in root.children:
- if (c.dep_ in CONJ_DEPREL) and (c.pos_ == root.pos_):
- return _find_matching_child(c, allowed_types=allowed_types)
-
- return None
-
-def _find_matching_parent(root, allowed_types):
- sub_tree = _find_matching_child(root.head, allowed_types=allowed_types)
- if root.head.i > sub_tree.end:
- return None
- else:
- return Span(root.doc, root.head.i, sub_tree.end)
-
-def extract_clauses(span):
- clauses = []
- verb_chunks = _get_verb_chunks(span)
- for verb in verb_chunks:
-
- subject = _get_subject(verb)
- if not subject:
- continue
-
- # Check if there are phrases of the form, "AE, a scientist of ..."
- # If so, add a new clause of the form:
- #
- for c in subject.root.children:
- if c.dep_ in APPOSITIVE_DEPREL:
- appos = extract_span_from_entity(c)
- complement = extract_span_from_entity_no_appos(subject.root)
- # Change subject to appos for informative order in relationship
- if (subject.root.pos_ == 'NOUN') and (appos.root.pos_ == 'PROPN'):
- subject = appos
- clause = Clause(subject=subject, complement=complement)
- clauses.append(clause)
-
- indirect_object = _find_matching_child(verb.root, IOBJECT_DEPREL)
- direct_object = _find_matching_child(verb.root, DOBJECT_DEPREL)
-
- # complement or "predicate of the subject"
- if (verb.root.dep_ in COPULAR_DEPREL):
- right = verb.doc[verb.root.i:].root
- # if rigth token is not a verb: finding the parent
- if (right.pos_ != 'VERB'):
- complement = _find_matching_parent(right, COMPLEMENT_DEPREL)
- else:
- complement = _find_matching_child(verb.root, COMPLEMENT_DEPREL)
-
- adverbials = [
- extract_span_from_entity(c)
- for c in verb.root.children
- if c.dep_ in ADVERBIAL_DEPREL
- ]
-
- clause = Clause(
- subject=subject,
- verb=verb,
- indirect_object=indirect_object,
- direct_object=direct_object,
- complement=complement,
- adverbials=adverbials,
- )
- clauses.append(clause)
- return clauses
-
-@Language.component('openie')
-def do_extract_clauses(doc):
- for sent in doc.sents:
- clauses = extract_clauses(sent)
- sent._.clauses = clauses
- doc._.clauses += clauses
- return doc
-
-def extract_span_from_entity(token):
- ent_subtree = sorted([c for c in token.subtree if c.pos_ != 'PUNCT'], key=lambda x: x.i)
- return Span(token.doc, start=ent_subtree[0].i, end=ent_subtree[-1].i + 1)
-
-def extract_span_from_entity_no_appos(token):
- ent_subtree = sorted(
- [token] + [c for c in token.children if c.dep_ not in APPOSITIVE_DEPREL],
- key=lambda x: x.i,
- )
- return Span(token.doc, start=ent_subtree[0].i, end=ent_subtree[-1].i + 1)
-
-def extract_ccs_from_token_at_root(span):
- if span is None:
- return []
- else:
- return [span]
- #return extract_ccs_from_token(span.root)
-
-def find_verb_subject(v):
- """
- Returns the subject of the verb. If it does not exist and the root is a head,
- find the subject of that verb instead.
- """
- if v.dep_ in SUBJECT_DEPREL:
- return v
- # guard against infinite recursion on root token
- elif v.dep_ in ["advcl", "acl", "acl:relcl"] and v.head.dep_ != "root":
- return find_verb_subject(v.head)
-
- for c in v.children:
- if c.dep_ in SUBJECT_DEPREL:
- return c
- elif c.dep_ in ["advcl", "acl", "acl:relcl"] and v.head.dep_ != "root":
- return find_verb_subject(v.head)
-
-"""# Run!"""
-
-if __name__ == "__main__":
- import spacy
-
- nlp = spacy_stanza.load_pipeline("pt")
- nlp.add_pipe("openie")
-
- text = [
- "Pinoquio disse que o heroi Super-man nasceu na extinta Kripton.",
- "Em 21 de maio de 2013, os proprietários da NFL em suas reuniões de primavera em Boston votaram e premiaram o jogo no Levi's Stadium.",
- "EA morreu em Princeton em 1995.",
- "O diretor do filme, Mohsen Makhmalbaf, decide realizar uma chamada aberta para escalar os atores de seu próximo filme através de um anúncio de jornal.",
- "No imenso desacerto que foi a defesa do Penafiel, o capitão Vasco foi o homem que ainda segurou as pontas.",
- "Daniela Barreiro Claro é professora da UFBA e ensina Banco de Dados.",
- "Os alunos querem aprender Matemática.",
- "A intervenção de Pequim é, possivelmente, a de maior alcance, desde a entrega de Hong Kong pelo Reino Unido em 1997.",
- "O dono da fazenda viajou para Salvador ontem.",
- "Eu compro, empresto e vendo ouro.",
- "Eu gosto de banana, pera e maça."
- ]
-
- with open('out.txt', 'w') as output:
-
- for s in text:
- doc = nlp(s)
- #explacy.print_parse_info(nlp, s)
- output.write(s + '\n')
- for prop in doc._.clauses:
- output.write('\t' + str(prop.to_propositions(inflect=None)) + '\n')
-
- output.close()
\ No newline at end of file