In [None]:
!sudo pip install -e /workspace/src/aymurai tensorflow_hub tensorflow_text

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

In [None]:
import flair
import logging


flair.logger.setLevel(logging.ERROR)

In [None]:
from aymurai.utils.display.render import DocRender
from aymurai.datasets.ar_juz_pcyf_10.annotations import (
    ArgentinaJuzgadoPCyF10LabelStudioAnnotations,
)

render = DocRender()
docs = ArgentinaJuzgadoPCyF10LabelStudioAnnotations(
    "/resources/data/restricted/annotations/20221130-bis/"
).data
sample = docs[:10]

print(len(docs))


In [None]:
from tensorflow.python.client import device_lib

def get_available_devices():
    local_device_protos = device_lib.list_local_devices()
    return [x.name for x in local_device_protos if x.device_type == 'GPU' or x.device_type == 'CPU']

get_available_devices()

## Pipeline

In [None]:
from aymurai.pipeline import AymurAIPipeline
from aymurai.models.flair.core import FlairModel
from aymurai.models.flair.utils import FlairTextNormalize
from aymurai.transforms.entity_subcategories.regex import RegexSubcategorizer
from aymurai.transforms.entity_subcategories.usem import USEMSubcategorizer

config = {
    "preprocess": [
        (FlairTextNormalize, {}),
    ],
    "models": [
        (
            FlairModel,
            {
                "basepath": "aymurai/flair-ner-spanish-judicial",
                "split_doc": True,
                "device": "cpu",
            },
        )
    ],
    "postprocess": [
        (RegexSubcategorizer, {}),
        (
            USEMSubcategorizer,
            {
                "category": "CONDUCTA",
                "subcategories_path": "https://drive.google.com/uc?id=1Vj5BxyeHzDnR1T8jYjLuteM3YwzE7fTW&confirm=true",
                "response_embeddings_path": "https://drive.google.com/uc?id=1zvBHGf1MeFyyG_I0TukJl1eaM-7TsbPF&confirm=true",
                "device": "/cpu:0",
            },
        ),
        (
            USEMSubcategorizer,
            {
                "category": "CONDUCTA_DESCRIPCION",
                "subcategories_path": "https://drive.google.com/uc?id=1A1I9xwzvynwxSv1I0SDHhN216Z3Yvoqj&confirm=true",
                "response_embeddings_path": "https://drive.google.com/uc?id=1c3nYVDIq23kYqgMIIKGtDbIz6zDORpYK&confirm=true",
                "device": "/cpu:0",
            },
        ),
        (
            USEMSubcategorizer,
            {
                "category": "DETALLE",
                "subcategories_path": "https://drive.google.com/uc?id=1o1Z4fhGTtNzUIL2m3WOfDr_f0KXHu_Ms&confirm=true",
                "response_embeddings_path": "https://drive.google.com/uc?id=1OumPgnnM9ffjHjObnb5NL96e3hnlt7Ik&confirm=true",
                "device": "/cpu:0",
            },
        ),
        (
            USEMSubcategorizer,
            {
                "category": "OBJETO_DE_LA_RESOLUCION",
                "subcategories_path": "https://drive.google.com/uc?id=1ksmfX_AJaE-OFEEGzj2N2mZgg5HZWB_4&confirm=true",
                "response_embeddings_path": "https://drive.google.com/uc?id=18wOgqzNDsqF13nrvX2XscE0JS_xrgqBU&confirm=true",
                "device": "/cpu:0",
            },
        ),
    ],
    "use_cache": False,
}


In [None]:
pipeline = AymurAIPipeline(config)


In [None]:

results = pipeline.preprocess(sample[:1])
results = pipeline.predict(results)


In [None]:

postprocessed = pipeline.postprocess(results)

In [None]:
idx = -1

render(postprocessed[idx])