diff --git a/pyproject.toml b/pyproject.toml index 8f147a9..b0a336a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,7 @@ classifiers = [ dependencies = [ "pydantic >= 2.7.4", "stam == 0.8.2", + "collection >= 0.1.6", ] diff --git a/src/openpecha/alignment/parsers/plaintext.py b/src/openpecha/alignment/parsers/plaintext.py index 92a1dfd..30c1671 100644 --- a/src/openpecha/alignment/parsers/plaintext.py +++ b/src/openpecha/alignment/parsers/plaintext.py @@ -1,10 +1,10 @@ from pathlib import Path -from typing import Dict +from typing import List -from openpecha.ids import get_initial_pecha_id, get_uuid from openpecha.pecha import Pecha from openpecha.pecha.annotation import Annotation from openpecha.pecha.layer import Layer, LayerEnum +from openpecha.pecha.metadata import InitialCreationType, InitialPechaMetadata class PlainTextLineAlignedParser: @@ -19,55 +19,48 @@ def from_files(cls, source_path: Path, target_path: Path, metadata: dict): target_text = target_path.read_text(encoding="utf-8") return cls(source_text, target_text, metadata) - def create_pecha_layer(self, base_text: str, annotation: LayerEnum): + def create_pecha_layer(self, segments: List[str], annotation_type: LayerEnum): """ """ - layer_annotations: Dict[str, Annotation] = {} + layer = Layer(annotation_type=annotation_type) char_count = 0 - for segment in base_text.split("\n"): - layer_annotations[get_uuid()] = Annotation( - id_=get_uuid(), - segment=segment, + for segment in segments: + annotation = Annotation( start=char_count, end=char_count + len(segment), ) + layer.set_annotation(annotation) char_count += len(segment) - return Layer(annotation_label=annotation, annotations=layer_annotations) + return layer def parse(self): - source_pecha_id, target_pecha_id = ( - get_initial_pecha_id(), - get_initial_pecha_id(), + source_pecha_metadata, target_pecha_metadata = ( + InitialPechaMetadata( + initial_creation_type=InitialCreationType.input, + source_metadata=self.metadata["source"], + ), + InitialPechaMetadata( + initial_creation_type=InitialCreationType.input, + source_metadata=self.metadata["target"], + ), ) + source_pecha = Pecha(metadata=source_pecha_metadata) + target_pecha = Pecha(metadata=target_pecha_metadata) - source_base_fname, target_base_fname = get_uuid(), get_uuid() - source_base_files = {source_base_fname: self.source_text} - target_base_files = {target_base_fname: self.target_text} + source_base_name = source_pecha.set_base_file(self.source_text) + target_base_name = target_pecha.set_base_file(self.target_text) - source_annotation = LayerEnum(self.metadata["source"]["annotation_label"]) - target_annotation = LayerEnum(self.metadata["target"]["annotation_label"]) - - source_layers = { - source_base_fname: { - source_annotation: self.create_pecha_layer( - self.source_text, source_annotation - ) - } - } - target_layers = { - target_base_fname: { - target_annotation: self.create_pecha_layer( - self.target_text, target_annotation - ), - } - } - - source_pecha = Pecha( # noqa - source_pecha_id, source_base_files, source_layers, self.metadata["source"] + source_pecha.set_layer( + source_base_name, + LayerEnum.segment, + self.create_pecha_layer(self.source_text.split("\n"), LayerEnum.segment), ) - target_pecha = Pecha( # noqa - target_pecha_id, target_base_files, target_layers, self.metadata["target"] + target_pecha.set_layer( + target_base_name, + LayerEnum.segment, + self.create_pecha_layer(self.target_text.split("\n"), LayerEnum.segment), ) + return source_pecha, target_pecha # TODO: diff --git a/src/openpecha/ids.py b/src/openpecha/ids.py index b27d246..9560ae5 100644 --- a/src/openpecha/ids.py +++ b/src/openpecha/ids.py @@ -6,6 +6,10 @@ def get_uuid(): return uuid4().hex +def get_fourchar_uuid(): + return get_uuid()[:4] + + def get_id(prefix, length): return prefix + "".join(random.choices(uuid4().hex, k=length)).upper() diff --git a/src/openpecha/pecha/__init__.py b/src/openpecha/pecha/__init__.py index 781d99e..27afe0c 100644 --- a/src/openpecha/pecha/__init__.py +++ b/src/openpecha/pecha/__init__.py @@ -1,30 +1,25 @@ import json +from collections import defaultdict from pathlib import Path -from shutil import rmtree -from typing import Dict +from typing import Dict, Optional, Tuple -from stam import AnnotationStore, Offset, Selector - -from openpecha.config import ( - PECHA_ANNOTATION_STORE_ID, - PECHA_DATASET_ID, - PECHAS_PATH, - _mkdir, -) +from openpecha.config import PECHAS_PATH, _mkdir from openpecha.ids import get_uuid -from openpecha.pecha.annotation import Annotation from openpecha.pecha.layer import Layer, LayerEnum +from openpecha.pecha.metadata import PechaMetadata, to_json_serializable class Pecha: def __init__( self, - pecha_id: str, - bases: Dict[str, str], - layers: Dict[str, Dict[LayerEnum, Layer]], - metadata: Dict[str, str], + pecha_id: str = None, + bases: Dict[str, str] = defaultdict(), + layers: Dict[str, Dict[Tuple[LayerEnum, str], Layer]] = defaultdict( + lambda: defaultdict() + ), + metadata: PechaMetadata = None, ) -> None: - self.pecha_id = pecha_id + self.pecha_id = metadata.id_ if metadata else pecha_id self.bases = bases self.layers = layers self.metadata = metadata @@ -37,28 +32,49 @@ def from_path(cls, path: str): def from_id(cls, pecha_id: str): pass - def write(self, export_path: Path = PECHAS_PATH): + def set_base_file(self, base_text: str) -> str: + base_file_name = get_uuid() + self.bases[base_file_name] = base_text + return base_file_name + + def set_layer( + self, base_name: str, annotation_type: LayerEnum, layer: Layer + ) -> str: + + """layer key is a tuple of layer label and layer id""" + """ A particular volume can have multiple layers with same label but different id""" + layer_subtype_id = get_uuid()[:4] + self.layers[base_name][(annotation_type, layer_subtype_id)] = layer + return layer_subtype_id - pecha_dir = _mkdir(export_path / self.pecha_id) + def write(self, output_path: Path = PECHAS_PATH): + if not self.pecha_id: + raise ValueError("pecha_id must be set before writing.") + + pecha_dir = _mkdir(output_path / self.pecha_id) self.base_path = _mkdir(pecha_dir / f"{self.pecha_id}.opf") """ write metadata """ self.metadata_fn = self.base_path / "metadata.json" self.metadata_fn.write_text( - json.dumps(self.metadata, indent=4, ensure_ascii=False), encoding="utf-8" + json.dumps( + to_json_serializable(self.metadata), indent=4, ensure_ascii=False + ), + encoding="utf-8", ) """ write base file""" - base_dir = _mkdir(self.base_path / "base") - for base_fname, base_text in self.bases.items(): - base_fn = base_dir / f"{base_fname}.txt" - base_fn.write_text(base_text, encoding="utf-8") - - layer_dir = _mkdir(self.base_path / "layers") - """ write annotation layers""" - for layer_fname, layer_data in self.layers.items(): - for _, layer in layer_data.items(): - _mkdir(layer_dir / layer_fname) - layer.write( - base_file_path=base_dir / layer_fname, - export_path=layer_dir / layer_fname, - ) + if self.bases: + base_dir = _mkdir(self.base_path / "base") + for base_name, base_text in self.bases.items(): + base_fn = base_dir / f"{base_name}.txt" + base_fn.write_text(base_text, encoding="utf-8") + if self.layers: + layer_dir = _mkdir(self.base_path / "layers") + """ write annotation layers""" + for layer_name, layer_data in self.layers.items(): + for _, layer in layer_data.items(): + _mkdir(layer_dir / layer_name) + layer.write( + base_file_path=base_dir / f"{layer_name}.txt", + output_path=output_path, + ) diff --git a/src/openpecha/pecha/annotation.py b/src/openpecha/pecha/annotation.py index c7f37c8..577cddd 100644 --- a/src/openpecha/pecha/annotation.py +++ b/src/openpecha/pecha/annotation.py @@ -1,8 +1,10 @@ from pydantic import BaseModel, Field, ValidationInfo, field_validator +from openpecha.ids import get_uuid + class Annotation(BaseModel): - segment: str + id_: str = Field(default_factory=get_uuid) start: int = Field(ge=0) end: int = Field(ge=0) metadata: dict = Field(default_factory=dict) diff --git a/src/openpecha/pecha/layer.py b/src/openpecha/pecha/layer.py index 671896c..9183298 100644 --- a/src/openpecha/pecha/layer.py +++ b/src/openpecha/pecha/layer.py @@ -1,61 +1,75 @@ import json +from collections import defaultdict from enum import Enum from pathlib import Path -from typing import Dict +from typing import Dict, Optional -from stam import AnnotationStore, Offset, Selector +from pydantic import BaseModel, ConfigDict, Field +from stam import AnnotationDataSet, AnnotationStore, Offset, Selector from openpecha.config import PECHA_ANNOTATION_STORE_ID, PECHA_DATASET_ID -from openpecha.ids import get_uuid +from openpecha.ids import get_fourchar_uuid, get_uuid from openpecha.pecha.annotation import Annotation class LayerEnum(Enum): segment = "Segment" - commentaries = "Commentaries" + commentaries = "Comment" -def get_annotation_category(): - # TODO - # Return annotation category based on the annotation label - return "Structure Type" +class LayerGroupEnum(Enum): + structure_type = "Structure Type" -class Layer: - def __init__(self, annotation_label: LayerEnum, annotations: Dict[str, Annotation]): - self.annotation_label = annotation_label - self.annotations = annotations +def get_annotation_category(layer_type: LayerEnum) -> LayerGroupEnum: + """return the annotation category for the layer label""" + if layer_type == LayerEnum.segment: + return LayerGroupEnum.structure_type + return LayerGroupEnum.structure_type - def covert_to_relative_path(self, json_string: str, export_path: Path): + +class Layer(BaseModel): + id_: str = Field(default_factory=get_fourchar_uuid) + annotation_type: LayerEnum + annotations: Dict[str, Annotation] = defaultdict() + + annotation_store: Optional[AnnotationStore] = None + dataset: Optional[AnnotationDataSet] = None + + model_config = ConfigDict(arbitrary_types_allowed=True) + + def set_annotation(self, annotation: Annotation): + self.annotations[annotation.id_] = annotation + + def covert_to_relative_path(self, json_string: str, output_path: Path): """convert the absolute path to relative path for base file path in json string""" json_object = json.loads(json_string) for resource in json_object["resources"]: original_path = Path(resource["@include"]) - resource["@include"] = str(original_path.relative_to(export_path)) + resource["@include"] = str(original_path.relative_to(output_path)) return json_object - def write(self, base_file_path: Path, export_path: Path): + def write(self, base_file_path: Path, output_path: Path): + base_file_path = base_file_path """write annotations in stam data model""" self.annotation_store = AnnotationStore(id=PECHA_ANNOTATION_STORE_ID) - self.resource = self.annotation_store.add_resource( + resource = self.annotation_store.add_resource( id=base_file_path.name, filename=base_file_path.as_posix() ) self.dataset = self.annotation_store.add_dataset(id=PECHA_DATASET_ID) - - annotation_category = get_annotation_category() + annotation_category = get_annotation_category(self.annotation_type).value self.dataset.add_key(annotation_category) - unique_annotation_data_id = get_uuid() for annotation_id, annotation in self.annotations.items(): target = Selector.textselector( - self.resource, + resource, Offset.simple(annotation.start, annotation.end), ) data = [ { "id": unique_annotation_data_id, "key": annotation_category, - "value": self.annotation_label.value, + "value": self.annotation_type.value, "set": self.dataset.id(), } ] @@ -66,11 +80,12 @@ def write(self, base_file_path: Path, export_path: Path): ) """ save annotations in json""" json_string = self.annotation_store.to_json_string() - json_object = self.covert_to_relative_path(json_string, export_path) + json_object = self.covert_to_relative_path(json_string, output_path) """ add four uuid digits to the layer file name for uniqueness""" - layer_fname = f"{self.annotation_label.value}-{get_uuid()[:4]}.json" + layer_dir = base_file_path.parent.parent / "layers" / base_file_path.stem + layer_file_path = layer_dir / f"{self.annotation_type.value}-{self.id_}.json" with open( - export_path / layer_fname, + layer_file_path, "w", ) as f: f.write(json.dumps(json_object, indent=4, ensure_ascii=False)) diff --git a/src/openpecha/pecha/metadata.py b/src/openpecha/pecha/metadata.py new file mode 100644 index 0000000..a7fa7f0 --- /dev/null +++ b/src/openpecha/pecha/metadata.py @@ -0,0 +1,82 @@ +import json +from collections import defaultdict +from datetime import datetime +from enum import Enum +from typing import Dict, List, Optional + +from pydantic import BaseModel, Field, field_validator, model_validator + +from openpecha.ids import get_initial_pecha_id + + +class InitialCreationType(Enum): + ocr = "ocr" + ebook = "ebook" + input = "input" + tmx = "tmx" + + +class PechaMetadata(BaseModel): + id_: str = Field(default=None, alias="id_") + title: List[str] = Field(default=None, alias="title") + author: List[str] = Field(default=None, alias="author") + source: str = Field(default=None, alias="source") + language: str = Field(default=None, alias="language") + initial_creation_type: InitialCreationType = Field( + None, alias="initial_creation_type" + ) + created_at: datetime = Field(default=None, alias="created_at") + source_metadata: Optional[Dict] = Field( + default={} + ) # place to dump any metadata from the source + + @field_validator("created_at", mode="before") + def set_imported_date(cls, v): + return v or datetime.now() + + class Config: + json_encoders = { + InitialCreationType: lambda v: v.value, + defaultdict: lambda d: dict(d), + } + + +def to_json_serializable(pecha_metadata: Optional[PechaMetadata]) -> str: + if pecha_metadata is None: + return json.dumps({}, indent=4, ensure_ascii=False) + + # Convert the model to a dictionary + dict_data = pecha_metadata.model_dump() + # Convert the defaultdict to a regular dictionary + dict_data["source_metadata"] = dict(dict_data["source_metadata"]) + # Convert the initial_creation_type enum to its value + if dict_data["initial_creation_type"] is not None: + dict_data["initial_creation_type"] = dict_data["initial_creation_type"].value + return json.dumps(dict_data, indent=4, ensure_ascii=False) + + +class InitialPechaMetadata(PechaMetadata): + @model_validator(mode="before") + @classmethod + def set_id(cls, values): + if "id_" not in values or values["id_"] is None: + values["id_"] = get_initial_pecha_id() + return values + + +class OpenPechaMetadata(PechaMetadata): + @model_validator(mode="before") + @classmethod + def set_id(cls, values): + if "id_" not in values or values["id_"] is None: + values["id_"] = get_initial_pecha_id() + return values + + +class DiplomaticPechaMetadata(PechaMetadata): + @model_validator(mode="before") + @classmethod + def set_id(cls, values): + if "id_" not in values or values["id_"] is None: + values["id_"] = get_initial_pecha_id() + return values diff --git a/tests/alignment/parsers/plaintext/test_plaintext.py b/tests/alignment/parsers/plaintext/test_plaintext.py index 60c1aea..91e6a09 100644 --- a/tests/alignment/parsers/plaintext/test_plaintext.py +++ b/tests/alignment/parsers/plaintext/test_plaintext.py @@ -11,17 +11,15 @@ def get_data_dir(): def get_metadata(): return { "source": { - "annotation_category": "Structure Type", - "annotation_label": "Segment", + "annotation_type": "Segment", }, "target": { - "annotation_category": "Structure Type", - "annotation_label": "Comment", + "annotation_type": "Comment", }, } -def test_plaintext_parse(): +def test_PlainTextLineAlignedParser_parse(): DATA_DIR = get_data_dir() source_path = DATA_DIR / "segments.txt" target_path = DATA_DIR / "comments.txt" @@ -30,26 +28,7 @@ def test_plaintext_parse(): plaintext = PlainTextLineAlignedParser.from_files( source_path, target_path, metadata ) - plaintext.parse() - - assert ( - len(plaintext.source_segments) == 5 - ), "plaintext parser is not parsing source_segments correctly" - assert ( - len(plaintext.target_segments) == 5 - ), "plaintext parser is not parsing target_segments correctly" - - -def test_plaintext_save(): - DATA_DIR = get_data_dir() - source_path = DATA_DIR / "segments.txt" - target_path = DATA_DIR / "comments.txt" - - metadata = get_metadata() - plaintext = PlainTextLineAlignedParser.from_files( - source_path, target_path, metadata - ) - source_pecha, target_pecha = plaintext.save() + source_pecha, target_pecha = plaintext.parse() assert isinstance( source_pecha, Pecha diff --git a/tests/pecha/test_pecha.py b/tests/pecha/test_pecha.py deleted file mode 100644 index 47f8e18..0000000 --- a/tests/pecha/test_pecha.py +++ /dev/null @@ -1,77 +0,0 @@ -from pathlib import Path -from shutil import rmtree - -from openpecha.pecha import Pecha -from openpecha.pecha.annotation import Annotation - - -def get_data_dir(): - export_path = Path(__file__).parent / "data" - export_path.mkdir(parents=True, exist_ok=True) - return export_path - - -def get_segments(): - return { - "f2b056668a0c4ad3a085bdcd8e2d7adb": "རྒྱ་གར་སྐད་དུ། བོ་དྷི་སཏྭ་ཙརྱ་ཨ་བ་ཏཱ་ར།", - "b696df2dbe314e8a87881a2bc391d0d5": "བོད་སྐད་དུ། བྱང་ཆུབ་སེམས་དཔའི་སྤྱོད་པ་ལ་འཇུག་པའི་ལེགས་པར་སྦྱར་བ།", - } - - -def get_metadata(): - return { - "annotation_category": "Structure Type", - "annotation_label": "Segment", - } - - -def get_expected_annotations(): - expected_annotations = [ - Annotation( - id_="f2b056668a0c4ad3a085bdcd8e2d7adb", - segment="རྒྱ་གར་སྐད་དུ། བོ་དྷི་སཏྭ་ཙརྱ་ཨ་བ་ཏཱ་ར།", - start=0, - end=39, - metadata={}, - ), - Annotation( - id_="b696df2dbe314e8a87881a2bc391d0d5", - segment="བོད་སྐད་དུ། བྱང་ཆུབ་སེམས་དཔའི་སྤྱོད་པ་ལ་འཇུག་པའི་ལེགས་པར་སྦྱར་བ།", - start=39, - end=103, - metadata={}, - ), - ] - return expected_annotations - - -def test_pecha_set_annotations(): - pecha_id = "IE7D6875F" - segments = get_segments() - metadata = get_metadata() - pecha = Pecha(pecha_id=pecha_id, segments=segments, metadata=metadata) - assert isinstance( - pecha, Pecha - ), "Not able to create Pecha object with id, segments and metadata" - - annotations = list(pecha.set_annotations()) - assert ( - annotations == get_expected_annotations() - ), "Pecha not able to set annotations for the segments" - - -def test_pecha_write_annotations(): - pecha_id = "IE7D6875F" - segments = get_segments() - metadata = get_metadata() - pecha = Pecha(pecha_id=pecha_id, segments=segments, metadata=metadata) - export_path = get_data_dir() - pecha.write_annotations(export_path=export_path) - assert pecha.base_fn.exists(), "Pecha not able to write base file" - assert pecha.metadata_fn.exists(), "Pecha not able to write metadata file" - assert pecha.annotation_fn.rglob( - "*.json" - ), "Pecha not able to write annotation file" - - """ clean up """ - rmtree(Path(export_path / pecha_id)) diff --git a/tests/pecha/write/expected_output/IE7D6875F/IE7D6875F.opf/base/f2b056668a0c4ad3a085bdcd8e2d7adb.txt b/tests/pecha/write/expected_output/IE7D6875F/IE7D6875F.opf/base/f2b056668a0c4ad3a085bdcd8e2d7adb.txt new file mode 100644 index 0000000..0b166fc --- /dev/null +++ b/tests/pecha/write/expected_output/IE7D6875F/IE7D6875F.opf/base/f2b056668a0c4ad3a085bdcd8e2d7adb.txt @@ -0,0 +1 @@ +རྒྱ་གར་སྐད་དུ། བོ་དྷི་སཏྭ་ཙརྱ་ཨ་བ་ཏཱ་ར།བོད་སྐད་དུ། བྱང་ཆུབ་སེམས་དཔའི་སྤྱོད་པ་ལ་འཇུག་པའི་ལེགས་པར་སྦྱར་བ། \ No newline at end of file diff --git a/tests/pecha/write/expected_output/IE7D6875F/IE7D6875F.opf/layers/f2b056668a0c4ad3a085bdcd8e2d7adb/Segment-bf13.json b/tests/pecha/write/expected_output/IE7D6875F/IE7D6875F.opf/layers/f2b056668a0c4ad3a085bdcd8e2d7adb/Segment-bf13.json new file mode 100644 index 0000000..92bcaec --- /dev/null +++ b/tests/pecha/write/expected_output/IE7D6875F/IE7D6875F.opf/layers/f2b056668a0c4ad3a085bdcd8e2d7adb/Segment-bf13.json @@ -0,0 +1,88 @@ +{ + "@type": "AnnotationStore", + "@id": "PechaAnnotationStore", + "resources": [ + { + "@type": "TextResource", + "@id": "f2b056668a0c4ad3a085bdcd8e2d7adb.txt", + "@include": "IE7D6875F/IE7D6875F.opf/base/f2b056668a0c4ad3a085bdcd8e2d7adb.txt" + } + ], + "annotationsets": [ + { + "@type": "AnnotationDataSet", + "@id": "PechaDataSet", + "keys": [ + { + "@type": "DataKey", + "@id": "Structure Type" + } + ], + "data": [ + { + "@type": "AnnotationData", + "@id": "0c2c4165fb58464eabf9db0d6a3a1080", + "key": "Structure Type", + "value": { + "@type": "String", + "value": "Segment" + } + } + ] + } + ], + "annotations": [ + { + "@type": "Annotation", + "@id": "f2b056668a0c4ad3a085bdcd8e2d7adb", + "target": { + "@type": "TextSelector", + "resource": "f2b056668a0c4ad3a085bdcd8e2d7adb.txt", + "offset": { + "@type": "Offset", + "begin": { + "@type": "BeginAlignedCursor", + "value": 0 + }, + "end": { + "@type": "BeginAlignedCursor", + "value": 39 + } + } + }, + "data": [ + { + "@type": "AnnotationData", + "@id": "0c2c4165fb58464eabf9db0d6a3a1080", + "set": "PechaDataSet" + } + ] + }, + { + "@type": "Annotation", + "@id": "b696df2dbe314e8a87881a2bc391d0d5", + "target": { + "@type": "TextSelector", + "resource": "f2b056668a0c4ad3a085bdcd8e2d7adb.txt", + "offset": { + "@type": "Offset", + "begin": { + "@type": "BeginAlignedCursor", + "value": 39 + }, + "end": { + "@type": "BeginAlignedCursor", + "value": 103 + } + } + }, + "data": [ + { + "@type": "AnnotationData", + "@id": "0c2c4165fb58464eabf9db0d6a3a1080", + "set": "PechaDataSet" + } + ] + } + ] +} \ No newline at end of file diff --git a/tests/pecha/write/expected_output/IE7D6875F/IE7D6875F.opf/metadata.json b/tests/pecha/write/expected_output/IE7D6875F/IE7D6875F.opf/metadata.json new file mode 100644 index 0000000..cb740ab --- /dev/null +++ b/tests/pecha/write/expected_output/IE7D6875F/IE7D6875F.opf/metadata.json @@ -0,0 +1,3 @@ +{ + "annotation_label": "Segment" +} \ No newline at end of file diff --git a/tests/pecha/write/test_pecha.py b/tests/pecha/write/test_pecha.py new file mode 100644 index 0000000..c905b80 --- /dev/null +++ b/tests/pecha/write/test_pecha.py @@ -0,0 +1,88 @@ +from pathlib import Path +from shutil import rmtree +from unittest import mock + +from openpecha.pecha import Pecha +from openpecha.pecha.annotation import Annotation +from openpecha.pecha.layer import Layer, LayerEnum +from openpecha.pecha.metadata import InitialCreationType, InitialPechaMetadata + + +def get_data_dir(): + output_path = Path(__file__).parent / "output" + output_path.mkdir(parents=True, exist_ok=True) + return output_path + + +def get_metadata(): + return { + "annotation_type": "Segment", + } + + +def get_base(): + return { + "f2b056668a0c4ad3a085bdcd8e2d7adb": "རྒྱ་གར་སྐད་དུ། བོ་དྷི་སཏྭ་ཙརྱ་ཨ་བ་ཏཱ་ར།བོད་སྐད་དུ། བྱང་ཆུབ་སེམས་དཔའི་སྤྱོད་པ་ལ་འཇུག་པའི་ལེགས་པར་སྦྱར་བ།" # noqa + } + + +def get_layer(): + return { + "f2b056668a0c4ad3a085bdcd8e2d7adb": { + (LayerEnum.segment, "bf13"): Layer( + id_="bf13", + annotation_type=LayerEnum("Segment"), + annotations=get_annotations(), + ) + } + } + + +def get_annotations(): + return { + "f2b056668a0c4ad3a085bdcd8e2d7adb": Annotation( + start=0, + end=39, + metadata={}, + ), + "b696df2dbe314e8a87881a2bc391d0d5": Annotation( + start=39, + end=103, + metadata={}, + ), + } + + +def test_pecha_write(): + with mock.patch( + "openpecha.pecha.metadata.get_initial_pecha_id" + ) as mock_get_initial_pecha_id: + mock_get_initial_pecha_id.return_value = "IE7D6875F" + base = get_base() + layer = get_layer() + output_path = get_data_dir() + expected_output_path = Path(__file__).parent / "expected_output" + + metadata = InitialPechaMetadata(initial_creation_type=InitialCreationType.input) + pecha = Pecha(metadata=metadata) + pecha.bases = base + pecha.layers = layer + + pecha.write(output_path=output_path) + + output_file_names = [file.name for file in list(output_path.rglob("*"))] + expected_file_names = [ + file.name for file in list(expected_output_path.rglob("*")) + ] + + """ sort the list """ + output_file_names.sort() + expected_file_names.sort() + + assert output_file_names == expected_file_names + + """ clean up """ + rmtree(output_path) + + +test_pecha_write()