From 517b74143612854e964821effc564f818bc9dc01 Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Mon, 1 Jul 2024 15:08:03 +0530
Subject: [PATCH 01/34] modify/set file path type as Path

---
 src/openpecha/alignment/parsers/plaintext.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/openpecha/alignment/parsers/plaintext.py b/src/openpecha/alignment/parsers/plaintext.py
index a97d60a..610f0dd 100644
--- a/src/openpecha/alignment/parsers/plaintext.py
+++ b/src/openpecha/alignment/parsers/plaintext.py
@@ -1,15 +1,18 @@
+from pathlib import Path
+
+
 class PlainText:
     def __init__(self, source_text: str, target_text: str):
         self.source_text = source_text
-        self.traget_text = target_text
+        self.target_text = target_text
 
     @classmethod
-    def from_files(cls, source_path: str, target_path: str):
-        source_text = open(source_path).read()
-        target_text = open(target_path).read()
+    def from_files(cls, source_path: Path, target_path: Path):
+        source_text = source_path.read_text(encoding="utf-8")
+        target_text = target_path.read_text(encoding="utf-8")
         return cls(source_text, target_text)
 
-    def parse(self, metadata: dict):
+    def parse(self, metadata: dict = None):
         # source_segments = self.source_text.split("\n")
         # target_segments = self.target_text.split("\n")
 

From 3836ad4ed8869c2302f8d351a7853e100d11c6ad Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Mon, 1 Jul 2024 15:16:46 +0530
Subject: [PATCH 02/34] set project_name as "openpecha"

---
 pyproject.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index a4cf13f..8a3e3c5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -3,12 +3,12 @@ requires = ["setuptools>=61.0"]
 build-backend = "setuptools.build_meta"
 
 [project]
-name = "project_name"
+name = "openpecha"
 version = "0.0.1"
 authors = [
   { name="OpenPecha", email="dev@openpecha.org" },
 ]
-description = "A small example package"
+description = "OpenPecha toolkit version 2"
 readme = "README.md"
 requires-python = ">=3.8"
 classifiers = [

From 54df02a3e1b34aab570717e0d919d156097216f1 Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Mon, 1 Jul 2024 15:58:08 +0530
Subject: [PATCH 03/34] create/class Pecha

---
 src/openpecha/alignment/parsers/plaintext.py  | 20 ++++++++++++++++---
 src/openpecha/alignment/pecha/pecha.py        |  2 --
 .../{alignment => }/pecha/__init__.py         |  0
 src/openpecha/pecha/pecha.py                  | 16 +++++++++++++++
 4 files changed, 33 insertions(+), 5 deletions(-)
 delete mode 100644 src/openpecha/alignment/pecha/pecha.py
 rename src/openpecha/{alignment => }/pecha/__init__.py (100%)
 create mode 100644 src/openpecha/pecha/pecha.py

diff --git a/src/openpecha/alignment/parsers/plaintext.py b/src/openpecha/alignment/parsers/plaintext.py
index 610f0dd..83173ad 100644
--- a/src/openpecha/alignment/parsers/plaintext.py
+++ b/src/openpecha/alignment/parsers/plaintext.py
@@ -1,5 +1,8 @@
 from pathlib import Path
 
+from openpecha.ids import get_initial_pecha_id, get_uuid
+from openpecha.pecha.pecha import Pecha
+
 
 class PlainText:
     def __init__(self, source_text: str, target_text: str):
@@ -13,11 +16,22 @@ def from_files(cls, source_path: Path, target_path: Path):
         return cls(source_text, target_text)
 
     def parse(self, metadata: dict = None):
-        # source_segments = self.source_text.split("\n")
-        # target_segments = self.target_text.split("\n")
+        source_text_lines = self.source_text.split("\n")
+        target_text_lines = self.target_text.split("\n")
+
+        """ prepare the data for pecha creation"""
+        source_pecha_id, target_pecha_id = (
+            get_initial_pecha_id(),
+            get_initial_pecha_id(),
+        )
+        source_segments = {get_uuid(): segment for segment in source_text_lines}
+        target_segments = {get_uuid(): segment for segment in target_text_lines}
+
+        source_pecha = Pecha(source_pecha_id, source_segments)  # noqa
+        target_pecha = Pecha(target_pecha_id, target_segments)  # noqa
 
         # TODO:
-        # 1. Create pecha with segment layers for source and target text
+
         # 2. create a segment pairs [((source_pecha_id,source_segment_id), (target_pecha_id, target_segment_id)), ...]
         # 3. Create AlignmentMetadata
 
diff --git a/src/openpecha/alignment/pecha/pecha.py b/src/openpecha/alignment/pecha/pecha.py
deleted file mode 100644
index cbffb86..0000000
--- a/src/openpecha/alignment/pecha/pecha.py
+++ /dev/null
@@ -1,2 +0,0 @@
-class Pecha:
-    pass
diff --git a/src/openpecha/alignment/pecha/__init__.py b/src/openpecha/pecha/__init__.py
similarity index 100%
rename from src/openpecha/alignment/pecha/__init__.py
rename to src/openpecha/pecha/__init__.py
diff --git a/src/openpecha/pecha/pecha.py b/src/openpecha/pecha/pecha.py
new file mode 100644
index 0000000..9d69554
--- /dev/null
+++ b/src/openpecha/pecha/pecha.py
@@ -0,0 +1,16 @@
+from typing import Dict
+
+
+class Pecha:
+    def __init__(self, pecha_id: str, segments: Dict[str, str]) -> None:
+        self.pecha_id = pecha_id
+        self.segments = segments
+        self.base_text = "".join(segments.values())
+
+    @classmethod
+    def from_path(cls, path: str):
+        pass
+
+    @classmethod
+    def from_id(cls, pecha_id: str):
+        pass

From 1adcc16c5e8a8778bcf6ab5bde2de4afe0e2e7c2 Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Mon, 1 Jul 2024 16:05:15 +0530
Subject: [PATCH 04/34] modify/move alignment pecha codes to its  __init__.py

---
 src/openpecha/alignment/__init__.py          | 36 ++++++++++++++++++++
 src/openpecha/alignment/alignment.py         | 36 --------------------
 src/openpecha/alignment/parsers/plaintext.py |  2 +-
 src/openpecha/pecha/__init__.py              | 16 +++++++++
 src/openpecha/pecha/pecha.py                 | 16 ---------
 5 files changed, 53 insertions(+), 53 deletions(-)
 delete mode 100644 src/openpecha/alignment/alignment.py
 delete mode 100644 src/openpecha/pecha/pecha.py

diff --git a/src/openpecha/alignment/__init__.py b/src/openpecha/alignment/__init__.py
index e69de29..5f769d7 100644
--- a/src/openpecha/alignment/__init__.py
+++ b/src/openpecha/alignment/__init__.py
@@ -0,0 +1,36 @@
+from typing import List, Tuple
+
+
+class AlignmentMetadata:
+    pass
+
+
+class Alignment:
+    def __init__(
+        self,
+        metadata: AlignmentMetadata,
+        parser_segment_pairs=None,
+        alignment_segment_pairs=None,
+    ):
+        self.metadata = metadata
+        self.parser_segment_pairs = parser_segment_pairs
+        self.alignment_segment_pairs = alignment_segment_pairs
+
+    @classmethod
+    def from_path(cls, path: str):
+        pass
+
+    @classmethod
+    def from_id(cls, alignment_id: str):
+        pass
+
+    @classmethod
+    def from_segment_pairs(
+        cls,
+        segment_pairs: List[Tuple[Tuple[str, str], Tuple[str, str]]],
+        metadata: AlignmentMetadata,
+    ):
+        return cls(metadata=metadata, parser_segment_pairs=segment_pairs)
+
+    def save(self, path: str):
+        pass
diff --git a/src/openpecha/alignment/alignment.py b/src/openpecha/alignment/alignment.py
deleted file mode 100644
index 5f769d7..0000000
--- a/src/openpecha/alignment/alignment.py
+++ /dev/null
@@ -1,36 +0,0 @@
-from typing import List, Tuple
-
-
-class AlignmentMetadata:
-    pass
-
-
-class Alignment:
-    def __init__(
-        self,
-        metadata: AlignmentMetadata,
-        parser_segment_pairs=None,
-        alignment_segment_pairs=None,
-    ):
-        self.metadata = metadata
-        self.parser_segment_pairs = parser_segment_pairs
-        self.alignment_segment_pairs = alignment_segment_pairs
-
-    @classmethod
-    def from_path(cls, path: str):
-        pass
-
-    @classmethod
-    def from_id(cls, alignment_id: str):
-        pass
-
-    @classmethod
-    def from_segment_pairs(
-        cls,
-        segment_pairs: List[Tuple[Tuple[str, str], Tuple[str, str]]],
-        metadata: AlignmentMetadata,
-    ):
-        return cls(metadata=metadata, parser_segment_pairs=segment_pairs)
-
-    def save(self, path: str):
-        pass
diff --git a/src/openpecha/alignment/parsers/plaintext.py b/src/openpecha/alignment/parsers/plaintext.py
index 83173ad..ada0e63 100644
--- a/src/openpecha/alignment/parsers/plaintext.py
+++ b/src/openpecha/alignment/parsers/plaintext.py
@@ -1,7 +1,7 @@
 from pathlib import Path
 
 from openpecha.ids import get_initial_pecha_id, get_uuid
-from openpecha.pecha.pecha import Pecha
+from openpecha.pecha import Pecha
 
 
 class PlainText:
diff --git a/src/openpecha/pecha/__init__.py b/src/openpecha/pecha/__init__.py
index e69de29..9d69554 100644
--- a/src/openpecha/pecha/__init__.py
+++ b/src/openpecha/pecha/__init__.py
@@ -0,0 +1,16 @@
+from typing import Dict
+
+
+class Pecha:
+    def __init__(self, pecha_id: str, segments: Dict[str, str]) -> None:
+        self.pecha_id = pecha_id
+        self.segments = segments
+        self.base_text = "".join(segments.values())
+
+    @classmethod
+    def from_path(cls, path: str):
+        pass
+
+    @classmethod
+    def from_id(cls, pecha_id: str):
+        pass
diff --git a/src/openpecha/pecha/pecha.py b/src/openpecha/pecha/pecha.py
deleted file mode 100644
index 9d69554..0000000
--- a/src/openpecha/pecha/pecha.py
+++ /dev/null
@@ -1,16 +0,0 @@
-from typing import Dict
-
-
-class Pecha:
-    def __init__(self, pecha_id: str, segments: Dict[str, str]) -> None:
-        self.pecha_id = pecha_id
-        self.segments = segments
-        self.base_text = "".join(segments.values())
-
-    @classmethod
-    def from_path(cls, path: str):
-        pass
-
-    @classmethod
-    def from_id(cls, pecha_id: str):
-        pass

From fce29b2ba93df059c1bd9f473949695c0b6882c2 Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Mon, 1 Jul 2024 16:34:58 +0530
Subject: [PATCH 05/34] create/class Annotation

---
 pyproject.toml                    |  5 +++++
 src/openpecha/pecha/__init__.py   | 15 +++++++++++++++
 src/openpecha/pecha/annotation.py | 16 ++++++++++++++++
 3 files changed, 36 insertions(+)
 create mode 100644 src/openpecha/pecha/annotation.py

diff --git a/pyproject.toml b/pyproject.toml
index 8a3e3c5..91d18ef 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,6 +17,11 @@ classifiers = [
     "Operating System :: OS Independent",
 ]
 
+dependencies = [
+  "pydantic >= 2.7.4",
+
+]
+
 [project.optional-dependencies]
 dev = [
     "pytest",
diff --git a/src/openpecha/pecha/__init__.py b/src/openpecha/pecha/__init__.py
index 9d69554..00c6480 100644
--- a/src/openpecha/pecha/__init__.py
+++ b/src/openpecha/pecha/__init__.py
@@ -1,11 +1,14 @@
 from typing import Dict
 
+from openpecha.pecha.annotation import Annotation
+
 
 class Pecha:
     def __init__(self, pecha_id: str, segments: Dict[str, str]) -> None:
         self.pecha_id = pecha_id
         self.segments = segments
         self.base_text = "".join(segments.values())
+        self.annotations = self.build_annotations()
 
     @classmethod
     def from_path(cls, path: str):
@@ -14,3 +17,15 @@ def from_path(cls, path: str):
     @classmethod
     def from_id(cls, pecha_id: str):
         pass
+
+    def build_annotations(self):
+        char_count = 0
+        for segment_id, segment in self.segments.items():
+            annotation = Annotation(
+                annotation_id=segment_id,
+                segment=segment,
+                start=char_count,
+                end=char_count + len(segment),
+            )
+            char_count += len(segment)
+            yield annotation
diff --git a/src/openpecha/pecha/annotation.py b/src/openpecha/pecha/annotation.py
new file mode 100644
index 0000000..138adf7
--- /dev/null
+++ b/src/openpecha/pecha/annotation.py
@@ -0,0 +1,16 @@
+from pydantic import BaseModel, Field, ValidationInfo, field_validator
+
+
+class Annotation(BaseModel):
+    annotation_id: str
+    segment: str
+    start: int = Field(ge=0)
+    end: int = Field(ge=0)
+    metadata: dict = Field(default_factory=dict)
+
+    @field_validator("end")
+    @classmethod
+    def end_must_not_be_less_than_start(cls, v: int, values: ValidationInfo) -> int:
+        if "start" in values.data and v < values.data["start"]:
+            raise ValueError("Span end must not be less than start")
+        return v

From ac2922727e34c842c2dedd9e8485a73a5f96a891 Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Tue, 2 Jul 2024 09:23:17 +0530
Subject: [PATCH 06/34] rename function/to set_annotations

---
 src/openpecha/pecha/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/openpecha/pecha/__init__.py b/src/openpecha/pecha/__init__.py
index 00c6480..0781347 100644
--- a/src/openpecha/pecha/__init__.py
+++ b/src/openpecha/pecha/__init__.py
@@ -8,7 +8,7 @@ def __init__(self, pecha_id: str, segments: Dict[str, str]) -> None:
         self.pecha_id = pecha_id
         self.segments = segments
         self.base_text = "".join(segments.values())
-        self.annotations = self.build_annotations()
+        self.annotations = self.set_annotations()
 
     @classmethod
     def from_path(cls, path: str):
@@ -18,7 +18,7 @@ def from_path(cls, path: str):
     def from_id(cls, pecha_id: str):
         pass
 
-    def build_annotations(self):
+    def set_annotations(self):
         char_count = 0
         for segment_id, segment in self.segments.items():
             annotation = Annotation(

From 816934853c910ae9feef17b518d9124527d393f8 Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Tue, 2 Jul 2024 10:31:22 +0530
Subject: [PATCH 07/34] Pecha function write annotations

---
 pyproject.toml                               |  1 +
 src/openpecha/alignment/parsers/plaintext.py | 17 +++++----
 src/openpecha/pecha/__init__.py              | 38 ++++++++++++++++++--
 src/openpecha/pecha/annotation.py            |  2 +-
 4 files changed, 49 insertions(+), 9 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 91d18ef..8f147a9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -19,6 +19,7 @@ classifiers = [
 
 dependencies = [
   "pydantic >= 2.7.4",
+  "stam == 0.8.2",
 
 ]
 
diff --git a/src/openpecha/alignment/parsers/plaintext.py b/src/openpecha/alignment/parsers/plaintext.py
index ada0e63..5243da2 100644
--- a/src/openpecha/alignment/parsers/plaintext.py
+++ b/src/openpecha/alignment/parsers/plaintext.py
@@ -5,17 +5,18 @@
 
 
 class PlainText:
-    def __init__(self, source_text: str, target_text: str):
+    def __init__(self, source_text: str, target_text: str, metadata: dict):
         self.source_text = source_text
         self.target_text = target_text
+        self.metadata = metadata
 
     @classmethod
-    def from_files(cls, source_path: Path, target_path: Path):
+    def from_files(cls, source_path: Path, target_path: Path, metadata: dict):
         source_text = source_path.read_text(encoding="utf-8")
         target_text = target_path.read_text(encoding="utf-8")
-        return cls(source_text, target_text)
+        return cls(source_text, target_text, metadata)
 
-    def parse(self, metadata: dict = None):
+    def parse(self):
         source_text_lines = self.source_text.split("\n")
         target_text_lines = self.target_text.split("\n")
 
@@ -27,8 +28,12 @@ def parse(self, metadata: dict = None):
         source_segments = {get_uuid(): segment for segment in source_text_lines}
         target_segments = {get_uuid(): segment for segment in target_text_lines}
 
-        source_pecha = Pecha(source_pecha_id, source_segments)  # noqa
-        target_pecha = Pecha(target_pecha_id, target_segments)  # noqa
+        source_pecha = Pecha(  # noqa
+            source_pecha_id, source_segments, self.metadata["source"]
+        )
+        target_pecha = Pecha(  # noqa
+            target_pecha_id, target_segments, self.metadata["target"]
+        )
 
         # TODO:
 
diff --git a/src/openpecha/pecha/__init__.py b/src/openpecha/pecha/__init__.py
index 0781347..bfd82b5 100644
--- a/src/openpecha/pecha/__init__.py
+++ b/src/openpecha/pecha/__init__.py
@@ -1,14 +1,21 @@
 from typing import Dict
 
+from stam import AnnotationStore, Offset, Selector
+
+from openpecha.ids import get_uuid
 from openpecha.pecha.annotation import Annotation
 
 
 class Pecha:
-    def __init__(self, pecha_id: str, segments: Dict[str, str]) -> None:
+    def __init__(
+        self, pecha_id: str, segments: Dict[str, str], metadata: Dict[str, str]
+    ) -> None:
         self.pecha_id = pecha_id
         self.segments = segments
+        self.metadata = metadata
         self.base_text = "".join(segments.values())
         self.annotations = self.set_annotations()
+        self.write_annotations()
 
     @classmethod
     def from_path(cls, path: str):
@@ -22,10 +29,37 @@ def set_annotations(self):
         char_count = 0
         for segment_id, segment in self.segments.items():
             annotation = Annotation(
-                annotation_id=segment_id,
+                id_=segment_id,
                 segment=segment,
                 start=char_count,
                 end=char_count + len(segment),
             )
             char_count += len(segment)
             yield annotation
+
+    def write_annotations(self):
+        """write annotations in stam data model"""
+        self.annotation_store = AnnotationStore(id="PechaAnnotationStore")
+        self.resource = self.annotation_store.add_resource(
+            id=self.pecha_id, filename="random file path"
+        )  # in case of having layers, resource_id will be pecha_id_layer_id
+        self.dataset = self.annotation_store.add_dataset(id="PechaDataSet")
+        self.dataset.add_key(self.metadata["annotation_category"])
+        for annotation in self.annotations:
+            target = Selector.textselector(
+                self.resource,
+                Offset.simple(annotation.start, annotation.end),
+            )
+            data = [
+                {
+                    "id": annotation.id_,
+                    "key": self.metadata["annotation_category"],
+                    "value": self.metadata["annotation_label"],
+                    "set": self.dataset.id(),
+                }
+            ]
+            self.annotation_store.add_annotation(
+                id=annotation.id_,
+                target=target,
+                data=data,
+            )
diff --git a/src/openpecha/pecha/annotation.py b/src/openpecha/pecha/annotation.py
index 138adf7..99c2132 100644
--- a/src/openpecha/pecha/annotation.py
+++ b/src/openpecha/pecha/annotation.py
@@ -2,7 +2,7 @@
 
 
 class Annotation(BaseModel):
-    annotation_id: str
+    id_: str
     segment: str
     start: int = Field(ge=0)
     end: int = Field(ge=0)

From 5ea7922695752b76cad2760512e84c30e7665410 Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Tue, 2 Jul 2024 11:16:30 +0530
Subject: [PATCH 08/34] Pecha function create pecha folder

---
 src/openpecha/config.py         | 12 ++++++++++++
 src/openpecha/pecha/__init__.py | 33 +++++++++++++++++++++++++++++----
 2 files changed, 41 insertions(+), 4 deletions(-)
 create mode 100644 src/openpecha/config.py

diff --git a/src/openpecha/config.py b/src/openpecha/config.py
new file mode 100644
index 0000000..a55dc16
--- /dev/null
+++ b/src/openpecha/config.py
@@ -0,0 +1,12 @@
+from pathlib import Path
+
+
+def _mkdir(path):
+    if path.is_dir():
+        return path
+    path.mkdir(exist_ok=True, parents=True)
+    return path
+
+
+BASE_PATH = _mkdir(Path.home() / ".pechadata")
+PECHAS_PATH = _mkdir(BASE_PATH / "pechas")
diff --git a/src/openpecha/pecha/__init__.py b/src/openpecha/pecha/__init__.py
index bfd82b5..48c60ae 100644
--- a/src/openpecha/pecha/__init__.py
+++ b/src/openpecha/pecha/__init__.py
@@ -2,7 +2,7 @@
 
 from stam import AnnotationStore, Offset, Selector
 
-from openpecha.ids import get_uuid
+from openpecha.config import PECHAS_PATH
 from openpecha.pecha.annotation import Annotation
 
 
@@ -37,12 +37,30 @@ def set_annotations(self):
             char_count += len(segment)
             yield annotation
 
+    def create_pecha_folder(self):
+        pecha_dir = PECHAS_PATH.joinpath(self.pecha_id)
+        opf_dir = pecha_dir.joinpath(f"{self.pecha_id}.opf")
+        base_dir = opf_dir.joinpath("base")
+        layers_dir = opf_dir.joinpath("layers")
+        layer_id_dir = layers_dir.joinpath(self.pecha_id)
+
+        pecha_dir.mkdir(exist_ok=True)
+        opf_dir.mkdir(exist_ok=True)
+        base_dir.mkdir(exist_ok=True)
+        base_dir.joinpath(f"{self.pecha_id}.txt").write_text(self.base_text)
+        layers_dir.mkdir(exist_ok=True)
+        layer_id_dir.mkdir(exist_ok=True)
+
+        self.annotation_fn = layer_id_dir
+        self.base_fn = base_dir.joinpath(f"{self.pecha_id}.txt")
+
     def write_annotations(self):
+        self.create_pecha_folder()
         """write annotations in stam data model"""
         self.annotation_store = AnnotationStore(id="PechaAnnotationStore")
         self.resource = self.annotation_store.add_resource(
-            id=self.pecha_id, filename="random file path"
-        )  # in case of having layers, resource_id will be pecha_id_layer_id
+            id=self.pecha_id, filename=self.base_fn.as_posix()
+        )
         self.dataset = self.annotation_store.add_dataset(id="PechaDataSet")
         self.dataset.add_key(self.metadata["annotation_category"])
         for annotation in self.annotations:
@@ -58,8 +76,15 @@ def write_annotations(self):
                     "set": self.dataset.id(),
                 }
             ]
-            self.annotation_store.add_annotation(
+            self.annotation_store.annotate(
                 id=annotation.id_,
                 target=target,
                 data=data,
             )
+        """ save annotations in stam data model"""
+        self.annotation_store.set_filename(
+            self.annotation_fn.joinpath(
+                f"{self.metadata['annotation_label']}.json"
+            ).as_posix()
+        )
+        self.annotation_store.save()

From cac48fcd24a921bff4c24dee5332517a79a53912 Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Tue, 2 Jul 2024 11:38:11 +0530
Subject: [PATCH 09/34] fix/set annotation data id as unique

---
 src/openpecha/pecha/__init__.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/openpecha/pecha/__init__.py b/src/openpecha/pecha/__init__.py
index 48c60ae..e3454b6 100644
--- a/src/openpecha/pecha/__init__.py
+++ b/src/openpecha/pecha/__init__.py
@@ -3,6 +3,7 @@
 from stam import AnnotationStore, Offset, Selector
 
 from openpecha.config import PECHAS_PATH
+from openpecha.ids import get_uuid
 from openpecha.pecha.annotation import Annotation
 
 
@@ -63,6 +64,8 @@ def write_annotations(self):
         )
         self.dataset = self.annotation_store.add_dataset(id="PechaDataSet")
         self.dataset.add_key(self.metadata["annotation_category"])
+
+        unique_annotation_data_id = get_uuid()
         for annotation in self.annotations:
             target = Selector.textselector(
                 self.resource,
@@ -70,7 +73,7 @@ def write_annotations(self):
             )
             data = [
                 {
-                    "id": annotation.id_,
+                    "id": unique_annotation_data_id,
                     "key": self.metadata["annotation_category"],
                     "value": self.metadata["annotation_label"],
                     "set": self.dataset.id(),

From 0cd444ad64283dfa46387b8065ba6f229796c168 Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Tue, 2 Jul 2024 11:43:22 +0530
Subject: [PATCH 10/34] write metadata as json

---
 src/openpecha/pecha/__init__.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/openpecha/pecha/__init__.py b/src/openpecha/pecha/__init__.py
index e3454b6..062eb1d 100644
--- a/src/openpecha/pecha/__init__.py
+++ b/src/openpecha/pecha/__init__.py
@@ -1,3 +1,4 @@
+import json
 from typing import Dict
 
 from stam import AnnotationStore, Offset, Selector
@@ -41,12 +42,17 @@ def set_annotations(self):
     def create_pecha_folder(self):
         pecha_dir = PECHAS_PATH.joinpath(self.pecha_id)
         opf_dir = pecha_dir.joinpath(f"{self.pecha_id}.opf")
+        metadata_dir = opf_dir.joinpath("metadata.json")
         base_dir = opf_dir.joinpath("base")
         layers_dir = opf_dir.joinpath("layers")
         layer_id_dir = layers_dir.joinpath(self.pecha_id)
 
         pecha_dir.mkdir(exist_ok=True)
         opf_dir.mkdir(exist_ok=True)
+        metadata_dir.write_text(
+            json.dumps(self.metadata, indent=4, ensure_ascii=False), encoding="utf-8"
+        )
+
         base_dir.mkdir(exist_ok=True)
         base_dir.joinpath(f"{self.pecha_id}.txt").write_text(self.base_text)
         layers_dir.mkdir(exist_ok=True)

From 4157fef62bf9b497397f6f425627457416eaa23a Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Tue, 2 Jul 2024 11:58:02 +0530
Subject: [PATCH 11/34] include base path to func write annotations

---
 src/openpecha/alignment/parsers/plaintext.py |  2 ++
 src/openpecha/pecha/__init__.py              | 16 +++++++++-------
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/src/openpecha/alignment/parsers/plaintext.py b/src/openpecha/alignment/parsers/plaintext.py
index 5243da2..ff214b6 100644
--- a/src/openpecha/alignment/parsers/plaintext.py
+++ b/src/openpecha/alignment/parsers/plaintext.py
@@ -34,6 +34,8 @@ def parse(self):
         target_pecha = Pecha(  # noqa
             target_pecha_id, target_segments, self.metadata["target"]
         )
+        source_pecha.write_annotations()
+        target_pecha.write_annotations()
 
         # TODO:
 
diff --git a/src/openpecha/pecha/__init__.py b/src/openpecha/pecha/__init__.py
index 062eb1d..5b22a85 100644
--- a/src/openpecha/pecha/__init__.py
+++ b/src/openpecha/pecha/__init__.py
@@ -1,4 +1,5 @@
 import json
+from pathlib import Path
 from typing import Dict
 
 from stam import AnnotationStore, Offset, Selector
@@ -15,9 +16,6 @@ def __init__(
         self.pecha_id = pecha_id
         self.segments = segments
         self.metadata = metadata
-        self.base_text = "".join(segments.values())
-        self.annotations = self.set_annotations()
-        self.write_annotations()
 
     @classmethod
     def from_path(cls, path: str):
@@ -28,6 +26,7 @@ def from_id(cls, pecha_id: str):
         pass
 
     def set_annotations(self):
+        """set annotations for the segments"""
         char_count = 0
         for segment_id, segment in self.segments.items():
             annotation = Annotation(
@@ -39,8 +38,8 @@ def set_annotations(self):
             char_count += len(segment)
             yield annotation
 
-    def create_pecha_folder(self):
-        pecha_dir = PECHAS_PATH.joinpath(self.pecha_id)
+    def create_pecha_folder(self, base_path: Path):
+        pecha_dir = base_path.joinpath(self.pecha_id)
         opf_dir = pecha_dir.joinpath(f"{self.pecha_id}.opf")
         metadata_dir = opf_dir.joinpath("metadata.json")
         base_dir = opf_dir.joinpath("base")
@@ -61,8 +60,11 @@ def create_pecha_folder(self):
         self.annotation_fn = layer_id_dir
         self.base_fn = base_dir.joinpath(f"{self.pecha_id}.txt")
 
-    def write_annotations(self):
-        self.create_pecha_folder()
+    def write_annotations(self, base_path: Path = PECHAS_PATH):
+        self.base_text = "".join(self.segments.values())
+        self.annotations = self.set_annotations()
+
+        self.create_pecha_folder(base_path)
         """write annotations in stam data model"""
         self.annotation_store = AnnotationStore(id="PechaAnnotationStore")
         self.resource = self.annotation_store.add_resource(

From 6420cd2cf89ecbdf88871047ed02f3352f75c54d Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Tue, 2 Jul 2024 12:09:17 +0530
Subject: [PATCH 12/34] include base path to PlainText parse

---
 src/openpecha/alignment/parsers/plaintext.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/openpecha/alignment/parsers/plaintext.py b/src/openpecha/alignment/parsers/plaintext.py
index ff214b6..639839e 100644
--- a/src/openpecha/alignment/parsers/plaintext.py
+++ b/src/openpecha/alignment/parsers/plaintext.py
@@ -1,5 +1,6 @@
 from pathlib import Path
 
+from openpecha.config import PECHAS_PATH
 from openpecha.ids import get_initial_pecha_id, get_uuid
 from openpecha.pecha import Pecha
 
@@ -16,7 +17,7 @@ def from_files(cls, source_path: Path, target_path: Path, metadata: dict):
         target_text = target_path.read_text(encoding="utf-8")
         return cls(source_text, target_text, metadata)
 
-    def parse(self):
+    def parse(self, base_path: Path = PECHAS_PATH):
         source_text_lines = self.source_text.split("\n")
         target_text_lines = self.target_text.split("\n")
 
@@ -34,8 +35,8 @@ def parse(self):
         target_pecha = Pecha(  # noqa
             target_pecha_id, target_segments, self.metadata["target"]
         )
-        source_pecha.write_annotations()
-        target_pecha.write_annotations()
+        source_pecha.write_annotations(base_path)
+        target_pecha.write_annotations(base_path)
 
         # TODO:
 

From b6b4c7a675679b316f26429544a1bfbaf79036d4 Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Tue, 2 Jul 2024 12:20:06 +0530
Subject: [PATCH 13/34] refactor code

---
 src/openpecha/alignment/parsers/plaintext.py | 22 ++++++++++++--------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/src/openpecha/alignment/parsers/plaintext.py b/src/openpecha/alignment/parsers/plaintext.py
index 639839e..a632e47 100644
--- a/src/openpecha/alignment/parsers/plaintext.py
+++ b/src/openpecha/alignment/parsers/plaintext.py
@@ -17,23 +17,27 @@ def from_files(cls, source_path: Path, target_path: Path, metadata: dict):
         target_text = target_path.read_text(encoding="utf-8")
         return cls(source_text, target_text, metadata)
 
-    def parse(self, base_path: Path = PECHAS_PATH):
+    def parse(self):
         source_text_lines = self.source_text.split("\n")
         target_text_lines = self.target_text.split("\n")
 
-        """ prepare the data for pecha creation"""
+        self.source_segments = {get_uuid(): segment for segment in source_text_lines}
+        self.target_segments = {get_uuid(): segment for segment in target_text_lines}
+
+    def save(self, base_path: Path = PECHAS_PATH):
+        if not self.source_segments or not self.target_segments:
+            self.parse()
+
+        """ save the source and target pecha"""
         source_pecha_id, target_pecha_id = (
             get_initial_pecha_id(),
             get_initial_pecha_id(),
         )
-        source_segments = {get_uuid(): segment for segment in source_text_lines}
-        target_segments = {get_uuid(): segment for segment in target_text_lines}
-
-        source_pecha = Pecha(  # noqa
-            source_pecha_id, source_segments, self.metadata["source"]
+        source_pecha = Pecha(
+            source_pecha_id, self.source_segments, self.metadata["source"]
         )
-        target_pecha = Pecha(  # noqa
-            target_pecha_id, target_segments, self.metadata["target"]
+        target_pecha = Pecha(
+            target_pecha_id, self.target_segments, self.metadata["target"]
         )
         source_pecha.write_annotations(base_path)
         target_pecha.write_annotations(base_path)

From 053717c0ef2628dfe082648b6e408cfe2455807d Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Tue, 2 Jul 2024 12:46:54 +0530
Subject: [PATCH 14/34] fix/check if attribute exits

---
 src/openpecha/alignment/parsers/plaintext.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/openpecha/alignment/parsers/plaintext.py b/src/openpecha/alignment/parsers/plaintext.py
index a632e47..2b62fac 100644
--- a/src/openpecha/alignment/parsers/plaintext.py
+++ b/src/openpecha/alignment/parsers/plaintext.py
@@ -25,7 +25,7 @@ def parse(self):
         self.target_segments = {get_uuid(): segment for segment in target_text_lines}
 
     def save(self, base_path: Path = PECHAS_PATH):
-        if not self.source_segments or not self.target_segments:
+        if not hasattr(self, "source_segments") or not hasattr(self, "target_segments"):
             self.parse()
 
         """ save the source and target pecha"""
@@ -39,8 +39,7 @@ def save(self, base_path: Path = PECHAS_PATH):
         target_pecha = Pecha(
             target_pecha_id, self.target_segments, self.metadata["target"]
         )
-        source_pecha.write_annotations(base_path)
-        target_pecha.write_annotations(base_path)
+        return source_pecha, target_pecha
 
         # TODO:
 

From c3251b21994efdaa6f41d48d15a355b2a44a423c Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Tue, 2 Jul 2024 12:53:44 +0530
Subject: [PATCH 15/34] test for plaintext

---
 .pre-commit-config.yaml                       |  1 -
 src/openpecha/config.py                       |  2 -
 tests/{__init__.py  => __init__.py}           |  0
 .../parsers/plaintext/data/comments.txt       |  5 ++
 .../parsers/plaintext/data/segments.txt       |  5 ++
 .../parsers/plaintext/test_plaintext.py       | 56 +++++++++++++++++++
 6 files changed, 66 insertions(+), 3 deletions(-)
 rename tests/{__init__.py  => __init__.py} (100%)
 create mode 100755 tests/alignment/parsers/plaintext/data/comments.txt
 create mode 100755 tests/alignment/parsers/plaintext/data/segments.txt
 create mode 100644 tests/alignment/parsers/plaintext/test_plaintext.py

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 55e4b03..a91a928 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -5,7 +5,6 @@ repos:
     rev: v4.3.0
     hooks:
       - id: trailing-whitespace
-      - id: end-of-file-fixer
       - id: check-yaml
 
   - repo: https://github.com/asottile/pyupgrade
diff --git a/src/openpecha/config.py b/src/openpecha/config.py
index a55dc16..3123568 100644
--- a/src/openpecha/config.py
+++ b/src/openpecha/config.py
@@ -2,8 +2,6 @@
 
 
 def _mkdir(path):
-    if path.is_dir():
-        return path
     path.mkdir(exist_ok=True, parents=True)
     return path
 
diff --git a/tests/__init__.py  b/tests/__init__.py
similarity index 100%
rename from tests/__init__.py 
rename to tests/__init__.py
diff --git a/tests/alignment/parsers/plaintext/data/comments.txt b/tests/alignment/parsers/plaintext/data/comments.txt
new file mode 100755
index 0000000..bac8759
--- /dev/null
+++ b/tests/alignment/parsers/plaintext/data/comments.txt
@@ -0,0 +1,5 @@
+﻿{D3874}༄༅༅། །རྒྱ་གར་སྐད་དུ། བོ་དྷི་སཏྭ་ཙཱརྱ་ཨ་བ་ཏཱ་ར་སང་ཀཱ་ར།
+བོད་སྐད་དུ། བྱང་ཆུབ་སེམས་དཔའི་སྤྱོད་པ་ལ་འཇུག་པའི་ལེགས་པར་སྦྱར་བ།
+བཅོམ་ལྡན་འདས་གསུང་གི་མངའ་བདག་འཇམ་དཔལ་གཞོན་ནུར་གྱུར་པ་ལ་ཕྱག་འཚལ་ལོ། །ངོ་བོ་ཉིད་ནི་བྱང་ཆུབ་སེམས་པའི། །རྒྱ་མཚོ་དེ་ལ་ཕྱག་འཚལ་ཏེ། །བདག་འདྲའི་སྤྱོད་པའི་ཡན་ལག་ལ། །འཇུག་ཕྱིར་ལེགས་སྦྱར་བཤད་ཙམ་བྱ། །དམ་པ་རྣམས་ཀྱིས་ནི་ཐོག་མ་དང་བར་དང་ཐ་མར་དགེ་བ་མངོན་པར་འཕེལ་བར་བྱ་བ་ཡིན་པས། བདེ་གཤེགས་ཞེས་བྱ་བ་ལ་སོགས་པ་སྨོས་པ་ཡིན་ཏེ། འདིར་ཕྱག་འཚལ་བ་ནི་དང་པོར་དགེ་བའོ། །ཆོས་བསྟན་པ་ནི་བར་དུ་དགེ་བའོ། །དགེ་བའི་རྩ་བ་ཡོངས་སུ་བསྔོ་བ་ནི་དགེ་བའི་རྩ་བ་མངོན་པར་འཕེལ་བ་ཡིན་པས་ཐ་མར་དགེ་བ་ཡིན་ནོ། །དེ་ལ་བདེ་བར་གཤེགས་པ་ནི་རྟོགས་པར་བྱ་བའི་ལྷག་མ་མི་མངའ་བས་ན་ཡོངས་སུ་རྫོགས་པར་ཐུགས་སུ་ཆུད་པའི་ཕྱིར་བདེ་བར་གཤེགས་པའོ། །ཆོས་ཀྱི་སྐུ་མངའ་བ་ནི་ལུང་དང་རྟོགས་པའི་བདག་ཉིད་ཅན་གྱི་དམ་པའི་ཆོས་ཀྱི་ཚོགས་ནི་ཆོས་ཀྱི་སྐུ་སྟེ་དེ་དང་བཅས་པའོ། །སྲས་བཅས་ནི་ཉིད་ལས་འཁྲུངས་པའི་སྲས་ཏེ། བྱང་ཆུབ་སེམས་དཔའ་དང་བཅས་པའོ། །ལ་ལ་ལས་ནི་བདེ་གཤེགས་དམ་པའི་ཆོས་དང་དགེ་འདུན་བཅས་ཞེས་ཟེར་རོ། །བཙུན་པ་ནི་ཉན་ཐོས་ཆེན་པོ་བརྒྱད་ལ་སོགས་པ་ལ་བྱ་སྟེ། དེ་དག་མ་ལུས་པ་ཀུན་ལ་ཕྱག་འཚལ་བའོ། །དཀོན་མཆོག་གསུམ་པོ་གཙོ་བོར་གྱུར་པས་སོ་སོར་སྨོས་པ་ཡིན་ལ། དེ་དག་ཀྱང་ཕྱག་བྱ་བར་འོས་པ་ཡིན་པས་གུས་པས་ཕྱག་འཚལ་ཏེ། ཞེས་བྱ་བ་སྨོས་ཏེ། འདིར་ཡོན་ཏན་དམ་པའི་བསྟོད་པ་རྒྱ་ཆེ་བ་དང་། མཆོད་པ་ཁྱད་པར་དུ་འཕགས་པའི་དམིགས་པ་ཡིད་ལ་བྱེད་པ་ལས་བྱུང་བའི་མོས་པའི་བསམ་པ་ཤིན་ཏུ་ཕུལ་དུ་བྱུང་བའི་དགའ་བ་རྒྱ་ཆེ་བའི་མཆོད་པ་དང་བཅས་པས་ལུས་ཞིང་ཐམས་ཅད་ཀྱི་རྡུལ་སྙེད་ཀྱིས་བཏུད་ཅིང་ཕྱག་འཚལ་ལོ། །དེ་ལྟར་ཕྱག་བཙལ་ནས་ཅི་ཞིག་བྱེད་ཅེ་ན། བདེ་གཤེགས་སྲས་ཀྱི་ཞེས་བྱ་བ་ལ་སོགས་པ་སྨོས་ཏེ། བདེ་བར་གཤེགས་པའི་བདག་ཉིད་ནི་ཆོས་ཀྱི་སྐུ་སྟེ། དེའི་དབང་དུ་བྱས་པ་ལས་སྐྱེས་པ་ས་ཆེན་པོ་ཐོབ་པ་དང་། རྒྱུ་ལ་གནས་པ་རྣམས་སོ། །དེ་རྣམས་ཀྱི་སྡོམ་པ་ནི་མི་དགེ་བ་སྤོང་བ་དང་། དགེ་བ་ལ་འཇུག་པ་དང་། སེམས་ཅན་གྱི་དོན་བྱ་བའོ། །དེ་ཡང་བཅོམ་ལྡན་འདས་ཀྱིས་ཤིན་ཏུ་ཟབ་ཅིང་རྒྱ་ཆེ་བའི་བདག་ཉིད་ཅན་དུ་གསུངས་ལ། དེར་བྱང་ཆུབ་ཏུ་སེམས་བསྐྱེད་པའི་ཕན་ཡོན་ལ་སོགས་པའི་དོན་རྣམ་པ་བཅུ་པོ་གང་ཡིན་པ་དེས་འཇུག་པའི་བདེ་བར་གཤེགས་པའི་སྲས་ཀྱི་སྡོམ་པ་ལ་འཇུག་པ་བསྟན་པར་བྱའོ། །དེ་ཡང་ལུང་བཞིན་ཞེས་བྱ་བ་སྟེ། ལུང་གི་དོན་དང་མི་འགལ་བར་རོ། །ལུང་ལས་ནི་བཅོམ་ལྡན་འདས་ཀྱིས་རྒྱ་ཆེར་གསུངས་སོ་ཞེ་ན། མདོར་བསྡུས་ནས་ནི་ཞེས་བྱ་བ་སྨོས་སོ། །དེ་ལྟ་ཡིན་དུ་ཆུག་ན། ཅི་འདིར་སྔོན་ཆད་མ་བྱུང་བ་གཞན་འགའ་ཞིག་སྨས་སམ།
+
+ལུང་ཇི་ལྟ་བ་ཡིན་ཞེ་ན། སྔོན་ཆད་ཅེས་བྱ་བ་ལ་སོགས་པ་སྨོས་སོ། །སྡེབ་སྦྱོར་མཁས་པས་སྔོན་མ་ཡིན་ནམ་ཞེ་ན། སྡེབ་སྦྱོར་ཞེས་བྱ་བ་ལ་སོགས་པ་སྨོས་སོ། །གང་གི་ཕྱིར་འདིར་སྡེབ་སྦྱོར་ལ་མཁས་པ་མེད་པ་ཉིད་ཀྱི་ཕྱིར་གཞན་གྱི་དོན་དུ་བདག་གིས་འདི་གཞུང་དུ་ཉེ་བར་སྦྱར་བ་མ་བྱས་སོ་ཞེས་བྱ་བར་དགོངས་སོ། །དེ་ལྟར་གལ་ཏེ་གཞན་གྱི་དོན་དུ་མ་བྱས་ན་ཅིའི་ཕྱིར་བྱེད་ཅེ་ན། དེའི་ཕྱིར་རང་གི་ཞེས་བྱ་བ་ལ་སོགས་པ་སྨོས་ཏེ། ཡིད་ལ་འདིར་བྱང་ཆུབ་ཀྱི་སེམས་ཏེ། དེ་བསྒོམ་པའི་ཕྱིར་ཞེས་བྱ་བ་ནི་བསླབ་པའི་ཕྱིར་རོ། །བཅོམ་ལྡན་འདས་ཀྱིས་ལུང་ལས་ཚིག་གི་དོན་རྒྱ་ཆེར་གསུངས་པ་དེ་ལས་མདོར་བསྡུས་ཏེ་རང་གི་ཡིད་ལ་བསྒོམ་པར་བྱ་བའི་ཕྱིར་བདག་གིས་འདི་བྱས་སོ་ཞེས་པའོ། །
\ No newline at end of file
diff --git a/tests/alignment/parsers/plaintext/data/segments.txt b/tests/alignment/parsers/plaintext/data/segments.txt
new file mode 100755
index 0000000..1c756a1
--- /dev/null
+++ b/tests/alignment/parsers/plaintext/data/segments.txt
@@ -0,0 +1,5 @@
+﻿རྒྱ་གར་སྐད་དུ། བོ་དྷི་སཏྭ་ཙརྱ་ཨ་བ་ཏཱ་ར།
+བོད་སྐད་དུ། བྱང་ཆུབ་སེམས་དཔའི་སྤྱོད་པ་ལ་འཇུག་པ།
+སངས་རྒྱས་དང་བྱང་ཆུབ་སེམས་དཔའ་ཐམས་ཅད་ལ་ཕྱག་འཚལ་ལོ། །
+བདེ་གཤེགས་ཆོས་ཀྱི་སྐུ་མངའ་སྲས་བཅས་དང་། །ཕྱག་འོས་ཀུན་ལའང་གུས་པར་ཕྱག་འཚལ་ཏེ། །བདེ་གཤེགས་སྲས་ཀྱི་སྡོམ་ལ་འཇུག་པ་ནི། །ལུང་བཞིན་མདོར་བསྡུས་ནས་ནི་བརྗོད་པར་བྱ། །
+སྔོན་ཆད་མ་བྱུང་བ་ཡང་འདིར་བརྗོད་མེད། །སྡེབ་སྦྱོར་མཁས་པའང་བདག་ལ་ཡོད་མིན་ཏེ། །དེ་ཕྱིར་གཞན་དོན་བསམ་པ་བདག་ལ་མེད། །རང་གི་ཡིད་ལ་བསྒོམ་ཕྱིར་ངས་འདི་བརྩམས། །
\ No newline at end of file
diff --git a/tests/alignment/parsers/plaintext/test_plaintext.py b/tests/alignment/parsers/plaintext/test_plaintext.py
new file mode 100644
index 0000000..ca44bce
--- /dev/null
+++ b/tests/alignment/parsers/plaintext/test_plaintext.py
@@ -0,0 +1,56 @@
+from pathlib import Path
+
+from openpecha.alignment.parsers.plaintext import PlainText
+from openpecha.pecha import Pecha
+
+
+def test_plaintext_parse():
+    DATA_DIR = Path(__file__).parent / "data"
+    source_path = DATA_DIR / "segments.txt"
+    target_path = DATA_DIR / "comments.txt"
+
+    metadata = {
+        "source": {
+            "annotation_category": "Structure Type",
+            "annotation_label": "Segment",
+        },
+        "target": {
+            "annotation_category": "Structure Type",
+            "annotation_label": "Comment",
+        },
+    }
+    plaintext = PlainText.from_files(source_path, target_path, metadata)
+    plaintext.parse()
+
+    assert (
+        len(plaintext.source_segments) == 5
+    ), "plaintext parser is not parsing source_segments correctly"
+    assert (
+        len(plaintext.target_segments) == 5
+    ), "plaintext parser is not parsing target_segments correctly"
+
+
+def test_plaintext_save():
+    DATA_DIR = Path(__file__).parent / "data"
+    source_path = DATA_DIR / "segments.txt"
+    target_path = DATA_DIR / "comments.txt"
+
+    metadata = {
+        "source": {
+            "annotation_category": "Structure Type",
+            "annotation_label": "Segment",
+        },
+        "target": {
+            "annotation_category": "Structure Type",
+            "annotation_label": "Comment",
+        },
+    }
+    plaintext = PlainText.from_files(source_path, target_path, metadata)
+    source_pecha, target_pecha = plaintext.save()
+
+    assert isinstance(
+        source_pecha, Pecha
+    ), "plaintext parser is not saving source_pecha as an instance of Pecha"
+    assert isinstance(
+        target_pecha, Pecha
+    ), "plaintext parser is not saving target_pecha as an instance of Pecha"

From 6f773afd9945913959cb6e7365c7ea8f53a1df35 Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Tue, 2 Jul 2024 12:57:52 +0530
Subject: [PATCH 16/34] refactor test plaintext

---
 .../parsers/plaintext/test_plaintext.py       | 35 +++++++++----------
 1 file changed, 17 insertions(+), 18 deletions(-)

diff --git a/tests/alignment/parsers/plaintext/test_plaintext.py b/tests/alignment/parsers/plaintext/test_plaintext.py
index ca44bce..488cd31 100644
--- a/tests/alignment/parsers/plaintext/test_plaintext.py
+++ b/tests/alignment/parsers/plaintext/test_plaintext.py
@@ -4,12 +4,12 @@
 from openpecha.pecha import Pecha
 
 
-def test_plaintext_parse():
-    DATA_DIR = Path(__file__).parent / "data"
-    source_path = DATA_DIR / "segments.txt"
-    target_path = DATA_DIR / "comments.txt"
+def get_data_dir():
+    return Path(__file__).parent / "data"
 
-    metadata = {
+
+def get_metadata():
+    return {
         "source": {
             "annotation_category": "Structure Type",
             "annotation_label": "Segment",
@@ -19,6 +19,14 @@ def test_plaintext_parse():
             "annotation_label": "Comment",
         },
     }
+
+
+def test_plaintext_parse():
+    DATA_DIR = get_data_dir()
+    source_path = DATA_DIR / "segments.txt"
+    target_path = DATA_DIR / "comments.txt"
+
+    metadata = get_metadata()
     plaintext = PlainText.from_files(source_path, target_path, metadata)
     plaintext.parse()
 
@@ -31,26 +39,17 @@ def test_plaintext_parse():
 
 
 def test_plaintext_save():
-    DATA_DIR = Path(__file__).parent / "data"
+    DATA_DIR = get_data_dir()
     source_path = DATA_DIR / "segments.txt"
     target_path = DATA_DIR / "comments.txt"
 
-    metadata = {
-        "source": {
-            "annotation_category": "Structure Type",
-            "annotation_label": "Segment",
-        },
-        "target": {
-            "annotation_category": "Structure Type",
-            "annotation_label": "Comment",
-        },
-    }
+    metadata = get_metadata()
     plaintext = PlainText.from_files(source_path, target_path, metadata)
     source_pecha, target_pecha = plaintext.save()
 
     assert isinstance(
         source_pecha, Pecha
-    ), "plaintext parser is not saving source_pecha as an instance of Pecha"
+    ), f"source_pecha is not an instance of Pecha, but {type(source_pecha)}"
     assert isinstance(
         target_pecha, Pecha
-    ), "plaintext parser is not saving target_pecha as an instance of Pecha"
+    ), f"target_pecha is not an instance of Pecha, but {type(target_pecha)}"

From 216b4fb580e82b5050dc149bd0ffdb57f52cf91a Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Tue, 2 Jul 2024 13:21:45 +0530
Subject: [PATCH 17/34] fix/check if annotations exits before setting

---
 src/openpecha/pecha/__init__.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/openpecha/pecha/__init__.py b/src/openpecha/pecha/__init__.py
index 5b22a85..71f2d3d 100644
--- a/src/openpecha/pecha/__init__.py
+++ b/src/openpecha/pecha/__init__.py
@@ -61,8 +61,10 @@ def create_pecha_folder(self, base_path: Path):
         self.base_fn = base_dir.joinpath(f"{self.pecha_id}.txt")
 
     def write_annotations(self, base_path: Path = PECHAS_PATH):
+        if not hasattr(self, "annotations"):
+            self.annotations = self.set_annotations()
+
         self.base_text = "".join(self.segments.values())
-        self.annotations = self.set_annotations()
 
         self.create_pecha_folder(base_path)
         """write annotations in stam data model"""

From 761ac7884b5308ac4704816e2f327ce17a82dd22 Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Tue, 2 Jul 2024 13:22:52 +0530
Subject: [PATCH 18/34] test for Pecha

---
 tests/pecha/test_pecha.py | 51 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)
 create mode 100644 tests/pecha/test_pecha.py

diff --git a/tests/pecha/test_pecha.py b/tests/pecha/test_pecha.py
new file mode 100644
index 0000000..81137f6
--- /dev/null
+++ b/tests/pecha/test_pecha.py
@@ -0,0 +1,51 @@
+from openpecha.pecha import Pecha
+from openpecha.pecha.annotation import Annotation
+
+
+def get_segments():
+    return {
+        "f2b056668a0c4ad3a085bdcd8e2d7adb": "རྒྱ་གར་སྐད་དུ། བོ་དྷི་སཏྭ་ཙརྱ་ཨ་བ་ཏཱ་ར།",
+        "b696df2dbe314e8a87881a2bc391d0d5": "བོད་སྐད་དུ། བྱང་ཆུབ་སེམས་དཔའི་སྤྱོད་པ་ལ་འཇུག་པའི་ལེགས་པར་སྦྱར་བ།",
+    }
+
+
+def get_metadata():
+    return {
+        "annotation_category": "Structure Type",
+        "annotation_label": "Segment",
+    }
+
+
+def get_expected_annotations():
+    expected_annotations = [
+        Annotation(
+            id_="f2b056668a0c4ad3a085bdcd8e2d7adb",
+            segment="རྒྱ་གར་སྐད་དུ། བོ་དྷི་སཏྭ་ཙརྱ་ཨ་བ་ཏཱ་ར།",
+            start=0,
+            end=39,
+            metadata={},
+        ),
+        Annotation(
+            id_="b696df2dbe314e8a87881a2bc391d0d5",
+            segment="བོད་སྐད་དུ། བྱང་ཆུབ་སེམས་དཔའི་སྤྱོད་པ་ལ་འཇུག་པའི་ལེགས་པར་སྦྱར་བ།",
+            start=39,
+            end=103,
+            metadata={},
+        ),
+    ]
+    return expected_annotations
+
+
+def test_pecha_set_annotations():
+    pecha_id = "IE7D6875F"
+    segments = get_segments()
+    metadata = get_metadata()
+    pecha = Pecha(pecha_id=pecha_id, segments=segments, metadata=metadata)
+    assert isinstance(
+        pecha, Pecha
+    ), "Not able to create Pecha object with id, segments and metadata"
+
+    annotations = list(pecha.set_annotations())
+    assert (
+        annotations == get_expected_annotations()
+    ), "Pecha not able to set annotations for the segments"

From 047b3ac9e1bf9f2ac0c44fbde8b5cffbb48c6eaf Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Tue, 2 Jul 2024 13:26:43 +0530
Subject: [PATCH 19/34] test for ids

---
 tests/test_ids.py | 78 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 78 insertions(+)
 create mode 100644 tests/test_ids.py

diff --git a/tests/test_ids.py b/tests/test_ids.py
new file mode 100644
index 0000000..39f6913
--- /dev/null
+++ b/tests/test_ids.py
@@ -0,0 +1,78 @@
+import re
+
+from openpecha.ids import (
+    get_alignment_id,
+    get_base_id,
+    get_collection_id,
+    get_diplomatic_id,
+    get_id,
+    get_initial_pecha_id,
+    get_open_pecha_id,
+    get_uuid,
+    get_work_id,
+)
+
+
+def test_get_uuid():
+    uuid = get_uuid()
+    assert re.match(
+        r"^[0-9a-fA-F]{32}$", uuid
+    ), f"UUID {uuid} is not in the correct format"
+
+
+def test_get_id():
+    prefix = "T"
+    length = 4
+    generated_id = get_id(prefix, length)
+    assert re.match(
+        r"^T[0-9A-F]{4}$", generated_id
+    ), f"ID {generated_id} is not in the correct format"
+
+
+def test_get_base_id():
+    base_id = get_base_id()
+    assert re.match(
+        r"^[0-9A-F]{4}$", base_id
+    ), f"Base ID {base_id} is not in the correct format"
+
+
+def test_get_initial_pecha_id():
+    initial_pecha_id = get_initial_pecha_id()
+    assert re.match(
+        r"^I[0-9A-F]{8}$", initial_pecha_id
+    ), f"Initial Pecha ID {initial_pecha_id} is not in the correct format"
+
+
+def test_get_open_pecha_id():
+    open_pecha_id = get_open_pecha_id()
+    assert re.match(
+        r"^O[0-9A-F]{8}$", open_pecha_id
+    ), f"Open Pecha ID {open_pecha_id} is not in the correct format"
+
+
+def test_get_diplomatic_id():
+    diplomatic_id = get_diplomatic_id()
+    assert re.match(
+        r"^D[0-9A-F]{8}$", diplomatic_id
+    ), f"Diplomatic ID {diplomatic_id} is not in the correct format"
+
+
+def test_get_work_id():
+    work_id = get_work_id()
+    assert re.match(
+        r"^W[0-9A-F]{8}$", work_id
+    ), f"Work ID {work_id} is not in the correct format"
+
+
+def test_get_alignment_id():
+    alignment_id = get_alignment_id()
+    assert re.match(
+        r"^A[0-9A-F]{8}$", alignment_id
+    ), f"Alignment ID {alignment_id} is not in the correct format"
+
+
+def test_get_collection_id():
+    collection_id = get_collection_id()
+    assert re.match(
+        r"^C[0-9A-F]{8}$", collection_id
+    ), f"Collection ID {collection_id} is not in the correct format"

From 4bd398246b01a90a9fa54d032e9bc8dbbb454eca Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Tue, 2 Jul 2024 16:28:12 +0530
Subject: [PATCH 20/34] modify/convert relative path in json string

---
 src/openpecha/pecha/__init__.py | 27 +++++++++++++++++++--------
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/src/openpecha/pecha/__init__.py b/src/openpecha/pecha/__init__.py
index 71f2d3d..64cd4ff 100644
--- a/src/openpecha/pecha/__init__.py
+++ b/src/openpecha/pecha/__init__.py
@@ -41,14 +41,14 @@ def set_annotations(self):
     def create_pecha_folder(self, base_path: Path):
         pecha_dir = base_path.joinpath(self.pecha_id)
         opf_dir = pecha_dir.joinpath(f"{self.pecha_id}.opf")
-        metadata_dir = opf_dir.joinpath("metadata.json")
+        metadata_fn = opf_dir.joinpath("metadata.json")
         base_dir = opf_dir.joinpath("base")
         layers_dir = opf_dir.joinpath("layers")
         layer_id_dir = layers_dir.joinpath(self.pecha_id)
 
         pecha_dir.mkdir(exist_ok=True)
         opf_dir.mkdir(exist_ok=True)
-        metadata_dir.write_text(
+        metadata_fn.write_text(
             json.dumps(self.metadata, indent=4, ensure_ascii=False), encoding="utf-8"
         )
 
@@ -59,6 +59,16 @@ def create_pecha_folder(self, base_path: Path):
 
         self.annotation_fn = layer_id_dir
         self.base_fn = base_dir.joinpath(f"{self.pecha_id}.txt")
+        self.opf_fn = base_path
+        self.metadata_fn = metadata_fn
+
+    def covert_to_relative_path(self, json_string: str):
+        """convert the absolute path to relative path for base file path in json string"""
+        json_object = json.loads(json_string)
+        for resource in json_object["resources"]:
+            original_path = Path(resource["@include"])
+            resource["@include"] = str(original_path.relative_to(self.opf_fn))
+        return json_object
 
     def write_annotations(self, base_path: Path = PECHAS_PATH):
         if not hasattr(self, "annotations"):
@@ -95,9 +105,10 @@ def write_annotations(self, base_path: Path = PECHAS_PATH):
                 data=data,
             )
         """ save annotations in stam data model"""
-        self.annotation_store.set_filename(
-            self.annotation_fn.joinpath(
-                f"{self.metadata['annotation_label']}.json"
-            ).as_posix()
-        )
-        self.annotation_store.save()
+        json_string = self.annotation_store.to_json_string()
+        json_object = self.covert_to_relative_path(json_string)
+        with open(
+            self.annotation_fn.joinpath(f"{self.metadata['annotation_label']}.json"),
+            "w",
+        ) as f:
+            f.write(json.dumps(json_object, indent=4, ensure_ascii=False))

From b46fdd033ab27dee86247e14a345220d75910e8e Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Tue, 2 Jul 2024 16:28:57 +0530
Subject: [PATCH 21/34] test case for Pecha write annotations

---
 tests/pecha/test_pecha.py | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/tests/pecha/test_pecha.py b/tests/pecha/test_pecha.py
index 81137f6..4f53a43 100644
--- a/tests/pecha/test_pecha.py
+++ b/tests/pecha/test_pecha.py
@@ -1,7 +1,16 @@
+from pathlib import Path
+from shutil import rmtree
+
 from openpecha.pecha import Pecha
 from openpecha.pecha.annotation import Annotation
 
 
+def get_data_dir():
+    base_path = Path(__file__).parent / "data"
+    base_path.mkdir(parents=True, exist_ok=True)
+    return base_path
+
+
 def get_segments():
     return {
         "f2b056668a0c4ad3a085bdcd8e2d7adb": "རྒྱ་གར་སྐད་དུ། བོ་དྷི་སཏྭ་ཙརྱ་ཨ་བ་ཏཱ་ར།",
@@ -49,3 +58,20 @@ def test_pecha_set_annotations():
     assert (
         annotations == get_expected_annotations()
     ), "Pecha not able to set annotations for the segments"
+
+
+def test_pecha_write_annotations():
+    pecha_id = "IE7D6875F"
+    segments = get_segments()
+    metadata = get_metadata()
+    pecha = Pecha(pecha_id=pecha_id, segments=segments, metadata=metadata)
+    base_path = get_data_dir()
+    pecha.write_annotations(base_path=base_path)
+    assert pecha.base_fn.exists(), "Pecha not able to write base file"
+    assert pecha.metadata_fn.exists(), "Pecha not able to write metadata file"
+    assert pecha.annotation_fn.rglob(
+        "*.json"
+    ), "Pecha not able to write annotation file"
+
+    """ clean up """
+    rmtree(Path(base_path / pecha_id))

From 8872295647b3df1267656705aca6da971cc0b538 Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Tue, 2 Jul 2024 17:02:46 +0530
Subject: [PATCH 22/34] refactor/Pecha create pecha folder

---
 .gitignore                      |  1 +
 src/openpecha/config.py         |  2 ++
 src/openpecha/pecha/__init__.py | 41 +++++++++++++++------------------
 tests/pecha/test_pecha.py       | 15 +++++++-----
 4 files changed, 30 insertions(+), 29 deletions(-)

diff --git a/.gitignore b/.gitignore
index b6e4761..cdf689f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
+pechas/
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
diff --git a/src/openpecha/config.py b/src/openpecha/config.py
index 3123568..a55dc16 100644
--- a/src/openpecha/config.py
+++ b/src/openpecha/config.py
@@ -2,6 +2,8 @@
 
 
 def _mkdir(path):
+    if path.is_dir():
+        return path
     path.mkdir(exist_ok=True, parents=True)
     return path
 
diff --git a/src/openpecha/pecha/__init__.py b/src/openpecha/pecha/__init__.py
index 64cd4ff..bcccd65 100644
--- a/src/openpecha/pecha/__init__.py
+++ b/src/openpecha/pecha/__init__.py
@@ -4,7 +4,7 @@
 
 from stam import AnnotationStore, Offset, Selector
 
-from openpecha.config import PECHAS_PATH
+from openpecha.config import PECHAS_PATH, _mkdir
 from openpecha.ids import get_uuid
 from openpecha.pecha.annotation import Annotation
 
@@ -38,45 +38,40 @@ def set_annotations(self):
             char_count += len(segment)
             yield annotation
 
-    def create_pecha_folder(self, base_path: Path):
-        pecha_dir = base_path.joinpath(self.pecha_id)
-        opf_dir = pecha_dir.joinpath(f"{self.pecha_id}.opf")
-        metadata_fn = opf_dir.joinpath("metadata.json")
-        base_dir = opf_dir.joinpath("base")
-        layers_dir = opf_dir.joinpath("layers")
-        layer_id_dir = layers_dir.joinpath(self.pecha_id)
-
-        pecha_dir.mkdir(exist_ok=True)
-        opf_dir.mkdir(exist_ok=True)
-        metadata_fn.write_text(
+    def create_pecha_folder(self, export_path: Path):
+        self.export_path = export_path
+
+        pecha_dir = _mkdir(export_path.joinpath(self.pecha_id))
+        opf_dir = _mkdir(pecha_dir.joinpath(f"{self.pecha_id}.opf"))
+        self.metadata_fn = opf_dir.joinpath("metadata.json")
+        base_dir = _mkdir(opf_dir.joinpath("base"))
+        layers_dir = _mkdir(opf_dir.joinpath("layers"))
+        layer_id_dir = _mkdir(layers_dir.joinpath(self.pecha_id))
+
+        """ write metadata and base file"""
+        self.metadata_fn.write_text(
             json.dumps(self.metadata, indent=4, ensure_ascii=False), encoding="utf-8"
         )
-
-        base_dir.mkdir(exist_ok=True)
-        base_dir.joinpath(f"{self.pecha_id}.txt").write_text(self.base_text)
-        layers_dir.mkdir(exist_ok=True)
-        layer_id_dir.mkdir(exist_ok=True)
+        self.base_fn = Path(base_dir / f"{self.pecha_id}.txt")
+        self.base_fn.write_text(self.base_text)
 
         self.annotation_fn = layer_id_dir
-        self.base_fn = base_dir.joinpath(f"{self.pecha_id}.txt")
-        self.opf_fn = base_path
-        self.metadata_fn = metadata_fn
 
     def covert_to_relative_path(self, json_string: str):
         """convert the absolute path to relative path for base file path in json string"""
         json_object = json.loads(json_string)
         for resource in json_object["resources"]:
             original_path = Path(resource["@include"])
-            resource["@include"] = str(original_path.relative_to(self.opf_fn))
+            resource["@include"] = str(original_path.relative_to(self.export_path))
         return json_object
 
-    def write_annotations(self, base_path: Path = PECHAS_PATH):
+    def write_annotations(self, export_path: Path = PECHAS_PATH):
         if not hasattr(self, "annotations"):
             self.annotations = self.set_annotations()
 
         self.base_text = "".join(self.segments.values())
 
-        self.create_pecha_folder(base_path)
+        self.create_pecha_folder(export_path)
         """write annotations in stam data model"""
         self.annotation_store = AnnotationStore(id="PechaAnnotationStore")
         self.resource = self.annotation_store.add_resource(
diff --git a/tests/pecha/test_pecha.py b/tests/pecha/test_pecha.py
index 4f53a43..fd09a39 100644
--- a/tests/pecha/test_pecha.py
+++ b/tests/pecha/test_pecha.py
@@ -6,9 +6,9 @@
 
 
 def get_data_dir():
-    base_path = Path(__file__).parent / "data"
-    base_path.mkdir(parents=True, exist_ok=True)
-    return base_path
+    export_path = Path(__file__).parent / "data"
+    export_path.mkdir(parents=True, exist_ok=True)
+    return export_path
 
 
 def get_segments():
@@ -65,8 +65,8 @@ def test_pecha_write_annotations():
     segments = get_segments()
     metadata = get_metadata()
     pecha = Pecha(pecha_id=pecha_id, segments=segments, metadata=metadata)
-    base_path = get_data_dir()
-    pecha.write_annotations(base_path=base_path)
+    export_path = get_data_dir()
+    pecha.write_annotations(export_path=export_path)
     assert pecha.base_fn.exists(), "Pecha not able to write base file"
     assert pecha.metadata_fn.exists(), "Pecha not able to write metadata file"
     assert pecha.annotation_fn.rglob(
@@ -74,4 +74,7 @@ def test_pecha_write_annotations():
     ), "Pecha not able to write annotation file"
 
     """ clean up """
-    rmtree(Path(base_path / pecha_id))
+    rmtree(Path(export_path / pecha_id))
+
+
+test_pecha_write_annotations()

From 1c31178ef5635e038609a42dba9565aef27527a2 Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Tue, 2 Jul 2024 17:03:55 +0530
Subject: [PATCH 23/34] refactor

---
 src/openpecha/pecha/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/openpecha/pecha/__init__.py b/src/openpecha/pecha/__init__.py
index bcccd65..4e25c8d 100644
--- a/src/openpecha/pecha/__init__.py
+++ b/src/openpecha/pecha/__init__.py
@@ -43,12 +43,12 @@ def create_pecha_folder(self, export_path: Path):
 
         pecha_dir = _mkdir(export_path.joinpath(self.pecha_id))
         opf_dir = _mkdir(pecha_dir.joinpath(f"{self.pecha_id}.opf"))
-        self.metadata_fn = opf_dir.joinpath("metadata.json")
         base_dir = _mkdir(opf_dir.joinpath("base"))
         layers_dir = _mkdir(opf_dir.joinpath("layers"))
         layer_id_dir = _mkdir(layers_dir.joinpath(self.pecha_id))
 
         """ write metadata and base file"""
+        self.metadata_fn = opf_dir.joinpath("metadata.json")
         self.metadata_fn.write_text(
             json.dumps(self.metadata, indent=4, ensure_ascii=False), encoding="utf-8"
         )

From edf86cae29e186b6e3187ec1133960a54935d48b Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Tue, 2 Jul 2024 17:08:19 +0530
Subject: [PATCH 24/34] delete/ unneccessary line

---
 tests/pecha/test_pecha.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tests/pecha/test_pecha.py b/tests/pecha/test_pecha.py
index fd09a39..47f8e18 100644
--- a/tests/pecha/test_pecha.py
+++ b/tests/pecha/test_pecha.py
@@ -75,6 +75,3 @@ def test_pecha_write_annotations():
 
     """ clean up """
     rmtree(Path(export_path / pecha_id))
-
-
-test_pecha_write_annotations()

From ba87b8f3fad5a871f9c002f43ce1fa679c778a0c Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Wed, 3 Jul 2024 10:23:00 +0530
Subject: [PATCH 25/34] set ANNOTATION_STORE_ID and ANNOTATION_DATASET_ID in
 config

---
 src/openpecha/config.py         |  3 +++
 src/openpecha/pecha/__init__.py | 11 ++++++++---
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/openpecha/config.py b/src/openpecha/config.py
index a55dc16..d436983 100644
--- a/src/openpecha/config.py
+++ b/src/openpecha/config.py
@@ -10,3 +10,6 @@ def _mkdir(path):
 
 BASE_PATH = _mkdir(Path.home() / ".pechadata")
 PECHAS_PATH = _mkdir(BASE_PATH / "pechas")
+
+PECHA_ANNOTATION_STORE_ID = "PechaAnnotationStore"
+PECHA_DATASET_ID = "PechaDataSet"
diff --git a/src/openpecha/pecha/__init__.py b/src/openpecha/pecha/__init__.py
index 4e25c8d..436059f 100644
--- a/src/openpecha/pecha/__init__.py
+++ b/src/openpecha/pecha/__init__.py
@@ -4,7 +4,12 @@
 
 from stam import AnnotationStore, Offset, Selector
 
-from openpecha.config import PECHAS_PATH, _mkdir
+from openpecha.config import (
+    PECHA_ANNOTATION_STORE_ID,
+    PECHA_DATASET_ID,
+    PECHAS_PATH,
+    _mkdir,
+)
 from openpecha.ids import get_uuid
 from openpecha.pecha.annotation import Annotation
 
@@ -73,11 +78,11 @@ def write_annotations(self, export_path: Path = PECHAS_PATH):
 
         self.create_pecha_folder(export_path)
         """write annotations in stam data model"""
-        self.annotation_store = AnnotationStore(id="PechaAnnotationStore")
+        self.annotation_store = AnnotationStore(id=PECHA_ANNOTATION_STORE_ID)
         self.resource = self.annotation_store.add_resource(
             id=self.pecha_id, filename=self.base_fn.as_posix()
         )
-        self.dataset = self.annotation_store.add_dataset(id="PechaDataSet")
+        self.dataset = self.annotation_store.add_dataset(id=PECHA_DATASET_ID)
         self.dataset.add_key(self.metadata["annotation_category"])
 
         unique_annotation_data_id = get_uuid()

From a886053f77b66d9e171727a725fbb541d0fe2a55 Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Wed, 3 Jul 2024 11:06:46 +0530
Subject: [PATCH 26/34] set base file name as uuid

---
 src/openpecha/config.py         |  5 +++--
 src/openpecha/pecha/__init__.py | 15 +++++++++------
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/src/openpecha/config.py b/src/openpecha/config.py
index d436983..e0fa952 100644
--- a/src/openpecha/config.py
+++ b/src/openpecha/config.py
@@ -1,9 +1,10 @@
 from pathlib import Path
+from shutil import rmtree
 
 
 def _mkdir(path):
-    if path.is_dir():
-        return path
+    if path.exists():
+        rmtree(path)
     path.mkdir(exist_ok=True, parents=True)
     return path
 
diff --git a/src/openpecha/pecha/__init__.py b/src/openpecha/pecha/__init__.py
index 436059f..25a0cb4 100644
--- a/src/openpecha/pecha/__init__.py
+++ b/src/openpecha/pecha/__init__.py
@@ -1,5 +1,6 @@
 import json
 from pathlib import Path
+from shutil import rmtree
 from typing import Dict
 
 from stam import AnnotationStore, Offset, Selector
@@ -48,18 +49,20 @@ def create_pecha_folder(self, export_path: Path):
 
         pecha_dir = _mkdir(export_path.joinpath(self.pecha_id))
         opf_dir = _mkdir(pecha_dir.joinpath(f"{self.pecha_id}.opf"))
-        base_dir = _mkdir(opf_dir.joinpath("base"))
-        layers_dir = _mkdir(opf_dir.joinpath("layers"))
-        layer_id_dir = _mkdir(layers_dir.joinpath(self.pecha_id))
-
         """ write metadata and base file"""
         self.metadata_fn = opf_dir.joinpath("metadata.json")
         self.metadata_fn.write_text(
             json.dumps(self.metadata, indent=4, ensure_ascii=False), encoding="utf-8"
         )
-        self.base_fn = Path(base_dir / f"{self.pecha_id}.txt")
+
+        base_file_name = get_uuid()
+        base_dir = _mkdir(opf_dir.joinpath("base"))
+        self.base_fn = Path(base_dir / f"{base_file_name}.txt")
         self.base_fn.write_text(self.base_text)
 
+        layers_dir = _mkdir(opf_dir.joinpath("layers"))
+        layer_id_dir = _mkdir(layers_dir.joinpath(base_file_name))
+
         self.annotation_fn = layer_id_dir
 
     def covert_to_relative_path(self, json_string: str):
@@ -80,7 +83,7 @@ def write_annotations(self, export_path: Path = PECHAS_PATH):
         """write annotations in stam data model"""
         self.annotation_store = AnnotationStore(id=PECHA_ANNOTATION_STORE_ID)
         self.resource = self.annotation_store.add_resource(
-            id=self.pecha_id, filename=self.base_fn.as_posix()
+            id=self.base_fn.name, filename=self.base_fn.as_posix()
         )
         self.dataset = self.annotation_store.add_dataset(id=PECHA_DATASET_ID)
         self.dataset.add_key(self.metadata["annotation_category"])

From a939d5df7607e965191abc6bcac3a92e81c7c2b8 Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Wed, 3 Jul 2024 14:31:55 +0530
Subject: [PATCH 27/34] rename PlainText to PlainTextLineAlignedParser

---
 src/openpecha/alignment/parsers/plaintext.py        |  2 +-
 tests/alignment/parsers/plaintext/test_plaintext.py | 10 +++++++---
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/openpecha/alignment/parsers/plaintext.py b/src/openpecha/alignment/parsers/plaintext.py
index 2b62fac..8c9e3dd 100644
--- a/src/openpecha/alignment/parsers/plaintext.py
+++ b/src/openpecha/alignment/parsers/plaintext.py
@@ -5,7 +5,7 @@
 from openpecha.pecha import Pecha
 
 
-class PlainText:
+class PlainTextLineAlignedParser:
     def __init__(self, source_text: str, target_text: str, metadata: dict):
         self.source_text = source_text
         self.target_text = target_text
diff --git a/tests/alignment/parsers/plaintext/test_plaintext.py b/tests/alignment/parsers/plaintext/test_plaintext.py
index 488cd31..60c1aea 100644
--- a/tests/alignment/parsers/plaintext/test_plaintext.py
+++ b/tests/alignment/parsers/plaintext/test_plaintext.py
@@ -1,6 +1,6 @@
 from pathlib import Path
 
-from openpecha.alignment.parsers.plaintext import PlainText
+from openpecha.alignment.parsers.plaintext import PlainTextLineAlignedParser
 from openpecha.pecha import Pecha
 
 
@@ -27,7 +27,9 @@ def test_plaintext_parse():
     target_path = DATA_DIR / "comments.txt"
 
     metadata = get_metadata()
-    plaintext = PlainText.from_files(source_path, target_path, metadata)
+    plaintext = PlainTextLineAlignedParser.from_files(
+        source_path, target_path, metadata
+    )
     plaintext.parse()
 
     assert (
@@ -44,7 +46,9 @@ def test_plaintext_save():
     target_path = DATA_DIR / "comments.txt"
 
     metadata = get_metadata()
-    plaintext = PlainText.from_files(source_path, target_path, metadata)
+    plaintext = PlainTextLineAlignedParser.from_files(
+        source_path, target_path, metadata
+    )
     source_pecha, target_pecha = plaintext.save()
 
     assert isinstance(

From 7741ab35dbe025955e87c7425831096a26512563 Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Wed, 3 Jul 2024 14:54:31 +0530
Subject: [PATCH 28/34] Layer abstraction class

---
 src/openpecha/pecha/layer.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)
 create mode 100644 src/openpecha/pecha/layer.py

diff --git a/src/openpecha/pecha/layer.py b/src/openpecha/pecha/layer.py
new file mode 100644
index 0000000..1298d6a
--- /dev/null
+++ b/src/openpecha/pecha/layer.py
@@ -0,0 +1,16 @@
+from enum import Enum
+
+from pydantic import BaseModel, Field
+
+from openpecha.ids import get_uuid
+
+
+class LayerEnum(Enum):
+    segment = "Segment"
+    commentaries = "Commentaries"
+
+
+class Layer(BaseModel):
+    id: str = Field(default_factory=get_uuid)
+    annotation_type: LayerEnum
+    annotations: dict = Field(default_factory=dict)

From 6fd99d19a2dd3b3b209f8fea1e4dd8219ea762c4 Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Wed, 3 Jul 2024 15:01:22 +0530
Subject: [PATCH 29/34] join class Annotation in Layer field

---
 src/openpecha/pecha/annotation.py | 4 +++-
 src/openpecha/pecha/layer.py      | 6 ++++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/openpecha/pecha/annotation.py b/src/openpecha/pecha/annotation.py
index 99c2132..be58627 100644
--- a/src/openpecha/pecha/annotation.py
+++ b/src/openpecha/pecha/annotation.py
@@ -1,8 +1,10 @@
 from pydantic import BaseModel, Field, ValidationInfo, field_validator
 
+from openpecha.ids import get_uuid
+
 
 class Annotation(BaseModel):
-    id_: str
+    id_: str = Field(default_factory=get_uuid)
     segment: str
     start: int = Field(ge=0)
     end: int = Field(ge=0)
diff --git a/src/openpecha/pecha/layer.py b/src/openpecha/pecha/layer.py
index 1298d6a..f3fef40 100644
--- a/src/openpecha/pecha/layer.py
+++ b/src/openpecha/pecha/layer.py
@@ -1,8 +1,10 @@
 from enum import Enum
+from typing import Dict
 
 from pydantic import BaseModel, Field
 
 from openpecha.ids import get_uuid
+from openpecha.pecha.annotation import Annotation
 
 
 class LayerEnum(Enum):
@@ -11,6 +13,6 @@ class LayerEnum(Enum):
 
 
 class Layer(BaseModel):
-    id: str = Field(default_factory=get_uuid)
+    id_: str = Field(default_factory=get_uuid)
     annotation_type: LayerEnum
-    annotations: dict = Field(default_factory=dict)
+    annotations: Dict[str, Annotation] = Field(default_factory=dict)

From 2ea5a283e038222dffa607dba875ad6d94d96f27 Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Wed, 3 Jul 2024 15:55:35 +0530
Subject: [PATCH 30/34] modify/Pecha fields

---
 src/openpecha/pecha/__init__.py | 95 +++------------------------------
 1 file changed, 8 insertions(+), 87 deletions(-)

diff --git a/src/openpecha/pecha/__init__.py b/src/openpecha/pecha/__init__.py
index 25a0cb4..2e761ef 100644
--- a/src/openpecha/pecha/__init__.py
+++ b/src/openpecha/pecha/__init__.py
@@ -13,14 +13,20 @@
 )
 from openpecha.ids import get_uuid
 from openpecha.pecha.annotation import Annotation
+from openpecha.pecha.layer import Layer, LayerEnum
 
 
 class Pecha:
     def __init__(
-        self, pecha_id: str, segments: Dict[str, str], metadata: Dict[str, str]
+        self,
+        pecha_id: str,
+        bases: Dict[str, str] = None,
+        layers: Dict[str, Dict[LayerEnum, Layer]] = None,
+        metadata: Dict[str, str] = None,
     ) -> None:
         self.pecha_id = pecha_id
-        self.segments = segments
+        self.bases = bases
+        self.layers = layers
         self.metadata = metadata
 
     @classmethod
@@ -30,88 +36,3 @@ def from_path(cls, path: str):
     @classmethod
     def from_id(cls, pecha_id: str):
         pass
-
-    def set_annotations(self):
-        """set annotations for the segments"""
-        char_count = 0
-        for segment_id, segment in self.segments.items():
-            annotation = Annotation(
-                id_=segment_id,
-                segment=segment,
-                start=char_count,
-                end=char_count + len(segment),
-            )
-            char_count += len(segment)
-            yield annotation
-
-    def create_pecha_folder(self, export_path: Path):
-        self.export_path = export_path
-
-        pecha_dir = _mkdir(export_path.joinpath(self.pecha_id))
-        opf_dir = _mkdir(pecha_dir.joinpath(f"{self.pecha_id}.opf"))
-        """ write metadata and base file"""
-        self.metadata_fn = opf_dir.joinpath("metadata.json")
-        self.metadata_fn.write_text(
-            json.dumps(self.metadata, indent=4, ensure_ascii=False), encoding="utf-8"
-        )
-
-        base_file_name = get_uuid()
-        base_dir = _mkdir(opf_dir.joinpath("base"))
-        self.base_fn = Path(base_dir / f"{base_file_name}.txt")
-        self.base_fn.write_text(self.base_text)
-
-        layers_dir = _mkdir(opf_dir.joinpath("layers"))
-        layer_id_dir = _mkdir(layers_dir.joinpath(base_file_name))
-
-        self.annotation_fn = layer_id_dir
-
-    def covert_to_relative_path(self, json_string: str):
-        """convert the absolute path to relative path for base file path in json string"""
-        json_object = json.loads(json_string)
-        for resource in json_object["resources"]:
-            original_path = Path(resource["@include"])
-            resource["@include"] = str(original_path.relative_to(self.export_path))
-        return json_object
-
-    def write_annotations(self, export_path: Path = PECHAS_PATH):
-        if not hasattr(self, "annotations"):
-            self.annotations = self.set_annotations()
-
-        self.base_text = "".join(self.segments.values())
-
-        self.create_pecha_folder(export_path)
-        """write annotations in stam data model"""
-        self.annotation_store = AnnotationStore(id=PECHA_ANNOTATION_STORE_ID)
-        self.resource = self.annotation_store.add_resource(
-            id=self.base_fn.name, filename=self.base_fn.as_posix()
-        )
-        self.dataset = self.annotation_store.add_dataset(id=PECHA_DATASET_ID)
-        self.dataset.add_key(self.metadata["annotation_category"])
-
-        unique_annotation_data_id = get_uuid()
-        for annotation in self.annotations:
-            target = Selector.textselector(
-                self.resource,
-                Offset.simple(annotation.start, annotation.end),
-            )
-            data = [
-                {
-                    "id": unique_annotation_data_id,
-                    "key": self.metadata["annotation_category"],
-                    "value": self.metadata["annotation_label"],
-                    "set": self.dataset.id(),
-                }
-            ]
-            self.annotation_store.annotate(
-                id=annotation.id_,
-                target=target,
-                data=data,
-            )
-        """ save annotations in stam data model"""
-        json_string = self.annotation_store.to_json_string()
-        json_object = self.covert_to_relative_path(json_string)
-        with open(
-            self.annotation_fn.joinpath(f"{self.metadata['annotation_label']}.json"),
-            "w",
-        ) as f:
-            f.write(json.dumps(json_object, indent=4, ensure_ascii=False))

From 3b5f6a2a62cf8cbfc21488869c3dcd1274189bcf Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Wed, 3 Jul 2024 16:11:27 +0530
Subject: [PATCH 31/34] modify/PlainTextLineAlignedParser parser func

---
 src/openpecha/alignment/parsers/plaintext.py | 58 +++++++++++++++-----
 src/openpecha/pecha/annotation.py            |  3 -
 src/openpecha/pecha/layer.py                 | 11 ++--
 3 files changed, 47 insertions(+), 25 deletions(-)

diff --git a/src/openpecha/alignment/parsers/plaintext.py b/src/openpecha/alignment/parsers/plaintext.py
index 8c9e3dd..d1e075a 100644
--- a/src/openpecha/alignment/parsers/plaintext.py
+++ b/src/openpecha/alignment/parsers/plaintext.py
@@ -1,8 +1,10 @@
 from pathlib import Path
+from typing import Dict
 
-from openpecha.config import PECHAS_PATH
 from openpecha.ids import get_initial_pecha_id, get_uuid
 from openpecha.pecha import Pecha
+from openpecha.pecha.annotation import Annotation
+from openpecha.pecha.layer import Layer, LayerEnum
 
 
 class PlainTextLineAlignedParser:
@@ -17,27 +19,53 @@ def from_files(cls, source_path: Path, target_path: Path, metadata: dict):
         target_text = target_path.read_text(encoding="utf-8")
         return cls(source_text, target_text, metadata)
 
-    def parse(self):
-        source_text_lines = self.source_text.split("\n")
-        target_text_lines = self.target_text.split("\n")
-
-        self.source_segments = {get_uuid(): segment for segment in source_text_lines}
-        self.target_segments = {get_uuid(): segment for segment in target_text_lines}
+    def create_pecha_layer(self, base_text: str, annotation: LayerEnum):
+        """ """
+        layer_annotations: Dict[str, Annotation] = {}
+        char_count = 0
+        for segment in base_text.split("\n"):
+            layer_annotations[get_uuid()] = Annotation(
+                id_=get_uuid(),
+                segment=segment,
+                start=char_count,
+                end=char_count + len(segment),
+            )
+            char_count += len(segment)
 
-    def save(self, base_path: Path = PECHAS_PATH):
-        if not hasattr(self, "source_segments") or not hasattr(self, "target_segments"):
-            self.parse()
+        return Layer(annotation_label=annotation, annotations=layer_annotations)
 
-        """ save the source and target pecha"""
+    def parse(self):
         source_pecha_id, target_pecha_id = (
             get_initial_pecha_id(),
             get_initial_pecha_id(),
         )
-        source_pecha = Pecha(
-            source_pecha_id, self.source_segments, self.metadata["source"]
+
+        source_base_files = {get_uuid(): self.source_text}
+        target_base_files = {get_uuid(): self.target_text}
+
+        source_annotation = LayerEnum(self.metadata["source"]["annotation_label"])
+        target_annotation = LayerEnum(self.metadata["target"]["annotation_label"])
+
+        source_layers = {
+            get_uuid(): {
+                source_annotation: self.create_pecha_layer(
+                    self.source_text, source_annotation
+                )
+            }
+        }
+        target_layers = {
+            get_uuid(): {
+                target_annotation: self.create_pecha_layer(
+                    self.target_text, target_annotation
+                ),
+            }
+        }
+
+        source_pecha = Pecha(  # noqa
+            source_pecha_id, source_base_files, source_layers, self.metadata["source"]
         )
-        target_pecha = Pecha(
-            target_pecha_id, self.target_segments, self.metadata["target"]
+        target_pecha = Pecha(  # noqa
+            target_pecha_id, target_base_files, target_layers, self.metadata["target"]
         )
         return source_pecha, target_pecha
 
diff --git a/src/openpecha/pecha/annotation.py b/src/openpecha/pecha/annotation.py
index be58627..c7f37c8 100644
--- a/src/openpecha/pecha/annotation.py
+++ b/src/openpecha/pecha/annotation.py
@@ -1,10 +1,7 @@
 from pydantic import BaseModel, Field, ValidationInfo, field_validator
 
-from openpecha.ids import get_uuid
-
 
 class Annotation(BaseModel):
-    id_: str = Field(default_factory=get_uuid)
     segment: str
     start: int = Field(ge=0)
     end: int = Field(ge=0)
diff --git a/src/openpecha/pecha/layer.py b/src/openpecha/pecha/layer.py
index f3fef40..9dc48a8 100644
--- a/src/openpecha/pecha/layer.py
+++ b/src/openpecha/pecha/layer.py
@@ -1,9 +1,6 @@
 from enum import Enum
 from typing import Dict
 
-from pydantic import BaseModel, Field
-
-from openpecha.ids import get_uuid
 from openpecha.pecha.annotation import Annotation
 
 
@@ -12,7 +9,7 @@ class LayerEnum(Enum):
     commentaries = "Commentaries"
 
 
-class Layer(BaseModel):
-    id_: str = Field(default_factory=get_uuid)
-    annotation_type: LayerEnum
-    annotations: Dict[str, Annotation] = Field(default_factory=dict)
+class Layer:
+    def __init__(self, annotation_label: LayerEnum, annotations: Dict[str, Annotation]):
+        self.annotation_label = annotation_label
+        self.annotations = annotations

From e9ccaef5b51da438cf1753681235cb69aa5ba43c Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Thu, 4 Jul 2024 09:18:31 +0530
Subject: [PATCH 32/34] fix/set basefile name and layer file name same

---
 src/openpecha/alignment/parsers/plaintext.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/openpecha/alignment/parsers/plaintext.py b/src/openpecha/alignment/parsers/plaintext.py
index d1e075a..92a1dfd 100644
--- a/src/openpecha/alignment/parsers/plaintext.py
+++ b/src/openpecha/alignment/parsers/plaintext.py
@@ -40,21 +40,22 @@ def parse(self):
             get_initial_pecha_id(),
         )
 
-        source_base_files = {get_uuid(): self.source_text}
-        target_base_files = {get_uuid(): self.target_text}
+        source_base_fname, target_base_fname = get_uuid(), get_uuid()
+        source_base_files = {source_base_fname: self.source_text}
+        target_base_files = {target_base_fname: self.target_text}
 
         source_annotation = LayerEnum(self.metadata["source"]["annotation_label"])
         target_annotation = LayerEnum(self.metadata["target"]["annotation_label"])
 
         source_layers = {
-            get_uuid(): {
+            source_base_fname: {
                 source_annotation: self.create_pecha_layer(
                     self.source_text, source_annotation
                 )
             }
         }
         target_layers = {
-            get_uuid(): {
+            target_base_fname: {
                 target_annotation: self.create_pecha_layer(
                     self.target_text, target_annotation
                 ),

From c6b4fa0254f3f39449d16733cc7504be62a689b9 Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Thu, 4 Jul 2024 09:23:06 +0530
Subject: [PATCH 33/34] Layer write layer

---
 src/openpecha/pecha/layer.py | 59 ++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)

diff --git a/src/openpecha/pecha/layer.py b/src/openpecha/pecha/layer.py
index 9dc48a8..038336b 100644
--- a/src/openpecha/pecha/layer.py
+++ b/src/openpecha/pecha/layer.py
@@ -1,6 +1,12 @@
+import json
 from enum import Enum
+from pathlib import Path
 from typing import Dict
 
+from stam import AnnotationStore, Offset, Selector
+
+from openpecha.config import PECHA_ANNOTATION_STORE_ID, PECHA_DATASET_ID
+from openpecha.ids import get_uuid
 from openpecha.pecha.annotation import Annotation
 
 
@@ -9,7 +15,60 @@ class LayerEnum(Enum):
     commentaries = "Commentaries"
 
 
+def get_annotation_category():
+    # TODO
+    # Return annotation category based on the annotation label
+    return "Structure Type"
+
+
 class Layer:
     def __init__(self, annotation_label: LayerEnum, annotations: Dict[str, Annotation]):
         self.annotation_label = annotation_label
         self.annotations = annotations
+
+    def covert_to_relative_path(self, json_string: str, export_path: Path):
+        """convert the absolute path to relative path for base file path in json string"""
+        json_object = json.loads(json_string)
+        for resource in json_object["resources"]:
+            original_path = Path(resource["@include"])
+            resource["@include"] = str(original_path.relative_to(export_path))
+        return json_object
+
+    def write_layer(self, base_file_path: Path, export_path: Path):
+        """write annotations in stam data model"""
+        self.annotation_store = AnnotationStore(id=PECHA_ANNOTATION_STORE_ID)
+        self.resource = self.annotation_store.add_resource(
+            id=base_file_path.name, filename=base_file_path.as_posix()
+        )
+        self.dataset = self.annotation_store.add_dataset(id=PECHA_DATASET_ID)
+
+        annotation_category = get_annotation_category()
+        self.dataset.add_key(annotation_category)
+
+        unique_annotation_data_id = get_uuid()
+        for annotation_id, annotation in self.annotations.items():
+            target = Selector.textselector(
+                self.resource,
+                Offset.simple(annotation.start, annotation.end),
+            )
+            data = [
+                {
+                    "id": unique_annotation_data_id,
+                    "key": annotation_category,
+                    "value": self.annotation_label.value,
+                    "set": self.dataset.id(),
+                }
+            ]
+            self.annotation_store.annotate(
+                id=annotation_id,
+                target=target,
+                data=data,
+            )
+        """ save annotations in json"""
+        json_string = self.annotation_store.to_json_string()
+        json_object = self.covert_to_relative_path(json_string, export_path)
+        with open(
+            export_path / f"{self.annotation_label.value}.json",
+            "w",
+        ) as f:
+            f.write(json.dumps(json_object, indent=4, ensure_ascii=False))

From 12a379fa977894cca418db5ff42bf37daea7c71d Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Thu, 4 Jul 2024 09:52:34 +0530
Subject: [PATCH 34/34] Pecha write function

---
 src/openpecha/pecha/__init__.py | 32 +++++++++++++++++++++++++++++---
 src/openpecha/pecha/layer.py    |  6 ++++--
 2 files changed, 33 insertions(+), 5 deletions(-)

diff --git a/src/openpecha/pecha/__init__.py b/src/openpecha/pecha/__init__.py
index 2e761ef..781d99e 100644
--- a/src/openpecha/pecha/__init__.py
+++ b/src/openpecha/pecha/__init__.py
@@ -20,9 +20,9 @@ class Pecha:
     def __init__(
         self,
         pecha_id: str,
-        bases: Dict[str, str] = None,
-        layers: Dict[str, Dict[LayerEnum, Layer]] = None,
-        metadata: Dict[str, str] = None,
+        bases: Dict[str, str],
+        layers: Dict[str, Dict[LayerEnum, Layer]],
+        metadata: Dict[str, str],
     ) -> None:
         self.pecha_id = pecha_id
         self.bases = bases
@@ -36,3 +36,29 @@ def from_path(cls, path: str):
     @classmethod
     def from_id(cls, pecha_id: str):
         pass
+
+    def write(self, export_path: Path = PECHAS_PATH):
+
+        pecha_dir = _mkdir(export_path / self.pecha_id)
+        self.base_path = _mkdir(pecha_dir / f"{self.pecha_id}.opf")
+        """ write metadata """
+        self.metadata_fn = self.base_path / "metadata.json"
+        self.metadata_fn.write_text(
+            json.dumps(self.metadata, indent=4, ensure_ascii=False), encoding="utf-8"
+        )
+
+        """ write base file"""
+        base_dir = _mkdir(self.base_path / "base")
+        for base_fname, base_text in self.bases.items():
+            base_fn = base_dir / f"{base_fname}.txt"
+            base_fn.write_text(base_text, encoding="utf-8")
+
+        layer_dir = _mkdir(self.base_path / "layers")
+        """ write annotation layers"""
+        for layer_fname, layer_data in self.layers.items():
+            for _, layer in layer_data.items():
+                _mkdir(layer_dir / layer_fname)
+                layer.write(
+                    base_file_path=base_dir / layer_fname,
+                    export_path=layer_dir / layer_fname,
+                )
diff --git a/src/openpecha/pecha/layer.py b/src/openpecha/pecha/layer.py
index 038336b..671896c 100644
--- a/src/openpecha/pecha/layer.py
+++ b/src/openpecha/pecha/layer.py
@@ -34,7 +34,7 @@ def covert_to_relative_path(self, json_string: str, export_path: Path):
             resource["@include"] = str(original_path.relative_to(export_path))
         return json_object
 
-    def write_layer(self, base_file_path: Path, export_path: Path):
+    def write(self, base_file_path: Path, export_path: Path):
         """write annotations in stam data model"""
         self.annotation_store = AnnotationStore(id=PECHA_ANNOTATION_STORE_ID)
         self.resource = self.annotation_store.add_resource(
@@ -67,8 +67,10 @@ def write_layer(self, base_file_path: Path, export_path: Path):
         """ save annotations in json"""
         json_string = self.annotation_store.to_json_string()
         json_object = self.covert_to_relative_path(json_string, export_path)
+        """ add four uuid digits to the layer file name for uniqueness"""
+        layer_fname = f"{self.annotation_label.value}-{get_uuid()[:4]}.json"
         with open(
-            export_path / f"{self.annotation_label.value}.json",
+            export_path / layer_fname,
             "w",
         ) as f:
             f.write(json.dumps(json_object, indent=4, ensure_ascii=False))