From 12878b84a3b1db2cd8272197f6421aa2cbbf33dd Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Thu, 4 Jul 2024 10:12:30 +0530
Subject: [PATCH 01/49] fix/layer name in Layer write function

---
 src/openpecha/pecha/__init__.py | 4 ++--
 src/openpecha/pecha/layer.py    | 7 +++++--
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/openpecha/pecha/__init__.py b/src/openpecha/pecha/__init__.py
index 781d99e..41a7b6f 100644
--- a/src/openpecha/pecha/__init__.py
+++ b/src/openpecha/pecha/__init__.py
@@ -59,6 +59,6 @@ def write(self, export_path: Path = PECHAS_PATH):
             for _, layer in layer_data.items():
                 _mkdir(layer_dir / layer_fname)
                 layer.write(
-                    base_file_path=base_dir / layer_fname,
-                    export_path=layer_dir / layer_fname,
+                    base_file_path=base_dir / f"{layer_fname}.txt",
+                    export_path=export_path,
                 )
diff --git a/src/openpecha/pecha/layer.py b/src/openpecha/pecha/layer.py
index 671896c..86441ad 100644
--- a/src/openpecha/pecha/layer.py
+++ b/src/openpecha/pecha/layer.py
@@ -68,9 +68,12 @@ def write(self, base_file_path: Path, export_path: Path):
         json_string = self.annotation_store.to_json_string()
         json_object = self.covert_to_relative_path(json_string, export_path)
         """ add four uuid digits to the layer file name for uniqueness"""
-        layer_fname = f"{self.annotation_label.value}-{get_uuid()[:4]}.json"
+        layer_dir = base_file_path.parent.parent / "layers" / base_file_path.stem
+        layer_file_path = (
+            layer_dir / f"{self.annotation_label.value}-{get_uuid()[:4]}.json"
+        )
         with open(
-            export_path / layer_fname,
+            layer_file_path,
             "w",
         ) as f:
             f.write(json.dumps(json_object, indent=4, ensure_ascii=False))

From 2dcf475c893c5b5ea2345ddd8f2b1bb28a14d816 Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Thu, 4 Jul 2024 10:19:29 +0530
Subject: [PATCH 02/49] moodify LayerEnum value

---
 src/openpecha/pecha/layer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/openpecha/pecha/layer.py b/src/openpecha/pecha/layer.py
index 86441ad..b9f08c7 100644
--- a/src/openpecha/pecha/layer.py
+++ b/src/openpecha/pecha/layer.py
@@ -12,7 +12,7 @@
 
 class LayerEnum(Enum):
     segment = "Segment"
-    commentaries = "Commentaries"
+    commentaries = "Comment"
 
 
 def get_annotation_category():

From 2558f1904e30c0fa390ffb39398194344cd1217a Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Thu, 4 Jul 2024 10:20:36 +0530
Subject: [PATCH 03/49] modify/meta data defination in test_plaintext

---
 .../parsers/plaintext/test_plaintext.py       | 25 ++-----------------
 1 file changed, 2 insertions(+), 23 deletions(-)

diff --git a/tests/alignment/parsers/plaintext/test_plaintext.py b/tests/alignment/parsers/plaintext/test_plaintext.py
index 60c1aea..78084fb 100644
--- a/tests/alignment/parsers/plaintext/test_plaintext.py
+++ b/tests/alignment/parsers/plaintext/test_plaintext.py
@@ -11,17 +11,15 @@ def get_data_dir():
 def get_metadata():
     return {
         "source": {
-            "annotation_category": "Structure Type",
             "annotation_label": "Segment",
         },
         "target": {
-            "annotation_category": "Structure Type",
             "annotation_label": "Comment",
         },
     }
 
 
-def test_plaintext_parse():
+def test_PlainTextLineAlignedParser_parse():
     DATA_DIR = get_data_dir()
     source_path = DATA_DIR / "segments.txt"
     target_path = DATA_DIR / "comments.txt"
@@ -30,26 +28,7 @@ def test_plaintext_parse():
     plaintext = PlainTextLineAlignedParser.from_files(
         source_path, target_path, metadata
     )
-    plaintext.parse()
-
-    assert (
-        len(plaintext.source_segments) == 5
-    ), "plaintext parser is not parsing source_segments correctly"
-    assert (
-        len(plaintext.target_segments) == 5
-    ), "plaintext parser is not parsing target_segments correctly"
-
-
-def test_plaintext_save():
-    DATA_DIR = get_data_dir()
-    source_path = DATA_DIR / "segments.txt"
-    target_path = DATA_DIR / "comments.txt"
-
-    metadata = get_metadata()
-    plaintext = PlainTextLineAlignedParser.from_files(
-        source_path, target_path, metadata
-    )
-    source_pecha, target_pecha = plaintext.save()
+    source_pecha, target_pecha = plaintext.parse()
 
     assert isinstance(
         source_pecha, Pecha

From d2107160c895c4c2d9fd3188126a578f52cfc2f1 Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Thu, 4 Jul 2024 10:51:03 +0530
Subject: [PATCH 04/49] test for pecha write function

---
 .../base/f2b056668a0c4ad3a085bdcd8e2d7adb.txt |  1 +
 .../Segment-bf13.json                         | 88 +++++++++++++++++++
 .../IE7D6875F/IE7D6875F.opf/metadata.json     |  3 +
 tests/pecha/test_pecha.py                     | 69 +++++++--------
 4 files changed, 122 insertions(+), 39 deletions(-)
 create mode 100644 tests/pecha/expected_output/IE7D6875F/IE7D6875F.opf/base/f2b056668a0c4ad3a085bdcd8e2d7adb.txt
 create mode 100644 tests/pecha/expected_output/IE7D6875F/IE7D6875F.opf/layers/f2b056668a0c4ad3a085bdcd8e2d7adb/Segment-bf13.json
 create mode 100644 tests/pecha/expected_output/IE7D6875F/IE7D6875F.opf/metadata.json

diff --git a/tests/pecha/expected_output/IE7D6875F/IE7D6875F.opf/base/f2b056668a0c4ad3a085bdcd8e2d7adb.txt b/tests/pecha/expected_output/IE7D6875F/IE7D6875F.opf/base/f2b056668a0c4ad3a085bdcd8e2d7adb.txt
new file mode 100644
index 0000000..0b166fc
--- /dev/null
+++ b/tests/pecha/expected_output/IE7D6875F/IE7D6875F.opf/base/f2b056668a0c4ad3a085bdcd8e2d7adb.txt
@@ -0,0 +1 @@
+རྒྱ་གར་སྐད་དུ། བོ་དྷི་སཏྭ་ཙརྱ་ཨ་བ་ཏཱ་ར།བོད་སྐད་དུ། བྱང་ཆུབ་སེམས་དཔའི་སྤྱོད་པ་ལ་འཇུག་པའི་ལེགས་པར་སྦྱར་བ།
\ No newline at end of file
diff --git a/tests/pecha/expected_output/IE7D6875F/IE7D6875F.opf/layers/f2b056668a0c4ad3a085bdcd8e2d7adb/Segment-bf13.json b/tests/pecha/expected_output/IE7D6875F/IE7D6875F.opf/layers/f2b056668a0c4ad3a085bdcd8e2d7adb/Segment-bf13.json
new file mode 100644
index 0000000..92bcaec
--- /dev/null
+++ b/tests/pecha/expected_output/IE7D6875F/IE7D6875F.opf/layers/f2b056668a0c4ad3a085bdcd8e2d7adb/Segment-bf13.json
@@ -0,0 +1,88 @@
+{
+    "@type": "AnnotationStore",
+    "@id": "PechaAnnotationStore",
+    "resources": [
+        {
+            "@type": "TextResource",
+            "@id": "f2b056668a0c4ad3a085bdcd8e2d7adb.txt",
+            "@include": "IE7D6875F/IE7D6875F.opf/base/f2b056668a0c4ad3a085bdcd8e2d7adb.txt"
+        }
+    ],
+    "annotationsets": [
+        {
+            "@type": "AnnotationDataSet",
+            "@id": "PechaDataSet",
+            "keys": [
+                {
+                    "@type": "DataKey",
+                    "@id": "Structure Type"
+                }
+            ],
+            "data": [
+                {
+                    "@type": "AnnotationData",
+                    "@id": "0c2c4165fb58464eabf9db0d6a3a1080",
+                    "key": "Structure Type",
+                    "value": {
+                        "@type": "String",
+                        "value": "Segment"
+                    }
+                }
+            ]
+        }
+    ],
+    "annotations": [
+        {
+            "@type": "Annotation",
+            "@id": "f2b056668a0c4ad3a085bdcd8e2d7adb",
+            "target": {
+                "@type": "TextSelector",
+                "resource": "f2b056668a0c4ad3a085bdcd8e2d7adb.txt",
+                "offset": {
+                    "@type": "Offset",
+                    "begin": {
+                        "@type": "BeginAlignedCursor",
+                        "value": 0
+                    },
+                    "end": {
+                        "@type": "BeginAlignedCursor",
+                        "value": 39
+                    }
+                }
+            },
+            "data": [
+                {
+                    "@type": "AnnotationData",
+                    "@id": "0c2c4165fb58464eabf9db0d6a3a1080",
+                    "set": "PechaDataSet"
+                }
+            ]
+        },
+        {
+            "@type": "Annotation",
+            "@id": "b696df2dbe314e8a87881a2bc391d0d5",
+            "target": {
+                "@type": "TextSelector",
+                "resource": "f2b056668a0c4ad3a085bdcd8e2d7adb.txt",
+                "offset": {
+                    "@type": "Offset",
+                    "begin": {
+                        "@type": "BeginAlignedCursor",
+                        "value": 39
+                    },
+                    "end": {
+                        "@type": "BeginAlignedCursor",
+                        "value": 103
+                    }
+                }
+            },
+            "data": [
+                {
+                    "@type": "AnnotationData",
+                    "@id": "0c2c4165fb58464eabf9db0d6a3a1080",
+                    "set": "PechaDataSet"
+                }
+            ]
+        }
+    ]
+}
\ No newline at end of file
diff --git a/tests/pecha/expected_output/IE7D6875F/IE7D6875F.opf/metadata.json b/tests/pecha/expected_output/IE7D6875F/IE7D6875F.opf/metadata.json
new file mode 100644
index 0000000..cb740ab
--- /dev/null
+++ b/tests/pecha/expected_output/IE7D6875F/IE7D6875F.opf/metadata.json
@@ -0,0 +1,3 @@
+{
+    "annotation_label": "Segment"
+}
\ No newline at end of file
diff --git a/tests/pecha/test_pecha.py b/tests/pecha/test_pecha.py
index 47f8e18..52ba75d 100644
--- a/tests/pecha/test_pecha.py
+++ b/tests/pecha/test_pecha.py
@@ -3,75 +3,66 @@
 
 from openpecha.pecha import Pecha
 from openpecha.pecha.annotation import Annotation
+from openpecha.pecha.layer import Layer, LayerEnum
 
 
 def get_data_dir():
-    export_path = Path(__file__).parent / "data"
+    export_path = Path(__file__).parent / "output"
     export_path.mkdir(parents=True, exist_ok=True)
     return export_path
 
 
-def get_segments():
+def get_metadata():
     return {
-        "f2b056668a0c4ad3a085bdcd8e2d7adb": "རྒྱ་གར་སྐད་དུ། བོ་དྷི་སཏྭ་ཙརྱ་ཨ་བ་ཏཱ་ར།",
-        "b696df2dbe314e8a87881a2bc391d0d5": "བོད་སྐད་དུ། བྱང་ཆུབ་སེམས་དཔའི་སྤྱོད་པ་ལ་འཇུག་པའི་ལེགས་པར་སྦྱར་བ།",
+        "annotation_label": "Segment",
     }
 
 
-def get_metadata():
+def get_base():
     return {
-        "annotation_category": "Structure Type",
-        "annotation_label": "Segment",
+        "f2b056668a0c4ad3a085bdcd8e2d7adb": "རྒྱ་གར་སྐད་དུ། བོ་དྷི་སཏྭ་ཙརྱ་ཨ་བ་ཏཱ་ར།བོད་སྐད་དུ། བྱང་ཆུབ་སེམས་དཔའི་སྤྱོད་པ་ལ་འཇུག་པའི་ལེགས་པར་སྦྱར་བ།"  # noqa
     }
 
 
-def get_expected_annotations():
-    expected_annotations = [
-        Annotation(
-            id_="f2b056668a0c4ad3a085bdcd8e2d7adb",
+def get_layer():
+    return {
+        "f2b056668a0c4ad3a085bdcd8e2d7adb": {
+            LayerEnum("Segment"): Layer(LayerEnum("Segment"), get_annotations())
+        }
+    }
+
+
+def get_annotations():
+    return {
+        "f2b056668a0c4ad3a085bdcd8e2d7adb": Annotation(
             segment="རྒྱ་གར་སྐད་དུ། བོ་དྷི་སཏྭ་ཙརྱ་ཨ་བ་ཏཱ་ར།",
             start=0,
             end=39,
             metadata={},
         ),
-        Annotation(
-            id_="b696df2dbe314e8a87881a2bc391d0d5",
+        "b696df2dbe314e8a87881a2bc391d0d5": Annotation(
             segment="བོད་སྐད་དུ། བྱང་ཆུབ་སེམས་དཔའི་སྤྱོད་པ་ལ་འཇུག་པའི་ལེགས་པར་སྦྱར་བ།",
             start=39,
             end=103,
             metadata={},
         ),
-    ]
-    return expected_annotations
+    }
 
 
-def test_pecha_set_annotations():
+def test_pecha_write():
     pecha_id = "IE7D6875F"
-    segments = get_segments()
-    metadata = get_metadata()
-    pecha = Pecha(pecha_id=pecha_id, segments=segments, metadata=metadata)
-    assert isinstance(
-        pecha, Pecha
-    ), "Not able to create Pecha object with id, segments and metadata"
+    base = get_base()
+    layer = get_layer()
+    export_path = get_data_dir()
+    expected_output_path = Path(__file__).parent / "expected_output"
 
-    annotations = list(pecha.set_annotations())
-    assert (
-        annotations == get_expected_annotations()
-    ), "Pecha not able to set annotations for the segments"
+    pecha = Pecha(pecha_id=pecha_id, bases=base, layers=layer, metadata=get_metadata())
+    pecha.write(export_path=export_path)
 
+    output_file_names = [file.name for file in export_path.rglob("*")].sort()
+    expected_file_names = [file.name for file in expected_output_path.rglob("*")].sort()
 
-def test_pecha_write_annotations():
-    pecha_id = "IE7D6875F"
-    segments = get_segments()
-    metadata = get_metadata()
-    pecha = Pecha(pecha_id=pecha_id, segments=segments, metadata=metadata)
-    export_path = get_data_dir()
-    pecha.write_annotations(export_path=export_path)
-    assert pecha.base_fn.exists(), "Pecha not able to write base file"
-    assert pecha.metadata_fn.exists(), "Pecha not able to write metadata file"
-    assert pecha.annotation_fn.rglob(
-        "*.json"
-    ), "Pecha not able to write annotation file"
+    assert output_file_names == expected_file_names
 
     """ clean up """
-    rmtree(Path(export_path / pecha_id))
+    rmtree(export_path)

From 7437fb07c45b0137fd38ad405348f9fce9d27970 Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Thu, 4 Jul 2024 11:20:23 +0530
Subject: [PATCH 05/49] raise Value Error if annotation segment doesnt match
 the base text

---
 src/openpecha/pecha/layer.py | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/src/openpecha/pecha/layer.py b/src/openpecha/pecha/layer.py
index b9f08c7..60a797e 100644
--- a/src/openpecha/pecha/layer.py
+++ b/src/openpecha/pecha/layer.py
@@ -34,19 +34,19 @@ def covert_to_relative_path(self, json_string: str, export_path: Path):
             resource["@include"] = str(original_path.relative_to(export_path))
         return json_object
 
-    def write(self, base_file_path: Path, export_path: Path):
-        """write annotations in stam data model"""
-        self.annotation_store = AnnotationStore(id=PECHA_ANNOTATION_STORE_ID)
-        self.resource = self.annotation_store.add_resource(
-            id=base_file_path.name, filename=base_file_path.as_posix()
-        )
-        self.dataset = self.annotation_store.add_dataset(id=PECHA_DATASET_ID)
-
+    def set_annotations(self):
         annotation_category = get_annotation_category()
         self.dataset.add_key(annotation_category)
-
         unique_annotation_data_id = get_uuid()
+        base_text = self.base_file_path.read_text(encoding="utf-8")
         for annotation_id, annotation in self.annotations.items():
+            if (
+                annotation.segment
+                != base_text[annotation.start : annotation.end]  # noqa
+            ):
+                raise ValueError(
+                    f"Annotation segment does not match the base text at {annotation_id}"
+                )
             target = Selector.textselector(
                 self.resource,
                 Offset.simple(annotation.start, annotation.end),
@@ -64,6 +64,16 @@ def write(self, base_file_path: Path, export_path: Path):
                 target=target,
                 data=data,
             )
+
+    def write(self, base_file_path: Path, export_path: Path):
+        self.base_file_path = base_file_path
+        """write annotations in stam data model"""
+        self.annotation_store = AnnotationStore(id=PECHA_ANNOTATION_STORE_ID)
+        self.resource = self.annotation_store.add_resource(
+            id=base_file_path.name, filename=base_file_path.as_posix()
+        )
+        self.dataset = self.annotation_store.add_dataset(id=PECHA_DATASET_ID)
+        self.set_annotations()
         """ save annotations in json"""
         json_string = self.annotation_store.to_json_string()
         json_object = self.covert_to_relative_path(json_string, export_path)

From 57991fe902b3628e717f622bafb044e92f3218a1 Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Thu, 4 Jul 2024 11:38:57 +0530
Subject: [PATCH 06/49] Layer function set annotation

---
 src/openpecha/alignment/parsers/plaintext.py |  8 +++----
 src/openpecha/pecha/layer.py                 | 24 +++++++++++---------
 2 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/src/openpecha/alignment/parsers/plaintext.py b/src/openpecha/alignment/parsers/plaintext.py
index 92a1dfd..c151000 100644
--- a/src/openpecha/alignment/parsers/plaintext.py
+++ b/src/openpecha/alignment/parsers/plaintext.py
@@ -1,5 +1,4 @@
 from pathlib import Path
-from typing import Dict
 
 from openpecha.ids import get_initial_pecha_id, get_uuid
 from openpecha.pecha import Pecha
@@ -21,18 +20,19 @@ def from_files(cls, source_path: Path, target_path: Path, metadata: dict):
 
     def create_pecha_layer(self, base_text: str, annotation: LayerEnum):
         """ """
-        layer_annotations: Dict[str, Annotation] = {}
+        layer = Layer(annotation_label=annotation, annotations={})
         char_count = 0
         for segment in base_text.split("\n"):
-            layer_annotations[get_uuid()] = Annotation(
+            annotation = Annotation(
                 id_=get_uuid(),
                 segment=segment,
                 start=char_count,
                 end=char_count + len(segment),
             )
+            layer.set_annotation(annotation)
             char_count += len(segment)
 
-        return Layer(annotation_label=annotation, annotations=layer_annotations)
+        return layer
 
     def parse(self):
         source_pecha_id, target_pecha_id = (
diff --git a/src/openpecha/pecha/layer.py b/src/openpecha/pecha/layer.py
index 60a797e..e888f94 100644
--- a/src/openpecha/pecha/layer.py
+++ b/src/openpecha/pecha/layer.py
@@ -26,6 +26,11 @@ def __init__(self, annotation_label: LayerEnum, annotations: Dict[str, Annotatio
         self.annotation_label = annotation_label
         self.annotations = annotations
 
+    def set_annotation(self, annotation: Annotation, annotation_id=None):
+        if not annotation_id:
+            annotation_id = get_uuid()
+        self.annotations[annotation_id] = annotation
+
     def covert_to_relative_path(self, json_string: str, export_path: Path):
         """convert the absolute path to relative path for base file path in json string"""
         json_object = json.loads(json_string)
@@ -34,7 +39,14 @@ def covert_to_relative_path(self, json_string: str, export_path: Path):
             resource["@include"] = str(original_path.relative_to(export_path))
         return json_object
 
-    def set_annotations(self):
+    def write(self, base_file_path: Path, export_path: Path):
+        self.base_file_path = base_file_path
+        """write annotations in stam data model"""
+        self.annotation_store = AnnotationStore(id=PECHA_ANNOTATION_STORE_ID)
+        self.resource = self.annotation_store.add_resource(
+            id=base_file_path.name, filename=base_file_path.as_posix()
+        )
+        self.dataset = self.annotation_store.add_dataset(id=PECHA_DATASET_ID)
         annotation_category = get_annotation_category()
         self.dataset.add_key(annotation_category)
         unique_annotation_data_id = get_uuid()
@@ -64,16 +76,6 @@ def set_annotations(self):
                 target=target,
                 data=data,
             )
-
-    def write(self, base_file_path: Path, export_path: Path):
-        self.base_file_path = base_file_path
-        """write annotations in stam data model"""
-        self.annotation_store = AnnotationStore(id=PECHA_ANNOTATION_STORE_ID)
-        self.resource = self.annotation_store.add_resource(
-            id=base_file_path.name, filename=base_file_path.as_posix()
-        )
-        self.dataset = self.annotation_store.add_dataset(id=PECHA_DATASET_ID)
-        self.set_annotations()
         """ save annotations in json"""
         json_string = self.annotation_store.to_json_string()
         json_object = self.covert_to_relative_path(json_string, export_path)

From 6879c9bd9a420e7f16141d7daa3db081bf4ae84b Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Thu, 4 Jul 2024 11:53:21 +0530
Subject: [PATCH 07/49] Pecha set base file, layer and metadata file

---
 src/openpecha/alignment/parsers/plaintext.py | 37 ++++++--------
 src/openpecha/pecha/__init__.py              | 54 ++++++++++++++------
 2 files changed, 53 insertions(+), 38 deletions(-)

diff --git a/src/openpecha/alignment/parsers/plaintext.py b/src/openpecha/alignment/parsers/plaintext.py
index c151000..274e4bf 100644
--- a/src/openpecha/alignment/parsers/plaintext.py
+++ b/src/openpecha/alignment/parsers/plaintext.py
@@ -39,35 +39,30 @@ def parse(self):
             get_initial_pecha_id(),
             get_initial_pecha_id(),
         )
+        source_pecha = Pecha(source_pecha_id)
+        target_pecha = Pecha(target_pecha_id)
 
         source_base_fname, target_base_fname = get_uuid(), get_uuid()
-        source_base_files = {source_base_fname: self.source_text}
-        target_base_files = {target_base_fname: self.target_text}
+        source_pecha.set_base_file(source_base_fname, self.source_text)
+        target_pecha.set_base_file(target_base_fname, self.target_text)
 
         source_annotation = LayerEnum(self.metadata["source"]["annotation_label"])
         target_annotation = LayerEnum(self.metadata["target"]["annotation_label"])
 
-        source_layers = {
-            source_base_fname: {
-                source_annotation: self.create_pecha_layer(
-                    self.source_text, source_annotation
-                )
-            }
-        }
-        target_layers = {
-            target_base_fname: {
-                target_annotation: self.create_pecha_layer(
-                    self.target_text, target_annotation
-                ),
-            }
-        }
-
-        source_pecha = Pecha(  # noqa
-            source_pecha_id, source_base_files, source_layers, self.metadata["source"]
+        source_pecha.set_layer(
+            source_base_fname,
+            source_annotation,
+            self.create_pecha_layer(self.source_text, source_annotation),
         )
-        target_pecha = Pecha(  # noqa
-            target_pecha_id, target_base_files, target_layers, self.metadata["target"]
+        target_pecha.set_layer(
+            target_base_fname,
+            target_annotation,
+            self.create_pecha_layer(self.target_text, target_annotation),
         )
+
+        source_pecha.set_metadata(self.metadata["source"])
+        target_pecha.set_metadata(self.metadata["target"])
+
         return source_pecha, target_pecha
 
         # TODO:
diff --git a/src/openpecha/pecha/__init__.py b/src/openpecha/pecha/__init__.py
index 41a7b6f..bca961a 100644
--- a/src/openpecha/pecha/__init__.py
+++ b/src/openpecha/pecha/__init__.py
@@ -20,9 +20,9 @@ class Pecha:
     def __init__(
         self,
         pecha_id: str,
-        bases: Dict[str, str],
-        layers: Dict[str, Dict[LayerEnum, Layer]],
-        metadata: Dict[str, str],
+        bases: Dict[str, str] = None,
+        layers: Dict[str, Dict[LayerEnum, Layer]] = None,
+        metadata: Dict[str, str] = None,
     ) -> None:
         self.pecha_id = pecha_id
         self.bases = bases
@@ -37,6 +37,25 @@ def from_path(cls, path: str):
     def from_id(cls, pecha_id: str):
         pass
 
+    def set_base_file(self, base_file_name: str, base_text: str):
+        if not self.bases:
+            self.bases = {}
+        self.bases[base_file_name] = base_text
+
+    def set_layer(self, layer_dir: str, layer: LayerEnum, layer_data: Layer):
+        """Note layer dir should be same as its corresponding base file name"""
+        if not self.layers:
+            self.layers = {}
+        if layer_dir not in self.layers:
+            self.layers[layer_dir] = {}
+        self.layers[layer_dir][layer] = layer_data
+
+    def set_metadata(self, metadata: Dict[str, str]):
+        if not self.metadata:
+            self.metadata = {}
+        for key, value in metadata.items():
+            self.metadata[key] = value
+
     def write(self, export_path: Path = PECHAS_PATH):
 
         pecha_dir = _mkdir(export_path / self.pecha_id)
@@ -48,17 +67,18 @@ def write(self, export_path: Path = PECHAS_PATH):
         )
 
         """ write base file"""
-        base_dir = _mkdir(self.base_path / "base")
-        for base_fname, base_text in self.bases.items():
-            base_fn = base_dir / f"{base_fname}.txt"
-            base_fn.write_text(base_text, encoding="utf-8")
-
-        layer_dir = _mkdir(self.base_path / "layers")
-        """ write annotation layers"""
-        for layer_fname, layer_data in self.layers.items():
-            for _, layer in layer_data.items():
-                _mkdir(layer_dir / layer_fname)
-                layer.write(
-                    base_file_path=base_dir / f"{layer_fname}.txt",
-                    export_path=export_path,
-                )
+        if self.bases:
+            base_dir = _mkdir(self.base_path / "base")
+            for base_fname, base_text in self.bases.items():
+                base_fn = base_dir / f"{base_fname}.txt"
+                base_fn.write_text(base_text, encoding="utf-8")
+        if self.layers:
+            layer_dir = _mkdir(self.base_path / "layers")
+            """ write annotation layers"""
+            for layer_fname, layer_data in self.layers.items():
+                for _, layer in layer_data.items():
+                    _mkdir(layer_dir / layer_fname)
+                    layer.write(
+                        base_file_path=base_dir / f"{layer_fname}.txt",
+                        export_path=export_path,
+                    )

From 1eb9be4f4e4c021b79d9aad9e46a7ce2e897f07d Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Thu, 4 Jul 2024 11:54:26 +0530
Subject: [PATCH 08/49] refactor

---
 src/openpecha/pecha/__init__.py | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/src/openpecha/pecha/__init__.py b/src/openpecha/pecha/__init__.py
index bca961a..d3be453 100644
--- a/src/openpecha/pecha/__init__.py
+++ b/src/openpecha/pecha/__init__.py
@@ -1,18 +1,8 @@
 import json
 from pathlib import Path
-from shutil import rmtree
 from typing import Dict
 
-from stam import AnnotationStore, Offset, Selector
-
-from openpecha.config import (
-    PECHA_ANNOTATION_STORE_ID,
-    PECHA_DATASET_ID,
-    PECHAS_PATH,
-    _mkdir,
-)
-from openpecha.ids import get_uuid
-from openpecha.pecha.annotation import Annotation
+from openpecha.config import PECHAS_PATH, _mkdir
 from openpecha.pecha.layer import Layer, LayerEnum
 
 

From 47290948bd8fbcc10136f03c259cff5c8fd6dfee Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Thu, 4 Jul 2024 12:03:46 +0530
Subject: [PATCH 09/49] refactor code

---
 src/openpecha/alignment/parsers/plaintext.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/openpecha/alignment/parsers/plaintext.py b/src/openpecha/alignment/parsers/plaintext.py
index 274e4bf..95a38bd 100644
--- a/src/openpecha/alignment/parsers/plaintext.py
+++ b/src/openpecha/alignment/parsers/plaintext.py
@@ -1,4 +1,5 @@
 from pathlib import Path
+from typing import List
 
 from openpecha.ids import get_initial_pecha_id, get_uuid
 from openpecha.pecha import Pecha
@@ -18,13 +19,12 @@ def from_files(cls, source_path: Path, target_path: Path, metadata: dict):
         target_text = target_path.read_text(encoding="utf-8")
         return cls(source_text, target_text, metadata)
 
-    def create_pecha_layer(self, base_text: str, annotation: LayerEnum):
+    def create_pecha_layer(self, segments: List[str], annotation_label: LayerEnum):
         """ """
-        layer = Layer(annotation_label=annotation, annotations={})
+        layer = Layer(annotation_label=annotation_label, annotations={})
         char_count = 0
-        for segment in base_text.split("\n"):
+        for segment in segments:
             annotation = Annotation(
-                id_=get_uuid(),
                 segment=segment,
                 start=char_count,
                 end=char_count + len(segment),
@@ -52,12 +52,12 @@ def parse(self):
         source_pecha.set_layer(
             source_base_fname,
             source_annotation,
-            self.create_pecha_layer(self.source_text, source_annotation),
+            self.create_pecha_layer(self.source_text.split("\n"), source_annotation),
         )
         target_pecha.set_layer(
             target_base_fname,
             target_annotation,
-            self.create_pecha_layer(self.target_text, target_annotation),
+            self.create_pecha_layer(self.target_text.split("\n"), target_annotation),
         )
 
         source_pecha.set_metadata(self.metadata["source"])

From 40b9781bc3a64b91cb35203c44d2e5c22040c079 Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Thu, 4 Jul 2024 12:16:26 +0530
Subject: [PATCH 10/49] modify/set Tuple of LayerEnum and str as Layer key

---
 src/openpecha/alignment/parsers/plaintext.py |  4 ++--
 src/openpecha/pecha/__init__.py              | 16 ++++++++++++----
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/src/openpecha/alignment/parsers/plaintext.py b/src/openpecha/alignment/parsers/plaintext.py
index 95a38bd..1ea8ded 100644
--- a/src/openpecha/alignment/parsers/plaintext.py
+++ b/src/openpecha/alignment/parsers/plaintext.py
@@ -51,12 +51,12 @@ def parse(self):
 
         source_pecha.set_layer(
             source_base_fname,
-            source_annotation,
+            (source_annotation, None),
             self.create_pecha_layer(self.source_text.split("\n"), source_annotation),
         )
         target_pecha.set_layer(
             target_base_fname,
-            target_annotation,
+            (target_annotation, None),
             self.create_pecha_layer(self.target_text.split("\n"), target_annotation),
         )
 
diff --git a/src/openpecha/pecha/__init__.py b/src/openpecha/pecha/__init__.py
index d3be453..77b2e18 100644
--- a/src/openpecha/pecha/__init__.py
+++ b/src/openpecha/pecha/__init__.py
@@ -1,8 +1,9 @@
 import json
 from pathlib import Path
-from typing import Dict
+from typing import Dict, Optional, Tuple
 
 from openpecha.config import PECHAS_PATH, _mkdir
+from openpecha.ids import get_uuid
 from openpecha.pecha.layer import Layer, LayerEnum
 
 
@@ -11,7 +12,7 @@ def __init__(
         self,
         pecha_id: str,
         bases: Dict[str, str] = None,
-        layers: Dict[str, Dict[LayerEnum, Layer]] = None,
+        layers: Dict[str, Dict[Tuple[LayerEnum, str], Layer]] = None,
         metadata: Dict[str, str] = None,
     ) -> None:
         self.pecha_id = pecha_id
@@ -32,13 +33,20 @@ def set_base_file(self, base_file_name: str, base_text: str):
             self.bases = {}
         self.bases[base_file_name] = base_text
 
-    def set_layer(self, layer_dir: str, layer: LayerEnum, layer_data: Layer):
+    def set_layer(
+        self, layer_dir: str, layer_key: Tuple[LayerEnum, Optional[str]], layer: Layer
+    ):
         """Note layer dir should be same as its corresponding base file name"""
         if not self.layers:
             self.layers = {}
         if layer_dir not in self.layers:
             self.layers[layer_dir] = {}
-        self.layers[layer_dir][layer] = layer_data
+
+        """ layer key is a tuple of layer label and layer id"""
+        """ A particular volume can have multiple layers with same label but different id"""
+        layer_label, layer_id = layer_key
+        layer_id = layer_id if layer_id else get_uuid()
+        self.layers[layer_dir][(layer_label, layer_id)] = layer
 
     def set_metadata(self, metadata: Dict[str, str]):
         if not self.metadata:

From ba8c3c94f164a6ec2b4ad665f65c5a8e7c235f2f Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Thu, 4 Jul 2024 12:21:23 +0530
Subject: [PATCH 11/49] get annotation category

---
 src/openpecha/pecha/layer.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/openpecha/pecha/layer.py b/src/openpecha/pecha/layer.py
index e888f94..60012cc 100644
--- a/src/openpecha/pecha/layer.py
+++ b/src/openpecha/pecha/layer.py
@@ -15,10 +15,13 @@ class LayerEnum(Enum):
     commentaries = "Comment"
 
 
-def get_annotation_category():
-    # TODO
-    # Return annotation category based on the annotation label
-    return "Structure Type"
+class LayerGroupEnum(Enum):
+    structure_type = "Structure Type"
+
+
+def get_annotation_category(layer_label: LayerEnum) -> LayerGroupEnum:
+    """return the annotation category for the layer label"""
+    return LayerGroupEnum.structure_type
 
 
 class Layer:
@@ -47,7 +50,7 @@ def write(self, base_file_path: Path, export_path: Path):
             id=base_file_path.name, filename=base_file_path.as_posix()
         )
         self.dataset = self.annotation_store.add_dataset(id=PECHA_DATASET_ID)
-        annotation_category = get_annotation_category()
+        annotation_category = get_annotation_category(self.annotation_label).value
         self.dataset.add_key(annotation_category)
         unique_annotation_data_id = get_uuid()
         base_text = self.base_file_path.read_text(encoding="utf-8")

From f22e47e14fb6acfe18f037a3598350a39e11259f Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Thu, 4 Jul 2024 12:31:48 +0530
Subject: [PATCH 12/49] refactor test folder structure

---
 .../IE7D6875F.opf/base/f2b056668a0c4ad3a085bdcd8e2d7adb.txt       | 0
 .../layers/f2b056668a0c4ad3a085bdcd8e2d7adb/Segment-bf13.json     | 0
 .../expected_output/IE7D6875F/IE7D6875F.opf/metadata.json         | 0
 tests/pecha/{ => write}/test_pecha.py                             | 0
 4 files changed, 0 insertions(+), 0 deletions(-)
 rename tests/pecha/{ => write}/expected_output/IE7D6875F/IE7D6875F.opf/base/f2b056668a0c4ad3a085bdcd8e2d7adb.txt (100%)
 rename tests/pecha/{ => write}/expected_output/IE7D6875F/IE7D6875F.opf/layers/f2b056668a0c4ad3a085bdcd8e2d7adb/Segment-bf13.json (100%)
 rename tests/pecha/{ => write}/expected_output/IE7D6875F/IE7D6875F.opf/metadata.json (100%)
 rename tests/pecha/{ => write}/test_pecha.py (100%)

diff --git a/tests/pecha/expected_output/IE7D6875F/IE7D6875F.opf/base/f2b056668a0c4ad3a085bdcd8e2d7adb.txt b/tests/pecha/write/expected_output/IE7D6875F/IE7D6875F.opf/base/f2b056668a0c4ad3a085bdcd8e2d7adb.txt
similarity index 100%
rename from tests/pecha/expected_output/IE7D6875F/IE7D6875F.opf/base/f2b056668a0c4ad3a085bdcd8e2d7adb.txt
rename to tests/pecha/write/expected_output/IE7D6875F/IE7D6875F.opf/base/f2b056668a0c4ad3a085bdcd8e2d7adb.txt
diff --git a/tests/pecha/expected_output/IE7D6875F/IE7D6875F.opf/layers/f2b056668a0c4ad3a085bdcd8e2d7adb/Segment-bf13.json b/tests/pecha/write/expected_output/IE7D6875F/IE7D6875F.opf/layers/f2b056668a0c4ad3a085bdcd8e2d7adb/Segment-bf13.json
similarity index 100%
rename from tests/pecha/expected_output/IE7D6875F/IE7D6875F.opf/layers/f2b056668a0c4ad3a085bdcd8e2d7adb/Segment-bf13.json
rename to tests/pecha/write/expected_output/IE7D6875F/IE7D6875F.opf/layers/f2b056668a0c4ad3a085bdcd8e2d7adb/Segment-bf13.json
diff --git a/tests/pecha/expected_output/IE7D6875F/IE7D6875F.opf/metadata.json b/tests/pecha/write/expected_output/IE7D6875F/IE7D6875F.opf/metadata.json
similarity index 100%
rename from tests/pecha/expected_output/IE7D6875F/IE7D6875F.opf/metadata.json
rename to tests/pecha/write/expected_output/IE7D6875F/IE7D6875F.opf/metadata.json
diff --git a/tests/pecha/test_pecha.py b/tests/pecha/write/test_pecha.py
similarity index 100%
rename from tests/pecha/test_pecha.py
rename to tests/pecha/write/test_pecha.py

From 11f79bbd7ad18b5b7f5ac240f424c3f74008c25b Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Thu, 4 Jul 2024 16:12:15 +0530
Subject: [PATCH 13/49] Layer class method from_path

---
 src/openpecha/pecha/layer.py | 42 ++++++++++++++++++++++++++++++------
 1 file changed, 36 insertions(+), 6 deletions(-)

diff --git a/src/openpecha/pecha/layer.py b/src/openpecha/pecha/layer.py
index 60012cc..241b5b2 100644
--- a/src/openpecha/pecha/layer.py
+++ b/src/openpecha/pecha/layer.py
@@ -24,22 +24,53 @@ def get_annotation_category(layer_label: LayerEnum) -> LayerGroupEnum:
     return LayerGroupEnum.structure_type
 
 
+def convert_relative_to_absolute_path(json_data, absolute_base_path: Path):
+    """call after loading the stam from json"""
+    for resource in json_data["resources"]:
+        original_path = Path(resource["@include"])
+        resource["@include"] = str(absolute_base_path / original_path)
+    return json_data
+
+
 class Layer:
     def __init__(self, annotation_label: LayerEnum, annotations: Dict[str, Annotation]):
         self.annotation_label = annotation_label
         self.annotations = annotations
 
+    @classmethod
+    def from_path(cls, layer_file_path: Path):
+        """get annotation label"""
+        annotation_label = LayerEnum(layer_file_path.stem.split("-")[0])
+        """ load annotations from json"""
+        with open(layer_file_path) as f:
+            json_data = json.load(f)
+        absolute_base_path = layer_file_path.parents[4]
+        json_data = convert_relative_to_absolute_path(json_data, absolute_base_path)
+        annotation_store = AnnotationStore(string=json.dumps(json_data))
+
+        layer_annotations: Dict[str, Annotation] = {}
+        for annotation in annotation_store.annotations():
+            annotation_id, segment = annotation.id(), str(annotation)
+            start = annotation.offset().begin().value()
+            end = annotation.offset().end().value()
+            layer_annotations[annotation_id] = Annotation(
+                segment=segment, start=start, end=end
+            )
+
+        return cls(annotation_label, layer_annotations)
+
     def set_annotation(self, annotation: Annotation, annotation_id=None):
         if not annotation_id:
             annotation_id = get_uuid()
         self.annotations[annotation_id] = annotation
 
-    def covert_to_relative_path(self, json_string: str, export_path: Path):
-        """convert the absolute path to relative path for base file path in json string"""
+    def convert_absolute_to_relative_path(self, absolute_base_path: Path):
+        """call before saving the stam in json"""
+        json_string = self.annotation_store.to_json()
         json_object = json.loads(json_string)
         for resource in json_object["resources"]:
             original_path = Path(resource["@include"])
-            resource["@include"] = str(original_path.relative_to(export_path))
+            resource["@include"] = str(original_path.relative_to(absolute_base_path))
         return json_object
 
     def write(self, base_file_path: Path, export_path: Path):
@@ -80,8 +111,7 @@ def write(self, base_file_path: Path, export_path: Path):
                 data=data,
             )
         """ save annotations in json"""
-        json_string = self.annotation_store.to_json_string()
-        json_object = self.covert_to_relative_path(json_string, export_path)
+        pecha_json = self.convert_absolute_to_relative_path(export_path)
         """ add four uuid digits to the layer file name for uniqueness"""
         layer_dir = base_file_path.parent.parent / "layers" / base_file_path.stem
         layer_file_path = (
@@ -91,4 +121,4 @@ def write(self, base_file_path: Path, export_path: Path):
             layer_file_path,
             "w",
         ) as f:
-            f.write(json.dumps(json_object, indent=4, ensure_ascii=False))
+            f.write(json.dumps(pecha_json, indent=4, ensure_ascii=False))

From fdb37abf47c530c8a442a3b6ff9da64ab91befe5 Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Thu, 4 Jul 2024 16:32:54 +0530
Subject: [PATCH 14/49] Pecha classmethod from_path

---
 src/openpecha/pecha/__init__.py | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/src/openpecha/pecha/__init__.py b/src/openpecha/pecha/__init__.py
index 77b2e18..f0322a4 100644
--- a/src/openpecha/pecha/__init__.py
+++ b/src/openpecha/pecha/__init__.py
@@ -21,8 +21,25 @@ def __init__(
         self.metadata = metadata
 
     @classmethod
-    def from_path(cls, path: str):
-        pass
+    def from_path(cls, base_path: Path):
+        pecha_id = base_path.stem
+        pecha = Pecha(pecha_id=pecha_id)
+
+        with open(base_path / "metadata.json", encoding="utf-8") as f:
+            metadata = json.load(f)
+            pecha.set_metadata(metadata)
+
+        for base_file in (base_path / "base").rglob("*.txt"):
+            base_text = base_file.read_text(encoding="utf-8")
+            pecha.set_base_file(base_file.stem, base_text)
+
+        for layer_dir in (base_path / "layers").iterdir():
+            for layer_file in layer_dir.glob("*.json"):
+                layer = Layer.from_path(layer_file)
+                layer_key = (layer.annotation_label, layer_file.stem)
+                pecha.set_layer(layer_dir.stem, layer_key, layer)
+
+        return pecha
 
     @classmethod
     def from_id(cls, pecha_id: str):

From a28eb5cd77f7b54598cb40721d60ae2805d84df4 Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Fri, 5 Jul 2024 11:51:32 +0530
Subject: [PATCH 15/49] fix/stam function to_json_string

---
 src/openpecha/pecha/layer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/openpecha/pecha/layer.py b/src/openpecha/pecha/layer.py
index 241b5b2..e914acc 100644
--- a/src/openpecha/pecha/layer.py
+++ b/src/openpecha/pecha/layer.py
@@ -66,7 +66,7 @@ def set_annotation(self, annotation: Annotation, annotation_id=None):
 
     def convert_absolute_to_relative_path(self, absolute_base_path: Path):
         """call before saving the stam in json"""
-        json_string = self.annotation_store.to_json()
+        json_string = self.annotation_store.to_json_string()
         json_object = json.loads(json_string)
         for resource in json_object["resources"]:
             original_path = Path(resource["@include"])

From ee9cc07fd1bb099624063d313f923be985ad648f Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Fri, 5 Jul 2024 11:57:53 +0530
Subject: [PATCH 16/49] modify/annotation_label -> annotation_type

---
 src/openpecha/alignment/parsers/plaintext.py        |  8 ++++----
 src/openpecha/pecha/layer.py                        | 10 +++++-----
 tests/alignment/parsers/plaintext/test_plaintext.py |  4 ++--
 tests/pecha/write/test_pecha.py                     |  2 +-
 4 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/openpecha/alignment/parsers/plaintext.py b/src/openpecha/alignment/parsers/plaintext.py
index 1ea8ded..e151127 100644
--- a/src/openpecha/alignment/parsers/plaintext.py
+++ b/src/openpecha/alignment/parsers/plaintext.py
@@ -19,9 +19,9 @@ def from_files(cls, source_path: Path, target_path: Path, metadata: dict):
         target_text = target_path.read_text(encoding="utf-8")
         return cls(source_text, target_text, metadata)
 
-    def create_pecha_layer(self, segments: List[str], annotation_label: LayerEnum):
+    def create_pecha_layer(self, segments: List[str], annotation_type: LayerEnum):
         """ """
-        layer = Layer(annotation_label=annotation_label, annotations={})
+        layer = Layer(annotation_type=annotation_type, annotations={})
         char_count = 0
         for segment in segments:
             annotation = Annotation(
@@ -46,8 +46,8 @@ def parse(self):
         source_pecha.set_base_file(source_base_fname, self.source_text)
         target_pecha.set_base_file(target_base_fname, self.target_text)
 
-        source_annotation = LayerEnum(self.metadata["source"]["annotation_label"])
-        target_annotation = LayerEnum(self.metadata["target"]["annotation_label"])
+        source_annotation = LayerEnum(self.metadata["source"]["annotation_type"])
+        target_annotation = LayerEnum(self.metadata["target"]["annotation_type"])
 
         source_pecha.set_layer(
             source_base_fname,
diff --git a/src/openpecha/pecha/layer.py b/src/openpecha/pecha/layer.py
index 60012cc..62e6b97 100644
--- a/src/openpecha/pecha/layer.py
+++ b/src/openpecha/pecha/layer.py
@@ -25,8 +25,8 @@ def get_annotation_category(layer_label: LayerEnum) -> LayerGroupEnum:
 
 
 class Layer:
-    def __init__(self, annotation_label: LayerEnum, annotations: Dict[str, Annotation]):
-        self.annotation_label = annotation_label
+    def __init__(self, annotation_type: LayerEnum, annotations: Dict[str, Annotation]):
+        self.annotation_type = annotation_type
         self.annotations = annotations
 
     def set_annotation(self, annotation: Annotation, annotation_id=None):
@@ -50,7 +50,7 @@ def write(self, base_file_path: Path, export_path: Path):
             id=base_file_path.name, filename=base_file_path.as_posix()
         )
         self.dataset = self.annotation_store.add_dataset(id=PECHA_DATASET_ID)
-        annotation_category = get_annotation_category(self.annotation_label).value
+        annotation_category = get_annotation_category(self.annotation_type).value
         self.dataset.add_key(annotation_category)
         unique_annotation_data_id = get_uuid()
         base_text = self.base_file_path.read_text(encoding="utf-8")
@@ -70,7 +70,7 @@ def write(self, base_file_path: Path, export_path: Path):
                 {
                     "id": unique_annotation_data_id,
                     "key": annotation_category,
-                    "value": self.annotation_label.value,
+                    "value": self.annotation_type.value,
                     "set": self.dataset.id(),
                 }
             ]
@@ -85,7 +85,7 @@ def write(self, base_file_path: Path, export_path: Path):
         """ add four uuid digits to the layer file name for uniqueness"""
         layer_dir = base_file_path.parent.parent / "layers" / base_file_path.stem
         layer_file_path = (
-            layer_dir / f"{self.annotation_label.value}-{get_uuid()[:4]}.json"
+            layer_dir / f"{self.annotation_type.value}-{get_uuid()[:4]}.json"
         )
         with open(
             layer_file_path,
diff --git a/tests/alignment/parsers/plaintext/test_plaintext.py b/tests/alignment/parsers/plaintext/test_plaintext.py
index 78084fb..91e6a09 100644
--- a/tests/alignment/parsers/plaintext/test_plaintext.py
+++ b/tests/alignment/parsers/plaintext/test_plaintext.py
@@ -11,10 +11,10 @@ def get_data_dir():
 def get_metadata():
     return {
         "source": {
-            "annotation_label": "Segment",
+            "annotation_type": "Segment",
         },
         "target": {
-            "annotation_label": "Comment",
+            "annotation_type": "Comment",
         },
     }
 
diff --git a/tests/pecha/write/test_pecha.py b/tests/pecha/write/test_pecha.py
index 52ba75d..d38caf5 100644
--- a/tests/pecha/write/test_pecha.py
+++ b/tests/pecha/write/test_pecha.py
@@ -14,7 +14,7 @@ def get_data_dir():
 
 def get_metadata():
     return {
-        "annotation_label": "Segment",
+        "annotation_type": "Segment",
     }
 
 

From 3a155affa5e44b0aa01222e7172d023495544eea Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Fri, 5 Jul 2024 12:05:31 +0530
Subject: [PATCH 17/49] fix/passing empty dict

---
 pyproject.toml                               | 1 +
 src/openpecha/alignment/parsers/plaintext.py | 2 +-
 src/openpecha/pecha/layer.py                 | 7 ++++++-
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 8f147a9..b0a336a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,6 +20,7 @@ classifiers = [
 dependencies = [
   "pydantic >= 2.7.4",
   "stam == 0.8.2",
+  "collection >= 0.1.6",
 
 ]
 
diff --git a/src/openpecha/alignment/parsers/plaintext.py b/src/openpecha/alignment/parsers/plaintext.py
index e151127..dd23a46 100644
--- a/src/openpecha/alignment/parsers/plaintext.py
+++ b/src/openpecha/alignment/parsers/plaintext.py
@@ -21,7 +21,7 @@ def from_files(cls, source_path: Path, target_path: Path, metadata: dict):
 
     def create_pecha_layer(self, segments: List[str], annotation_type: LayerEnum):
         """ """
-        layer = Layer(annotation_type=annotation_type, annotations={})
+        layer = Layer(annotation_type=annotation_type)
         char_count = 0
         for segment in segments:
             annotation = Annotation(
diff --git a/src/openpecha/pecha/layer.py b/src/openpecha/pecha/layer.py
index 62e6b97..b490720 100644
--- a/src/openpecha/pecha/layer.py
+++ b/src/openpecha/pecha/layer.py
@@ -1,4 +1,5 @@
 import json
+from collections import defaultdict
 from enum import Enum
 from pathlib import Path
 from typing import Dict
@@ -25,7 +26,11 @@ def get_annotation_category(layer_label: LayerEnum) -> LayerGroupEnum:
 
 
 class Layer:
-    def __init__(self, annotation_type: LayerEnum, annotations: Dict[str, Annotation]):
+    def __init__(
+        self,
+        annotation_type: LayerEnum,
+        annotations: Dict[str, Annotation] = defaultdict(),
+    ):
         self.annotation_type = annotation_type
         self.annotations = annotations
 

From 1b07070644e86ce69b8cedfb0fd871afc35a6a7f Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Fri, 5 Jul 2024 12:27:12 +0530
Subject: [PATCH 18/49] delete segment attribute from Annotation class

---
 src/openpecha/alignment/parsers/plaintext.py | 1 -
 src/openpecha/pecha/annotation.py            | 1 -
 src/openpecha/pecha/layer.py                 | 8 --------
 tests/pecha/write/test_pecha.py              | 5 +++--
 4 files changed, 3 insertions(+), 12 deletions(-)

diff --git a/src/openpecha/alignment/parsers/plaintext.py b/src/openpecha/alignment/parsers/plaintext.py
index dd23a46..a968cfc 100644
--- a/src/openpecha/alignment/parsers/plaintext.py
+++ b/src/openpecha/alignment/parsers/plaintext.py
@@ -25,7 +25,6 @@ def create_pecha_layer(self, segments: List[str], annotation_type: LayerEnum):
         char_count = 0
         for segment in segments:
             annotation = Annotation(
-                segment=segment,
                 start=char_count,
                 end=char_count + len(segment),
             )
diff --git a/src/openpecha/pecha/annotation.py b/src/openpecha/pecha/annotation.py
index c7f37c8..f700849 100644
--- a/src/openpecha/pecha/annotation.py
+++ b/src/openpecha/pecha/annotation.py
@@ -2,7 +2,6 @@
 
 
 class Annotation(BaseModel):
-    segment: str
     start: int = Field(ge=0)
     end: int = Field(ge=0)
     metadata: dict = Field(default_factory=dict)
diff --git a/src/openpecha/pecha/layer.py b/src/openpecha/pecha/layer.py
index b490720..cea03a0 100644
--- a/src/openpecha/pecha/layer.py
+++ b/src/openpecha/pecha/layer.py
@@ -58,15 +58,7 @@ def write(self, base_file_path: Path, export_path: Path):
         annotation_category = get_annotation_category(self.annotation_type).value
         self.dataset.add_key(annotation_category)
         unique_annotation_data_id = get_uuid()
-        base_text = self.base_file_path.read_text(encoding="utf-8")
         for annotation_id, annotation in self.annotations.items():
-            if (
-                annotation.segment
-                != base_text[annotation.start : annotation.end]  # noqa
-            ):
-                raise ValueError(
-                    f"Annotation segment does not match the base text at {annotation_id}"
-                )
             target = Selector.textselector(
                 self.resource,
                 Offset.simple(annotation.start, annotation.end),
diff --git a/tests/pecha/write/test_pecha.py b/tests/pecha/write/test_pecha.py
index d38caf5..687c690 100644
--- a/tests/pecha/write/test_pecha.py
+++ b/tests/pecha/write/test_pecha.py
@@ -35,13 +35,11 @@ def get_layer():
 def get_annotations():
     return {
         "f2b056668a0c4ad3a085bdcd8e2d7adb": Annotation(
-            segment="རྒྱ་གར་སྐད་དུ། བོ་དྷི་སཏྭ་ཙརྱ་ཨ་བ་ཏཱ་ར།",
             start=0,
             end=39,
             metadata={},
         ),
         "b696df2dbe314e8a87881a2bc391d0d5": Annotation(
-            segment="བོད་སྐད་དུ། བྱང་ཆུབ་སེམས་དཔའི་སྤྱོད་པ་ལ་འཇུག་པའི་ལེགས་པར་སྦྱར་བ།",
             start=39,
             end=103,
             metadata={},
@@ -66,3 +64,6 @@ def test_pecha_write():
 
     """ clean up """
     rmtree(export_path)
+
+
+test_pecha_write()

From 8f525ce3b57ee0d228e68e225af37e452a502f4f Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Fri, 5 Jul 2024 12:33:09 +0530
Subject: [PATCH 19/49] modify/set basefile name in set_base_file function

---
 src/openpecha/alignment/parsers/plaintext.py | 11 +++++------
 src/openpecha/pecha/__init__.py              |  9 +++++----
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/openpecha/alignment/parsers/plaintext.py b/src/openpecha/alignment/parsers/plaintext.py
index a968cfc..b4b5cce 100644
--- a/src/openpecha/alignment/parsers/plaintext.py
+++ b/src/openpecha/alignment/parsers/plaintext.py
@@ -1,7 +1,7 @@
 from pathlib import Path
 from typing import List
 
-from openpecha.ids import get_initial_pecha_id, get_uuid
+from openpecha.ids import get_initial_pecha_id
 from openpecha.pecha import Pecha
 from openpecha.pecha.annotation import Annotation
 from openpecha.pecha.layer import Layer, LayerEnum
@@ -41,20 +41,19 @@ def parse(self):
         source_pecha = Pecha(source_pecha_id)
         target_pecha = Pecha(target_pecha_id)
 
-        source_base_fname, target_base_fname = get_uuid(), get_uuid()
-        source_pecha.set_base_file(source_base_fname, self.source_text)
-        target_pecha.set_base_file(target_base_fname, self.target_text)
+        source_base_name = source_pecha.set_base_file(self.source_text)
+        target_base_name = target_pecha.set_base_file(self.target_text)
 
         source_annotation = LayerEnum(self.metadata["source"]["annotation_type"])
         target_annotation = LayerEnum(self.metadata["target"]["annotation_type"])
 
         source_pecha.set_layer(
-            source_base_fname,
+            source_base_name,
             (source_annotation, None),
             self.create_pecha_layer(self.source_text.split("\n"), source_annotation),
         )
         target_pecha.set_layer(
-            target_base_fname,
+            target_base_name,
             (target_annotation, None),
             self.create_pecha_layer(self.target_text.split("\n"), target_annotation),
         )
diff --git a/src/openpecha/pecha/__init__.py b/src/openpecha/pecha/__init__.py
index 77b2e18..d72cc13 100644
--- a/src/openpecha/pecha/__init__.py
+++ b/src/openpecha/pecha/__init__.py
@@ -1,4 +1,5 @@
 import json
+from collections import defaultdict
 from pathlib import Path
 from typing import Dict, Optional, Tuple
 
@@ -11,7 +12,7 @@ class Pecha:
     def __init__(
         self,
         pecha_id: str,
-        bases: Dict[str, str] = None,
+        bases: Dict[str, str] = defaultdict(),
         layers: Dict[str, Dict[Tuple[LayerEnum, str], Layer]] = None,
         metadata: Dict[str, str] = None,
     ) -> None:
@@ -28,10 +29,10 @@ def from_path(cls, path: str):
     def from_id(cls, pecha_id: str):
         pass
 
-    def set_base_file(self, base_file_name: str, base_text: str):
-        if not self.bases:
-            self.bases = {}
+    def set_base_file(self, base_text: str) -> str:
+        base_file_name = get_uuid()
         self.bases[base_file_name] = base_text
+        return base_file_name
 
     def set_layer(
         self, layer_dir: str, layer_key: Tuple[LayerEnum, Optional[str]], layer: Layer

From 3218572740f38b92ec5d89ed4ebc87debf5989f0 Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Fri, 5 Jul 2024 12:35:08 +0530
Subject: [PATCH 20/49] modify layer_label -> layer_type

---
 src/openpecha/pecha/layer.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/openpecha/pecha/layer.py b/src/openpecha/pecha/layer.py
index cea03a0..9a0676b 100644
--- a/src/openpecha/pecha/layer.py
+++ b/src/openpecha/pecha/layer.py
@@ -20,8 +20,10 @@ class LayerGroupEnum(Enum):
     structure_type = "Structure Type"
 
 
-def get_annotation_category(layer_label: LayerEnum) -> LayerGroupEnum:
+def get_annotation_category(layer_type: LayerEnum) -> LayerGroupEnum:
     """return the annotation category for the layer label"""
+    if layer_type == LayerEnum.segment:
+        return LayerGroupEnum.structure_type
     return LayerGroupEnum.structure_type
 
 

From 4ca26d009121a35625069465181cf9ae73630bbc Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Fri, 5 Jul 2024 15:24:15 +0530
Subject: [PATCH 21/49] modify/export_path -> output_path

---
 src/openpecha/pecha/__init__.py |  6 +++---
 src/openpecha/pecha/layer.py    |  8 ++++----
 tests/pecha/write/test_pecha.py | 17 +++++++----------
 3 files changed, 14 insertions(+), 17 deletions(-)

diff --git a/src/openpecha/pecha/__init__.py b/src/openpecha/pecha/__init__.py
index d72cc13..52ef168 100644
--- a/src/openpecha/pecha/__init__.py
+++ b/src/openpecha/pecha/__init__.py
@@ -55,9 +55,9 @@ def set_metadata(self, metadata: Dict[str, str]):
         for key, value in metadata.items():
             self.metadata[key] = value
 
-    def write(self, export_path: Path = PECHAS_PATH):
+    def write(self, output_path: Path = PECHAS_PATH):
 
-        pecha_dir = _mkdir(export_path / self.pecha_id)
+        pecha_dir = _mkdir(output_path / self.pecha_id)
         self.base_path = _mkdir(pecha_dir / f"{self.pecha_id}.opf")
         """ write metadata """
         self.metadata_fn = self.base_path / "metadata.json"
@@ -79,5 +79,5 @@ def write(self, export_path: Path = PECHAS_PATH):
                     _mkdir(layer_dir / layer_fname)
                     layer.write(
                         base_file_path=base_dir / f"{layer_fname}.txt",
-                        export_path=export_path,
+                        output_path=output_path,
                     )
diff --git a/src/openpecha/pecha/layer.py b/src/openpecha/pecha/layer.py
index 9a0676b..464c984 100644
--- a/src/openpecha/pecha/layer.py
+++ b/src/openpecha/pecha/layer.py
@@ -41,15 +41,15 @@ def set_annotation(self, annotation: Annotation, annotation_id=None):
             annotation_id = get_uuid()
         self.annotations[annotation_id] = annotation
 
-    def covert_to_relative_path(self, json_string: str, export_path: Path):
+    def covert_to_relative_path(self, json_string: str, output_path: Path):
         """convert the absolute path to relative path for base file path in json string"""
         json_object = json.loads(json_string)
         for resource in json_object["resources"]:
             original_path = Path(resource["@include"])
-            resource["@include"] = str(original_path.relative_to(export_path))
+            resource["@include"] = str(original_path.relative_to(output_path))
         return json_object
 
-    def write(self, base_file_path: Path, export_path: Path):
+    def write(self, base_file_path: Path, output_path: Path):
         self.base_file_path = base_file_path
         """write annotations in stam data model"""
         self.annotation_store = AnnotationStore(id=PECHA_ANNOTATION_STORE_ID)
@@ -80,7 +80,7 @@ def write(self, base_file_path: Path, export_path: Path):
             )
         """ save annotations in json"""
         json_string = self.annotation_store.to_json_string()
-        json_object = self.covert_to_relative_path(json_string, export_path)
+        json_object = self.covert_to_relative_path(json_string, output_path)
         """ add four uuid digits to the layer file name for uniqueness"""
         layer_dir = base_file_path.parent.parent / "layers" / base_file_path.stem
         layer_file_path = (
diff --git a/tests/pecha/write/test_pecha.py b/tests/pecha/write/test_pecha.py
index 687c690..2b4be16 100644
--- a/tests/pecha/write/test_pecha.py
+++ b/tests/pecha/write/test_pecha.py
@@ -7,9 +7,9 @@
 
 
 def get_data_dir():
-    export_path = Path(__file__).parent / "output"
-    export_path.mkdir(parents=True, exist_ok=True)
-    return export_path
+    output_path = Path(__file__).parent / "output"
+    output_path.mkdir(parents=True, exist_ok=True)
+    return output_path
 
 
 def get_metadata():
@@ -51,19 +51,16 @@ def test_pecha_write():
     pecha_id = "IE7D6875F"
     base = get_base()
     layer = get_layer()
-    export_path = get_data_dir()
+    output_path = get_data_dir()
     expected_output_path = Path(__file__).parent / "expected_output"
 
     pecha = Pecha(pecha_id=pecha_id, bases=base, layers=layer, metadata=get_metadata())
-    pecha.write(export_path=export_path)
+    pecha.write(output_path=output_path)
 
-    output_file_names = [file.name for file in export_path.rglob("*")].sort()
+    output_file_names = [file.name for file in output_path.rglob("*")].sort()
     expected_file_names = [file.name for file in expected_output_path.rglob("*")].sort()
 
     assert output_file_names == expected_file_names
 
     """ clean up """
-    rmtree(export_path)
-
-
-test_pecha_write()
+    rmtree(output_path)

From 5ca3fcb2e7f646a0c1de91236fcaa9936fa74133 Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Fri, 5 Jul 2024 15:26:53 +0530
Subject: [PATCH 22/49] modify/base_fname -> base_name

---
 src/openpecha/pecha/__init__.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/openpecha/pecha/__init__.py b/src/openpecha/pecha/__init__.py
index 52ef168..5f4b881 100644
--- a/src/openpecha/pecha/__init__.py
+++ b/src/openpecha/pecha/__init__.py
@@ -68,16 +68,16 @@ def write(self, output_path: Path = PECHAS_PATH):
         """ write base file"""
         if self.bases:
             base_dir = _mkdir(self.base_path / "base")
-            for base_fname, base_text in self.bases.items():
-                base_fn = base_dir / f"{base_fname}.txt"
+            for base_name, base_text in self.bases.items():
+                base_fn = base_dir / f"{base_name}.txt"
                 base_fn.write_text(base_text, encoding="utf-8")
         if self.layers:
             layer_dir = _mkdir(self.base_path / "layers")
             """ write annotation layers"""
-            for layer_fname, layer_data in self.layers.items():
+            for layer_name, layer_data in self.layers.items():
                 for _, layer in layer_data.items():
-                    _mkdir(layer_dir / layer_fname)
+                    _mkdir(layer_dir / layer_name)
                     layer.write(
-                        base_file_path=base_dir / f"{layer_fname}.txt",
+                        base_file_path=base_dir / f"{layer_name}.txt",
                         output_path=output_path,
                     )

From 7c9663f2f6dc19782f4250802e75a393c13c11fc Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Fri, 5 Jul 2024 15:42:13 +0530
Subject: [PATCH 23/49] add id_ attribute to Annotation class

---
 src/openpecha/pecha/annotation.py | 3 +++
 src/openpecha/pecha/layer.py      | 6 ++----
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/openpecha/pecha/annotation.py b/src/openpecha/pecha/annotation.py
index f700849..577cddd 100644
--- a/src/openpecha/pecha/annotation.py
+++ b/src/openpecha/pecha/annotation.py
@@ -1,7 +1,10 @@
 from pydantic import BaseModel, Field, ValidationInfo, field_validator
 
+from openpecha.ids import get_uuid
+
 
 class Annotation(BaseModel):
+    id_: str = Field(default_factory=get_uuid)
     start: int = Field(ge=0)
     end: int = Field(ge=0)
     metadata: dict = Field(default_factory=dict)
diff --git a/src/openpecha/pecha/layer.py b/src/openpecha/pecha/layer.py
index 464c984..f254234 100644
--- a/src/openpecha/pecha/layer.py
+++ b/src/openpecha/pecha/layer.py
@@ -36,10 +36,8 @@ def __init__(
         self.annotation_type = annotation_type
         self.annotations = annotations
 
-    def set_annotation(self, annotation: Annotation, annotation_id=None):
-        if not annotation_id:
-            annotation_id = get_uuid()
-        self.annotations[annotation_id] = annotation
+    def set_annotation(self, annotation: Annotation):
+        self.annotations[annotation.id_] = annotation
 
     def covert_to_relative_path(self, json_string: str, output_path: Path):
         """convert the absolute path to relative path for base file path in json string"""

From a606ea05840ef4b803a34d948ac6469e551634f5 Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Fri, 5 Jul 2024 16:21:33 +0530
Subject: [PATCH 24/49] add id_ attribute to Layer class

---
 src/openpecha/alignment/parsers/plaintext.py | 11 +++----
 src/openpecha/ids.py                         |  4 +++
 src/openpecha/pecha/__init__.py              | 21 ++++++------
 src/openpecha/pecha/layer.py                 | 34 ++++++++++----------
 tests/pecha/write/test_pecha.py              | 17 ++++++++--
 5 files changed, 48 insertions(+), 39 deletions(-)

diff --git a/src/openpecha/alignment/parsers/plaintext.py b/src/openpecha/alignment/parsers/plaintext.py
index b4b5cce..80240f2 100644
--- a/src/openpecha/alignment/parsers/plaintext.py
+++ b/src/openpecha/alignment/parsers/plaintext.py
@@ -44,18 +44,15 @@ def parse(self):
         source_base_name = source_pecha.set_base_file(self.source_text)
         target_base_name = target_pecha.set_base_file(self.target_text)
 
-        source_annotation = LayerEnum(self.metadata["source"]["annotation_type"])
-        target_annotation = LayerEnum(self.metadata["target"]["annotation_type"])
-
         source_pecha.set_layer(
             source_base_name,
-            (source_annotation, None),
-            self.create_pecha_layer(self.source_text.split("\n"), source_annotation),
+            LayerEnum.segment,
+            self.create_pecha_layer(self.source_text.split("\n"), LayerEnum.segment),
         )
         target_pecha.set_layer(
             target_base_name,
-            (target_annotation, None),
-            self.create_pecha_layer(self.target_text.split("\n"), target_annotation),
+            LayerEnum.segment,
+            self.create_pecha_layer(self.target_text.split("\n"), LayerEnum.segment),
         )
 
         source_pecha.set_metadata(self.metadata["source"])
diff --git a/src/openpecha/ids.py b/src/openpecha/ids.py
index b27d246..9560ae5 100644
--- a/src/openpecha/ids.py
+++ b/src/openpecha/ids.py
@@ -6,6 +6,10 @@ def get_uuid():
     return uuid4().hex
 
 
+def get_fourchar_uuid():
+    return get_uuid()[:4]
+
+
 def get_id(prefix, length):
     return prefix + "".join(random.choices(uuid4().hex, k=length)).upper()
 
diff --git a/src/openpecha/pecha/__init__.py b/src/openpecha/pecha/__init__.py
index 5f4b881..9a1c523 100644
--- a/src/openpecha/pecha/__init__.py
+++ b/src/openpecha/pecha/__init__.py
@@ -13,7 +13,9 @@ def __init__(
         self,
         pecha_id: str,
         bases: Dict[str, str] = defaultdict(),
-        layers: Dict[str, Dict[Tuple[LayerEnum, str], Layer]] = None,
+        layers: Dict[str, Dict[Tuple[LayerEnum, str], Layer]] = defaultdict(
+            lambda: defaultdict()
+        ),
         metadata: Dict[str, str] = None,
     ) -> None:
         self.pecha_id = pecha_id
@@ -35,19 +37,14 @@ def set_base_file(self, base_text: str) -> str:
         return base_file_name
 
     def set_layer(
-        self, layer_dir: str, layer_key: Tuple[LayerEnum, Optional[str]], layer: Layer
-    ):
-        """Note layer dir should be same as its corresponding base file name"""
-        if not self.layers:
-            self.layers = {}
-        if layer_dir not in self.layers:
-            self.layers[layer_dir] = {}
+        self, layer_dir: str, annotation_type: LayerEnum, layer: Layer
+    ) -> str:
 
-        """ layer key is a tuple of layer label and layer id"""
+        """layer key is a tuple of layer label and layer id"""
         """ A particular volume can have multiple layers with same label but different id"""
-        layer_label, layer_id = layer_key
-        layer_id = layer_id if layer_id else get_uuid()
-        self.layers[layer_dir][(layer_label, layer_id)] = layer
+        layer_id = get_uuid()[:4]
+        self.layers[layer_dir][(annotation_type, layer_id)] = layer
+        return layer_id
 
     def set_metadata(self, metadata: Dict[str, str]):
         if not self.metadata:
diff --git a/src/openpecha/pecha/layer.py b/src/openpecha/pecha/layer.py
index f254234..9183298 100644
--- a/src/openpecha/pecha/layer.py
+++ b/src/openpecha/pecha/layer.py
@@ -2,12 +2,13 @@
 from collections import defaultdict
 from enum import Enum
 from pathlib import Path
-from typing import Dict
+from typing import Dict, Optional
 
-from stam import AnnotationStore, Offset, Selector
+from pydantic import BaseModel, ConfigDict, Field
+from stam import AnnotationDataSet, AnnotationStore, Offset, Selector
 
 from openpecha.config import PECHA_ANNOTATION_STORE_ID, PECHA_DATASET_ID
-from openpecha.ids import get_uuid
+from openpecha.ids import get_fourchar_uuid, get_uuid
 from openpecha.pecha.annotation import Annotation
 
 
@@ -27,14 +28,15 @@ def get_annotation_category(layer_type: LayerEnum) -> LayerGroupEnum:
     return LayerGroupEnum.structure_type
 
 
-class Layer:
-    def __init__(
-        self,
-        annotation_type: LayerEnum,
-        annotations: Dict[str, Annotation] = defaultdict(),
-    ):
-        self.annotation_type = annotation_type
-        self.annotations = annotations
+class Layer(BaseModel):
+    id_: str = Field(default_factory=get_fourchar_uuid)
+    annotation_type: LayerEnum
+    annotations: Dict[str, Annotation] = defaultdict()
+
+    annotation_store: Optional[AnnotationStore] = None
+    dataset: Optional[AnnotationDataSet] = None
+
+    model_config = ConfigDict(arbitrary_types_allowed=True)
 
     def set_annotation(self, annotation: Annotation):
         self.annotations[annotation.id_] = annotation
@@ -48,10 +50,10 @@ def covert_to_relative_path(self, json_string: str, output_path: Path):
         return json_object
 
     def write(self, base_file_path: Path, output_path: Path):
-        self.base_file_path = base_file_path
+        base_file_path = base_file_path
         """write annotations in stam data model"""
         self.annotation_store = AnnotationStore(id=PECHA_ANNOTATION_STORE_ID)
-        self.resource = self.annotation_store.add_resource(
+        resource = self.annotation_store.add_resource(
             id=base_file_path.name, filename=base_file_path.as_posix()
         )
         self.dataset = self.annotation_store.add_dataset(id=PECHA_DATASET_ID)
@@ -60,7 +62,7 @@ def write(self, base_file_path: Path, output_path: Path):
         unique_annotation_data_id = get_uuid()
         for annotation_id, annotation in self.annotations.items():
             target = Selector.textselector(
-                self.resource,
+                resource,
                 Offset.simple(annotation.start, annotation.end),
             )
             data = [
@@ -81,9 +83,7 @@ def write(self, base_file_path: Path, output_path: Path):
         json_object = self.covert_to_relative_path(json_string, output_path)
         """ add four uuid digits to the layer file name for uniqueness"""
         layer_dir = base_file_path.parent.parent / "layers" / base_file_path.stem
-        layer_file_path = (
-            layer_dir / f"{self.annotation_type.value}-{get_uuid()[:4]}.json"
-        )
+        layer_file_path = layer_dir / f"{self.annotation_type.value}-{self.id_}.json"
         with open(
             layer_file_path,
             "w",
diff --git a/tests/pecha/write/test_pecha.py b/tests/pecha/write/test_pecha.py
index 2b4be16..ae65eaf 100644
--- a/tests/pecha/write/test_pecha.py
+++ b/tests/pecha/write/test_pecha.py
@@ -27,7 +27,11 @@ def get_base():
 def get_layer():
     return {
         "f2b056668a0c4ad3a085bdcd8e2d7adb": {
-            LayerEnum("Segment"): Layer(LayerEnum("Segment"), get_annotations())
+            (LayerEnum.segment, "bf13"): Layer(
+                id_="bf13",
+                annotation_type=LayerEnum("Segment"),
+                annotations=get_annotations(),
+            )
         }
     }
 
@@ -57,10 +61,17 @@ def test_pecha_write():
     pecha = Pecha(pecha_id=pecha_id, bases=base, layers=layer, metadata=get_metadata())
     pecha.write(output_path=output_path)
 
-    output_file_names = [file.name for file in output_path.rglob("*")].sort()
-    expected_file_names = [file.name for file in expected_output_path.rglob("*")].sort()
+    output_file_names = [file.name for file in list(output_path.rglob("*"))]
+    expected_file_names = [file.name for file in list(expected_output_path.rglob("*"))]
+
+    """ sort the list """
+    output_file_names.sort()
+    expected_file_names.sort()
 
     assert output_file_names == expected_file_names
 
     """ clean up """
     rmtree(output_path)
+
+
+test_pecha_write()

From 869d207f8b62883176187c956d1431bfd45a78e6 Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Fri, 5 Jul 2024 16:23:33 +0530
Subject: [PATCH 25/49] modify/ layer_dir -> base_name

---
 src/openpecha/pecha/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/openpecha/pecha/__init__.py b/src/openpecha/pecha/__init__.py
index 9a1c523..9975839 100644
--- a/src/openpecha/pecha/__init__.py
+++ b/src/openpecha/pecha/__init__.py
@@ -37,13 +37,13 @@ def set_base_file(self, base_text: str) -> str:
         return base_file_name
 
     def set_layer(
-        self, layer_dir: str, annotation_type: LayerEnum, layer: Layer
+        self, base_name: str, annotation_type: LayerEnum, layer: Layer
     ) -> str:
 
         """layer key is a tuple of layer label and layer id"""
         """ A particular volume can have multiple layers with same label but different id"""
         layer_id = get_uuid()[:4]
-        self.layers[layer_dir][(annotation_type, layer_id)] = layer
+        self.layers[base_name][(annotation_type, layer_id)] = layer
         return layer_id
 
     def set_metadata(self, metadata: Dict[str, str]):

From 3b6f1b8086ccf01ee272efc994d07e30406e9d5c Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Fri, 5 Jul 2024 16:26:33 +0530
Subject: [PATCH 26/49] modify layer_id -> layer_subtype_id

---
 src/openpecha/pecha/__init__.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/openpecha/pecha/__init__.py b/src/openpecha/pecha/__init__.py
index 9975839..25f2f2a 100644
--- a/src/openpecha/pecha/__init__.py
+++ b/src/openpecha/pecha/__init__.py
@@ -42,9 +42,9 @@ def set_layer(
 
         """layer key is a tuple of layer label and layer id"""
         """ A particular volume can have multiple layers with same label but different id"""
-        layer_id = get_uuid()[:4]
-        self.layers[base_name][(annotation_type, layer_id)] = layer
-        return layer_id
+        layer_subtype_id = get_uuid()[:4]
+        self.layers[base_name][(annotation_type, layer_subtype_id)] = layer
+        return layer_subtype_id
 
     def set_metadata(self, metadata: Dict[str, str]):
         if not self.metadata:

From 0f034d9528524111e8c8cce0518e5cd34038fb35 Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Mon, 8 Jul 2024 07:37:30 +0530
Subject: [PATCH 27/49] create PechaMetadata

---
 src/openpecha/pecha/__init__.py |  9 ++----
 src/openpecha/pecha/metadata.py | 52 +++++++++++++++++++++++++++++++++
 2 files changed, 54 insertions(+), 7 deletions(-)
 create mode 100644 src/openpecha/pecha/metadata.py

diff --git a/src/openpecha/pecha/__init__.py b/src/openpecha/pecha/__init__.py
index 25f2f2a..a531e3a 100644
--- a/src/openpecha/pecha/__init__.py
+++ b/src/openpecha/pecha/__init__.py
@@ -6,6 +6,7 @@
 from openpecha.config import PECHAS_PATH, _mkdir
 from openpecha.ids import get_uuid
 from openpecha.pecha.layer import Layer, LayerEnum
+from openpecha.pecha.metadata import PechaMetadata
 
 
 class Pecha:
@@ -16,7 +17,7 @@ def __init__(
         layers: Dict[str, Dict[Tuple[LayerEnum, str], Layer]] = defaultdict(
             lambda: defaultdict()
         ),
-        metadata: Dict[str, str] = None,
+        metadata: PechaMetadata = None,
     ) -> None:
         self.pecha_id = pecha_id
         self.bases = bases
@@ -46,12 +47,6 @@ def set_layer(
         self.layers[base_name][(annotation_type, layer_subtype_id)] = layer
         return layer_subtype_id
 
-    def set_metadata(self, metadata: Dict[str, str]):
-        if not self.metadata:
-            self.metadata = {}
-        for key, value in metadata.items():
-            self.metadata[key] = value
-
     def write(self, output_path: Path = PECHAS_PATH):
 
         pecha_dir = _mkdir(output_path / self.pecha_id)
diff --git a/src/openpecha/pecha/metadata.py b/src/openpecha/pecha/metadata.py
new file mode 100644
index 0000000..d373cc3
--- /dev/null
+++ b/src/openpecha/pecha/metadata.py
@@ -0,0 +1,52 @@
+from collections import defaultdict
+from datetime import datetime
+from enum import Enum
+from typing import Dict, List, Optional
+
+from pydantic import BaseModel, Field, field_validator
+
+from openpecha.ids import get_diplomatic_id, get_initial_pecha_id, get_open_pecha_id
+
+
+class InitialCreationType(Enum):
+    ocr = "ocr"
+    ebook = "ebook"
+    input = "input"
+    tmx = "tmx"
+
+
+class PechaMetadata(BaseModel):
+    id_: str = Field(default=None, alias="id_")
+    title: List[str] = Field(default=None, alias="title")
+    author: List[str] = Field(default=None, alias="author")
+    source: str = Field(default=None, alias="source")
+    language: str = Field(default=None, alias="language")
+    initial_creation_type: InitialCreationType = Field(
+        None, alias="initial_creation_type"
+    )
+    created_at: datetime = Field(default=datetime.now, alias="created_at")
+    source_metadata: Optional[Dict] = Field(
+        default=defaultdict
+    )  # place to dump any metadata from the source
+
+    @field_validator("created_at", pre=True, always=True)
+    def set_imported_date(cls, v):
+        return v or datetime.now()
+
+
+class InitialPechaMetadata(PechaMetadata):
+    @field_validator("id_", pre=True, always=True)
+    def set_id(cls, v):
+        return v or get_initial_pecha_id()
+
+
+class OpenPechaMetadata(PechaMetadata):
+    @field_validator("id_", pre=True, always=True)
+    def set_id(cls, v):
+        return v or get_open_pecha_id()
+
+
+class DiplomaticPechaMetadata(PechaMetadata):
+    @field_validator("id_", pre=True, always=True)
+    def set_id(cls, v):
+        return v or get_diplomatic_id()

From 3d188ecedd592c83dfee1814ae5910ef1602a384 Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Mon, 8 Jul 2024 07:50:23 +0530
Subject: [PATCH 28/49] instate Pecha with metadata

---
 src/openpecha/alignment/parsers/plaintext.py | 15 ++++++---------
 src/openpecha/pecha/__init__.py              |  4 ++--
 2 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/src/openpecha/alignment/parsers/plaintext.py b/src/openpecha/alignment/parsers/plaintext.py
index 80240f2..aefb594 100644
--- a/src/openpecha/alignment/parsers/plaintext.py
+++ b/src/openpecha/alignment/parsers/plaintext.py
@@ -1,10 +1,10 @@
 from pathlib import Path
 from typing import List
 
-from openpecha.ids import get_initial_pecha_id
 from openpecha.pecha import Pecha
 from openpecha.pecha.annotation import Annotation
 from openpecha.pecha.layer import Layer, LayerEnum
+from openpecha.pecha.metadata import InitialCreationType, InitialPechaMetadata
 
 
 class PlainTextLineAlignedParser:
@@ -34,12 +34,12 @@ def create_pecha_layer(self, segments: List[str], annotation_type: LayerEnum):
         return layer
 
     def parse(self):
-        source_pecha_id, target_pecha_id = (
-            get_initial_pecha_id(),
-            get_initial_pecha_id(),
+        source_pecha_metadata, target_pecha_metadata = (
+            InitialPechaMetadata(initial_creation_type=InitialCreationType.input),
+            InitialPechaMetadata(initial_creation_type=InitialCreationType.input),
         )
-        source_pecha = Pecha(source_pecha_id)
-        target_pecha = Pecha(target_pecha_id)
+        source_pecha = Pecha(metadata=source_pecha_metadata)
+        target_pecha = Pecha(metadata=target_pecha_metadata)
 
         source_base_name = source_pecha.set_base_file(self.source_text)
         target_base_name = target_pecha.set_base_file(self.target_text)
@@ -55,9 +55,6 @@ def parse(self):
             self.create_pecha_layer(self.target_text.split("\n"), LayerEnum.segment),
         )
 
-        source_pecha.set_metadata(self.metadata["source"])
-        target_pecha.set_metadata(self.metadata["target"])
-
         return source_pecha, target_pecha
 
         # TODO:
diff --git a/src/openpecha/pecha/__init__.py b/src/openpecha/pecha/__init__.py
index a531e3a..d9a1c91 100644
--- a/src/openpecha/pecha/__init__.py
+++ b/src/openpecha/pecha/__init__.py
@@ -12,14 +12,14 @@
 class Pecha:
     def __init__(
         self,
-        pecha_id: str,
+        pecha_id: str = None,
         bases: Dict[str, str] = defaultdict(),
         layers: Dict[str, Dict[Tuple[LayerEnum, str], Layer]] = defaultdict(
             lambda: defaultdict()
         ),
         metadata: PechaMetadata = None,
     ) -> None:
-        self.pecha_id = pecha_id
+        self.pecha_id = metadata.pecha_id if metadata else pecha_id
         self.bases = bases
         self.layers = layers
         self.metadata = metadata

From 15fee96c5b56c882745e75a7817296719b9e2e14 Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Mon, 8 Jul 2024 07:55:23 +0530
Subject: [PATCH 29/49] fix field_validator attribute

---
 src/openpecha/pecha/metadata.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/openpecha/pecha/metadata.py b/src/openpecha/pecha/metadata.py
index d373cc3..25ffaac 100644
--- a/src/openpecha/pecha/metadata.py
+++ b/src/openpecha/pecha/metadata.py
@@ -29,24 +29,24 @@ class PechaMetadata(BaseModel):
         default=defaultdict
     )  # place to dump any metadata from the source
 
-    @field_validator("created_at", pre=True, always=True)
+    @field_validator("created_at", mode="before")
     def set_imported_date(cls, v):
         return v or datetime.now()
 
 
 class InitialPechaMetadata(PechaMetadata):
-    @field_validator("id_", pre=True, always=True)
+    @field_validator("id_", mode="before")
     def set_id(cls, v):
         return v or get_initial_pecha_id()
 
 
 class OpenPechaMetadata(PechaMetadata):
-    @field_validator("id_", pre=True, always=True)
+    @field_validator("id_", mode="before")
     def set_id(cls, v):
         return v or get_open_pecha_id()
 
 
 class DiplomaticPechaMetadata(PechaMetadata):
-    @field_validator("id_", pre=True, always=True)
+    @field_validator("id_", mode="before")
     def set_id(cls, v):
         return v or get_diplomatic_id()

From 753c721c3d126242b837672c7f7af132e095fabf Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Mon, 8 Jul 2024 08:28:26 +0530
Subject: [PATCH 30/49] fix pydantic validator

---
 src/openpecha/pecha/metadata.py | 31 ++++++++++++++++++++-----------
 1 file changed, 20 insertions(+), 11 deletions(-)

diff --git a/src/openpecha/pecha/metadata.py b/src/openpecha/pecha/metadata.py
index 25ffaac..41ab20c 100644
--- a/src/openpecha/pecha/metadata.py
+++ b/src/openpecha/pecha/metadata.py
@@ -3,9 +3,9 @@
 from enum import Enum
 from typing import Dict, List, Optional
 
-from pydantic import BaseModel, Field, field_validator
+from pydantic import BaseModel, Field, field_validator, model_validator
 
-from openpecha.ids import get_diplomatic_id, get_initial_pecha_id, get_open_pecha_id
+from openpecha.ids import get_initial_pecha_id
 
 
 class InitialCreationType(Enum):
@@ -35,18 +35,27 @@ def set_imported_date(cls, v):
 
 
 class InitialPechaMetadata(PechaMetadata):
-    @field_validator("id_", mode="before")
-    def set_id(cls, v):
-        return v or get_initial_pecha_id()
+    @model_validator(mode="before")
+    @classmethod
+    def set_id(cls, values):
+        if "id_" not in values or values["id_"] is None:
+            values["id_"] = get_initial_pecha_id()
+        return values
 
 
 class OpenPechaMetadata(PechaMetadata):
-    @field_validator("id_", mode="before")
-    def set_id(cls, v):
-        return v or get_open_pecha_id()
+    @model_validator(mode="before")
+    @classmethod
+    def set_id(cls, values):
+        if "id_" not in values or values["id_"] is None:
+            values["id_"] = get_initial_pecha_id()
+        return values
 
 
 class DiplomaticPechaMetadata(PechaMetadata):
-    @field_validator("id_", mode="before")
-    def set_id(cls, v):
-        return v or get_diplomatic_id()
+    @model_validator(mode="before")
+    @classmethod
+    def set_id(cls, values):
+        if "id_" not in values or values["id_"] is None:
+            values["id_"] = get_initial_pecha_id()
+        return values

From 20cfefbf338772d95734a433e36a449c627e442f Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Mon, 8 Jul 2024 08:29:10 +0530
Subject: [PATCH 31/49] pass metadata to Pechadata

---
 src/openpecha/alignment/parsers/plaintext.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/openpecha/alignment/parsers/plaintext.py b/src/openpecha/alignment/parsers/plaintext.py
index aefb594..30c1671 100644
--- a/src/openpecha/alignment/parsers/plaintext.py
+++ b/src/openpecha/alignment/parsers/plaintext.py
@@ -35,8 +35,14 @@ def create_pecha_layer(self, segments: List[str], annotation_type: LayerEnum):
 
     def parse(self):
         source_pecha_metadata, target_pecha_metadata = (
-            InitialPechaMetadata(initial_creation_type=InitialCreationType.input),
-            InitialPechaMetadata(initial_creation_type=InitialCreationType.input),
+            InitialPechaMetadata(
+                initial_creation_type=InitialCreationType.input,
+                source_metadata=self.metadata["source"],
+            ),
+            InitialPechaMetadata(
+                initial_creation_type=InitialCreationType.input,
+                source_metadata=self.metadata["target"],
+            ),
         )
         source_pecha = Pecha(metadata=source_pecha_metadata)
         target_pecha = Pecha(metadata=target_pecha_metadata)

From d7bdb302c0d1b4b6ccf0f76ebeafcb87a477f861 Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Mon, 8 Jul 2024 08:31:55 +0530
Subject: [PATCH 32/49] set pecha_id if not in metadata

---
 src/openpecha/pecha/__init__.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/openpecha/pecha/__init__.py b/src/openpecha/pecha/__init__.py
index d9a1c91..d8441ff 100644
--- a/src/openpecha/pecha/__init__.py
+++ b/src/openpecha/pecha/__init__.py
@@ -19,7 +19,7 @@ def __init__(
         ),
         metadata: PechaMetadata = None,
     ) -> None:
-        self.pecha_id = metadata.pecha_id if metadata else pecha_id
+        self.pecha_id = metadata.id_ if metadata else pecha_id
         self.bases = bases
         self.layers = layers
         self.metadata = metadata
@@ -48,6 +48,8 @@ def set_layer(
         return layer_subtype_id
 
     def write(self, output_path: Path = PECHAS_PATH):
+        if not self.pecha_id:
+            raise ValueError("pecha_id must be set before writing.")
 
         pecha_dir = _mkdir(output_path / self.pecha_id)
         self.base_path = _mkdir(pecha_dir / f"{self.pecha_id}.opf")

From 2d14de794d89fdae4f013273b538bda567bce279 Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Mon, 8 Jul 2024 09:11:54 +0530
Subject: [PATCH 33/49] make PechaMetadata json serializable

---
 src/openpecha/pecha/__init__.py |  7 +++++--
 src/openpecha/pecha/metadata.py | 25 +++++++++++++++++++++++--
 2 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/src/openpecha/pecha/__init__.py b/src/openpecha/pecha/__init__.py
index d8441ff..27afe0c 100644
--- a/src/openpecha/pecha/__init__.py
+++ b/src/openpecha/pecha/__init__.py
@@ -6,7 +6,7 @@
 from openpecha.config import PECHAS_PATH, _mkdir
 from openpecha.ids import get_uuid
 from openpecha.pecha.layer import Layer, LayerEnum
-from openpecha.pecha.metadata import PechaMetadata
+from openpecha.pecha.metadata import PechaMetadata, to_json_serializable
 
 
 class Pecha:
@@ -56,7 +56,10 @@ def write(self, output_path: Path = PECHAS_PATH):
         """ write metadata """
         self.metadata_fn = self.base_path / "metadata.json"
         self.metadata_fn.write_text(
-            json.dumps(self.metadata, indent=4, ensure_ascii=False), encoding="utf-8"
+            json.dumps(
+                to_json_serializable(self.metadata), indent=4, ensure_ascii=False
+            ),
+            encoding="utf-8",
         )
 
         """ write base file"""
diff --git a/src/openpecha/pecha/metadata.py b/src/openpecha/pecha/metadata.py
index 41ab20c..a7fa7f0 100644
--- a/src/openpecha/pecha/metadata.py
+++ b/src/openpecha/pecha/metadata.py
@@ -1,3 +1,4 @@
+import json
 from collections import defaultdict
 from datetime import datetime
 from enum import Enum
@@ -24,15 +25,35 @@ class PechaMetadata(BaseModel):
     initial_creation_type: InitialCreationType = Field(
         None, alias="initial_creation_type"
     )
-    created_at: datetime = Field(default=datetime.now, alias="created_at")
+    created_at: datetime = Field(default=None, alias="created_at")
     source_metadata: Optional[Dict] = Field(
-        default=defaultdict
+        default={}
     )  # place to dump any metadata from the source
 
     @field_validator("created_at", mode="before")
     def set_imported_date(cls, v):
         return v or datetime.now()
 
+    class Config:
+        json_encoders = {
+            InitialCreationType: lambda v: v.value,
+            defaultdict: lambda d: dict(d),
+        }
+
+
+def to_json_serializable(pecha_metadata: Optional[PechaMetadata]) -> str:
+    if pecha_metadata is None:
+        return json.dumps({}, indent=4, ensure_ascii=False)
+
+    # Convert the model to a dictionary
+    dict_data = pecha_metadata.model_dump()
+    # Convert the defaultdict to a regular dictionary
+    dict_data["source_metadata"] = dict(dict_data["source_metadata"])
+    # Convert the initial_creation_type enum to its value
+    if dict_data["initial_creation_type"] is not None:
+        dict_data["initial_creation_type"] = dict_data["initial_creation_type"].value
+    return json.dumps(dict_data, indent=4, ensure_ascii=False)
+
 
 class InitialPechaMetadata(PechaMetadata):
     @model_validator(mode="before")

From 9f7a01a762135a644914bf6f021c7519387f1034 Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Mon, 8 Jul 2024 09:13:18 +0530
Subject: [PATCH 34/49] modify test_pecha instantiate pecha with metadata

---
 tests/pecha/write/test_pecha.py | 51 ++++++++++++++++++++-------------
 1 file changed, 31 insertions(+), 20 deletions(-)

diff --git a/tests/pecha/write/test_pecha.py b/tests/pecha/write/test_pecha.py
index ae65eaf..c905b80 100644
--- a/tests/pecha/write/test_pecha.py
+++ b/tests/pecha/write/test_pecha.py
@@ -1,9 +1,11 @@
 from pathlib import Path
 from shutil import rmtree
+from unittest import mock
 
 from openpecha.pecha import Pecha
 from openpecha.pecha.annotation import Annotation
 from openpecha.pecha.layer import Layer, LayerEnum
+from openpecha.pecha.metadata import InitialCreationType, InitialPechaMetadata
 
 
 def get_data_dir():
@@ -52,26 +54,35 @@ def get_annotations():
 
 
 def test_pecha_write():
-    pecha_id = "IE7D6875F"
-    base = get_base()
-    layer = get_layer()
-    output_path = get_data_dir()
-    expected_output_path = Path(__file__).parent / "expected_output"
-
-    pecha = Pecha(pecha_id=pecha_id, bases=base, layers=layer, metadata=get_metadata())
-    pecha.write(output_path=output_path)
-
-    output_file_names = [file.name for file in list(output_path.rglob("*"))]
-    expected_file_names = [file.name for file in list(expected_output_path.rglob("*"))]
-
-    """ sort the list """
-    output_file_names.sort()
-    expected_file_names.sort()
-
-    assert output_file_names == expected_file_names
-
-    """ clean up """
-    rmtree(output_path)
+    with mock.patch(
+        "openpecha.pecha.metadata.get_initial_pecha_id"
+    ) as mock_get_initial_pecha_id:
+        mock_get_initial_pecha_id.return_value = "IE7D6875F"
+        base = get_base()
+        layer = get_layer()
+        output_path = get_data_dir()
+        expected_output_path = Path(__file__).parent / "expected_output"
+
+        metadata = InitialPechaMetadata(initial_creation_type=InitialCreationType.input)
+        pecha = Pecha(metadata=metadata)
+        pecha.bases = base
+        pecha.layers = layer
+
+        pecha.write(output_path=output_path)
+
+        output_file_names = [file.name for file in list(output_path.rglob("*"))]
+        expected_file_names = [
+            file.name for file in list(expected_output_path.rglob("*"))
+        ]
+
+        """ sort the list """
+        output_file_names.sort()
+        expected_file_names.sort()
+
+        assert output_file_names == expected_file_names
+
+        """ clean up """
+        rmtree(output_path)
 
 
 test_pecha_write()

From 7c253d2e73b48748c97c904638e3b800bbfaf332 Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Mon, 8 Jul 2024 10:12:55 +0530
Subject: [PATCH 35/49] set base and layer from class method from_path Pecha

---
 src/openpecha/pecha/__init__.py | 29 +++++++++++++++------------
 src/openpecha/pecha/layer.py    | 35 +++++++++++++++++++++++++++++++++
 2 files changed, 51 insertions(+), 13 deletions(-)

diff --git a/src/openpecha/pecha/__init__.py b/src/openpecha/pecha/__init__.py
index 99a3137..1a21d40 100644
--- a/src/openpecha/pecha/__init__.py
+++ b/src/openpecha/pecha/__init__.py
@@ -13,7 +13,7 @@ class Pecha:
     def __init__(
         self,
         pecha_id: str = None,
-        bases: Dict[str, str] = defaultdict(),
+        bases: Dict[str, str] = defaultdict(str),
         layers: Dict[str, Dict[Tuple[LayerEnum, str], Layer]] = defaultdict(
             lambda: defaultdict()
         ),
@@ -33,15 +33,14 @@ def from_path(cls, base_path: Path):
         #     metadata = json.load(f)
         # pecha.set_metadata(metadata)
 
-        # for base_file in (base_path / "base").rglob("*.txt"):
-        #     base_text = base_file.read_text(encoding="utf-8")
-        # pecha.set_base_file(base_file.stem, base_text)
+        for base_file in (base_path / "base").rglob("*"):
+            base_text = base_file.read_text(encoding="utf-8")
+            pecha.set_base_file(base_text, base_file.stem)
 
-        # for layer_dir in (base_path / "layers").iterdir():
-        #     for layer_file in layer_dir.glob("*.json"):
-        #         layer = Layer.from_path(layer_file)
-        #         layer_key = (layer.annotation_label, layer_file.stem)
-        # pecha.set_layer(layer_dir.stem, layer_key, layer)
+        for layer_dir in (base_path / "layers").iterdir():
+            for layer_file in layer_dir.glob("*.json"):
+                layer = Layer.from_path(layer_file)
+                pecha.set_layer(layer_dir.stem, layer.annotation_type, layer, layer.id_)
 
         return pecha
 
@@ -49,18 +48,22 @@ def from_path(cls, base_path: Path):
     def from_id(cls, pecha_id: str):
         pass
 
-    def set_base_file(self, base_text: str) -> str:
-        base_file_name = get_uuid()
+    def set_base_file(self, base_text: str, base_file_name: str = None) -> str:
+        base_file_name = base_file_name if base_file_name else get_uuid()[:4]
         self.bases[base_file_name] = base_text
         return base_file_name
 
     def set_layer(
-        self, base_name: str, annotation_type: LayerEnum, layer: Layer
+        self,
+        base_name: str,
+        annotation_type: LayerEnum,
+        layer: Layer,
+        layer_subtype_id: str = None,
     ) -> str:
 
         """layer key is a tuple of layer label and layer id"""
         """ A particular volume can have multiple layers with same label but different id"""
-        layer_subtype_id = get_uuid()[:4]
+        layer_subtype_id = get_uuid()[:4] if not layer_subtype_id else layer_subtype_id
         self.layers[base_name][(annotation_type, layer_subtype_id)] = layer
         return layer_subtype_id
 
diff --git a/src/openpecha/pecha/layer.py b/src/openpecha/pecha/layer.py
index 9183298..dc3d060 100644
--- a/src/openpecha/pecha/layer.py
+++ b/src/openpecha/pecha/layer.py
@@ -28,6 +28,14 @@ def get_annotation_category(layer_type: LayerEnum) -> LayerGroupEnum:
     return LayerGroupEnum.structure_type
 
 
+def convert_relative_to_absolute_path(json_data, absolute_base_path: Path):
+    """call after loading the stam from json"""
+    for resource in json_data["resources"]:
+        original_path = Path(resource["@include"])
+        resource["@include"] = str(absolute_base_path / original_path)
+    return json_data
+
+
 class Layer(BaseModel):
     id_: str = Field(default_factory=get_fourchar_uuid)
     annotation_type: LayerEnum
@@ -38,6 +46,33 @@ class Layer(BaseModel):
 
     model_config = ConfigDict(arbitrary_types_allowed=True)
 
+    @classmethod
+    def from_path(cls, layer_file_path: Path):
+        """get annotation label"""
+        annotation_label = LayerEnum(layer_file_path.stem.split("-")[0])
+        layer_id = layer_file_path.stem.split("-")[1]
+        """ load annotations from json"""
+        with open(layer_file_path) as f:
+            json_data = json.load(f)
+        absolute_base_path = layer_file_path.parents[4]
+        json_data = convert_relative_to_absolute_path(json_data, absolute_base_path)
+        annotation_store = AnnotationStore(string=json.dumps(json_data))
+
+        layer_annotations: Dict[str, Annotation] = {}
+        for annotation in annotation_store.annotations():
+            annotation_id, segment = annotation.id(), str(annotation)
+            start = annotation.offset().begin().value()
+            end = annotation.offset().end().value()
+            layer_annotations[annotation_id] = Annotation(
+                segment=segment, start=start, end=end
+            )
+
+        return Layer(
+            id_=layer_id,
+            annotation_type=annotation_label,
+            annotations=layer_annotations,
+        )
+
     def set_annotation(self, annotation: Annotation):
         self.annotations[annotation.id_] = annotation
 

From ba5e4d0b6042ea752712ce463352869da429ab8f Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Mon, 8 Jul 2024 10:14:42 +0530
Subject: [PATCH 36/49] refactor code

---
 src/openpecha/pecha/layer.py | 35 ++++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/src/openpecha/pecha/layer.py b/src/openpecha/pecha/layer.py
index dc3d060..ac36c90 100644
--- a/src/openpecha/pecha/layer.py
+++ b/src/openpecha/pecha/layer.py
@@ -28,14 +28,6 @@ def get_annotation_category(layer_type: LayerEnum) -> LayerGroupEnum:
     return LayerGroupEnum.structure_type
 
 
-def convert_relative_to_absolute_path(json_data, absolute_base_path: Path):
-    """call after loading the stam from json"""
-    for resource in json_data["resources"]:
-        original_path = Path(resource["@include"])
-        resource["@include"] = str(absolute_base_path / original_path)
-    return json_data
-
-
 class Layer(BaseModel):
     id_: str = Field(default_factory=get_fourchar_uuid)
     annotation_type: LayerEnum
@@ -76,14 +68,6 @@ def from_path(cls, layer_file_path: Path):
     def set_annotation(self, annotation: Annotation):
         self.annotations[annotation.id_] = annotation
 
-    def covert_to_relative_path(self, json_string: str, output_path: Path):
-        """convert the absolute path to relative path for base file path in json string"""
-        json_object = json.loads(json_string)
-        for resource in json_object["resources"]:
-            original_path = Path(resource["@include"])
-            resource["@include"] = str(original_path.relative_to(output_path))
-        return json_object
-
     def write(self, base_file_path: Path, output_path: Path):
         base_file_path = base_file_path
         """write annotations in stam data model"""
@@ -115,7 +99,7 @@ def write(self, base_file_path: Path, output_path: Path):
             )
         """ save annotations in json"""
         json_string = self.annotation_store.to_json_string()
-        json_object = self.covert_to_relative_path(json_string, output_path)
+        json_object = convert_to_relative_path(json_string, output_path)
         """ add four uuid digits to the layer file name for uniqueness"""
         layer_dir = base_file_path.parent.parent / "layers" / base_file_path.stem
         layer_file_path = layer_dir / f"{self.annotation_type.value}-{self.id_}.json"
@@ -124,3 +108,20 @@ def write(self, base_file_path: Path, output_path: Path):
             "w",
         ) as f:
             f.write(json.dumps(json_object, indent=4, ensure_ascii=False))
+
+
+def convert_relative_to_absolute_path(json_data, absolute_base_path: Path):
+    """call after loading the stam from json"""
+    for resource in json_data["resources"]:
+        original_path = Path(resource["@include"])
+        resource["@include"] = str(absolute_base_path / original_path)
+    return json_data
+
+
+def convert_to_relative_path(json_string: str, output_path: Path):
+    """convert the absolute path to relative path for base file path in json string"""
+    json_object = json.loads(json_string)
+    for resource in json_object["resources"]:
+        original_path = Path(resource["@include"])
+        resource["@include"] = str(original_path.relative_to(output_path))
+    return json_object

From 029e6add8383a5c1e6de983dff3ecfd936311545 Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Mon, 8 Jul 2024 10:28:32 +0530
Subject: [PATCH 37/49] create test_pecha_read

---
 .../base/f2b056668a0c4ad3a085bdcd8e2d7adb.txt |  1 +
 .../Segment-bf13.json                         | 88 +++++++++++++++++++
 .../IE7D6875F/IE7D6875F.opf/metadata.json     |  3 +
 tests/pecha/read/test_pecha_read.py           | 24 +++++
 4 files changed, 116 insertions(+)
 create mode 100644 tests/pecha/read/data/IE7D6875F/IE7D6875F.opf/base/f2b056668a0c4ad3a085bdcd8e2d7adb.txt
 create mode 100644 tests/pecha/read/data/IE7D6875F/IE7D6875F.opf/layers/f2b056668a0c4ad3a085bdcd8e2d7adb/Segment-bf13.json
 create mode 100644 tests/pecha/read/data/IE7D6875F/IE7D6875F.opf/metadata.json
 create mode 100644 tests/pecha/read/test_pecha_read.py

diff --git a/tests/pecha/read/data/IE7D6875F/IE7D6875F.opf/base/f2b056668a0c4ad3a085bdcd8e2d7adb.txt b/tests/pecha/read/data/IE7D6875F/IE7D6875F.opf/base/f2b056668a0c4ad3a085bdcd8e2d7adb.txt
new file mode 100644
index 0000000..0b166fc
--- /dev/null
+++ b/tests/pecha/read/data/IE7D6875F/IE7D6875F.opf/base/f2b056668a0c4ad3a085bdcd8e2d7adb.txt
@@ -0,0 +1 @@
+རྒྱ་གར་སྐད་དུ། བོ་དྷི་སཏྭ་ཙརྱ་ཨ་བ་ཏཱ་ར།བོད་སྐད་དུ། བྱང་ཆུབ་སེམས་དཔའི་སྤྱོད་པ་ལ་འཇུག་པའི་ལེགས་པར་སྦྱར་བ།
\ No newline at end of file
diff --git a/tests/pecha/read/data/IE7D6875F/IE7D6875F.opf/layers/f2b056668a0c4ad3a085bdcd8e2d7adb/Segment-bf13.json b/tests/pecha/read/data/IE7D6875F/IE7D6875F.opf/layers/f2b056668a0c4ad3a085bdcd8e2d7adb/Segment-bf13.json
new file mode 100644
index 0000000..92bcaec
--- /dev/null
+++ b/tests/pecha/read/data/IE7D6875F/IE7D6875F.opf/layers/f2b056668a0c4ad3a085bdcd8e2d7adb/Segment-bf13.json
@@ -0,0 +1,88 @@
+{
+    "@type": "AnnotationStore",
+    "@id": "PechaAnnotationStore",
+    "resources": [
+        {
+            "@type": "TextResource",
+            "@id": "f2b056668a0c4ad3a085bdcd8e2d7adb.txt",
+            "@include": "IE7D6875F/IE7D6875F.opf/base/f2b056668a0c4ad3a085bdcd8e2d7adb.txt"
+        }
+    ],
+    "annotationsets": [
+        {
+            "@type": "AnnotationDataSet",
+            "@id": "PechaDataSet",
+            "keys": [
+                {
+                    "@type": "DataKey",
+                    "@id": "Structure Type"
+                }
+            ],
+            "data": [
+                {
+                    "@type": "AnnotationData",
+                    "@id": "0c2c4165fb58464eabf9db0d6a3a1080",
+                    "key": "Structure Type",
+                    "value": {
+                        "@type": "String",
+                        "value": "Segment"
+                    }
+                }
+            ]
+        }
+    ],
+    "annotations": [
+        {
+            "@type": "Annotation",
+            "@id": "f2b056668a0c4ad3a085bdcd8e2d7adb",
+            "target": {
+                "@type": "TextSelector",
+                "resource": "f2b056668a0c4ad3a085bdcd8e2d7adb.txt",
+                "offset": {
+                    "@type": "Offset",
+                    "begin": {
+                        "@type": "BeginAlignedCursor",
+                        "value": 0
+                    },
+                    "end": {
+                        "@type": "BeginAlignedCursor",
+                        "value": 39
+                    }
+                }
+            },
+            "data": [
+                {
+                    "@type": "AnnotationData",
+                    "@id": "0c2c4165fb58464eabf9db0d6a3a1080",
+                    "set": "PechaDataSet"
+                }
+            ]
+        },
+        {
+            "@type": "Annotation",
+            "@id": "b696df2dbe314e8a87881a2bc391d0d5",
+            "target": {
+                "@type": "TextSelector",
+                "resource": "f2b056668a0c4ad3a085bdcd8e2d7adb.txt",
+                "offset": {
+                    "@type": "Offset",
+                    "begin": {
+                        "@type": "BeginAlignedCursor",
+                        "value": 39
+                    },
+                    "end": {
+                        "@type": "BeginAlignedCursor",
+                        "value": 103
+                    }
+                }
+            },
+            "data": [
+                {
+                    "@type": "AnnotationData",
+                    "@id": "0c2c4165fb58464eabf9db0d6a3a1080",
+                    "set": "PechaDataSet"
+                }
+            ]
+        }
+    ]
+}
\ No newline at end of file
diff --git a/tests/pecha/read/data/IE7D6875F/IE7D6875F.opf/metadata.json b/tests/pecha/read/data/IE7D6875F/IE7D6875F.opf/metadata.json
new file mode 100644
index 0000000..cb740ab
--- /dev/null
+++ b/tests/pecha/read/data/IE7D6875F/IE7D6875F.opf/metadata.json
@@ -0,0 +1,3 @@
+{
+    "annotation_label": "Segment"
+}
\ No newline at end of file
diff --git a/tests/pecha/read/test_pecha_read.py b/tests/pecha/read/test_pecha_read.py
new file mode 100644
index 0000000..c2f3ff2
--- /dev/null
+++ b/tests/pecha/read/test_pecha_read.py
@@ -0,0 +1,24 @@
+from pathlib import Path
+
+from openpecha.pecha import Pecha
+from openpecha.pecha.layer import Layer, LayerEnum
+
+
+def test_pecha_read():
+    DATA = Path(__file__).parent / "data"
+    pecha = Pecha.from_path(DATA / "IE7D6875F" / "IE7D6875F.opf")
+    assert pecha.pecha_id == "IE7D6875F"
+    assert "f2b056668a0c4ad3a085bdcd8e2d7adb" in pecha.bases
+    assert (
+        pecha.bases["f2b056668a0c4ad3a085bdcd8e2d7adb"]
+        == "རྒྱ་གར་སྐད་དུ། བོ་དྷི་སཏྭ་ཙརྱ་ཨ་བ་ཏཱ་ར།བོད་སྐད་དུ། བྱང་ཆུབ་སེམས་དཔའི་སྤྱོད་པ་ལ་འཇུག་པའི་ལེགས་པར་སྦྱར་བ།"
+    )
+
+    for layer_key, layer in pecha.layers["f2b056668a0c4ad3a085bdcd8e2d7adb"].items():
+        annotation_type, layer_id = layer_key
+        assert annotation_type == LayerEnum.segment
+        assert isinstance(layer_id, str)
+        assert isinstance(layer, Layer)
+
+
+test_pecha_read()

From 72ef523dfe35337fb7dbf7472d776cd6628a92b6 Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Mon, 8 Jul 2024 10:32:38 +0530
Subject: [PATCH 38/49] fix/set annotation id in layer classmethod from path

---
 src/openpecha/pecha/layer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/openpecha/pecha/layer.py b/src/openpecha/pecha/layer.py
index ac36c90..4a6ad9f 100644
--- a/src/openpecha/pecha/layer.py
+++ b/src/openpecha/pecha/layer.py
@@ -56,7 +56,7 @@ def from_path(cls, layer_file_path: Path):
             start = annotation.offset().begin().value()
             end = annotation.offset().end().value()
             layer_annotations[annotation_id] = Annotation(
-                segment=segment, start=start, end=end
+                id_=annotation_id, segment=segment, start=start, end=end
             )
 
         return Layer(

From 2f289666ca739456ec34d25c0de578e4c94dbaa7 Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Mon, 8 Jul 2024 12:04:05 +0530
Subject: [PATCH 39/49] Layer get_annotations

---
 src/openpecha/pecha/layer.py        | 21 +++++++++++++++++++++
 tests/pecha/read/test_pecha_read.py | 20 ++++++++++++++++++++
 2 files changed, 41 insertions(+)

diff --git a/src/openpecha/pecha/layer.py b/src/openpecha/pecha/layer.py
index 4a6ad9f..efc144f 100644
--- a/src/openpecha/pecha/layer.py
+++ b/src/openpecha/pecha/layer.py
@@ -5,6 +5,7 @@
 from typing import Dict, Optional
 
 from pydantic import BaseModel, ConfigDict, Field
+from stam import Annotation as StamAnnotation
 from stam import AnnotationDataSet, AnnotationStore, Offset, Selector
 
 from openpecha.config import PECHA_ANNOTATION_STORE_ID, PECHA_DATASET_ID
@@ -63,8 +64,28 @@ def from_path(cls, layer_file_path: Path):
             id_=layer_id,
             annotation_type=annotation_label,
             annotations=layer_annotations,
+            annotation_store=annotation_store,
         )
 
+    def get_annotations(self):
+        if not self.annotation_store:
+            return None
+        for ann in self.annotation_store:
+            yield self.parse_annotation(ann)
+
+    def get_annotation(self, ann_id: str):
+        if not self.annotation_store:
+            return None
+        ann = self.annotation_store.annotation(id=ann_id)
+        return self.parse_annotation(ann)
+
+    def parse_annotation(self, ann: StamAnnotation):
+        ann_id = ann.id()
+        ann_segment = str(ann)
+        start = ann.offset().begin().value()
+        end = ann.offset().end().value()
+        return {"id": ann_id, "segment": ann_segment, "start": start, "end": end}
+
     def set_annotation(self, annotation: Annotation):
         self.annotations[annotation.id_] = annotation
 
diff --git a/tests/pecha/read/test_pecha_read.py b/tests/pecha/read/test_pecha_read.py
index c2f3ff2..97b5e03 100644
--- a/tests/pecha/read/test_pecha_read.py
+++ b/tests/pecha/read/test_pecha_read.py
@@ -20,5 +20,25 @@ def test_pecha_read():
         assert isinstance(layer_id, str)
         assert isinstance(layer, Layer)
 
+    first_layer = pecha.layers["f2b056668a0c4ad3a085bdcd8e2d7adb"][
+        (LayerEnum.segment, "bf13")
+    ]
+
+    annotations = list(first_layer.get_annotations())
+    assert annotations == [
+        {
+            "id": "f2b056668a0c4ad3a085bdcd8e2d7adb",
+            "segment": "རྒྱ་གར་སྐད་དུ། བོ་དྷི་སཏྭ་ཙརྱ་ཨ་བ་ཏཱ་ར།",
+            "start": 0,
+            "end": 39,
+        },
+        {
+            "id": "b696df2dbe314e8a87881a2bc391d0d5",
+            "segment": "བོད་སྐད་དུ། བྱང་ཆུབ་སེམས་དཔའི་སྤྱོད་པ་ལ་འཇུག་པའི་ལེགས་པར་སྦྱར་བ།",
+            "start": 39,
+            "end": 103,
+        },
+    ]
+
 
 test_pecha_read()

From 783927457a81424f38ebc67b9325a0a2ef6c9526 Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Mon, 8 Jul 2024 14:47:38 +0530
Subject: [PATCH 40/49] add/ annotation metadata in parse_annotation

---
 src/openpecha/pecha/layer.py        | 14 +++++++++++++-
 tests/pecha/read/test_pecha_read.py |  4 ++++
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/src/openpecha/pecha/layer.py b/src/openpecha/pecha/layer.py
index efc144f..1a4aea6 100644
--- a/src/openpecha/pecha/layer.py
+++ b/src/openpecha/pecha/layer.py
@@ -84,7 +84,19 @@ def parse_annotation(self, ann: StamAnnotation):
         ann_segment = str(ann)
         start = ann.offset().begin().value()
         end = ann.offset().end().value()
-        return {"id": ann_id, "segment": ann_segment, "start": start, "end": end}
+
+        parsed_ann = {"id": ann_id, "segment": ann_segment, "start": start, "end": end}
+
+        for ann_data in ann:
+            key, value = ann_data.key().id(), str(ann_data.value())
+            if key in LayerGroupEnum._value2member_map_:
+                parsed_ann["annotation_category"] = key
+                parsed_ann["annotation_type"] = value
+            else:
+                parsed_ann["payloads"] = defaultdict(str)
+                parsed_ann["payloads"][key] = value
+
+        return parsed_ann
 
     def set_annotation(self, annotation: Annotation):
         self.annotations[annotation.id_] = annotation
diff --git a/tests/pecha/read/test_pecha_read.py b/tests/pecha/read/test_pecha_read.py
index 97b5e03..fe80e9c 100644
--- a/tests/pecha/read/test_pecha_read.py
+++ b/tests/pecha/read/test_pecha_read.py
@@ -31,12 +31,16 @@ def test_pecha_read():
             "segment": "རྒྱ་གར་སྐད་དུ། བོ་དྷི་སཏྭ་ཙརྱ་ཨ་བ་ཏཱ་ར།",
             "start": 0,
             "end": 39,
+            "annotation_category": "Structure Type",
+            "annotation_type": "Segment",
         },
         {
             "id": "b696df2dbe314e8a87881a2bc391d0d5",
             "segment": "བོད་སྐད་དུ། བྱང་ཆུབ་སེམས་དཔའི་སྤྱོད་པ་ལ་འཇུག་པའི་ལེགས་པར་སྦྱར་བ།",
             "start": 39,
             "end": 103,
+            "annotation_category": "Structure Type",
+            "annotation_type": "Segment",
         },
     ]
 

From fba97e27a636da71f18f0e3a9d452d393aeb2cbb Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Mon, 8 Jul 2024 15:02:33 +0530
Subject: [PATCH 41/49] write ann metadata to stam if exist

---
 src/openpecha/pecha/layer.py | 31 ++++++++++++++++++++++++++++---
 1 file changed, 28 insertions(+), 3 deletions(-)

diff --git a/src/openpecha/pecha/layer.py b/src/openpecha/pecha/layer.py
index 1a4aea6..b1ed743 100644
--- a/src/openpecha/pecha/layer.py
+++ b/src/openpecha/pecha/layer.py
@@ -2,7 +2,7 @@
 from collections import defaultdict
 from enum import Enum
 from pathlib import Path
-from typing import Dict, Optional
+from typing import Dict, Optional, Tuple
 
 from pydantic import BaseModel, ConfigDict, Field
 from stam import Annotation as StamAnnotation
@@ -111,20 +111,45 @@ def write(self, base_file_path: Path, output_path: Path):
         self.dataset = self.annotation_store.add_dataset(id=PECHA_DATASET_ID)
         annotation_category = get_annotation_category(self.annotation_type).value
         self.dataset.add_key(annotation_category)
-        unique_annotation_data_id = get_uuid()
+
+        unique_ann_data_id = get_uuid()
+        ann_data_ids: Dict[Tuple[str, str], str] = {}
+
         for annotation_id, annotation in self.annotations.items():
             target = Selector.textselector(
                 resource,
                 Offset.simple(annotation.start, annotation.end),
             )
+
             data = [
                 {
-                    "id": unique_annotation_data_id,
+                    "id": unique_ann_data_id,
                     "key": annotation_category,
                     "value": self.annotation_type.value,
                     "set": self.dataset.id(),
                 }
             ]
+            """
+                add metadata to the annotation if exists
+                if the metadata is already added, get the id from the dictionary,
+                else create a new id and add to the dictionary
+            """
+            if annotation.metadata:
+                for key, value in annotation.metadata.items():
+                    if (key, value) in ann_data_ids:
+                        ann_data_id = ann_data_ids[(key, value)]
+                    else:
+                        ann_data_id = get_uuid()
+                        ann_data_ids[(key, value)] = ann_data_id
+                    data.append(
+                        {
+                            "id": ann_data_id,
+                            "key": key,
+                            "value": value,
+                            "set": self.dataset.id(),
+                        }
+                    )
+
             self.annotation_store.annotate(
                 id=annotation_id,
                 target=target,

From 2ca71a5c3bccbb0f07e8a4748b9335595c2b76a7 Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Mon, 8 Jul 2024 16:20:06 +0530
Subject: [PATCH 42/49] read pecha metadata from_path

---
 src/openpecha/pecha/__init__.py               | 43 ++++++++++++++++---
 src/openpecha/pecha/metadata.py               | 15 ++++---
 .../IE7D6875F/IE7D6875F.opf/metadata.json     |  4 +-
 3 files changed, 48 insertions(+), 14 deletions(-)

diff --git a/src/openpecha/pecha/__init__.py b/src/openpecha/pecha/__init__.py
index 1a21d40..09f0deb 100644
--- a/src/openpecha/pecha/__init__.py
+++ b/src/openpecha/pecha/__init__.py
@@ -6,7 +6,11 @@
 from openpecha.config import PECHAS_PATH, _mkdir
 from openpecha.ids import get_uuid
 from openpecha.pecha.layer import Layer, LayerEnum
-from openpecha.pecha.metadata import PechaMetadata, to_json_serializable
+from openpecha.pecha.metadata import (
+    InitialCreationType,
+    PechaMetadata,
+    to_json_serializable,
+)
 
 
 class Pecha:
@@ -26,12 +30,14 @@ def __init__(
 
     @classmethod
     def from_path(cls, base_path: Path):
-        pecha_id = base_path.stem
-        pecha = Pecha(pecha_id=pecha_id)
 
-        # with open(base_path / "metadata.json", encoding="utf-8") as f:
-        #     metadata = json.load(f)
-        # pecha.set_metadata(metadata)
+        with open(base_path / "metadata.json", encoding="utf-8") as f:
+            metadata = json.load(f)
+            metadata = json.loads(metadata)
+
+        preprocessed_meta = preprocess_metadata(metadata)
+        pecha_metadata = PechaMetadata(**preprocessed_meta)
+        pecha = Pecha(metadata=pecha_metadata)
 
         for base_file in (base_path / "base").rglob("*"):
             base_text = base_file.read_text(encoding="utf-8")
@@ -98,3 +104,28 @@ def write(self, output_path: Path = PECHAS_PATH):
                         base_file_path=base_dir / f"{layer_name}.txt",
                         output_path=output_path,
                     )
+
+
+def preprocess_metadata(metadata: Dict) -> Dict:
+    # Replace null values with default values
+    processed_metadata = {
+        "id_": metadata.get("id_", ""),
+        "title": metadata.get("title", []) if metadata.get("title") is not None else [],
+        "author": metadata.get("author", [])
+        if metadata.get("author") is not None
+        else [],
+        "source": metadata.get("source", "")
+        if metadata.get("source") is not None
+        else "",
+        "language": metadata.get("language", "")
+        if metadata.get("language") is not None
+        else "",
+        "initial_creation_type": InitialCreationType(metadata["initial_creation_type"])
+        if "initial_creation_type" in metadata
+        else None,
+        "created_at": metadata.get("created_at"),
+        "source_metadata": metadata.get("source_metadata", {})
+        if metadata.get("source_metadata") is not None
+        else {},
+    }
+    return processed_metadata
diff --git a/src/openpecha/pecha/metadata.py b/src/openpecha/pecha/metadata.py
index a7fa7f0..a7d6840 100644
--- a/src/openpecha/pecha/metadata.py
+++ b/src/openpecha/pecha/metadata.py
@@ -18,16 +18,16 @@ class InitialCreationType(Enum):
 
 class PechaMetadata(BaseModel):
     id_: str = Field(default=None, alias="id_")
-    title: List[str] = Field(default=None, alias="title")
-    author: List[str] = Field(default=None, alias="author")
+    title: List[str] = Field(default=list, alias="title")
+    author: List[str] = Field(default=list, alias="author")
     source: str = Field(default=None, alias="source")
     language: str = Field(default=None, alias="language")
     initial_creation_type: InitialCreationType = Field(
         None, alias="initial_creation_type"
     )
-    created_at: datetime = Field(default=None, alias="created_at")
+    created_at: Optional[datetime] = Field(default=None, alias="created_at")
     source_metadata: Optional[Dict] = Field(
-        default={}
+        default=dict
     )  # place to dump any metadata from the source
 
     @field_validator("created_at", mode="before")
@@ -48,10 +48,15 @@ def to_json_serializable(pecha_metadata: Optional[PechaMetadata]) -> str:
     # Convert the model to a dictionary
     dict_data = pecha_metadata.model_dump()
     # Convert the defaultdict to a regular dictionary
-    dict_data["source_metadata"] = dict(dict_data["source_metadata"])
     # Convert the initial_creation_type enum to its value
     if dict_data["initial_creation_type"] is not None:
         dict_data["initial_creation_type"] = dict_data["initial_creation_type"].value
+    for k, v in dict_data.items():
+        if v is list:
+            dict_data[k] = []
+            continue
+        if v is dict:
+            dict_data[k] = {}
     return json.dumps(dict_data, indent=4, ensure_ascii=False)
 
 
diff --git a/tests/pecha/read/data/IE7D6875F/IE7D6875F.opf/metadata.json b/tests/pecha/read/data/IE7D6875F/IE7D6875F.opf/metadata.json
index cb740ab..38be7bc 100644
--- a/tests/pecha/read/data/IE7D6875F/IE7D6875F.opf/metadata.json
+++ b/tests/pecha/read/data/IE7D6875F/IE7D6875F.opf/metadata.json
@@ -1,3 +1 @@
-{
-    "annotation_label": "Segment"
-}
\ No newline at end of file
+"{\n    \"id_\": \"IE7D6875F\",\n    \"title\": null,\n    \"author\": null,\n    \"source\": null,\n    \"language\": null,\n    \"initial_creation_type\": \"input\",\n    \"created_at\": null,\n    \"source_metadata\": {}\n}"
\ No newline at end of file

From 3a2e5af84fd2a7215312752d4d7395e9bccec644 Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Wed, 10 Jul 2024 08:28:08 +0530
Subject: [PATCH 43/49] modify path assignment

---
 src/openpecha/config.py             |  2 ++
 src/openpecha/pecha/__init__.py     | 11 +++++++----
 tests/pecha/read/test_pecha_read.py |  2 +-
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/openpecha/config.py b/src/openpecha/config.py
index e0fa952..8d34098 100644
--- a/src/openpecha/config.py
+++ b/src/openpecha/config.py
@@ -9,6 +9,8 @@ def _mkdir(path):
     return path
 
 
+ORG_NAME = "PechaData"
+
 BASE_PATH = _mkdir(Path.home() / ".pechadata")
 PECHAS_PATH = _mkdir(BASE_PATH / "pechas")
 
diff --git a/src/openpecha/pecha/__init__.py b/src/openpecha/pecha/__init__.py
index 09f0deb..4662a93 100644
--- a/src/openpecha/pecha/__init__.py
+++ b/src/openpecha/pecha/__init__.py
@@ -29,8 +29,9 @@ def __init__(
         self.metadata = metadata
 
     @classmethod
-    def from_path(cls, base_path: Path):
-
+    def from_path(cls, pecha_path: Path):
+        pecha_id = pecha_path.stem
+        base_path = pecha_path / f"{pecha_id}.opf"
         with open(base_path / "metadata.json", encoding="utf-8") as f:
             metadata = json.load(f)
             metadata = json.loads(metadata)
@@ -38,6 +39,7 @@ def from_path(cls, base_path: Path):
         preprocessed_meta = preprocess_metadata(metadata)
         pecha_metadata = PechaMetadata(**preprocessed_meta)
         pecha = Pecha(metadata=pecha_metadata)
+        pecha.pecha_path = pecha_path
 
         for base_file in (base_path / "base").rglob("*"):
             base_text = base_file.read_text(encoding="utf-8")
@@ -77,8 +79,9 @@ def write(self, output_path: Path = PECHAS_PATH):
         if not self.pecha_id:
             raise ValueError("pecha_id must be set before writing.")
 
-        pecha_dir = _mkdir(output_path / self.pecha_id)
-        self.base_path = _mkdir(pecha_dir / f"{self.pecha_id}.opf")
+        self.pecha_path = _mkdir(output_path / self.pecha_id)
+
+        self.base_path = _mkdir(self.pecha_path / f"{self.pecha_id}.opf")
         """ write metadata """
         self.metadata_fn = self.base_path / "metadata.json"
         self.metadata_fn.write_text(
diff --git a/tests/pecha/read/test_pecha_read.py b/tests/pecha/read/test_pecha_read.py
index fe80e9c..fbd4f9a 100644
--- a/tests/pecha/read/test_pecha_read.py
+++ b/tests/pecha/read/test_pecha_read.py
@@ -6,7 +6,7 @@
 
 def test_pecha_read():
     DATA = Path(__file__).parent / "data"
-    pecha = Pecha.from_path(DATA / "IE7D6875F" / "IE7D6875F.opf")
+    pecha = Pecha.from_path(DATA / "IE7D6875F")
     assert pecha.pecha_id == "IE7D6875F"
     assert "f2b056668a0c4ad3a085bdcd8e2d7adb" in pecha.bases
     assert (

From cfc24147ddcc644aa696b72629a4696ae8fe3005 Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Wed, 10 Jul 2024 08:41:54 +0530
Subject: [PATCH 44/49] upload files to github repo

---
 pyproject.toml                |  2 +-
 src/openpecha/github_utils.py | 37 +++++++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+), 1 deletion(-)
 create mode 100644 src/openpecha/github_utils.py

diff --git a/pyproject.toml b/pyproject.toml
index b0a336a..405713f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -21,7 +21,7 @@ dependencies = [
   "pydantic >= 2.7.4",
   "stam == 0.8.2",
   "collection >= 0.1.6",
-
+  "PyGithub >= 2.3.0",
 ]
 
 [project.optional-dependencies]
diff --git a/src/openpecha/github_utils.py b/src/openpecha/github_utils.py
new file mode 100644
index 0000000..4237cda
--- /dev/null
+++ b/src/openpecha/github_utils.py
@@ -0,0 +1,37 @@
+import os
+from pathlib import Path
+
+from github import Github, GithubException
+
+from openpecha.config import ORG_NAME
+
+GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
+if not GITHUB_TOKEN:
+    raise Exception("GITHUB_TOKEN is not set in the environment.")
+
+
+def create_github_repo(repo_name: str):
+    try:
+        g = Github(GITHUB_TOKEN)
+        org = g.get_organization(ORG_NAME)
+        org.create_repo(repo_name)
+
+    except GithubException as e:
+        raise GithubException(f"Error creating repo: {e}")
+
+
+def upload_files_to_github_repo(repo_name: str, folder_path: Path):
+    try:
+        g = Github(GITHUB_TOKEN)
+        org = g.get_organization(ORG_NAME)
+        repo = org.get_repo(repo_name)
+
+        for file in folder_path.rglob("*"):
+            if file.is_dir():
+                continue
+            file_path = file.relative_to(folder_path)
+            with open(file) as f:
+                content = f.read()
+                repo.create_file(str(file_path), f"committing {file.name}", content)
+    except GithubException as e:
+        raise GithubException(f"Error uploading files to github: {e}")

From a03050caeb6836b799dd050730c0647fd1c56a32 Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Wed, 10 Jul 2024 08:47:23 +0530
Subject: [PATCH 45/49] delete unneccessary lines

---
 src/openpecha/pecha/metadata.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/openpecha/pecha/metadata.py b/src/openpecha/pecha/metadata.py
index a7d6840..8220539 100644
--- a/src/openpecha/pecha/metadata.py
+++ b/src/openpecha/pecha/metadata.py
@@ -45,10 +45,7 @@ def to_json_serializable(pecha_metadata: Optional[PechaMetadata]) -> str:
     if pecha_metadata is None:
         return json.dumps({}, indent=4, ensure_ascii=False)
 
-    # Convert the model to a dictionary
     dict_data = pecha_metadata.model_dump()
-    # Convert the defaultdict to a regular dictionary
-    # Convert the initial_creation_type enum to its value
     if dict_data["initial_creation_type"] is not None:
         dict_data["initial_creation_type"] = dict_data["initial_creation_type"].value
     for k, v in dict_data.items():

From 32e10e60646ec38f1b97dc93b166522968ec63a0 Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Wed, 10 Jul 2024 09:03:16 +0530
Subject: [PATCH 46/49] Pecha classmethod from_id

---
 src/openpecha/github_utils.py   | 22 ++++++++++++++++++++++
 src/openpecha/pecha/__init__.py |  4 +++-
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/src/openpecha/github_utils.py b/src/openpecha/github_utils.py
index 4237cda..7a15d28 100644
--- a/src/openpecha/github_utils.py
+++ b/src/openpecha/github_utils.py
@@ -1,5 +1,7 @@
 import os
+import subprocess
 from pathlib import Path
+from shutil import rmtree
 
 from github import Github, GithubException
 
@@ -35,3 +37,23 @@ def upload_files_to_github_repo(repo_name: str, folder_path: Path):
                 repo.create_file(str(file_path), f"committing {file.name}", content)
     except GithubException as e:
         raise GithubException(f"Error uploading files to github: {e}")
+
+
+def clone_github_repo(repo_name: str, destination_folder: Path):
+    repo_path = destination_folder / repo_name
+    if repo_path.exists():
+        rmtree(repo_path)
+    else:
+        try:
+            repo_url = f"https://github.com/{ORG_NAME}/{repo_name}.git"
+            env = {"GIT_ASKPASS": "echo", "GIT_PASSWORD": GITHUB_TOKEN}
+            subprocess.run(
+                ["git", "clone", repo_url, str(repo_path)],
+                check=True,
+                capture_output=True,
+                env={k: str(v) for k, v in env.items()},
+            )
+            return repo_path
+        except subprocess.CalledProcessError as e:
+            print(f"Error cloning {repo_name} repository: {e}")
+            return None
diff --git a/src/openpecha/pecha/__init__.py b/src/openpecha/pecha/__init__.py
index 4662a93..7bfff2c 100644
--- a/src/openpecha/pecha/__init__.py
+++ b/src/openpecha/pecha/__init__.py
@@ -4,6 +4,7 @@
 from typing import Dict, Optional, Tuple
 
 from openpecha.config import PECHAS_PATH, _mkdir
+from openpecha.github_utils import clone_github_repo
 from openpecha.ids import get_uuid
 from openpecha.pecha.layer import Layer, LayerEnum
 from openpecha.pecha.metadata import (
@@ -54,7 +55,8 @@ def from_path(cls, pecha_path: Path):
 
     @classmethod
     def from_id(cls, pecha_id: str):
-        pass
+        repo_path = clone_github_repo(pecha_id, PECHAS_PATH)
+        return cls.from_path(repo_path)
 
     def set_base_file(self, base_text: str, base_file_name: str = None) -> str:
         base_file_name = base_file_name if base_file_name else get_uuid()[:4]

From f13cc29764a9eeca38efff3de9b5ea1e5a66d9b6 Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Wed, 10 Jul 2024 09:15:59 +0530
Subject: [PATCH 47/49] setup a dummy GITHUB_TOKEN

---
 .github/workflows/CI.yml | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index bdd06bd..bad05e6 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -16,20 +16,25 @@ jobs:
 
     steps:
     - uses: actions/checkout@v3
-    
+
     - name: Set up Python 3.8
       uses: actions/setup-python@v3
       with:
         python-version: "3.8"
-        
+
     - name: Install dependencies
       run: |
         pip install -U pip
         pip install .
         pip install .[dev]
-        
+
+    - name: Set up GITHUB_TOKEN
+      env:
+        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      run: echo "GITHUB_TOKEN is set up"
+
     - name: Test with pytest
       run: PYTHONPATH=src pytest
-    
+
     - name: Test Coverage
       run: PYTHONPATH=src pytest --cov project_name

From 961cef2f11ad77c728ec368be6d16b0464bb2cf5 Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Wed, 10 Jul 2024 09:50:08 +0530
Subject: [PATCH 48/49] update CI

---
 .github/workflows/CI.yml | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index bad05e6..830631e 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -28,13 +28,12 @@ jobs:
         pip install .
         pip install .[dev]
 
-    - name: Set up GITHUB_TOKEN
-      env:
-        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-      run: echo "GITHUB_TOKEN is set up"
-
     - name: Test with pytest
       run: PYTHONPATH=src pytest
+      env:
+        GITHUB_TOKEN: ""
 
     - name: Test Coverage
-      run: PYTHONPATH=src pytest --cov project_name
+      run: PYTHONPATH=src pytest --cov openpecha
+      env:
+        GITHUB_TOKEN: ""

From 154514a3d618fc13e102e8c38e7d1133ce2c44df Mon Sep 17 00:00:00 2001
From: Tenzin <tenzintsunduebhattu@gmail.com>
Date: Wed, 10 Jul 2024 09:58:29 +0530
Subject: [PATCH 49/49] update CI

---
 .github/workflows/CI.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 830631e..288c688 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -31,9 +31,9 @@ jobs:
     - name: Test with pytest
       run: PYTHONPATH=src pytest
       env:
-        GITHUB_TOKEN: ""
+        GITHUB_TOKEN: " "
 
     - name: Test Coverage
       run: PYTHONPATH=src pytest --cov openpecha
       env:
-        GITHUB_TOKEN: ""
+        GITHUB_TOKEN: " "