### Importing the necessary packages

In [54]:
# handling the file system
import os
# handling metadata in the zip file
import zipfile as zf
import xml.etree.ElementTree as ET
# documenting the annotation
import pandas as pd

### Initialize the global variable

In [14]:
# path to variable
path_dataset = "./../../../dataset_used/"
path_zip_files = os.path.join(path_dataset, "zipped_files")
path_annotation = os.path.join(path_dataset, "annotations")
# list variable
labels  = ['glaucoma', 'non_glaucoma']

### Create the directory to store the annotation file

In [16]:
for label in labels:
    labelled_dir = os.path.join(path_annotation, label)
    if not os.path.exists(labelled_dir):
        os.makedirs(labelled_dir)
    else:
        print(f"{labelled_dir} directory already exists")
del labelled_dir

./../../../dataset_used/annotations\glaucoma directory already exists
./../../../dataset_used/annotations\non_glaucoma directory already exists


### Handle the zipped files

In [22]:
zip_files = [file for file in os.listdir(path_zip_files) if file.endswith(".zip")]  # List of all zip files

for zip_file in zip_files:
    with zf.ZipFile(os.path.join(path_zip_files, zip_file), 'r') as zip_ref:
        if zip_file.startswith(labels[0]):
            zip_ref.extractall(os.path.join(path_annotation, labels[0]))
        elif zip_file.startswith(labels[1]):
            zip_ref.extractall(os.path.join(path_annotation, labels[1]))

### Handle the annotation files

In [57]:
def get_annotation(xml_file:str):
    """get the annotation from the xml file and document it in a dataframe

    Args:
        xml_file (str): the path to the xml file

    Returns:
        list, pd.DataFrame: a list of dictionary containing the annotation and a dataframe documenting the annotation
    """
    # Parse the xml file
    tree = ET.parse(xml_file)
    root = tree.getroot()
    # Create a dataframe to document the annotation
    documentation = pd.DataFrame(columns=['img_name', 'disc annotated', 'cup annotated'])
    annotasi = []

    # Loop through the xml file
    for child in root:
        if child.tag == "image":
            metadata = {}
            annot_content = []
            metadata["img_name"] = child.attrib["name"]
            metadata["img_width"] = child.attrib["width"]
            metadata["img_height"] = child.attrib["height"]
            for counter, subchild in enumerate(child):
                content = {}
                content["label"] = subchild.attrib["label"]
                content["points"] = subchild.attrib["points"]
                annot_content.append(content)
            annotasi.append({"metadata": metadata,
                            "annotation": annot_content})
            documentation.loc[len(documentation)] = [metadata["img_name"], True, True if counter == 1 else False]
    
    return annotasi, documentation

In [58]:
documentation

Unnamed: 0,img_name,disc annotated,cup annotated
0,fff_1_120793_l_2.jpg,True,True
1,fff_1_120793_l_4.jpg,True,True
2,fff_1_120793_r_1.jpg,True,False
3,fff_1_120793_r_2.jpg,True,False
4,fff_1_120793_r_3.jpg,True,False
5,fff_1_120793_r_4.jpg,True,False
6,fff_1_120793_r_5.jpg,True,False
