In [2]:
# %load_ext nb_black

import yaml
import pandas as pd
import pathlib
import uuid
import json
import time
import datetime
from tqdm import tqdm
import humanfriendly
import exifread
import sys
import os
import PIL
from PIL import Image

cwfid_path = pathlib.Path(os.path.realpath('../cwfid_to_json/'))

input_metadata_file = cwfid_path / "labels.csv"
output_file = cwfid_path / "cwfid_imageinfo.json"
image_base = cwfid_path / "cwfid_images"
dir_name = "cwfid_images"





In [39]:

CATEGORY_MAP = {
    # TODO: we need a spec for species unspecified, and we need a spec for species specified
    'crop': {'species': 'daugus carota', 'role': 'crop', 'id': 0},
    'weed': {'species': 'UNSPECIFIED', 'role': 'weed', 'id': 1}
}

def create_annotations(ann_blob, image_id, starting_idx):
    annotations = []
    for i, obj in enumerate(ann_blob['annotation'], starting_idx):
        category = CATEGORY_MAP[obj['type']]
        # a COCO polygon is just a sequence [[x1, y1, x2, y2, ...]]
        if not isinstance(obj["points"]["x"], list):
            print(f"Found invalid polygon for annotation of {ann_blob['filename']} with points {obj['points']}")
            continue
        polygon = zip(obj["points"]["x"], obj["points"]["y"])
        polygon = sum(polygon, ())  # flatten an iterable of tuples
        polygon = [list(map(int, polygon))]
        # TODO: parse this polygon with pycocotools.frPyObjects(polygon, im_height, im_width)
        # then:
        # * check that mask matches the stored images in cwfid, and that there is not an off-by-one error (due to indexing base)
        # * get area and bbox for annotation object
        annotations.append({
            "id": i,
            "image_id": image_id,
            "category_id": category["id"],
            "segmentation": polygon,
            "iscrowd": 0,
            # something in pycocotools to do this
            # "area": calculate_area(polygon),
            # "bbox": ...
        })
    return annotations

In [40]:
def get_image_dimensions(path):
    if not os.path.isfile(path):
        return None
    # Retrieve image width and height
    image = PIL.Image.open(fullPath)
    width, height = image.size
    return {'width': width, 'height': height}

In [41]:
missing_files = []
categories = [
    CATEGORY_MAP['crop'],
    CATEGORY_MAP['weed']
]
agdata = [{"id": 0,
           # TODO
          }]
info = {}  # TODO
annotations = []
images = []
progress = tqdm((cwfid_path / "annotations").glob("*_annotation.yaml"))
for ann_path in progress:
    progress.set_description(ann_path.name)
    image_id = int(ann_path.name[:3])
    ann_blob = yaml.safe_load(ann_path.open())
    
    image = {
        "id": image_id,
        "file_name": os.path.join(dir_name, ann_blob["filename"]),
        "license": 0,  # TODO
        "agdata_id": 0,
    }
    dims = get_image_dimensions(image_base / ann_blob["filename"])
    if dims is None:
        missing_files.append(image_base / ann_blob["filename"])
    else:
        image.update(dims)
    images.append(image)
    
    annotations.extend(create_annotations(ann_blob, image_id, starting_idx=len(annotations)))

041_annotation.yaml: : 13it [00:00, 61.45it/s]

Found invalid polygon for annotation of 018_image.png with points {'x': 685.0, 'y': 200.0}
Found invalid polygon for annotation of 009_image.png with points {'x': 457.0, 'y': 650.0}


020_annotation.yaml: : 60it [00:00, 63.77it/s]


In [None]:
collections = [
    # TODO: DCMI conformance
    {"author": "Haug, Sebastian and Ostermann, Jörn",
     "title": "A Crop/Weed Field Image Dataset for the Evaluation of Computer Vision Based Precision Agriculture Tasks",
     "year": 2015,
     "identifier": "doi:10.1007/978-3-319-16220-1_8",
     "rights": "All data is subject to copyright and may only be used for non-commercial research. In case of use please cite our publication.",
     "id": 0,
    }
]
# TODO: construct from train_test_split.yaml
collection_memberships = [
    {"image_id": 0,  # XXX: do we record membership per image? per annotation? per both????
     "subset": "test",
     "collection_id": 0}
]

In [43]:
with output_file.open('w') as fout:
    json.dump({"info": info,
               "agdata": agdata,
               "images": images,
               "annotations": annotations,
               "categories": categories,
               "collections": collections,
               "collection_memberships": collection_memberships},
              fout)

In [46]:
!pbcopy < $output_file