# Avoid the border effect while evaluating

- While the labelling process have kept only wheat with more than 30%, it was still an ambiguous task
- To solve the ambiguity while evaluating algorithm for localization, two specifics preprocessing tasks have been applied to the original labels. We recommand to filter your JSON with the same procedure as us.

## 1-Localization

- To avoid the ambiguity of detection wheat head on the border, all boxes on the border are removed
- A square of 921x921px is centered on the image. All boxes that don't have a complete overlap with this square are removed.

In [1]:
import json
from pathlib import Path
import numpy as np 
from tqdm.notebook import tqdm


In [2]:
def clean_boxes(boxes,min_area=10):
    area = boxes[:,2]*boxes[:,3]

    return np.squeeze(np.argwhere(area > min_area))


def filter_border(boxes, patch_size,sensitivity):
    (x1_a,y1_a) = (((np.array(patch_size)*sensitivity)/2)-1).astype(int)
    (x2_a, y2_a) = [x1_a, y1_a] + np.array(patch_size)*(1-sensitivity)
    
    boxes = np.array(boxes)
    x = boxes[:,0]
    y = boxes[:,1]
    h = boxes[:,2]
    w = boxes[:,3]

    areas = h*w

    xx = x+h
    yy= w+y

    # a réécrire
    xx1 = np.maximum(x, x1_a)
    yy1 = np.maximum(y, y1_a)
    xx2 = np.minimum(xx, x2_a)
    yy2 = np.minimum(yy, y2_a)

    w = np.maximum(0, xx2 - xx1 + 1)
    h = np.maximum(0, yy2 - yy1 + 1)
    
    overlap = (w * h) / areas
    pick_boxes = np.argwhere(overlap != 1.)

    return np.squeeze(pick_boxes)

def clean_json(
    data,
    sensitivity=0.1
    ):


    new_ann = []
    patch_size = (data["images"][0]["width"],data["images"][0]["height"])
    
    for img_ann in tqdm(data["images"]):


        temp_ann = np.array([ann for ann in data["annotations"] if ann["image_id"] == img_ann["id"]])

        boxes = np.array([np.array(ann["bbox"])for ann in data["annotations"] if ann["image_id"] == img_ann["id"]])
        if len(boxes) > 1:

            pick_1 = clean_boxes(boxes)

            pick_2 = filter_border(boxes[pick_1], patch_size,sensitivity)
            temp_ann = temp_ann[pick_1][pick_2]

        new_ann += list(temp_ann)

    data["annotations"] = new_ann

    return(data)

In [3]:
reference_data = Path("/home/etdavid/Projects/1_research/3_wheat_counting/3-GWHD/biased-result/")
for jsonp in reference_data.glob("corrected*.json"):

    data = json.load(jsonp.open())
    data = clean_json(data)

    Path(f"/home/etdavid/Projects/1_research/3_wheat_counting/3-GWHD/biased-result/submit/{jsonp.name}").write_text(json.dumps(data))
    

TypeError: list indices must be integers or slices, not str

## Counting

- To avoid border effect, counting is generated thank to the following procedure:
    - A square of 921x921px is centered on the image
    - All boxes strictly outside the square are removed
    - Boxes completly within the square count for one
    - Boxes that are cropped will weight the proportion of remaining pixel. For instance if a boxe with an area of 150 pixels is cropped to a box with an area of 75 pixels, it will be count as 75 / 150 = 0.5 in the total count

In [92]:
import pandas as pd
def weighted_count(boxes, patch_size,sensitivity):
    
    
    (x1_a,y1_a) = (((np.array(patch_size)*sensitivity)/2)-1).astype(int)
    (x2_a, y2_a) = [x1_a, y1_a] + np.array(patch_size)*(1-sensitivity)
    
    boxes = np.array(boxes)
    x = boxes[:,0]
    y = boxes[:,1]
    h = boxes[:,2]
    w = boxes[:,3]
    

    areas = h*w

    xx = x+h
    yy= w+y

    xx1 = np.maximum(x, x1_a)
    yy1 = np.maximum(y, y1_a)
    xx2 = np.minimum(xx, x2_a)
    yy2 = np.minimum(yy, y2_a)

    hh = np.maximum(0, xx2 - xx1 + 1)
    ww = np.maximum(0, yy2 - yy1 + 1)
    t = w*h
    
    overlap = (ww * hh) / areas
    overlap[overlap >1] =1


    return np.sum(overlap)

def generate_countcsv(
    sessions_path,
    out_name,
    sensitivity=0.1
    ):
    
    count_csv = []
    
    for sessp in sessions_path:
        data = json.load(sessp.open())
        session_name = sessp.with_suffix("").name
        
        patch_size = (data["images"][0]["width"],data["images"][0]["height"])

        for img_ann in tqdm(data["images"]):
            img_name = img_ann["file_name"]

            boxes = np.array([np.array(ann["bbox"])for ann in data["annotations"] if ann["image_id"] == img_ann["id"]])
            if len(boxes) > 1:

                pick_1 = clean_boxes(boxes)
                
                count = weighted_count(boxes[pick_1], patch_size,sensitivity)
            else:
                count  = 0
                
            


            count_csv.append([session_name, img_name, count, len(boxes)])

    count_df = pd.DataFrame(count_csv,columns=["session","image_name","count","control_count"])

    count_df.to_csv(out_name)

In [100]:
reference_data = Path("complete_test_json")
sessions_path =list(reference_data.glob("*.json"))

generate_countcsv(sessions_path,"/home/etdavid/Projects/2_modules_dev/global-wheat-codalab/bundle/reference_data/count.csv")

    

HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=994.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


