In [32]:
import cv2
from confusion_matrix import *
import glob
from tqdm import tqdm
import os

def read_model_output(file_path, img_id, height, width):
    # Initialize an empty list to store bounding boxes
    bounding_boxes = []


    # Read the file line by line
    with open(file_path, 'r') as file:
        for i, line in enumerate(file):
            # Split the line into components
            components = line.strip().split()

            # Extract relevant information
            obj_class = int(components[0])
            x_center = float(components[1])
            y_center = float(components[2])
            width_ratio = float(components[3])
            height_ratio = float(components[4])
            confidence = float(components[5])

            # Compute bounding box coordinates
            x_min = max(0, (x_center - 0.5 * width_ratio) * width)
            y_min = max(0, (y_center - 0.5 * height_ratio) * height)
            x_max = min(width, (x_center + 0.5 * width_ratio) * width)
            y_max = min(height, (y_center + 0.5 * height_ratio) * height)
            
            bounding_box = {
#                 "id": i + 4347,
                "image_id": img_id,
                "category_id": obj_class,
                "score": confidence,
                "bbox" : [float(x_min), float(y_min), float(x_max), float(y_max)],
#                  "iscrowd": 0,
#                 "area": ((x_max-x_min)*(y_max-y_min))
            }

            # Append the bounding box to the list
            bounding_boxes.append(bounding_box)

    return bounding_boxes

def read_gt(file_path, img_id, height, width):
    bounding_boxes = []
    # Read the image to get its height and width
#     image = cv2.imread(image_path)
#     height, width, _ = image.shape

    with open(file_path, 'r') as file:
        for i, line in enumerate(file):
            values = line.strip().split()
            obj_class = int(values[0])
            segmentation = list(map(float, values[1:]))

            x_values = segmentation[::2]  # Extract every other element starting from index 0
            y_values = segmentation[1::2]  # Extract every other element starting from index 1

            x_min = min(x_values)
            x_max = max(x_values)
            y_min = min(y_values)
            y_max = max(y_values)

            # Normalize bounding box coordinates with respect to image size
            x_min_normalized = x_min * width
            x_max_normalized = x_max * width
            y_min_normalized = y_min * height
            y_max_normalized = y_max * height

            bounding_box = {
                "id": i+1,
                "image_id": img_id,
                "category_id": obj_class,
                "score": 1,
                "bbox" : [float(x_min_normalized), float(y_min_normalized), 
                          float(x_max_normalized), float(y_max_normalized)],
                "iscrowd": 0,
                "area": int((x_max_normalized-x_min_normalized)*(y_max_normalized-y_min_normalized))
            }

            # Append the bounding box to the list
            bounding_boxes.append(bounding_box)

    return bounding_boxes

In [33]:
import json
gt_label = './dataset_whole/training_valid_dataset/hardtail_cv_february_2023-13/train/labels/'
images = './dataset_whole/training_valid_dataset/hardtail_cv_february_2023-13/train/images/'
predicted_labels = './yolov7/seg/runs/predict-seg/exp2/labels/'

annotations = []
image_info = []
predictions = []

for img_id in tqdm(os.listdir(images)):
    gt = os.path.join(gt_label, img_id.replace('.jpg','.txt'))
    pred = os.path.join(predicted_labels, img_id.replace('.jpg','.txt'))
    img = os.path.join(images, img_id)
    
    img_id = img_id.replace('.jpg','')
    image = cv2.imread(img)
    height, width, _ = image.shape
    
    gt_box = read_gt(gt, img_id, height, width)
    pred_box = read_model_output(pred, img_id, height, width)
    
    image_info.append({
            "id": img_id,
            "file_name": f"{img_id}.jpg",
            "width": width,
            "height": height
        })
    
    annotations.extend(gt_box)
    predictions.extend(pred_box)
    
# Create the COCO JSON object
coco_data = {
    "info": {
        "description": "My COCO dataset"
    },
    "images": image_info,
    "annotations": annotations,  # Add the list of annotations to the JSON object
    "categories": [{"id": 0, "name": "apparel"},{"id": 1, "name": "hand"},{"id": 2, "name": "product"}]  # Add a list of categories for the objects in the dataset
}

# Save the COCO JSON object to a file
with open("annotations.json", "w") as f:
    json.dump(coco_data, f)

# Save the COCO JSON object to a file
with open("predictions.json", "w") as f:
    json.dump(predictions, f)

100%|██████████████████████████████████████████████████████████████████████| 376/376 [00:09<00:00, 40.79it/s]


In [34]:
annotations

[{'id': 1,
  'image_id': '2023-02-16_1676576193_ht_video_mp4_out0044_png.rf.503e21942db4a17a4e3293a0823bb71c',
  'category_id': 2,
  'score': 1,
  'bbox': [972.1089337528882,
   498.8363174094097,
   1138.7480001714216,
   858.644935119638],
  'iscrowd': 0,
  'area': 59958},
 {'id': 2,
  'image_id': '2023-02-16_1676576193_ht_video_mp4_out0044_png.rf.503e21942db4a17a4e3293a0823bb71c',
  'category_id': 2,
  'score': 1,
  'bbox': [814.8017461844954,
   388.0808138575795,
   1040.5420565332893,
   555.7777786254883],
  'iscrowd': 0,
  'area': 37855},
 {'id': 3,
  'image_id': '2023-02-16_1676576193_ht_video_mp4_out0044_png.rf.503e21942db4a17a4e3293a0823bb71c',
  'category_id': 2,
  'score': 1,
  'bbox': [841.3333129882812,
   543.1094646689867,
   1005.8811312606791,
   706.57804320084],
  'iscrowd': 0,
  'area': 26898},
 {'id': 4,
  'image_id': '2023-02-16_1676576193_ht_video_mp4_out0044_png.rf.503e21942db4a17a4e3293a0823bb71c',
  'category_id': 2,
  'score': 1,
  'bbox': [624.166758893512

In [35]:
predictions


[{'image_id': '2023-02-16_1676576193_ht_video_mp4_out0044_png.rf.503e21942db4a17a4e3293a0823bb71c',
  'category_id': 2,
  'score': 0.869164,
  'bbox': [726.0, 557.000442, 834.0, 615.000438]},
 {'image_id': '2023-02-16_1676576193_ht_video_mp4_out0044_png.rf.503e21942db4a17a4e3293a0823bb71c',
  'category_id': 2,
  'score': 0.924029,
  'bbox': [243.00096000000002, 451.99992599999996, 543.00096, 517.999914]},
 {'image_id': '2023-02-16_1676576193_ht_video_mp4_out0044_png.rf.503e21942db4a17a4e3293a0823bb71c',
  'category_id': 2,
  'score': 0.932769,
  'bbox': [897.0001920000001, 695.0003039999999, 970.000128, 770.000256]},
 {'image_id': '2023-02-16_1676576193_ht_video_mp4_out0044_png.rf.503e21942db4a17a4e3293a0823bb71c',
  'category_id': 2,
  'score': 0.944059,
  'bbox': [693.99936, 462.99978000000004, 849.9993600000001, 605.99934]},
 {'image_id': '2023-02-16_1676576193_ht_video_mp4_out0044_png.rf.503e21942db4a17a4e3293a0823bb71c',
  'category_id': 2,
  'score': 0.957661,
  'bbox': [623.9997