Combine dataset and resize

Step 1: Resize Images and Create Initial JSON
Resize all the images and rename them according to the new IDs.

Create a JSON file for each sub-dataset (without annotations).

In [None]:
import os
import json
from PIL import Image

def resize_image(image_path, size):
    image = Image.open(image_path)
    orig_size = image.size
    image = image.resize(size, Image.LANCZOS)
    return image, orig_size

def create_initial_json(subset, dataset_names):
    combined = {
        "images": [],
        "categories": [{"id": 0, "name": "0"}, {"id": 1, "name": "1"}],
        "info": {"description": f"Combined {subset} dataset"},
        "licenses": [{"id": 1, "name": "Default"}]
    }

    img_id_offset = 0
    existing_image_names = set()

    for dataset_name in dataset_names:
        source_folder = os.path.join(f"dataset_coco/{dataset_name}/{subset}")
        dest_folder = os.path.join(f"dataset_coco/combined_dataset/{subset}")
        os.makedirs(dest_folder, exist_ok=True)

        annotation_file = os.path.join(source_folder, "_annotations.coco.json")

        with open(annotation_file, 'r') as f:
            data = json.load(f)

        for img in data["images"]:
            image_path = os.path.join(source_folder, img["file_name"])
            new_image_name = f"{img_id_offset}.jpg"

            while new_image_name in existing_image_names:
                new_image_name = f"{img_id_offset}_renamed.jpg"

            existing_image_names.add(new_image_name)
            dest_image_path = os.path.join(dest_folder, new_image_name)

            if os.path.exists(image_path):
                resized_image, orig_size = resize_image(image_path, (128, 128))
                resized_image.save(dest_image_path)

            new_img = {
                "id": img_id_offset,
                "file_name": new_image_name,
                "width": 128,
                "height": 128,
                "license": 1,
                "source_dataset": dataset_name,
                "id_in_dataset": img["id"]
            }
            combined["images"].append(new_img)

            img_id_offset += 1

    json_file_path = os.path.join(f"dataset_coco/combined_dataset/{subset}", f"{subset}_initial.json")
    with open(json_file_path, 'w') as json_file:
        json.dump(combined, json_file, indent=4)

# Example usage for each subset
create_initial_json("train", ["Dataset1", "Dataset2", "Dataset3", "Dataset4"])
create_initial_json("valid", ["Dataset1", "Dataset2", "Dataset3", "Dataset4"])
create_initial_json("test", ["Dataset1", "Dataset2", "Dataset3", "Dataset4"])


Step 2: Copy and Adjust Annotations
Read the initial JSON file.

Copy annotations from the original datasets and adjust them.

In [None]:
import os
import json

def adjust_annotations(annotations, x_scale, y_scale, size, img_id, ann_id_offset):
    updated_annotations = []
    for ann in annotations:
        ann["bbox"][0] *= x_scale
        ann["bbox"][1] *= y_scale
        ann["bbox"][2] *= x_scale
        ann["bbox"][3] *= y_scale

        ann["bbox"][0] = max(0, min(ann["bbox"][0], size[0] - ann["bbox"][2]))
        ann["bbox"][1] = max(0, min(ann["bbox"][1], size[1] - ann["bbox"][3]))
        ann["bbox"][2] = min(ann["bbox"][2], size[0])
        ann["bbox"][3] = min(ann["bbox"][3], size[1])

        ann["area"] = ann["bbox"][2] * ann["bbox"][3]

        if "segmentation" in ann:
            new_segmentation = []
            for seg in ann["segmentation"]:
                new_seg = []
                for i in range(0, len(seg), 2):
                    x = seg[i] * x_scale
                    y = seg[i + 1] * y_scale
                    new_seg.append(x)
                    new_seg.append(y)
                new_segmentation.append(new_seg)
            ann["segmentation"] = new_segmentation

        ann["category_id"] = 0 if ann["category_id"] != 1 else 1
        ann["image_id"] = img_id

        ann["id"] = ann_id_offset
        ann_id_offset += 1

        updated_annotations.append(ann)
    return updated_annotations, ann_id_offset

def copy_and_adjust_annotations(subset, dataset_names):
    initial_json_path = os.path.join(f"dataset_coco/combined_dataset/{subset}", f"{subset}_initial.json")
    
    with open(initial_json_path, 'r') as f:
        combined = json.load(f)

    annotations = []
    ann_id_offset = 0

    for img in combined["images"]:
        dataset_name = img["source_dataset"]
        img_id_in_dataset = img["id_in_dataset"]

        source_folder = os.path.join(f"dataset_coco/{dataset_name}/{subset}")
        annotation_file = os.path.join(source_folder, "_annotations.coco.json")

        with open(annotation_file, 'r') as f:
            data = json.load(f)

        orig_img = next((i for i in data["images"] if i["id"] == img_id_in_dataset), None)
        if orig_img:
            orig_size = (orig_img["width"], orig_img["height"])
            x_scale = img["width"] / orig_size[0]
            y_scale = img["height"] / orig_size[1]

            image_annotations = [ann for ann in data["annotations"] if ann["image_id"] == img_id_in_dataset]
            updated_annotations_batch, ann_id_offset = adjust_annotations(image_annotations, x_scale, y_scale, (128, 128), img["id"], ann_id_offset)
            annotations.extend(updated_annotations_batch)

    combined["annotations"] = annotations
    combined_json_path = os.path.join(f"dataset_coco/combined_dataset/{subset}", f"{subset}_annotations.coco.json")
    
    with open(combined_json_path, 'w') as json_file:
        json.dump(combined, json_file, indent=4)

# Example usage for each subset
copy_and_adjust_annotations("train", ["Dataset1", "Dataset2", "Dataset3", "Dataset4"])
copy_and_adjust_annotations("valid", ["Dataset1", "Dataset2", "Dataset3", "Dataset4"])
copy_and_adjust_annotations("test", ["Dataset1", "Dataset2", "Dataset3", "Dataset4"])


confirming that the resize work

In [None]:
import os
import json
import random
from PIL import Image, ImageDraw
import matplotlib.pyplot as plt

def draw_bounding_boxes(image_path, annotations):
    image = Image.open(image_path)
    draw = ImageDraw.Draw(image)
    
    for ann in annotations:
        bbox = ann['bbox']
        x, y, width, height = bbox
        draw.rectangle([x, y, x + width, y + height], outline='red', width=2)
    
    return image

def select_random_images_and_display(dataset_folder, combined_annotations_file, num_images=5):
    with open(combined_annotations_file, 'r') as f:
        data = json.load(f)
    
    images = data['images']
    annotations = data['annotations']
    
    selected_images = random.sample(images, num_images)
    
    for img in selected_images:
        image_id = img['id']
        image_file = img['file_name']
        image_path = os.path.join(dataset_folder, image_file)
        
        img_annotations = [ann for ann in annotations if ann['image_id'] == image_id]
        
        image_with_boxes = draw_bounding_boxes(image_path, img_annotations)
        
        # Display the image
        plt.figure()
        plt.imshow(image_with_boxes)
        plt.axis('off')
        plt.show()

# Example usage
dataset_folder = "dataset_coco/combined_dataset/valid"
combined_annotations_file = os.path.join(dataset_folder, "valid_annotations.coco.json")

select_random_images_and_display(dataset_folder, combined_annotations_file)


Resize dataset old

In [None]:
import os
import json
import cv2
import matplotlib.pyplot as plt

def resize_image_and_boxes(image, boxes, output_size):
    h, w = image.shape[:2]
    scale_factor_w = w / output_size[0]
    scale_factor_h = h / output_size[1]
    resized_image = cv2.resize(image, output_size)
    resized_boxes = [[coord / scale_factor_w if i % 2 == 0 else coord / scale_factor_h for i, coord in enumerate(box)] for box in boxes]
    return resized_image, resized_boxes, scale_factor_w, scale_factor_h

def resize_coco_annotations(input_json_path, output_json_path, output_size):
    with open(input_json_path, 'r') as f:
        data = json.load(f)

    for image_info in data['images']:
        h, w = image_info['height'], image_info['width']
        scale_factor_w = w / output_size[0]
        scale_factor_h = h / output_size[1]
        image_info['height'] = output_size[1]
        image_info['width'] = output_size[0]

        for annotation in data['annotations']:
            if annotation['image_id'] == image_info['id']:
                bbox = annotation['bbox']
                annotation['bbox'] = [bbox[0] / scale_factor_w, bbox[1] / scale_factor_h, bbox[2] / scale_factor_w, bbox[3] / scale_factor_h]
                if 'segmentation' in annotation:
                    annotation['segmentation'] = [
                        [point / scale_factor_w if i % 2 == 0 else point / scale_factor_h for i, point in enumerate(segment)]
                        for segment in annotation['segmentation']
                    ]

    with open(output_json_path, 'w') as f:
        json.dump(data, f, indent=4)

def draw_bounding_boxes(image, boxes, color=(0, 255, 0)):
    for box in boxes:
        x, y, w, h = map(int, box)
        cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)
    return image

def display_images_with_boxes(images_with_boxes):
    for resized_image, original_image in images_with_boxes:
        plt.figure(figsize=(10, 5))

        plt.subplot(1, 2, 1)
        plt.title("Resized Image with Boxes")
        plt.imshow(cv2.cvtColor(resized_image, cv2.COLOR_BGR2RGB))

        plt.subplot(1, 2, 2)
        plt.title("Original Image with Rescaled Boxes")
        plt.imshow(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB))

        plt.show()

def process_dataset(input_dir, output_dir, input_json_path, output_json_path, output_size, sample_count=5):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    with open(input_json_path, 'r') as f:
        data = json.load(f)

    sample_images = []
    for i, image_info in enumerate(data['images']):
        image_path = os.path.join(input_dir, image_info['file_name'])
        image = cv2.imread(image_path)
        if image is None:
            continue

        annotations = [ann for ann in data['annotations'] if ann['image_id'] == image_info['id']]
        boxes = [ann['bbox'] for ann in annotations]

        resized_image, resized_boxes, scale_factor_w, scale_factor_h = resize_image_and_boxes(image, boxes, output_size)
        resized_image_path = os.path.join(output_dir, image_info['file_name'])
        cv2.imwrite(resized_image_path, resized_image)

        for ann, box in zip(annotations, resized_boxes):
            ann['bbox'] = box

        if i < sample_count:
            # Draw bounding boxes on resized image
            resized_image_with_boxes = draw_bounding_boxes(resized_image.copy(), resized_boxes)

            # Rescale boxes back to original size and draw on original image
            original_boxes = [[coord * scale_factor_w if i % 2 == 0 else coord * scale_factor_h for i, coord in enumerate(box)] for box in resized_boxes]
            original_image_with_boxes = draw_bounding_boxes(image.copy(), original_boxes, color=(255, 0, 0))

            sample_images.append((resized_image_with_boxes, original_image_with_boxes))

    resize_coco_annotations(input_json_path, output_json_path, output_size)

    display_images_with_boxes(sample_images)

# Paths
input_dir = 'dataset_coco/train'
output_dir = 'dataset_128x128/train'
input_json_path = os.path.join(input_dir, '_annotations.coco.json')
output_json_path = os.path.join(output_dir, '_annotations.coco.json')

# Parameters
output_size = (128, 128)  # Desired output resolution
sample_count = 5  # Number of sample images to save for verification

# Process dataset
process_dataset(input_dir, output_dir, input_json_path, output_json_path, output_size, sample_count)


Vailodata model NCNN

In [None]:
import os
import json
import random
import cv2
import numpy as np
from PIL import Image
import ncnn

class NanoDet:
    def __init__(self, param_path, bin_path, num_threads=2, target_size=128, prob_threshold=0.6, nms_threshold=0.65):
        self.net = ncnn.Net()
        self.net.load_param(param_path)
        self.net.load_model(bin_path)
        self.num_threads = num_threads
        self.target_size = target_size
        self.prob_threshold = prob_threshold
        self.nms_threshold = nms_threshold
        self.net.opt.num_threads = num_threads
        self.net.opt.use_packing_layout = True
        self.net.opt.use_fp16_packed = True
        self.net.opt.use_fp16_storage = True
        self.net.opt.use_fp16_arithmetic = True
        self.net.opt.use_int8_inference = True
        self.net.opt.use_vulkan_compute = False

    def detect(self, bgr):
        width = bgr.shape[1]
        height = bgr.shape[0]

        # pad to multiple of 32
        w, h, scale = width, height, 1.0
        if w > h:
            scale = self.target_size / w
            w, h = self.target_size, int(h * scale)
        else:
            scale = self.target_size / h
            h, w = self.target_size, int(w * scale)

        in_resized = cv2.resize(bgr, (w, h))
        wpad = (w + 31) // 32 * 32 - w
        hpad = (h + 31) // 32 * 32 - h
        in_padded = cv2.copyMakeBorder(in_resized, hpad // 2, hpad - hpad // 2, wpad // 2, wpad - wpad // 2, cv2.BORDER_CONSTANT, value=(0, 0, 0))
        in_padded = in_padded.astype(np.float32)
        mean_vals = [103.53, 116.28, 123.675]
        norm_vals = [0.017429, 0.017507, 0.017125]
        in_padded -= mean_vals
        in_padded *= norm_vals

        mat_in = ncnn.Mat.from_pixels(in_padded, ncnn.Mat.PixelType.PIXEL_BGR2RGB, in_padded.shape[1], in_padded.shape[0])
        ex = self.net.create_extractor()
        ex.input("in0", mat_in)

        proposals = []
        for stride, layer_id in zip([8, 16, 32], ["150", "160", "170"]):
            out = ncnn.Mat()
            ex.extract(layer_id, out)
            self.generate_proposals(out, stride, mat_in, proposals)

        proposals.sort(key=lambda x: -x[4])  # Sort by score

        picked = []
        self.nms_sorted_bboxes(proposals, picked)

        objects = []
        for i in picked:
            obj = proposals[i]
            x0, y0 = (obj[0] - wpad / 2) / scale, (obj[1] - hpad / 2) / scale
            x1, y1 = (obj[2] - wpad / 2) / scale, (obj[3] - hpad / 2) / scale
            obj = [x0, y0, x1, y1, obj[4], obj[5]]
            objects.append(obj)
        
        return objects

    def generate_proposals(self, pred, stride, in_pad, proposals):
        num_grid_x = pred.w
        num_grid_y = pred.h
        num_class = 2
        reg_max_1 = (pred.c - num_class) // 4

        for i in range(num_grid_y):
            for j in range(num_grid_x):
                # find label with max score
                label = -1
                score = -float('inf')
                for k in range(1, num_class):
                    s = pred.channel(k).row(i)[j]
                    if s > score:
                        label = k
                        score = s

                score = 1.0 / (1.0 + np.exp(-score))

                if score >= self.prob_threshold:
                    bbox_pred = np.zeros((4, reg_max_1))
                    for k in range(reg_max_1 * 4):
                        bbox_pred[k // reg_max_1, k % reg_max_1] = pred.channel(num_class + k).row(i)[j]

                    bbox_pred = np.apply_along_axis(self.soft_max, axis=1, arr=bbox_pred)

                    pred_ltrb = []
                    for k in range(4):
                        dis = np.dot(np.arange(reg_max_1), bbox_pred[k])
                        pred_ltrb.append(dis * stride)

                    pb_cx = j * stride
                    pb_cy = i * stride
                    x0 = pb_cx - pred_ltrb[0]
                    y0 = pb_cy - pred_ltrb[1]
                    x1 = pb_cx + pred_ltrb[2]
                    y1 = pb_cy + pred_ltrb[3]

                    proposals.append([x0, y0, x1, y1, score, label])

    def soft_max(self, x):
        exp_x = np.exp(x - np.max(x))
        return exp_x / exp_x.sum(axis=0)

    def nms_sorted_bboxes(self, faceobjects, picked, nms_threshold=0.65, agnostic=False):
        picked.clear()

        n = len(faceobjects)
        areas = [(x[2] - x[0]) * (x[3] - x[1]) for x in faceobjects]

        for i in range(n):
            keep = True
            for j in picked:
                if not agnostic and faceobjects[i][5] != faceobjects[j][5]:
                    continue

                inter_x1 = max(faceobjects[i][0], faceobjects[j][0])
                inter_y1 = max(faceobjects[i][1], faceobjects[j][1])
                inter_x2 = min(faceobjects[i][2], faceobjects[j][2])
                inter_y2 = min(faceobjects[i][3], faceobjects[j][3])

                inter_w = max(0, inter_x2 - inter_x1)
                inter_h = max(0, inter_y2 - inter_y1)
                inter_area = inter_w * inter_h
                union_area = areas[i] + areas[j] - inter_area

                iou = inter_area / union_area
                if iou > nms_threshold:
                    keep = False
                    break

            if keep:
                picked.append(i)

def evaluate_model(dataset_folder, annotations_file, model_param, model_bin):
    net = NanoDet(model_param, model_bin)
    
    with open(annotations_file, 'r') as f:
        annotations = json.load(f)
    
    images = annotations['images']
    ground_truths = annotations['annotations']

    for img_info in images:
        img_id = img_info['id']
        img_file = os.path.join(dataset_folder, img_info['file_name'])
        image = cv2.imread(img_file)
        
        gt_boxes = [ann for ann in ground_truths if ann['image_id'] == img_id]
        gt_bboxes = [[ann['bbox'][0], ann['bbox'][1], ann['bbox'][2], ann['bbox'][3]] for ann in gt_boxes]

        pred_boxes = net.detect(image)

        # Calculate metrics like precision, recall, mAP, etc.
        precision, recall, mAP = calculate_metrics(pred_boxes, gt_bboxes)
        print(f'Image ID: {img_id}, Precision: {precision}, Recall: {recall}, mAP: {mAP}')

def calculate_metrics(pred_boxes, gt_bboxes, iou_threshold=0.5):
    def iou(box1, box2):
        inter_x1 = max(box1[0], box2[0])
        inter_y1 = max(box1[1], box2[1])
        inter_x2 = min(box1[2], box2[2])
        inter_y2 = min(box1[3], box2[3])

        inter_w = max(0, inter_x2 - inter_x1)
        inter_h = max(0, inter_y2 - inter_y1)
        inter_area = inter_w * inter_h

        box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
        box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
        union_area = box1_area + box2_area - inter_area

        return inter_area / union_area

    tp, fp, fn = 0, 0, 0
    for gt in gt_bboxes:
        matched = False
        for pred in pred_boxes:
            if iou(gt, pred) >= iou_threshold:
                tp += 1
                matched = True
                break
        if not matched:
            fn += 1

    fp = len(pred_boxes) - tp
    precision = tp / (tp + fp) if tp + fp > 0 else 0
    recall = tp / (tp + fn) if tp + fn > 0 else 0
    ap = precision * recall  # Simplified AP calculation, adjust as needed

    return precision, recall, ap


# Example usage
evaluate_model('dataset_128x128/valid', 'dataset_128x128/valid/_annotations.coco.json', 'nanodet_128x128_simplified-int8.param', 'nanodet_128x128_simplified-int8.bin')
