In [None]:
from google.colab import drive
drive.mount("/content/drive", force_remount=True)

Mounted at /content/drive


In [None]:
!unzip -qq /content/drive/MyDrive/datasets/coco/annotations_trainval2017.zip -d /content
!unzip -qq /content/drive/MyDrive/datasets/coco/train2017.zip -d /content
!unzip -qq /content/drive/MyDrive/datasets/coco/val2017.zip -d /content


# Functii auxiliare

In [None]:
import os
import numpy as np
import random
import cv2
from pycocotools.coco import COCO

def mask_to_segmentation(mask):
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    segmentation = []
    for contour in contours:
        contour = np.squeeze(contour, axis=1)
        for point in contour:
            segmentation.extend([int(point[0]), int(point[1])])

    return segmentation

def rotate_image(image, angle):
    height, width = image.shape[:2]
    center = (width / 2, height / 2)

    matrix = cv2.getRotationMatrix2D(center, angle, 1)
    rotated_img = cv2.warpAffine(image, matrix, (width, height))

    return rotated_img


def resize_instance_and_mask(instance, mask, scale, dest_shape):
    if instance is None:
        return instance, mask, [0, 0, 0, 0]

    original_h, original_w = instance.shape[:2]
    new_h, new_w = int(scale * original_h), int(scale * original_w)

    new_h = max(1, new_h)
    new_w = max(1, new_w)

    if random.random() < 0.5:
        angle = random.randint(0, 360)
        instance = rotate_image(instance, angle)
        mask = rotate_image(mask, angle)

    resized_instance = cv2.resize(instance, (new_w, new_h))
    resized_mask = cv2.resize(mask, (new_w, new_h))

    if random.random() < 0.7:
        resized_instance = cv2.flip(resized_instance, 1)
        resized_mask = cv2.flip(resized_mask, 1)

    if new_h * new_w < 4900:  # (70x70)
        new_h *= 2
        new_w *= 2
        resized_instance = cv2.resize(resized_instance, (new_w, new_h))
        resized_mask = cv2.resize(resized_mask, (new_w, new_h))

    if new_h > dest_shape[0] or new_w > dest_shape[1]:
        y_offset = random.randint(0, max(0, new_h - dest_shape[0]))
        x_offset = random.randint(0, max(0, new_w - dest_shape[1]))
        resized_instance = resized_instance[y_offset:y_offset+dest_shape[0], x_offset:x_offset+dest_shape[1]]
        resized_mask = resized_mask[y_offset:y_offset+dest_shape[0], x_offset:x_offset+dest_shape[1]]

    contours, _ = cv2.findContours(resized_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if len(contours) > 0:
        bbox = cv2.boundingRect(max(contours, key=cv2.contourArea))
    else:
        bbox = (0, 0, 0, 0)

    area_of_bbox = bbox[2] * bbox[3]
    total_area = dest_shape[0] * dest_shape[1]
    if area_of_bbox / total_area > 0.6:
        new_h = int(resized_instance.shape[0] * (2/3))
        new_w = int(resized_instance.shape[1] * (2/3))
        resized_instance = cv2.resize(resized_instance, (new_w, new_h))
        resized_mask = cv2.resize(resized_mask, (new_w, new_h))

        contours, _ = cv2.findContours(resized_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        if len(contours) > 0:
            bbox = cv2.boundingRect(max(contours, key=cv2.contourArea))
        else:
            bbox = (0, 0, 0, 0)

    return resized_instance, resized_mask, bbox

def generate_unique_ids(coco_dataset, num_ids=1000):
    existing_ids = set(coco_dataset.getAnnIds())
    all_ids = set(range(1, num_ids + 1))
    unique_ids = all_ids - existing_ids
    return unique_ids

def get_anchor_point(original_dim, new_dim):
    position = random.choice(['tl', 'tr', 'bl', 'br', 'center'])  # Top-left, Top-right, etc.

    if position == 'tl':
        return 0, 0
    elif position == 'tr':
        return original_dim[0] - new_dim[0], 0
    elif position == 'bl':
        return 0, original_dim[1] - new_dim[1]
    elif position == 'br':
        return original_dim[0] - new_dim[0], original_dim[1] - new_dim[1]
    elif position == 'center':
        return (original_dim[0] - new_dim[0]) // 2, (original_dim[1] - new_dim[1]) // 2

def adjust_annotations(coco, annotations, scale, new_w, new_h, original_w, original_h, ann_ids, anchor_x=0, anchor_y=0):
    new_annotations = []

    for ann in annotations:
        new_ann = ann.copy()

        # Ajustează bbox
        if 'bbox' in ann:
            x, y, w, h = ann['bbox']
            new_x = (x * scale) + anchor_x
            new_y = (y * scale) + anchor_y
            new_w = w * scale
            new_h = h * scale

            new_ann['bbox'] = [new_x, new_y, new_w, new_h]

        # Ajustează segmentații (poligoane)
        if 'segmentation' in ann:
            new_segmentations = []
            for seg in ann['segmentation']:
                new_seg = []
                if isinstance(seg, list):
                    for i in range(0, len(seg), 2):
                        x, y = seg[i], seg[i+1]
                        new_x = (x * scale) + anchor_x
                        new_y = (y * scale) + anchor_y
                        new_seg.extend([new_x, new_y])
                else:
                    new_seg = seg
                new_segmentations.append(new_seg)
            new_ann['segmentation'] = new_segmentations

            new_ann['id'] = ann_ids.pop()

        new_annotations.append(new_ann)

    return new_annotations, ann_ids

def resize_image_and_annotations(coco, image, scale, annotations=[], ann_ids=[]):
    original_h, original_w = image.shape[:2]
    new_h, new_w = int(scale * original_h), int(scale * original_w)

    resized_img = cv2.resize(image, (new_w, new_h))

    anchor_x, anchor_y = 0, 0

    if scale < 1:
        final_img = np.ones((original_h, original_w, 3), dtype=np.uint8) * 127
        anchor_x, anchor_y = get_anchor_point((original_w, original_h), (new_w, new_h))
        final_img[anchor_y:anchor_y+new_h, anchor_x:anchor_x+new_w] = resized_img

    else:
        y_offset = 0
        x_offset = 0
        if new_h - original_h > 0:
            y_offset = random.randint(0, int(new_h - original_h))
        if new_w - original_w > 0:
            x_offset = random.randint(0, int(new_w - original_w))
        final_img = resized_img[y_offset:y_offset+original_h, x_offset:x_offset+original_w]

    adjusted_annotations, remaining_ids = adjust_annotations(coco, annotations, scale, new_w, new_h, original_w, original_h, ann_ids, anchor_x, anchor_y)

    return final_img, adjusted_annotations, remaining_ids


def add_offset_to_annotations(segmentation, bbox, offset):
  x_offset, y_offset = offset
  x, y, _, _ = bbox
  x += x_offset
  y += y_offset

  new_bbox = (x, y, bbox[2], bbox[3])

  for i in range(0, len(segmentation), 2):
    segmentation[i] += x_offset
    segmentation[i+1] += y_offset

  return new_bbox, segmentation

def check_overlap(bbox1, bbox2):
    x1_a, y1_a, x2_a, y2_a = bbox1
    x1_b, y1_b, x2_b, y2_b = bbox2
    return x1_a < x2_b and x1_b < x2_a and y1_a < y2_b and y1_b < y2_a

def do_bboxes_intersect(bbox1, bbox2):
    return not (bbox1[0] + bbox1[2] <= bbox2[0] or  # bbox1 este la stânga bbox2
                bbox1[0] >= bbox2[0] + bbox2[2] or  # bbox1 este la dreapta bbox2
                bbox1[1] + bbox1[3] <= bbox2[1] or  # bbox1 este deasupra bbox2
                bbox1[1] >= bbox2[1] + bbox2[3])    # bbox1 este dedesubtul bbox2

# Impartirea adnotarilor in subseturi

In [None]:
from pycocotools.coco import COCO
import os
import math
import json

annotation_file = '/content/annotations/instances_train2017.json'


output_directory = '/content/sample_data/split_annon'
os.makedirs(output_directory, exist_ok=True)

coco = COCO(annotation_file)

image_ids = list(coco.imgs.keys())

num_subsets = 20
images_per_subset = math.ceil(len(image_ids) / num_subsets)


for subset_idx in range(num_subsets):
    start_idx = subset_idx * images_per_subset
    end_idx = min(start_idx + images_per_subset, len(image_ids))
    subset_image_ids = image_ids[start_idx:end_idx]

    subset_annotations = coco.loadAnns(coco.getAnnIds(imgIds=subset_image_ids))
    subset_images = coco.loadImgs(subset_image_ids)

    subset_data = {
        'info': coco.dataset['info'],
        'licenses': coco.dataset['licenses'],
        'categories': coco.dataset['categories'],
        'images': subset_images,
        'annotations': subset_annotations
    }

    output_filename = os.path.join(output_directory, f'subset_{subset_idx}.json')
    if os.path.exists(output_filename):
        os.remove(output_filename)

    with open(output_filename, 'w') as f:
        json.dump(subset_data, f)


loading annotations into memory...
Done (t=14.96s)
creating index...
index created!


# Functia de procesare a unei imagini

In [None]:
def process_image_func(args):
    img_id, coco, cat_ids, instances_to_copy, added_instances, images, ann_ids, counter, proccessing_type = args

    counter.value += 1
    if counter.value % 250 == 0:
      print(counter.value)

    cat_img_ids = coco.getImgIds(catIds=cat_ids)
    dest_img_info = coco.loadImgs([img_id])[0]
    dest_img_ann_id = coco.getAnnIds(imgIds=dest_img_info['id'])
    dest_img_ann = coco.loadAnns(dest_img_ann_id)


    # se sterg adnotările vechi asociate cu această imagine
    old_ann_ids = [ann['id'] for ann in coco.dataset['annotations'] if ann['image_id'] == dest_img_info['id']]

    dest_img = cv2.imread(os.path.join(images, dest_img_info['file_name']))
    if dest_img is None:
        print("Somthing is None")
        return 0, [], None, dest_img_info['file_name'], []

    scale_dest = random.uniform(0.5, 1)
    final_dest_img, new_ann, remaining_ann_ids = resize_image_and_annotations(coco, dest_img, scale_dest, dest_img_ann, ann_ids)

    instances_no = 0
    for ann in new_ann:
      if ann['category_id'] == cat_ids[0]:
        instances_no += 1
        break # Ne intereseaza doar daca avem o singura instanta din categoria data

    # 1 - Poze cu persoane
    # 2 - Poze fara persoane
    # 3 - Orice Poza

    if proccessing_type == 1 and instances_no == 0:
        return 0, new_ann, final_dest_img, dest_img_info['file_name'], old_ann_ids

    elif proccessing_type == 2 and instances_no == 1:
        return 0, new_ann, final_dest_img, dest_img_info['file_name'], old_ann_ids


    if added_instances < instances_to_copy:
        ann_ids = coco.getAnnIds(imgIds=img_id)
        if ann_ids:
            source_img_id = img_id
            while source_img_id == img_id:
                source_img_id = random.choice(cat_img_ids)

            source_ann_ids = coco.getAnnIds(imgIds=source_img_id, catIds=cat_ids)
            source_anns = coco.loadAnns(source_ann_ids)
            num_instances_to_copy_from_this_image = random.randint(1, len(source_anns))

            for _ in range(num_instances_to_copy_from_this_image):
                if added_instances >= instances_to_copy:
                    break

                ann = random.choice(source_anns)
                source_anns.remove(ann)
                source_img = cv2.imread(os.path.join(images, coco.loadImgs([source_img_id])[0]['file_name']))

                if source_img is None:
                    num_instances_to_copy_from_this_image -= 1
                    continue

                mask = coco.annToMask(ann)
                x, y, w, h = [int(i) for i in ann['bbox']]
                mask = (mask > 0).astype(np.uint8) * 255
                person = source_img[y:y+h, x:x+w]
                person_mask = mask[y:y+h, x:x+w]

                scale_instance = random.uniform(0.7, 1.5)
                resized_person, resized_mask, resized_bbox = resize_instance_and_mask(person, person_mask, scale_instance, final_dest_img.shape)

                all_bboxes = [ann_element['bbox'] for ann_element in new_ann]
                y1 = random.randint(0, final_dest_img.shape[0] - resized_mask.shape[0])
                x1 = random.randint(0, final_dest_img.shape[1] - resized_mask.shape[1])
                segmentation = mask_to_segmentation(resized_mask)
                added_bbox, added_seg = add_offset_to_annotations(segmentation, resized_bbox, (x1, y1))
                attempts = 10
                while any(do_bboxes_intersect(added_bbox, bbox) for bbox in all_bboxes) and attempts > 0:
                    y1 = random.randint(0, final_dest_img.shape[0] - resized_mask.shape[0])
                    x1 = random.randint(0, final_dest_img.shape[1] - resized_mask.shape[1])
                    segmentation = mask_to_segmentation(resized_mask)
                    added_bbox, added_seg = add_offset_to_annotations(segmentation, resized_bbox, (x1, y1))
                    attempts -= 1

                roi = final_dest_img[y1:y1+resized_mask.shape[0], x1:x1+resized_mask.shape[1]]
                for c in range(0, 3):
                    roi[:, :, c] = roi[:, :, c] * (1 - (resized_mask / 255.0)) + resized_person[:, :, c] * (resized_mask / 255.0)

                added_instances += 1
                new_annotation = {
                  'image_id': dest_img_info['id'],
                  'category_id': cat_ids[0],
                  'bbox': added_bbox,
                  'segmentation': [added_seg],
                  'iscrowd': 0,
                  'id': remaining_ann_ids.pop()
                }
                new_ann.append(new_annotation)

    return added_instances, new_ann, final_dest_img, dest_img_info['file_name'], old_ann_ids


# Functia care porneste procesarea

In [None]:
import os
import shutil
import cv2
import numpy as np
import random
import json as jsonn
from pycocotools.coco import COCO
from multiprocessing import Pool
from multiprocessing import Manager


def create_distribution(number_of_instances, coco, category_id, type):
    total_photos = len(coco.getImgIds())
    distrib = [0] * total_photos

    all_img_ids = coco.getImgIds()

    if type == 1:
        img_ids_with_category = coco.getImgIds(catIds=[category_id])
        num_photos_with_category = len(img_ids_with_category)
        rest = number_of_instances % num_photos_with_category
        value = int(number_of_instances / num_photos_with_category)

        for img_id in img_ids_with_category:
            index = all_img_ids.index(img_id)
            distrib[index] = value
        for i in range(rest):
            index = all_img_ids.index(img_ids_with_category[i])
            distrib[index] += 1

    elif type == 2:
        img_ids_without_category = set(all_img_ids) - set(coco.getImgIds(catIds=[category_id]))
        num_photos_without_category = len(img_ids_without_category)
        rest = number_of_instances % num_photos_without_category
        value = int(number_of_instances / num_photos_without_category)

        for img_id in img_ids_without_category:
            index = all_img_ids.index(img_id)
            distrib[index] = value
        for i, img_id in enumerate(img_ids_without_category):
            if i < rest:
                index = all_img_ids.index(img_id)
                distrib[index] += 1

    elif type == 3:
        rest = number_of_instances % total_photos
        value = int(number_of_instances / total_photos)
        for i in range(total_photos):
            distrib[i] = value
        for i in range(rest):
            distrib[i] += 1

    return distrib


def create_ann_ids_lists(coco, distrib):
  available_ids = generate_unique_ids(coco, 100000000)
  ret_list = []
  for i, x in enumerate(distrib):
    add_list = []
    for j in range(x+120):
      add_list.append(available_ids.pop())
    ret_list.append(add_list)
  return ret_list


def simple_copy_paste_parallel(cat, percent, images, json, output_folder, proc_type):
    coco = COCO(json)
    cat_ids = [cat]
    cat_img_ids = coco.getImgIds(catIds=cat_ids)

    coco.dataset['annotations'] = [x for x in coco.dataset['annotations'] if x['category_id'] in cat_ids]
    initial_instances = sum([len(coco.getAnnIds(imgIds=img_id, catIds=cat_ids)) for img_id in cat_img_ids])
    instances_to_copy = int(initial_instances * (percent / 100))
    distribution = create_distribution(instances_to_copy, coco, cat, proc_type)

    list_of_ann_ids = create_ann_ids_lists(coco, distribution)

    if os.path.exists(output_folder):
        shutil.rmtree(output_folder)
    os.makedirs(output_folder)

    all_img_ids = coco.getImgIds()

    total_images = len(all_img_ids)
    added_instances = 0
    tasks = []
    manager = Manager()
    processed_images_counter = manager.Value('i', 0)

    for idx, img_id in enumerate(all_img_ids):
      tasks.append((img_id, coco, cat_ids, distribution[idx], added_instances, images, list_of_ann_ids[idx], processed_images_counter, proc_type))


    with Pool(os.cpu_count()) as p:
        for index, result in enumerate(p.map(process_image_func, tasks)):
            if (index + 1) % 200 == 0:
              print(f"Processed image {index + 1} of {total_images}")

            added_for_this_image, new_ann, final_dest_img, filename, old_ann_ids = result
            added_instances += added_for_this_image

            if final_dest_img is not None:
                cv2.imwrite(os.path.join(output_folder, filename), final_dest_img)

            coco.dataset['annotations'].extend(new_ann)
            coco.dataset['annotations'] = [ann for ann in coco.dataset['annotations'] if ann['id'] not in old_ann_ids]

    coco.dataset['annotations'] = [ann for ann in coco.dataset['annotations'] if ann['category_id'] == cat]

    output_json = os.path.join(output_folder, 'annotations.json')
    with open(output_json, 'w') as f:
        jsonn.dump(coco.dataset, f)

    return initial_instances, added_instances, initial_instances + added_instances


In [None]:
!rm -r sample_data/images*
!rm -r person_train_copy*
!rm -r annotations*

rm: cannot remove 'sample_data/images*': No such file or directory


In [None]:
for proccessing_type in [2]:
  for i in range(2, 20):
    print(f"{'=' * 10} {i} {proccessing_type} {'=' * 10}")
    out = f"/content/sample_data/images{i}_{proccessing_type}"
    ann = f"/content/sample_data/split_annon/subset_{i}.json"
    img = "/content/train2017"
    _, _, _ = simple_copy_paste_parallel(1, 320, img, ann, out, proccessing_type)

loading annotations into memory...
Done (t=0.50s)
creating index...
index created!
250
500
750
1000
12501250

1500
1750
2000
2250
2500
2750
3000
3250
35003500

3750
4000
4250
4500
4750
5000
5250
5500
Processed image 200 of 5915
Processed image 400 of 5915
Processed image 600 of 5915
Processed image 800 of 5915
Processed image 1000 of 5915
Processed image 1200 of 5915
Processed image 1400 of 5915
Processed image 1600 of 5915
Processed image 1800 of 5915
Processed image 2000 of 5915
Processed image 2200 of 5915
Processed image 2400 of 5915
Processed image 2600 of 5915
Processed image 2800 of 5915
Processed image 3000 of 5915
Processed image 3200 of 5915
Processed image 3400 of 5915
Processed image 3600 of 5915
Processed image 3800 of 5915
Processed image 4000 of 5915
Processed image 4200 of 5915
Processed image 4400 of 5915
Processed image 4600 of 5915
Processed image 4800 of 5915
Processed image 5000 of 5915
Processed image 5200 of 5915
Processed image 5400 of 5915
Processed image 5600 

# Afisare BBOX si SEGMENTATION

In [None]:
import json
import os
import random
import cv2
import matplotlib.pyplot as pl
import numpy as np

def load_coco_annotations(annotation_path):
    with open(annotation_path, 'r') as f:
        coco_data = json.load(f)
    return coco_data

def select_random_images(coco_data, num_images=20):
    image_ids = [image['id'] for image in coco_data['images']]
    selected_image_ids = random.sample(image_ids, num_images)
    return [image for image in coco_data['images'] if image['id'] in selected_image_ids]

def select_images_from_directory(coco_data, directory_path):
    image_files_in_directory = os.listdir(directory_path)
    selected_images = [image for image in coco_data['images'] if image['file_name'] in image_files_in_directory]

    return selected_images

def get_image_with_bboxes(image_path, annotations, category_names):
    image = cv2.imread(image_path)
    for ann in annotations:
        if 'bbox' in ann:
            bbox = ann['bbox']
            category_id = ann['category_id']
            if category_id == 'person':
                category_id = 1
            category_name = category_names.get(category_id, "Unknown")

            x, y, w, h = [int(coord) for coord in bbox]
            cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
            cv2.putText(image, category_name, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

        if 'segmentation' in ann and isinstance(ann['segmentation'], list) and len(ann['segmentation']) > 0:
            for seg in ann['segmentation']:
                if isinstance(seg, list) and len(seg) >= 6:
                    np_seg = np.array(seg).reshape((-1, 2)).astype(np.int32)
                    cv2.drawContours(image, [np_seg], 0, (0, 0, 255), 2)
    return image

def main(image_dir, annotation_path):
    coco_data = load_coco_annotations(annotation_path)
    category_names = {category['id']: category['name'] for category in coco_data['categories']}
    selected_images = select_images_from_directory(coco_data, image_dir)

    random.shuffle(selected_images)
    selected_images = selected_images[:40]

    for i, image_info in enumerate(selected_images):
        image_path = os.path.join(image_dir, image_info['file_name'])
        annotations = [ann for ann in coco_data['annotations'] if ann['image_id'] == image_info['id']]
        image_with_bboxes = get_image_with_bboxes(image_path, annotations, category_names)

        plt.imshow(cv2.cvtColor(image_with_bboxes, cv2.COLOR_BGR2RGB))
        plt.title("Image with Bounding Boxes and Segmentation Contours")
        plt.show()

if __name__ == "__main__":
    image_directory = "/content/person_val_copy_paste1"
    annotations_file = "/content/annotations_1.json"
    main(image_directory, annotations_file)


# Functia care uneste cele 20 de directoare de poze in unul singur

In [None]:
import os
import shutil

def merge_image_directories(destination_dir):
  for proccessing_type in range(2, 3):
    dest_dir = f'{destination_dir}{proccessing_type}'
    if not os.path.exists(dest_dir):
      os.makedirs(dest_dir)

    for i in range(20):
        source_dir = f"/content/sample_data/images{i}_{proccessing_type}"
        if os.path.exists(source_dir):
            for filename in os.listdir(source_dir):
                if filename.endswith(".jpg") and filename != "annotations.jpg":
                    source_filepath = os.path.join(source_dir, filename)
                    destination_filepath = os.path.join(dest_dir, filename)

                    shutil.copy2(source_filepath, destination_filepath)

    print("Toate imaginile au fost copiate cu succes în", dest_dir)
merge_image_directories("/content/person_train_copy_paste")

Toate imaginile au fost copiate cu succes în /content/person_train_copy_paste2


In [None]:
rm -r person_train_copy*

# Functia care uneste toate fisierele de adnotari in unul singur

In [None]:
from pycocotools.coco import COCO
import json

def merge_coco_annotations(folders):
    merged_data = {
        "images": [],
        "annotations": [],
        "categories": []
    }

    annotation_id = 1

    for folder in folders:
        annotation_path = f"{folder}/annotations.json"
        if not os.path.exists(annotation_path):
            print(f"Fișierul {annotation_path} nu există. Se trece la următorul.")
            continue

        coco = COCO(annotation_path)
        for img in coco.dataset['images']:
            merged_data["images"].append(img)

        for ann in coco.dataset['annotations']:
            ann['id'] = annotation_id
            annotation_id += 1
            merged_data["annotations"].append(ann)

        if not merged_data["categories"]:
            merged_data["categories"] = coco.dataset['categories']

    return merged_data

folders = [[f"/content/sample_data/images{i}_{j}" for i in range(20)] for j in range(2, 3)] #TODO change  3 to 1

for i, folders_type in enumerate(folders, start = 2):
  merged_data = merge_coco_annotations(folders_type)
  with open(f"/content/annotations_{i}.json", 'w') as f:
      json.dump(merged_data, f)

  print("Adnotările au fost unite și salvate cu succes!", i)


loading annotations into memory...
Done (t=0.78s)
creating index...
index created!
loading annotations into memory...
Done (t=0.72s)
creating index...
index created!
loading annotations into memory...
Done (t=2.61s)
creating index...
index created!
loading annotations into memory...
Done (t=0.71s)
creating index...
index created!
loading annotations into memory...
Done (t=0.71s)
creating index...
index created!
loading annotations into memory...
Done (t=0.67s)
creating index...
index created!
loading annotations into memory...
Done (t=0.69s)
creating index...
index created!
loading annotations into memory...
Done (t=0.68s)
creating index...
index created!
loading annotations into memory...
Done (t=0.72s)
creating index...
index created!
loading annotations into memory...
Done (t=0.71s)
creating index...
index created!
loading annotations into memory...
Done (t=0.68s)
creating index...
index created!
loading annotations into memory...
Done (t=0.70s)
creating index...
index created!
load

In [None]:
# c = COCO('annotations_2.json')
# print(len(c.dataset['annotations']))

c = COCO('annotations_2.json')
print(len(c.dataset['annotations']))

loading annotations into memory...
Done (t=13.24s)
creating index...
index created!
398050


# ====================== Optional ======================
# ====================== Optional ======================


In [None]:
coco = COCO("annotations_3.json")

max_id = max([img['id'] for img in coco.dataset['images']])

for img in coco.dataset['images']:
    max_id += 1
    old_name = img['file_name']
    img['file_name'] = str(max_id).zfill(12) + '.jpg'

    old_id = img['id']
    ann_ids = coco.getAnnIds([old_id])
    anns = coco.loadAnns(ann_ids)
    img['id'] = max_id

    for ann in anns:
        ann['image_id'] = max_id

    old_image_path = os.path.join('/content/person_train_copy_paste3', old_name)
    new_image_path = os.path.join('/content/person_train_copy_paste3_new', img['file_name'])
    shutil.copy2(old_image_path, new_image_path)

output_json_path = 'annotations_3_updated.json'
with open(output_json_path, 'w') as json_file:
    json.dump(coco.dataset, json_file, indent=4)

loading annotations into memory...
Done (t=13.44s)
creating index...
index created!


In [None]:
c = COCO("/content/annotations_3_updated.json")
print(len(c.dataset['annotations']))

loading annotations into memory...
Done (t=16.68s)
creating index...
index created!
327481


In [None]:
import json
import shutil
from pathlib import Path

def merge_coco_datasets(coco1_annotations, coco2_annotations, output_annotations,
                        coco1_images_folder, coco2_images_folder, output_images_folder):
    with open(coco1_annotations, 'r') as f:
        coco1_data = json.load(f)
    with open(coco2_annotations, 'r') as f:
        coco2_data = json.load(f)



    merged_images = coco1_data['images'] + coco2_data['images']
    print(max(x['id'] for x in coco1_data['images']))
    print(max(x['id'] for x in coco2_data['images']))
    print(len(merged_images))
    merged_annotations = coco1_data['annotations'] + coco2_data['annotations']
    merged_categories = coco1_data['categories']

    merged_data = {
        'images': merged_images,
        'annotations': merged_annotations,
        'categories': merged_categories
    }

    with open(output_annotations, 'w') as f:
        json.dump(merged_data, f)

merge_coco_datasets(
    '/content/annotations/instances_train2017.json',
    '/content/annotations_3_updated.json',
    '/content/merged_annotations.json',
    '/content/train2017',
    '/content/person_train_copy_paste3_new',
    '/content/merged_images'
)

581929
700216
236574


In [None]:
with open('/content/merged_annotations.json', 'r') as f:
  coco1_data = json.load(f)
print(len(coco1_data['images']))
print(max([x['id'] for x in coco.dataset['images']]))

236574
700216


In [None]:
print(len([x for x in coco1_data['annotations'] if x['category_id'] == 1]))

589946


In [None]:
!zip -r  /content/merged_train_with_30_cp.zip /content/merged_images /content/merged_annotations.json

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  adding: content/merged_images/000000589423.jpg (deflated 1%)
  adding: content/merged_images/000000657509.jpg (deflated 4%)
  adding: content/merged_images/000000387365.jpg (deflated 0%)
  adding: content/merged_images/000000184210.jpg (deflated 0%)
  adding: content/merged_images/000000662452.jpg (deflated 4%)
  adding: content/merged_images/000000599713.jpg (deflated 5%)
  adding: content/merged_images/000000693324.jpg (deflated 4%)
  adding: content/merged_images/000000610345.jpg (deflated 1%)
  adding: content/merged_images/000000674296.jpg (deflated 2%)
  adding: content/merged_images/000000660745.jpg (deflated 3%)
  adding: content/merged_images/000000466530.jpg (deflated 0%)
  adding: content/merged_images/000000037122.jpg (deflated 0%)
  adding: content/merged_images/000000520232.jpg (deflated 0%)
  adding: content/merged_images/000000397974.jpg (deflated 0%)
  adding: content/merged_images/000000312679.jpg (def

In [None]:
!mv /content/merged_train_with_30_cp.zip /content/drive/MyDrive/A_new_datasets

# ====================== Optional ======================
# ====================== Optional ======================


# Functia care face ZIP fisierul de adnotari si fisierul cu poze

In [None]:
import shutil

source_directory = '/content/person_train_copy_paste'
annotation_file = '/content/annotations.json'

import zipfile

zip_filename = "/content/2_BUN_80%copy_paste_train_doar_cu_persoane_BUN.zip"

with zipfile.ZipFile(zip_filename, 'w') as zipf:
    for i in range(2, 3):
      for foldername, subfolders, filenames in os.walk(f'{source_directory}{i}'):
          for filename in filenames:
              file_path = os.path.join(foldername, filename)

              arcname = os.path.relpath(file_path, '/content/')

              zipf.write(file_path, arcname)

      zipf.write(f"/content/annotations_{i}.json", os.path.basename(f"/content/annotations_{i}.json"))

# Functia care copiaza zipul pe drive

In [None]:
import shutil

source_zip = zip_filename
destination_path = f'/content/drive/MyDrive/A_new_datasets/ULTIMUL_SET{zip_filename[9:]}'
shutil.copy2(source_zip, destination_path)


In [None]:
import json

def count_person_instances(file_path):

    with open(file_path, 'r') as f:
        data = json.load(f)

    if "categories" not in data:
        raise ValueError("Fișierul JSON nu conține cheia 'categories'.")

    person_id = None
    for category in data["categories"]:
        if category["name"] == "person":
            person_id = category["id"]
            break

    if person_id is None:
        raise ValueError("Categoria 'person' nu a fost găsită în fișierul JSON.")
    count = 0
    for annotation in data["annotations"]:
        if annotation["category_id"] == person_id:
            count += 1

    return count

print(count_person_instances("/content/annotations_1.json"),
  count_person_instances("/content/annotations/instances_train2017.json"),
  count_person_instances("/content/annotations_2.json"),
  count_person_instances("/content/annotations_3.json"))


340473 262465 323681 340473


# Restart runtime

In [None]:
import os
os.kill(os.getpid(), 9)