Project name: AMAGroup test

Description: This notebook file aims to produce annotation text files for each image in the dataset according to the format that YOLOv7 accepts

Author: Ali Saghafi

Date: 31/10/2023

In [1]:
import os
import numpy as np
import pandas as pd
import json

In [2]:
try:
    __location__ = os.path.realpath(os.path.join(
        os.getcwd(), os.path.dirname(__file__)))
except(NameError):
    __location__ = os.getcwd()

In [4]:
file_path = __location__+"\\material"
with open(file_path+"\\instances_val2017.json", 'r') as file:
    json_data = json.load(file)

In [5]:
image_list = json_data['images']
annotation_list = json_data['annotations']

In [6]:
# Dictionary format {image_id:image_name} 
id_nam_dict = {}
for item in image_list:
    if 'file_name' in item and 'id' in item:
        key_new = item['id']
        value_new = item['file_name']
        id_nam_dict[key_new] = value_new

In [7]:
# The annot_dict is a dictionary which amalgamte all areas and corresponding bounding boxes of each image together
annot_dict = {}
for id, _ in id_nam_dict.items():
    for item in annotation_list:
                    
        if item['image_id'] == id:
            if id not in annot_dict:
                annot_dict[id] = {'areas': [item['area']], 'bboxes': [item['bbox']]}
            else:
                annot_dict[id]['areas'].append(item['area'])
                annot_dict[id]['bboxes'].append(item['bbox'])

In [8]:
# to extract the largest and smallest area and corresponding bonding boxes
result_dict = {}

for id_key, values in annot_dict.items():
    area_data = values['areas']
    bbox_data = values['bboxes']

    areas_with_index = list(enumerate(area_data))  # Create a list of tuples (index, area)
    areas_with_index.sort(key=lambda x: x[1])      # Sort the list based on area

    largest_area_index = areas_with_index[-1][0]
    smallest_area_index = areas_with_index[0][0]

    # 0 for the smallest class
    # 1 for the largest class 
    result_dict[id_key] = {
        0: {'bbox': bbox_data[smallest_area_index]},
        1: {'bbox': bbox_data[largest_area_index]}
    }

#print(result_dict)

In [9]:
# making corresponding .txt annotation file for each image in the dataset
annot_path = __location__ + "\\Dataset\\annotations\\"
for id,name in id_nam_dict.items():
    if id in result_dict.keys():
        temp = result_dict[id]
        file_name = os.path.splitext(name)[0]
        with open(f"{annot_path+file_name}.txt", "w") as file:
            for key, value in temp.items():
                bbox_values = value['bbox']
                bbox_str = ', '.join(str(coord) for coord in bbox_values)
                file.write(f"{key} {bbox_str}\n")

