In [2]:
import pandas as pd
import json
import os
from PIL import Image

w4_labels = ['CAR', 'CAR_CIPV', 'VAN_NLV', 'TRUCK', 'VAN', 'CAR_NLV', 
             'BUS', 'TRUCK_NLV', 'BUS_CIPV', 'TRUCK_CIPV', 'BUS_NLV', 
             'VAN_CIPV', 'TRAILER', 'TOW_TRUCK', 'RV', 'CONSTRUCTION_VEHICLE', 'SPECIAL_4W', 'TRACTOR', 
             'PICK_UP_CAR', 'OPEN_CAR_DOOR', 'DUMMY_CAR', 'MISK', 'IGNORE_VEHICLE', 'MINI_TRUCK', 'OPEN_CAR_DOOR', 
             'CEMENT_MIXER_TRUCK']

ped_labels = ['PERSON', 'PERSON_MCP', 'CHILD', 'CHILD_MCP', 'IGNORE_PEDESTRIAN', 'DUMMY_PERSON', "PEDESTRIAN", 'PEDESTRIAN_MCP'] 

w2_labels = ['KICK_SCOOTER', 'BIKE', 'TRICYCLE', 'BIKE_NLV', 'MOTOR_NLV', 
             'BIKE_CIPV', 'MOTOR_CIPV', 'MOTOR_WITHOUT_RIDER', 'KICK_SCOOTER_CIPV', 'KICK_SCOOTER_NLV', 'MOTOR', 'IGNORE_BICYCLE']

rider_labels = ['RIDER_NLV', 'RIDER', 'RIDER_CIPV']

def get_label_id(label):
    if label in w4_labels:
        return 0
    elif label in w2_labels:
        return 1
    elif label in ped_labels:
        return 2
    elif label in rider_labels:
        return 3
    else:
        return -1  # Undefined category

def df_to_coco(df, images_path, output_file):
    coco_format = {
        "images": [],
        "annotations": [],
        "categories": [
            {"supercategory": None, "id": 0, "name": "2w"},
            {"supercategory": None, "id": 1, "name": "4w"},
            {"supercategory": None, "id": 2, "name": "ped"},
            {"supercategory": None, "id": 3, "name": "rider"}
        ]
    }

    image_set = {}
    annotation_id = 1

    # First, iterate through all unique images to assign IDs
    unique_images = df['name'].unique()
    for idx, image_name in enumerate(unique_images):
        image_path = os.path.join(images_path, image_name)
        if os.path.exists(image_path):
            with Image.open(image_path) as img:
                width, height = img.size

            image_info = {
                "file_name": os.path.join(images_path, image_name),
                "id": idx + 1,  # Start IDs from 1
                "width": width,
                "height": height
            }
            image_set[image_name] = image_info["id"]
            coco_format["images"].append(image_info)
        else:
            print(f"Image file {image_path} does not exist.")

    # Then, iterate through the rows to create annotations
    for _, row in df.iterrows():
        image_id = image_set.get(row["name"])
        if image_id is None:
            continue  # Skip if image_id is not found

        category_id = get_label_id(row["label"])
        if category_id == -1:
            continue  # Skip if category_id is undefined

        # Convert (x_center, y_center, width, height) to (top-left-x, top-left-y, width, height)
        x_center, y_center, width, height = row["x_center"], row["y_center"], row["width"], row["height"]
        top_left_x = x_center - (width / 2)
        top_left_y = y_center - (height / 2)

        annotation_info = {
            "id": annotation_id,
            "image_id": image_id,
            "category_id": category_id,
            "bbox": [top_left_x, top_left_y, width, height],
            "iscrowd": 0,
            "area": width * height,
            #"ignore": row["ignore_state"],
            #"attributes": {
            #    "is_occluded": row["is_occluded"],
            #    "is_truncated": row["is_truncated"],
            #    "d3_separation": row["d3_separation"] if pd.notna(row["d3_separation"]) else 0.0,
            #    "r_label": row["r_label"] if pd.notna(row["r_label"]) else "NaN",
            #    "is_rider_on_2_wheels": row["is_rider_on_2_wheels"],
            #    "occlusion_state_b2b": row["occlusion_state_b2b"],
            #    "is_on_road_bondery": row["is_on_road_bondery"],
            #    "l_label": row["l_label"] if pd.notna(row["l_label"]) else "NaN",
            #    "importance_state": row["importance_state"]
            #}
        }

        coco_format["annotations"].append(annotation_info)
        annotation_id += 1
    
    with open(output_file, 'w') as f:
        json.dump(coco_format, f, indent=4)





In [2]:
df = pd.read_csv("/home/b2b/arthur/data/921_hard_images/gt.tsv", sep = '\t')

In [3]:
df_to_coco(df, "/home/b2b/arthur/data/921_hard_images/images", "/home/b2b/arthur/data/921_hard_images/coco_labels.json")

In [7]:
import pandas as pd
import json
import os
from PIL import Image

# Label categories
w4_labels = ['CAR', 'CAR_CIPV', 'VAN_NLV', 'TRUCK', 'VAN', 'CAR_NLV', 
             'BUS', 'TRUCK_NLV', 'BUS_CIPV', 'TRUCK_CIPV', 'BUS_NLV', 
             'VAN_CIPV', 'TRAILER', 'TOW_TRUCK', 'RV', 'CONSTRUCTION_VEHICLE', 'SPECIAL_4W', 'TRACTOR', 
             'PICK_UP_CAR', 'OPEN_CAR_DOOR', 'DUMMY_CAR', 'MISK', 'MISC', 'IGNORE_VEHICLE', 'MINI_TRUCK', 'OPEN_CAR_DOOR', 
             'CEMENT_MIXER_TRUCK']

ped_labels = ['PERSON', 'PERSON_MCP', 'CHILD', 'CHILD_MCP', 'IGNORE_PEDESTRIAN', 'DUMMY_PERSON', "PEDESTRIAN", 'PEDESTRIAN_MCP'] 

w2_labels = ['KICK_SCOOTER', 'BIKE', 'TRICYCLE', 'BIKE_NLV', 'MOTOR_NLV', 
             'BIKE_CIPV', 'MOTOR_CIPV', 'MOTOR_WITHOUT_RIDER', 'KICK_SCOOTER_CIPV', 'KICK_SCOOTER_NLV', 'MOTOR', 'IGNORE_BICYCLE']

rider_labels = ['RIDER_NLV', 'RIDER', 'RIDER_CIPV']

ignore_labels = ["IGNORE_VEHICLE", "IGNORE_PEDESTRIAN", "IGNORE_BICYCLE", "IGNORE_RIDER"]

# Function to get label ID
def get_label_id(label):
    label = label.upper()
    if label in w4_labels:
        return 1
    elif label in w2_labels:
        return 0
    elif label in ped_labels:
        return 2
    elif label in rider_labels:
        return 3
    else:
        return -1  # Undefined category

# Function to convert DataFrame to COCO format JSON
def df_to_coco(df, images_path, coco_format, annotation_id, stats):
    image_set = {}
    
    # Iterate through all unique images to assign IDs
    unique_images = df['name'].unique()
    for idx, image_name in enumerate(unique_images):
        image_path = os.path.join(images_path, image_name)
        if os.path.exists(image_path):
            with Image.open(image_path) as img:
                width, height = img.size

            image_info = {
                "file_name": os.path.join(images_path, image_name),
                "id": len(coco_format["images"]) + 1,  # Start IDs from 1
                "width": width,
                "height": height
            }
            image_set[image_name] = image_info["id"]
            coco_format["images"].append(image_info)
        else:
            print(f"Image file {image_path} does not exist.")

    # Iterate through the rows to create annotations
    for _, row in df.iterrows():
        image_id = image_set.get(row["name"])
        if image_id is None:
            continue  # Skip if image_id is not found

        category_id = get_label_id(row["label"])
        if category_id == -1:
            continue  # Skip if category_id is undefined

        # Increment the corresponding category count
        stats[category_id] += 1

        # Convert (x_center, y_center, width, height) to (top-left-x, top-left-y, width, height)
        x_center, y_center, width, height = row["x_center"], row["y_center"], row["width"], row["height"]
        top_left_x = x_center - (width / 2)
        top_left_y = y_center - (height / 2)

        
        loss_weight = 3.0
        if row['label'].upper() in ignore_labels:
            loss_weight = 1.0


        annotation_info = {
            "id": annotation_id,
            "image_id": image_id,
            "category_id": category_id,
            "bbox": [top_left_x, top_left_y, width, height],
            "iscrowd": 0,
            "area": width * height,
            "loss_weight" : loss_weight
        }

        coco_format["annotations"].append(annotation_info)
        annotation_id += 1
    
    return coco_format, annotation_id, stats

# Main function to process all subdirectories
def process_all_folders(root_folder, output_file):
    coco_format = {
        "images": [],
        "annotations": [],
        "categories": [
            {"supercategory": None, "id": 0, "name": "2w"},
            {"supercategory": None, "id": 1, "name": "4w"},
            {"supercategory": None, "id": 2, "name": "ped"},
            {"supercategory": None, "id": 3, "name": "rider"}
        ]
    }
    annotation_id = 1
    stats = {0: 0, 1: 0, 2: 0, 3: 0}  # Initialize statistics counter

    # Iterate through all subdirectories in the root folder
    for subdir in os.listdir(root_folder):
        subdir_path = os.path.join(root_folder, subdir)
        if os.path.isdir(subdir_path):
            # Read the TSV file
            tsv_file = os.path.join(subdir_path, f"{subdir}_CorticaFormatGT.tsv")
            if os.path.exists(tsv_file):
                df = pd.read_csv(tsv_file, sep='\t')
                images_path = os.path.join(subdir_path, f"{subdir}_images")
                coco_format, annotation_id, stats = df_to_coco(df, images_path, coco_format, annotation_id, stats)
            else:
                print(f"TSV file {tsv_file} does not exist.")

    with open(output_file, 'w') as f:
        json.dump(coco_format, f, indent=4)

    # Print out the statistics
    print(f"Number of 2-wheelers (2w): {stats[0]}")
    print(f"Number of 4-wheelers (4w): {stats[1]}")
    print(f"Number of pedestrians (ped): {stats[2]}")
    print(f"Number of riders: {stats[3]}")

# Example usage
root_folder = "/mnt/data/OD_Data/8mp_data_kernel_1.7mp/"
output_file = "./all_data_coco_loss_weight_v2.json"
process_all_folders(root_folder, output_file)


Image file /mnt/data/OD_Data/8mp_data_kernel_1.7mp/israel_urban_with_buses/israel_urban_with_buses_images/1690370164999596.png does not exist.
Image file /mnt/data/OD_Data/8mp_data_kernel_1.7mp/israel_urban_with_buses/israel_urban_with_buses_images/1690370158033282.png does not exist.
Image file /mnt/data/OD_Data/8mp_data_kernel_1.7mp/israel_urban_with_buses/israel_urban_with_buses_images/1690370166833088.png does not exist.
Image file /mnt/data/OD_Data/8mp_data_kernel_1.7mp/israel_urban_with_buses/israel_urban_with_buses_images/1689859406133704.png does not exist.
Image file /mnt/data/OD_Data/8mp_data_kernel_1.7mp/israel_urban_with_buses/israel_urban_with_buses_images/1689859415700384.png does not exist.
Image file /mnt/data/OD_Data/8mp_data_kernel_1.7mp/israel_urban_with_buses/israel_urban_with_buses_images/1689859444066749.png does not exist.
Image file /mnt/data/OD_Data/8mp_data_kernel_1.7mp/israel_urban_with_buses/israel_urban_with_buses_images/1689859418899945.png does not exist.

In [4]:
df = pd.read_csv("/mnt/data/OD_Data/8mp_data_kernel_1.7mp/paris_night_urban_for_train_tagged_14_05_23/paris_night_urban_for_train_tagged_14_05_23_CorticaFormatGT.tsv", sep = '\t')

In [5]:
df.columns, df.name

(Index(['name', 'x_center', 'y_center', 'width', 'height', 'label',
        'is_occluded', 'is_truncated', 'l_label', 'd3_separation', 'r_label',
        'front_wheel', 'back_wheel', 'is_rider_on_2_wheels', 'is_not_in_lane'],
       dtype='object'),
 0        1675530774577055.png
 1        1675530774577055.png
 2        1675530774577055.png
 3        1675530774577055.png
 4        1675530774577055.png
                  ...         
 13278    1675532134734928.png
 13279    1675532134734928.png
 13280    1675532134734928.png
 13281    1675532134734928.png
 13282    1675532134734928.png
 Name: name, Length: 13283, dtype: object)

In [26]:
df_to_coco(df, "/mnt/data/OD_Data/8mp_data_kernel_1.7mp/paris_night_urban_for_train_tagged_14_05_23/paris_night_urban_for_train_tagged_14_05_23_images",\
           coco_format, 0)

Image file /mnt/data/OD_Data/8mp_data_kernel_1.7mp/paris_night_urban_for_train_tagged_14_05_23/paris_night_urban_for_train_tagged_14_05_23_images/1675530803310684.png does not exist.


({'images': [{'file_name': '/mnt/data/OD_Data/8mp_data_kernel_1.7mp/paris_night_urban_for_train_tagged_14_05_23/paris_night_urban_for_train_tagged_14_05_23_images/1675530774577055.png',
    'id': 1,
    'width': 3840,
    'height': 1920},
   {'file_name': '/mnt/data/OD_Data/8mp_data_kernel_1.7mp/paris_night_urban_for_train_tagged_14_05_23/paris_night_urban_for_train_tagged_14_05_23_images/1675530773710380.png',
    'id': 2,
    'width': 3840,
    'height': 1920},
   {'file_name': '/mnt/data/OD_Data/8mp_data_kernel_1.7mp/paris_night_urban_for_train_tagged_14_05_23/paris_night_urban_for_train_tagged_14_05_23_images/1675530772710369.png',
    'id': 3,
    'width': 3840,
    'height': 1920},
   {'file_name': '/mnt/data/OD_Data/8mp_data_kernel_1.7mp/paris_night_urban_for_train_tagged_14_05_23/paris_night_urban_for_train_tagged_14_05_23_images/1675530771877027.png',
    'id': 4,
    'width': 3840,
    'height': 1920},
   {'file_name': '/mnt/data/OD_Data/8mp_data_kernel_1.7mp/paris_night_urba

In [3]:
coco_format = {
        "images": [],
        "annotations": [],
        "categories": [
            {"supercategory": None, "id": 0, "name": "2w"},
            {"supercategory": None, "id": 1, "name": "4w"},
            {"supercategory": None, "id": 2, "name": "ped"},
            {"supercategory": None, "id": 3, "name": "rider"}
        ]
    }

In [4]:
def get_all_labels(root_folder):
    all_labels = set()
    
    # Iterate through all subdirectories in the root folder
    for subdir in os.listdir(root_folder):
        subdir_path = os.path.join(root_folder, subdir)
        if os.path.isdir(subdir_path):
            # Read the TSV file
            tsv_file = os.path.join(subdir_path, f"{subdir}_CorticaFormatGT.tsv")
            if os.path.exists(tsv_file):
                df = pd.read_csv(tsv_file, sep='\t')
                unique_labels = df['label'].unique()
                all_labels.update(label.upper() for label in unique_labels)
            else:
                print(f"TSV file {tsv_file} does not exist.")
    
    return all_labels

In [5]:
#root_folder = '/mnt/data/OD_Data/8mp_data_kernel_1.7mp/'
root_folder = '/mnt/data/OD_Data/od_full_training_set_for_data_effort'

labels = get_all_labels(root_folder)

In [6]:
for label in labels:
    if label not in w4_labels + w2_labels + ped_labels + rider_labels:
        if not "SIG" in label:
            print (label)

2WHEELS_PR2
TRAFFIC_LIGHT_4
IGNORE_CONSTRUCTION_CONE
CONSTRUCTION_CONE
CAR_PR3
TRAFFIC_LIGHT_3_V
RIDER_AND_BIKE
TRAFFIC_LIGHT_UNKNOWN_NUMBER
ISLAND_TRAFFIC_LIGHT
ARROW_RIGHT
PERSON_PR2
TIRE
BACK_TRAFFIC_LIGHT
PERSON_PR1
REFLECTOR_POST
TRAFFIC_LIGHT_4_V
TRAFFIC_LIGHT
TUNNEL
2WHEELS_PR1
BLANK
ARROW_LEFT
SUN_RAY
2WHEELS_PR3
CAR_PR1
SUPPLEMENTARY_GENERAL
TRAIN
PERSON_PR3
TRAFFIC_LIGHT_3
TRAFFIC_LIGHT_2_V
TRAFFIC_LIGHT_5
CAR_PR2
DISTANCE
IGNORE_TRAFFIC_LIGHT
TRAFFIC_LIGHT_2
GENERAL_TRAFFIC_LIGHT
TRAFFIC_LIGHT_1
MISC


In [26]:
from glob import glob
from PIL import Image
from tqdm import tqdm

from glob import glob
from PIL import Image
from tqdm import tqdm
from multiprocessing import Pool, cpu_count

def check_image_integrity(image_path):
    try:
        with Image.open(image_path) as img:
            img.verify()  # Verify that the file is not broken or truncated
            return None  # Return None if the image is good
    except (IOError, SyntaxError) as e:
        return image_path  # Return the path if the image is bad

def process_images(images):
    with Pool(cpu_count()) as pool:
        bad_images = list(tqdm(pool.imap_unordered(check_image_integrity, images), total=len(images)))
        bad_images = [img for img in bad_images if img is not None]  # Filter out None values
    return bad_images
    
images = glob("/mnt/data/OD_Data/8mp_data_kernel_1.7mp/*/*/*.png")

bad_images = process_images(images)


100%|██████████| 107043/107043 [10:53<00:00, 163.78it/s]


In [27]:
len(bad_images)

1

In [22]:
bad_image = '/mnt/data/OD_Data/8mp_data_kernel_1.7mp/paris_night_urban_for_train_tagged_14_05_23/paris_night_urban_for_train_tagged_14_05_23_images/1675531355516374.png'

good_image = images[100]

check_image_integrity(bad_image)

Truncated File Read


'/mnt/data/OD_Data/8mp_data_kernel_1.7mp/paris_night_urban_for_train_tagged_14_05_23/paris_night_urban_for_train_tagged_14_05_23_images/1675531355516374.png'

In [23]:
check_image_integrity(good_image)

In [28]:
bad_images

['/mnt/data/OD_Data/8mp_data_kernel_1.7mp/paris_night_urban_for_train_tagged_14_05_23/paris_night_urban_for_train_tagged_14_05_23_images/1675531355516374.png']

In [30]:
import os
os.system(f"mv {bad_image} ./")

0