In [1]:
import os
import random
import shutil
from PIL import Image
import time

In [2]:
group_members = ["Khanh Toan Nguyen", "Ayush Bhandari", "Keshini Munasinghe","Max Harrison", "Ratanakvisal Heng"]

In [3]:
labels  = ["damaged-sign3", "not_rubbish", "rubbish"]

In [4]:
input_dir = "data"
output_dir = "data_labelling"

In [5]:
# Create labelling folder for each member
for member in group_members:
    for label in labels:
        os.makedirs(os.path.join(output_dir,member,label),exist_ok=True)

In [None]:
def chunking_and_converting_images(group_members,input_dir,output_dir,label):
    """ """
    start_time = time.time()
    error_image = []
    error_count = 0

    label_path = os.path.join(input_dir,label)
    images = [f for f in os.listdir(label_path) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]

    print(f"Process chunking for label: {label}\n")
    
    random.seed(42)                                             # Random seed (42) to ensures reproductability
    random.shuffle(images)

    num_images = len(images)
    per_person = num_images // len(group_members)               # Calculate how many images each team's member need to label

    print(f"Number of images: {num_images}")
    print(f"Each member has to label: {per_person} images\n")

    img_counter = 1

    for i, member in enumerate(group_members):
        print(f"Member {i+1}: {member}")
        start = i* per_person
        end = (i + 1) * per_person if i < len(group_members)-1 else num_images
        print(f"    Start idx: {start}, End idx: {end}")
        member_imgs = images[start:end]                         # Calculate which chunk of data will be given to which person

        for img_name in member_imgs:
            img_path = os.path.join(label_path, img_name)
            new_name = f"{label}-{img_counter:03d}.png"         # Rename the image
            save_path = os.path.join(output_dir, member, label, new_name)       
            try:
                with Image.open(img_path) as im:
                    im = im.convert("RGB")
                    im.save(save_path, "PNG")                   # Convert to PNG

                    end_time = time.time()
                    elapsed = end_time - start_time
                    elapsed_min = int(elapsed // 60)
                    elapsed_sec = int(elapsed % 60)
                    print(
                        f"Processing img: {img_path} - Convert to new name: {save_path}",
                        f"| Elapsed: {elapsed_min}m {elapsed_sec}s",
                        end="\r"
                    )                                           # Calculate elapsed time for each label
                img_counter += 1

            except Exception as e:
                print(f"❌ Skipping for Error processing {img_path}: {e}")
                error_count += 1
                error_image.append(img_path)                    # Trace which image got error while process.
    return error_image, error_count

In [None]:
# Chunking for label: damaged-sign3
err,count = chunking_and_converting_images(group_members, input_dir,output_dir,labels[0])


Process chunking for label: damaged-sign3

Number of images: 527
Each member has to label: 105 images

Member 1: Khanh Toan Nguyen
    Start idx: 0, End idx: 105
Member 2: Ayush Bhandariaged-sign3\91343852333188940018699593236846337439772868550-25.jpg - Convert to new name: data_labelling\Khanh Toan Nguyen\damaged-sign3\damaged-sign3-105.png | Elapsed: 0m 5s
    Start idx: 105, End idx: 210
Member 3: Keshini Munasinghe-sign3\91343852333188941499275880222161360434283617287-25.jpg - Convert to new name: data_labelling\Ayush Bhandari\damaged-sign3\damaged-sign3-210.png | Elapsed: 0m 11s
    Start idx: 210, End idx: 315
Member 4: Max Harrisonamaged-sign3\91343852333188064379143965272942111149418746806-0.jpg - Convert to new name: data_labelling\Keshini Munasinghe\damaged-sign3\damaged-sign3-315.png | Elapsed: 0m 16ss
    Start idx: 315, End idx: 420
Member 5: Ratanakvisal Hengd-sign3\91343852333198685152013489949978895503452884065-0.jpg - Convert to new name: data_labelling\Max Harrison\da

In [8]:
err,count

([], 0)

> No error for `damaged-sign3`

In [6]:
# Chunking for label: not_rubbish
err,count = chunking_and_converting_images(group_members, input_dir,output_dir,labels[1])

Process chunking for label: not_rubbish

Number of images: 6554
Each member has to label: 1310 images

Member 1: Khanh Toan Nguyen
    Start idx: 0, End idx: 1310
Member 2: Ayush Bhandari_rubbish\2022-04-26T09.46.44.frame41.jpg - Convert to new name: data_labelling\Khanh Toan Nguyen\not_rubbish\not_rubbish-1310.png | Elapsed: 2m 19s.png | Elapsed: 2m 19s
    Start idx: 1310, End idx: 2620
Member 3: Keshini Munasinghebish\91343852333182717176372434584814937403551994388-2.jpg - Convert to new name: data_labelling\Ayush Bhandari\not_rubbish\not_rubbish-2620.png | Elapsed: 4m 44s
    Start idx: 2620, End idx: 3930
Member 4: Max Harrisonot_rubbish\2022-04-25T10.42.21.frame31.jpg - Convert to new name: data_labelling\Keshini Munasinghe\not_rubbish\not_rubbish-3930.png | Elapsed: 7m 4s7.png | Elapsed: 7m 3ss
    Start idx: 3930, End idx: 5240
Member 5: Ratanakvisal Hengbbish\vlad-03199.png - Convert to new name: data_labelling\Max Harrison\not_rubbish\not_rubbish-5240.png | Elapsed: 9m 15s\no

In [8]:
err,count

([], 0)

> No error for `not_rubbish`

In [6]:
# Chunking for label: rubbish
err,count = chunking_and_converting_images(group_members, input_dir,output_dir,labels[2])

Process chunking for label: rubbish

Number of images: 2701
Each member has to label: 540 images

Member 1: Khanh Toan Nguyen
    Start idx: 0, End idx: 540
❌ Skipping for Error processing data\rubbish\1c269143-b0b4-457d-b85e-e7e6e61d0817.jpeg: cannot identify image file 'data\\rubbish\\1c269143-b0b4-457d-b85e-e7e6e61d0817.jpeg' 0m 29s
Member 2: Ayush Bhandaribish\C334.30-11-2021-1638233360722.jpg - Convert to new name: data_labelling\Khanh Toan Nguyen\rubbish\rubbish-539.png | Elapsed: 1m 44sng | Elapsed: 1m 34s
    Start idx: 540, End idx: 1080
Member 3: Keshini Munasinghe\C191.19-6-2020-1592516609936.jpg - Convert to new name: data_labelling\Ayush Bhandari\rubbish\rubbish-1079.png | Elapsed: 3m 25ssng | Elapsed: 3m 23s
    Start idx: 1080, End idx: 1620
Member 4: Max Harrisonubbish\C234.21-9-2021-1632173520200.jpg - Convert to new name: data_labelling\Keshini Munasinghe\rubbish\rubbish-1619.png | Elapsed: 5m 4s.png | Elapsed: 5m 2ss
    Start idx: 1620, End idx: 2160
Member 5: Ratan

In [7]:
err,count

(['data\\rubbish\\1c269143-b0b4-457d-b85e-e7e6e61d0817.jpeg'], 1)

> 1 Error for `rubbish`, 'data\\rubbish\\1c269143-b0b4-457d-b85e-e7e6e61d0817.jpeg'