## 🧩 Integrating BBoxes and Images into a Unified Dataset

Now that all bounding boxes are extracted and image integrity is verified, it's time to consolidate them into a unified dataset.

---

### 📁 Folder Structure

We will create the following directory layout:



In [None]:
import os
import shutil

# Define paths
main_directory = "extracted_bounding_boxes"
output_directory = "combined_bboxes"
combined_labels_file = "combined_labels.txt"

# Create the output directory if it doesn't exist
os.makedirs(output_directory, exist_ok=True)

# Initialize a variable to keep track of the current line number
current_line_number = 1

# Initialize a list to store combined labels
combined_labels = []

# Iterate through each subfolder in the main directory
for subfolder in os.listdir(main_directory):
    subfolder_path = os.path.join(main_directory, subfolder)
    if os.path.isdir(subfolder_path):
        # The text file name is based on the subfolder name
        label_file_name = f"bounding_box_texts_{subfolder}.txt"
        label_file_path = os.path.join(subfolder_path, label_file_name)
        
        if os.path.exists(label_file_path):
            # Read the text file for labels in the current subfolder
            with open(label_file_path, "r") as label_file:
                lines = label_file.readlines()

            # Process each line in the text file
            for line in lines:
                # Extract the original line number and label
                original_line_number, label = line.strip().split(': ')
                
                # Clean up the line number (e.g., remove "Line ")
                original_line_number = original_line_number.replace("Line ", "").strip()
                
                # Rename the bounding box image file
                original_image_name = f"line{original_line_number}.jpg"
                new_image_name = f"line{current_line_number}.jpg"
                original_image_path = os.path.join(subfolder_path, original_image_name)
                new_image_path = os.path.join(output_directory, new_image_name)
                
                # Copy the renamed image to the output directory
                if os.path.exists(original_image_path):
                    shutil.copyfile(original_image_path, new_image_path)

                # Append the updated line to the combined labels list
                combined_labels.append(f"line{current_line_number}: {label}\n")
                print(current_line_number)
                print(original_image_name)
                print(subfolder)
                # Increment the current line number
                current_line_number += 1
                

# Write the combined labels to a single text file
with open(combined_labels_file, "w") as combined_file:
    combined_file.writelines(combined_labels)

print(f"Processing complete. All bounding box images are in {output_directory}")
print(f"Combined labels are saved in {combined_labels_file}")


1
line1.jpg
IMG_1368
2
line2.jpg
IMG_1368
3
line3.jpg
IMG_1368
4
line4.jpg
IMG_1368
5
line5.jpg
IMG_1368
6
line6.jpg
IMG_1368
7
line7.jpg
IMG_1368
8
line8.jpg
IMG_1368
9
line9.jpg
IMG_1368
10
line10.jpg
IMG_1368
11
line11.jpg
IMG_1368
12
line12.jpg
IMG_1368
13
line13.jpg
IMG_1368
14
line14.jpg
IMG_1368
15
line15.jpg
IMG_1368
16
line16.jpg
IMG_1368
17
line17.jpg
IMG_1368
18
line18.jpg
IMG_1368
19
line19.jpg
IMG_1368
20
line20.jpg
IMG_1368
21
line21.jpg
IMG_1368
22
line22.jpg
IMG_1368
23
line23.jpg
IMG_1368
24
line24.jpg
IMG_1368
25
line25.jpg
IMG_1368
26
line26.jpg
IMG_1368
27
line27.jpg
IMG_1368
28
line28.jpg
IMG_1368
29
line1.jpg
IMG_1370
30
line2.jpg
IMG_1370
31
line3.jpg
IMG_1370
32
line4.jpg
IMG_1370
33
line5.jpg
IMG_1370
34
line6.jpg
IMG_1370
35
line7.jpg
IMG_1370
36
line8.jpg
IMG_1370
37
line9.jpg
IMG_1370
38
line10.jpg
IMG_1370
39
line11.jpg
IMG_1370
40
line12.jpg
IMG_1370
41
line13.jpg
IMG_1370
42
line14.jpg
IMG_1370
43
line15.jpg
IMG_1370
44
line16.jpg
IMG_1370
45
line17.jpg
I