In [None]:
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
from pathlib import Path
import os

In [None]:
from source.visual_genome_to_yolo import read_yaml_to_class_map
from source.visual_genome_to_yolo import read_yolo_metadata
from source.visual_genome_to_yolo import save_class_map_to_yaml
from source.visual_genome_meta_data import plot_image_with_multiple_bboxes
from source.visual_genome_data import get_file_by_id

In [None]:
import shutil

In [None]:
def plot_image_with_yolo_grid(image_path, grid_divisions=10, figsize=(12, 10), label_size=12, 
                            zoom_region=None):
   """
   Plot image with YOLO-compatible grid overlay and labeled axes.
   
   Args:
       image_path: Path to image file (.jpg, .tif, etc.)
       grid_divisions: Number of grid divisions (default 10 for 0.1 increments)
       figsize: Figure size as (width, height) tuple (default (12, 10))
       label_size: Font size for grid labels (default 12)
       zoom_region: Tuple (x_min, y_min, x_max, y_max) in YOLO coordinates (0.0-1.0)
                   to zoom into specific region. None for full image (default)
   """
   import matplotlib.pyplot as plt
   import numpy as np
   from PIL import Image
   print(f"Current image: {image_path}")    


   
   # Load image
   img = Image.open(image_path)
   
   # Create plot with custom figure size
   fig, ax = plt.subplots(figsize=figsize)
   ax.imshow(img)
   
   # Get image dimensions
   width, height = img.size
   
   # Determine zoom bounds
   if zoom_region is None:
       x_min, y_min, x_max, y_max = 0.0, 0.0, 1.0, 1.0
   else:
       x_min, y_min, x_max, y_max = zoom_region
   
   # Create grid lines within zoom region
   grid_positions_x = np.linspace(x_min, x_max, grid_divisions + 1)
   grid_positions_y = np.linspace(y_min, y_max, grid_divisions + 1)
   
   # Vertical grid lines (x-coordinates)
   for pos in grid_positions_x:
       x_pixel = pos * width
       ax.axvline(x=x_pixel, color='red', alpha=0.7, linewidth=1)
   
   # Horizontal grid lines (y-coordinates) 
   for pos in grid_positions_y:
       y_pixel = pos * height
       ax.axhline(y=y_pixel, color='red', alpha=0.7, linewidth=1)
   
   # Create labels with appropriate precision based on grid fineness
   if grid_divisions <= 10:
       decimal_places = 1
   elif grid_divisions <= 100:
       decimal_places = 2
   else:
       decimal_places = 3
   
   x_labels = [f"{pos:.{decimal_places}f}" for pos in grid_positions_x]
   y_labels = [f"{pos:.{decimal_places}f}" for pos in grid_positions_y]
   
   # Set tick positions and labels with custom font size
   ax.set_xticks([pos * width for pos in grid_positions_x])
   ax.set_xticklabels(x_labels, fontsize=label_size)
   ax.set_yticks([pos * height for pos in grid_positions_y])
   ax.set_yticklabels(y_labels, fontsize=label_size)
   
   # Set zoom limits if specified
   if zoom_region is not None:
       ax.set_xlim(x_min * width, x_max * width)
       ax.set_ylim(y_max * height, y_min * height)  # Note: y-axis is flipped in images
   
   # Labels with custom font size
   ax.set_xlabel('YOLO X-coordinate (normalized)', fontsize=label_size)
   ax.set_ylabel('YOLO Y-coordinate (normalized)', fontsize=label_size)
   
   zoom_text = f" (zoomed: {x_min:.2f}-{x_max:.2f}, {y_min:.2f}-{y_max:.2f})" if zoom_region else ""
   ax.set_title(f'YOLO Grid Overlay - {image_path}{zoom_text}', fontsize=label_size)
   
   plt.tight_layout()
   plt.show()

In [None]:
def parse_zoom_input(user_input):
   """Parse zoom region from user input"""
   if not user_input.strip():
       return None
   
   try:
       coords = [float(x.strip()) for x in user_input.strip().split(',')]
       if len(coords) == 4:
           return tuple(coords)
       else:
           print("Invalid format. Please enter 4 values: x_min, y_min, x_max, y_max")
           return False
   except ValueError:
       print("Invalid input. Please enter numbers separated by commas.")
       return False

In [None]:
def grid_coords_to_yolo(top_left, bottom_right, image_path):
   """
   Convert grid coordinates to YOLO format bounding box.
   
   Args:
       top_left: Tuple (x, y) of top-left corner in grid coordinates (0.0-1.0)
       bottom_right: Tuple (x, y) of bottom-right corner in grid coordinates (0.0-1.0)
       image_path: Path to image (needed to get dimensions for validation)
   
   Returns:
       Tuple (x_center, y_center, width, height) in YOLO format (all 0.0-1.0)
   """
   from PIL import Image
   
   # Load image to validate (optional, but good practice)
   img = Image.open(image_path)
   img_width, img_height = img.size
   
   # Extract coordinates
   x1, y1 = top_left      # Top-left corner
   x2, y2 = bottom_right  # Bottom-right corner
   
   # Validate coordinates are in correct order
   if x1 >= x2 or y1 >= y2:
       raise ValueError(f"Invalid coordinates: top_left {top_left} should be above and left of bottom_right {bottom_right}")
   
   # Validate coordinates are in valid range
   if not (0 <= x1 <= 1 and 0 <= y1 <= 1 and 0 <= x2 <= 1 and 0 <= y2 <= 1):
       raise ValueError("All coordinates must be between 0.0 and 1.0")
   
   # Calculate YOLO format
   x_center = (x1 + x2) / 2
   y_center = (y1 + y2) / 2
   width = x2 - x1
   height = y2 - y1
   
   print(f"Grid coordinates: top_left=({x1:.3f}, {y1:.3f}), bottom_right=({x2:.3f}, {y2:.3f})")
   print(f"YOLO format: center=({x_center:.6f}, {y_center:.6f}), size=({width:.6f}, {height:.6f})")
   
   return x_center, y_center, width, height

# Usage examples:
# yolo_coords = grid_coords_to_yolo((0.2, 0.1), (0.8, 0.7), 'image.jpg')
# x_center, y_center, width, height = grid_coords_to_yolo((0.3, 0.2), (0.9, 0.6), 'image.jpg')

In [None]:
def create_yolo_metadata_file(output_path, bounding_boxes):
    """
    Create and save YOLO metadata file with multiple bounding boxes.
    
    Args:
        output_path: Path where to save the .txt file
        bounding_boxes: List of tuples, each containing (class_id, x_center, y_center, width, height)
    """
    with open(output_path, 'w') as f:
        for class_id, x_center, y_center, width, height in bounding_boxes:
            yolo_line = f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n"
            f.write(yolo_line)
    
    print(f"YOLO metadata saved to: {output_path}")
    print(f"Added {len(bounding_boxes)} bounding boxes")



In [None]:
def clear_yolo_metadata_by_id(data_path, identifier, id_end=True):
   """
   Clear YOLO metadata file by identifier - makes it empty (removes all bounding boxes)
   
   Args:
       data_path: Path to directory containing .txt files
       identifier: Integer identifier to search for
       id_end: If True (default), select file with ID at end only.
               If False, select file with ID surrounded by underscores.
   """
   #import os
   #
   #def get_file_by_id(data_path, identifier, file_extension):
   #    filenames = []
   #    identifier_underlines = '_' + str(identifier) + '_'
   #    identifier_end = '_' + str(identifier) + '.'
   #    for file in os.listdir(str(data_path)):
   #        id_underline_bool = identifier_underlines in file
   #        id_end_bool = identifier_end in file
   #        id_bool = id_underline_bool or id_end_bool
   #        file_ext_bool = file.endswith(file_extension)
   #        if id_bool and file_ext_bool:
   #            filenames.append(file)                 
   #    return filenames
   
   # Get all matching files
   txt_files = get_file_by_id(data_path, identifier, '.txt')
   
   if not txt_files:
       print(f"No .txt file found with identifier {identifier}")
       return False
   
   # Filter based on id_end parameter
   if id_end:
       # Select only files where ID is at the end
       filtered_files = [f for f in txt_files if f.endswith(f'_{identifier}.txt')]
   else:
       # Select only files where ID is surrounded by underscores
       filtered_files = [f for f in txt_files if f'_{identifier}_' in f]
   
   if not filtered_files:
       pattern_type = "at end" if id_end else "with underscores"
       print(f"No .txt file found with identifier {identifier} {pattern_type}")
       return False
   
   # Overwrite with empty content
   file_path = os.path.join(data_path, filtered_files[0])
   with open(file_path, 'w') as f:
       pass
   
   print(f"Cleared metadata file: {filtered_files[0]}")
   return True

In [None]:
def delete_files_by_id(data_path, identifier, file_extension, id_end=True):
   """
   Delete files by identifier.
   
   Args:
       data_path: Path to directory containing files
       identifier: Integer identifier to search for
       file_extension: File extension (e.g., '.txt', '.jpg')
       id_end: If True (default), delete only files with ID at end.
               If False, delete files with ID surrounded by underscores.
   """
   #import os
   #
   #def get_file_by_id(data_path, identifier, file_extension):
   #    filenames = []
   #    identifier_underlines = '_' + str(identifier) + '_'
   #    identifier_end = '_' + str(identifier) + '.'
   #    for file in os.listdir(str(data_path)):
   #        id_underline_bool = identifier_underlines in file
   #        id_end_bool = identifier_end in file
   #        id_bool = id_underline_bool or id_end_bool
   #        file_ext_bool = file.endswith(file_extension)
   #        if id_bool and file_ext_bool:
   #            filenames.append(file)
   #    return filenames
   
   # Get all matching files
   files = get_file_by_id(data_path, identifier, file_extension)
   
   # Filter based on id_end parameter
   if id_end:
       filtered_files = [f for f in files if f.endswith(f'_{identifier}{file_extension}')]
   else:
       filtered_files = [f for f in files if f'_{identifier}_' in f]
   
   # Delete filtered files
   deleted_count = 0
   for filename in filtered_files:
       file_path = os.path.join(data_path, filename)
       try:
           os.remove(file_path)
           print(f"Deleted: {filename}")
           deleted_count += 1
       except OSError as e:
           print(f"Error deleting {filename}: {e}")
   
   print(f"Deleted {deleted_count} files")
   return deleted_count

# Usage:
# delete_files_by_id('/path/to/files', 2324505, '.txt')  # Delete files ending with _2324505.txt

In [None]:

project_path = Path.cwd()
root_path = (project_path / '..' / 'test_yolo_annotation').resolve()
data_path = root_path / 'visual_genome_data'

yolo_path = root_path / 'visual_genome_yolo'


In [None]:
print(root_path)
print(data_path)
print(yolo_path)

In [None]:

ds_store_path = os.path.join(data_path, '.DS_Store')
if os.path.exists(ds_store_path):
   os.remove(ds_store_path)
   print(f"Removed .DS_Store from {data_path}")


ds_store_path = os.path.join(yolo_path, '.DS_Store')
if os.path.exists(ds_store_path):
   os.remove(ds_store_path)
   print(f"Removed .DS_Store from {yolo_path}")

ipynb_checkpoints_path = os.path.join(yolo_path, '.ipynb_checkpoints')
if os.path.exists(ipynb_checkpoints_path):
    shutil.rmtree(ipynb_checkpoints_path)
    print(f"Removed .ipynb_checkpoints directory from {yolo_path}")


ipynb_checkpoints_path = os.path.join(data_path, '.ipynb_checkpoints')
if os.path.exists(ipynb_checkpoints_path):
    shutil.rmtree(ipynb_checkpoints_path)
    print(f"Removed .ipynb_checkpoints directory from {data_path}")

In [None]:
meta_data_files = os.listdir(yolo_path)
meta_data_files

In [None]:
files_annotated = []
identifiers = []
for file in meta_data_files:
    identifier = file.split('_')[-1].split('.')[0]
    print(identifier)
    try:
       result = int(identifier)
       print(f"Conversion successful: {result}")
       identifiers.append(result)
        
    except ValueError:
       print("Cannot convert to int")
print(identifiers)
for identifier in identifiers:
    filename = get_file_by_id(data_path, identifier, '.jpg')[0]
    files_annotated.append(filename)
print(files_annotated)

In [None]:
class_map = {'man': 0,
            'girl statue': 1}

In [None]:

file_path = str(yolo_path) + '/'

yaml_path = os.path.join(yolo_path, 'class_map.yaml')
save_class_map_to_yaml(class_map, yaml_path)

In [None]:
yaml_file_name = 'class_map.yaml'
yaml_path = yolo_path/yaml_file_name

class_map = read_yaml_to_class_map(str(yaml_path))


In [None]:
class_map

In [None]:
for file in files_annotated[0:3]:

    img_id = file.split('_')[-1].split('.')[0]
    print(img_id)

    label_file = 'visual_genome_' + img_id + '.txt'
    label_path = os.path.join(yolo_path, label_file)
    print(label_path)
    
    
    labels, bboxes = read_yolo_metadata(label_path, class_map)
    class_names = list(labels)
    
    image_path = os.path.join(data_path, file)
    
    print(image_path)
    
    plot_image_with_multiple_bboxes(image_path, bboxes, class_names)

In [None]:
files_to_annotate = os.listdir(data_path)
files_to_annotate 

In [None]:
#file_name = 'visual_genome_2399497.jpg'
#file_name = 'visual_genome_2405738.jpg'
file_name = files_to_annotate[0]

image_path = data_path / file_name
image_path

plot_image_with_yolo_grid(image_path, grid_divisions=15, figsize=(18,14), label_size=15)

In [None]:
files_to_annotate

In [None]:
from IPython.display import clear_output

In [None]:
files_to_annotate_selection = files_to_annotate[0:7]

img_idx = 0
exit_viewer = False

while img_idx < len(files_to_annotate_selection) and not exit_viewer:
    img_file = files_to_annotate_selection[img_idx] 
    image_path = os.path.join(data_path, img_file)
    zoom_region = None

    
    while True:

        plot_image_with_yolo_grid(image_path, grid_divisions=15, figsize=(20,16), label_size=22, zoom_region=zoom_region)
        plt.show()
        
        user_input = input("Press Enter for next image, enter zoom region (x_min,y_min,x_max,y_max), or 'q' to quit: ")
        plt.close()
        clear_output(wait=True)
        
        if user_input.strip().lower() in ['q', 'quit']:
            exit_viewer = True
            break
        elif not user_input.strip():
            break
        else:
            parsed_zoom = parse_zoom_input(user_input)
            if parsed_zoom is None:
                break
            elif parsed_zoom is False:
                continue
                
            else:
                zoom_region = parsed_zoom
        ####
        # Auto-adjust grid divisions based on zoom region size
        if zoom_region is not None:
            x_min, y_min, x_max, y_max = zoom_region
            zoom_width = x_max - x_min
            zoom_height = y_max - y_min
            zoom_area = zoom_width * zoom_height
            
            # Scale grid divisions inversely with zoom area (smaller area = more divisions)
            if zoom_area < 0.1:
                grid_divisions = 50
            elif zoom_area < 0.25:
                grid_divisions = 30
            elif zoom_area < 0.5:
                grid_divisions = 20
            else:
                grid_divisions = 10
        else:
            grid_divisions = 20
            
    ####
    
    img_idx += 1

In [None]:
image_files_no_object = [
'visual_genome_2405738.jpg'
,'visual_genome_2321173.jpg'
]

In [None]:
files_for_annotation = [
{'image_file': 'visual_genome_2.jpg', 
 'objects': [
     {'object_class': 0, 
      'top_left_values': [(0.4, 0.5)],
      'bottom_right_values': [(0.65, 0.99)]}
 ]
 },
{'image_file': 'visual_genome_1.jpg', 
 'objects': [
     {'object_class': 0,
      'top_left_values': [(0.3, 0.4), (0.46, 0.41)], 
      'bottom_right_values': [(0.4, 0.9), (0.58, 0.87)]}
 ]
 },
{'image_file': 'visual_genome_2321125.jpg', 
 'objects': [
     {'object_class': 1,
      'top_left_values': [(0.7, 0.2)], 
      'bottom_right_values': [(0.93, 0.93)]}
 ]
 }
]

In [None]:
files_for_annotation

In [None]:
meta_data_for_annotation = []
file_names_for_annotation = []

for file in files_for_annotation:
    file_name = file['image_file']
    print(file_name)
    file_names_for_annotation.append(file_name)
    image_path = os.path.join(data_path, file_name)
    img_id = image_path.split('_')[-1].split('.')[0]
    print(img_id)
    label_file = 'visual_genome_' + img_id + '.txt'
    label_path = os.path.join(yolo_path, label_file)
    print(label_path)
    file_meta_data = {'file_name': file_name,
                     'output_path': label_path,
                     'objects': []}
    
    for img_object in file['objects']:
        
        object_class = img_object['object_class']
        print(object_class)
        
        top_left_values = img_object['top_left_values']
        print(top_left_values)
        
        bottom_right_values = img_object['bottom_right_values']
        print(bottom_right_values)
        
        #object_meta_data = []
        for top_left, bottom_right in zip(top_left_values, bottom_right_values):
            x_center, y_center, width, height = grid_coords_to_yolo(top_left, bottom_right, image_path)
            yolo_meta_data = (object_class, x_center, y_center, width, height)
            #object_meta_data.append(yolo_meta_data)
            file_meta_data['objects'].append(yolo_meta_data)

    meta_data_for_annotation.append(file_meta_data)
        
        
        

In [None]:
meta_data_for_annotation

In [None]:
files_newly_annotated = file_names_for_annotation + image_files_no_object

In [None]:
for file_meta_data in meta_data_for_annotation:
    print(file_meta_data)
    label_path = file_meta_data['output_path']
    yolo_meta_data = file_meta_data['objects']
    print(type(yolo_meta_data))
    create_yolo_metadata_file(label_path, yolo_meta_data)
    

In [None]:
files_for_annotation

In [None]:
image_files_no_object

In [None]:
file_names_for_annotation

In [None]:
files_newly_annotated

In [None]:
# Method 3: For multiple empty annotation files
#image_names = ['image_001.jpg', 'image_002.jpg', 'image_003.jpg']
for img_name in image_files_no_object:
    print(img_name)
    txt_name = img_name.replace('.jpg', '.txt')
    print(txt_name)
    output_path = os.path.join(yolo_path, txt_name)
    with open(output_path, 'w') as file:
        pass


In [None]:
files_newly_annotated

In [None]:


for file in files_newly_annotated:

    img_id = file.split('_')[-1].split('.')[0]
    print(img_id)

    label_file = 'visual_genome_' + img_id + '.txt'
    label_path = os.path.join(yolo_path, label_file)
    print(label_path)
    
    
    labels, bboxes = read_yolo_metadata(label_path, class_map)
    class_names = list(labels)
    
    #image_path = os.path.join(data_path, file)
    image_path = data_path / file
    
    print(image_path)
    
    plot_image_with_multiple_bboxes(image_path, bboxes, class_names)