In [5]:
import cv2
import os
import numpy as np
import pandas as pd
import ast  # To safely evaluate string representation of lists
import matplotlib.pyplot as plt
from shapely.geometry import Polygon

# Define paths
base_path = "/Volumes/Agora_Sim/Python/CAA_Proj_1"
image_dir = os.path.join(base_path, "images")
train_csv_path = os.path.join(base_path, "Train.csv")
test_csv_path = os.path.join(base_path, "Test.csv")

output_train_dir = os.path.join(base_path, "annotated_train")
output_test_dir = os.path.join(base_path, "annotated_test")
os.makedirs(output_train_dir, exist_ok=True)
os.makedirs(output_test_dir, exist_ok=True)

# Function to load an image
def load_image(image_path):
    if not os.path.exists(image_path):
        print(f"Image not found: {image_path}")
        return None
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB
    return img

# Function to convert polygon string to a proper list of tuples
def parse_polygon(polygon_str):
    try:
        return np.array(ast.literal_eval(polygon_str), np.int32)  # Convert to numpy array
    except (SyntaxError, ValueError):
        print(f"Failed to parse polygon: {polygon_str}")
        return None

# Function to draw polygons and labels
def draw_annotations(image, annotations):
    if annotations is None or len(annotations) == 0:
        print("No valid annotations for this image.")
        return image

    polygon_coords, nbr_pan, nbr_boil, placement = annotations

    # Check if polygon is valid
    if len(polygon_coords) < 3:
        print("Skipping: Polygon has fewer than 3 points")
        return image

    # Draw polygon
    cv2.polylines(image, [polygon_coords.reshape((-1, 1, 2))], isClosed=True, color=(255, 0, 0), thickness=2)

    # Compute centroid for text placement
    poly = Polygon(polygon_coords)
    centroid_x, centroid_y = map(int, poly.centroid.coords[0])

    # Draw labels
    label = f"P: {nbr_pan}, B: {nbr_boil}, {placement}"
    cv2.putText(image, label, (centroid_x, centroid_y), cv2.FONT_HERSHEY_SIMPLEX, 
                0.5, (255, 255, 255), 1, cv2.LINE_AA)

    return image

# Function to process dataset using CSV data
def process_dataset(csv_path, output_dir, display=False):
    df = pd.read_csv(csv_path)

    for _, row in df.iterrows():
        image_name = row['ID'] + ".jpg"  # Assuming images are named using ID + .jpg
        image_path = os.path.join(image_dir, image_name)

        # Parse annotation data from CSV
        polygon_coords = parse_polygon(row['polygon'])
        if polygon_coords is None:
            continue  # Skip if polygon parsing fails

        annotations = (polygon_coords, row['pan_nbr'], row['boil_nbr'], row['placement'])

        # Load image
        image = load_image(image_path)
        if image is None:
            continue  # Skip if image is missing

        # Annotate image
        annotated_image = draw_annotations(image, annotations)

        # Save the annotated image
        output_path = os.path.join(output_dir, image_name)
        cv2.imwrite(output_path, cv2.cvtColor(annotated_image, cv2.COLOR_RGB2BGR))

        # Display the image (optional)
        if display:
            plt.figure(figsize=(10, 10))
            plt.imshow(annotated_image)
            plt.axis("off")
            plt.show()


In [None]:

# Process and save annotated images for train and test sets
process_dataset(train_csv_path, output_train_dir, display=False)
process_dataset(test_csv_path, output_test_dir, display=False)

print("Processing complete! Annotated images are saved in their respective output directories.")
