## Composite Transformations

In this notebook we will explore how to do a composition of transformations which will reflect our synthetic data pipeline. 

We will investigate:
- How to rotate our bounding box coordinates along with the image
    - Recall we are using the YOLO ml algorithm for our model. 
    - So if we rotate the image we will need to perserve the coordinates for the classification i.e. the top left corner of the card that holds the value and suit
    - we need to also perserve the coordinates for the card outline. These coordinates will not be labels for the training set but they are needed to construct a masking for the background changes
- How the transformations differ when we apply them in different order
    -  For e.g. we know that: blur -> contrast -> noise =/= contrast -> noise ->  blur 

## Rotate corner bounding box:

In [101]:
import numpy as np
import cv2
import os

img_path = "../../../data/raw/as.jpg" # path to image 
path = os.path.abspath(img_path)
img = cv2.imread(path) # load image

h, w = img.shape[:2] 
class_id, x_cent, y_cent, box_w, box_h = 9, 0.2671875, 0.1625, 0.059375, 0.09765625  # Classificaiton label i.e. Top-left corner

original image with the bounding box highlighted

In [102]:
# Convert to pixels for drawing
x_min = int((x_cent - box_w / 2) * w)  # ~97
x_max = int((x_cent + box_w / 2) * w)  # ~388
y_min = int((y_cent - box_h / 2) * h)  # ~103
y_max = int((y_cent + box_h / 2) * h)  # ~412

# Draw lines connecting corners (rectangle)
cv2.line(img, (x_min, y_min), (x_max, y_min), (0, 255, 0), 2)  # Top
cv2.line(img, (x_max, y_min), (x_max, y_max), (0, 255, 0), 2)  # Right
cv2.line(img, (x_max, y_max), (x_min, y_max), (0, 255, 0), 2)  # Bottom
cv2.line(img, (x_min, y_max), (x_min, y_min), (0, 255, 0), 2)  # Left

# Save
cv2.imwrite("original_with_box.jpg", img)
print(f"Original label: {class_id} {x_cent} {y_cent} {box_w} {box_h}")

Original label: 9 0.2671875 0.1625 0.059375 0.09765625


rotated image with the rotated bounding box highlighted

In [103]:
img_path = "../../../data/raw/as.jpg" # path to image 
path = os.path.abspath(img_path)
img = cv2.imread(path) # load image

h, w = img.shape[:2]  # 2061, 1940
class_id, x_cent, y_cent, box_w, box_h = 6, 0.2671875, 0.1625, 0.059375, 0.09765625  # Classificaiton label i.e. Top-left corner

In [104]:
# Rotation
theta = 30  # Degrees, adjust as needed
center = (w // 2, h // 2)  # Rotate around image center
M = cv2.getRotationMatrix2D(center, theta, 1.0)
rotated_img = cv2.warpAffine(img, M, (w, h))

# Rotate classification box center
x_cent_px = x_cent * w
y_cent_px = y_cent * h
center_point = np.array([x_cent_px, y_cent_px, 1])  # Homogeneous coords
new_center = M.dot(center_point)
new_x_cent = new_center[0] / w  # Normalize back
new_y_cent = new_center[1] / h

# Width/height stay the same (upright box approximation)
new_box_w, new_box_h = box_w, box_h
new_label = f"{class_id} {new_x_cent} {new_y_cent} {new_box_w} {new_box_h}"

# Convert to pixels for drawing
x_min = int((new_x_cent - new_box_w / 2) * w)
x_max = int((new_x_cent + new_box_w / 2) * w)
y_min = int((new_y_cent - new_box_h / 2) * h)
y_max = int((new_y_cent + new_box_h / 2) * h)

# Draw lines connecting corners (rectangle)
cv2.line(rotated_img, (x_min, y_min), (x_max, y_min), (0, 255, 0), 2)  # Top
cv2.line(rotated_img, (x_max, y_min), (x_max, y_max), (0, 255, 0), 2)  # Right
cv2.line(rotated_img, (x_max, y_max), (x_min, y_max), (0, 255, 0), 2)  # Bottom
cv2.line(rotated_img, (x_min, y_max), (x_min, y_min), (0, 255, 0), 2)  # Left

# Save
cv2.imwrite("rotated_with_box.jpg", rotated_img)
print(f"New label: {new_label}")

New label: 6 0.15060178436322855 0.3405530033846654 0.059375 0.09765625


## Rotate outline bounding box:

In [105]:
import cv2
import numpy as np

# Load image and background
img = cv2.imread("../../../data/raw/as.jpg")
bg = cv2.imread("../backgrounds/bg.jpg")  # Replace with your bg file
if img is None: raise Exception("Image not found")
if bg is None: raise Exception("Background not found")

h, w = img.shape[:2]  

# Original bounding box (card outline for masking)
class_id, x_cent, y_cent, box_w, box_h = 0, 0.471875, 0.496875, 0.490625, 0.78828125

rotate the image and bounding box coordinate with a theta value. Use that theta and the transformed coordinates to make a rotated mask.

In [None]:
theta = np.random.uniform(-30, 30)
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, theta, 1.0)

# Rotate image
rotated_img = cv2.warpAffine(img, M, (w, h))

# Create mask with original coords (before rotation)
orig_x_min = int((x_cent - box_w / 2) * w)  # ~434
orig_x_max = int((x_cent + box_w / 2) * w)  # ~1412
orig_y_min = int((y_cent - box_h / 2) * h)  # ~210
orig_y_max = int((y_cent + box_h / 2) * h)
mask = np.zeros_like(img, dtype=np.uint8)
mask[orig_y_min:orig_y_max, orig_x_min:orig_x_max] = 255

# Rotate mask with theta
rotated_mask = cv2.warpAffine(mask, M, (w, h))

# Background swap (single photo)
bg = cv2.resize(bg, (w, h))
result = np.where(rotated_mask == 255, rotated_img, bg)

# Save
cv2.imwrite("rotated_masked_card.jpg", result)
print(f"Theta: {theta}")

Theta: -25.676350864411962
