In [2]:
import os
import cv2
import torch
import numpy as np
from ultralytics import YOLO

# Directories
input_folder = 'C:/Users/kalya/Desktop/Desktop/ML/lama/lama/test_aug'
output_folder = 'C:/Users/kalya/Desktop/Desktop/ML/lama/lama'
augmented_folder = os.path.join(output_folder, 'augmented_images')
mask_folder = os.path.join(output_folder, 'masks')
objects_folder = os.path.join(output_folder, 'extracted_objects')

# Create directories if they don't exist
os.makedirs(augmented_folder, exist_ok=True)
os.makedirs(mask_folder, exist_ok=True)
os.makedirs(objects_folder, exist_ok=True)

# Load YOLOv8 model (consider a larger model if `yolov8n.pt` is too small)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# model = YOLO('yolov8l.pt') '' # Using medium model for potentially better accuracy
model = YOLO("yolo11n-seg.pt")
model.to(device)

# Gaussian noise function
def add_gaussian_noise(image, mean=0, std=25):
    noise = np.random.normal(mean, std, image.shape).astype(np.uint8)
    noisy_image = cv2.add(image, noise)
    return noisy_image

# Process each image in the input folder
# image_file = "ILSVRC2012_val_00000002.jpg"
image_files = [f for f in os.listdir(input_folder) if f.endswith(('.jpg', '.jpeg', '.png'))]
ctr=0
for image_file in image_files:
    # Load the original image
    image_path = os.path.join(input_folder, image_file)
    image = cv2.imread(image_path)
    
    # Resize the image to 640x640 for YOLO
    resized_image = cv2.resize(image, (640, 640))
    ctr+=1
    # Object detection with lower confidence threshold
    results = model(resized_image, conf=0.2)  # Set lower confidence threshold for detections
    # results[0].show()
    # Get detections for the first image in results
    detections = results[0].boxes  # Access bounding boxes for the first image
    # Prepare mask for the detected objects (black background)
    mask = np.zeros(resized_image.shape[:2], dtype=np.uint8)
    
    # Loop through each detected object and get the bounding box coordinates
    for i, box in enumerate(detections):
        # Move bounding box data to CPU, then convert to integers
        x1, y1, x2, y2 = map(int, box.xyxy[0].cpu().numpy())
        confidence = box.conf.cpu().numpy()
        
        if confidence > 0.25:  # Lower confidence threshold
            # Extract object and save it
            if ctr%2 == 0:
                obj_image = resized_image[y1:y2, x1:x2]
                obj_image_with_noise = add_gaussian_noise(obj_image)
                obj_path = os.path.join(objects_folder, f'obj_{i}_{image_file}')
                cv2.imwrite(obj_path, obj_image)
            else:
                obj_image = resized_image[y1:y2, x1:x2]
                obj_path = os.path.join(objects_folder, f'obj_{i}_{image_file}')
                cv2.imwrite(obj_path, obj_image)
            # Draw bounding box on mask to indicate regions of interest (black inside objects)
            mask[y1:y2, x1:x2] = 255  # Black region for detected object on mask
            
    # Apply Gaussian noise to the original image after object derection and save
    augmented_image = add_gaussian_noise(resized_image)
    augmented_image_path = os.path.join(augmented_folder, f'{image_file}')
    cv2.imwrite(augmented_image_path, augmented_image)
    
    # Save the mask
    tmp=str(image_file)
    img = tmp.split(".")[0]
    num = tmp.split("e")[-1].split(".")[0]
    mask_path = os.path.join(mask_folder, f'{img}_mask{num}.png')
    cv2.imwrite(mask_path, mask)
    print(f"Processed {image_file}: Augmented image saved to {augmented_image_path}, Mask saved to {mask_path}, Objects saved in {objects_folder}")
print("Process Completed for all Images")


0: 640x640 (no detections), 62.9ms
Speed: 15.6ms preprocess, 62.9ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)
Processed image1.png: Augmented image saved to C:/Users/kalya/Desktop/Desktop/ML/lama/lama\augmented_images\image1.png, Mask saved to C:/Users/kalya/Desktop/Desktop/ML/lama/lama\masks\image1_mask1.png, Objects saved in C:/Users/kalya/Desktop/Desktop/ML/lama/lama\extracted_objects

0: 640x640 1 cow, 1 zebra, 35.2ms
Speed: 16.5ms preprocess, 35.2ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)
Processed image17.png: Augmented image saved to C:/Users/kalya/Desktop/Desktop/ML/lama/lama\augmented_images\image17.png, Mask saved to C:/Users/kalya/Desktop/Desktop/ML/lama/lama\masks\image17_mask17.png, Objects saved in C:/Users/kalya/Desktop/Desktop/ML/lama/lama\extracted_objects

0: 640x640 1 bed, 31.7ms
Speed: 14.3ms preprocess, 31.7ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)
Processed image19.png: Augmented image save

In [8]:
# import cv2
# import numpy as np
# import os

# # Directory paths
# input_folder = 'C:/Users/kalya/Desktop/Desktop/ML/lama/lama/test_aug'
# output_folder = 'C:/Users/kalya/Desktop/Desktop/ML/lama/lama/output'
# augmented_folder = os.path.join(output_folder, 'augmented_images')
# mask_folder = os.path.join(output_folder, 'masks')
# objects_folder = os.path.join(output_folder, 'extracted_objects')

# # Create directories for output, masks, and objects
# os.makedirs(augmented_folder, exist_ok=True)
# os.makedirs(mask_folder, exist_ok=True)
# os.makedirs(objects_folder, exist_ok=True)
# os.makedirs(output_folder, exist_ok=True)

# # List of all images in the input folder
# image_files = [f for f in os.listdir(input_folder) if f.endswith(('.jpg', '.jpeg', '.png'))]

# # Bounding box coordinates
# bbox = []
# drawing = False  # Flag to indicate if we're currently drawing the bounding box

# # Gaussian noise function
# def add_gaussian_noise(image, mean=0, std=25):
#     noise = np.random.normal(mean, std, image.shape).astype(np.uint8)
#     noisy_image = cv2.add(image, noise)
#     return noisy_image


# def draw_bbox(event, x, y, flags, param):
#     """Mouse callback function to draw bounding box dynamically."""
#     global bbox, drawing, image, clone

#     if event == cv2.EVENT_LBUTTONDOWN:
#         # Start drawing the bounding box
#         bbox = [(x, y)]
#         drawing = True

#     elif event == cv2.EVENT_MOUSEMOVE:
#         # Update the bounding box while the mouse is moving
#         if drawing:
#             temp_image = clone.copy()  # Copy of the original image to reset drawing on each move
#             cv2.rectangle(temp_image, bbox[0], (x, y), (0, 255, 0), 2)
#             cv2.imshow("Image", temp_image)

#     elif event == cv2.EVENT_LBUTTONUP:
#         # Finalize the bounding box
#         bbox.append((x, y))
#         drawing = False
#         cv2.rectangle(image, bbox[0], bbox[1], (0, 255, 0), 2)
#         cv2.imshow("Image", image)

# # Process each image in the folder
# ctr=0
# for image_file in image_files:
#     # Load the image
#     image_path = os.path.join(input_folder, image_file)
#     image = cv2.imread(image_path)
#     clone = image.copy()  # Keep a clone of the original image
#     bbox.clear()
    
#     # Display image and set mouse callback
#     cv2.namedWindow("Image")
#     cv2.setMouseCallback("Image", draw_bbox)
#     cv2.imshow("Image", image)
#     print(f"Draw a bounding box for {image_file} and press any key when done.")
    
#     # Wait for the user to draw the bounding box and press a key
#     cv2.waitKey(0)
#     cv2.destroyAllWindows()
    
#     # Skip if bounding box is not drawn
#     if len(bbox) != 2:
#         print(f"Skipping {image_file} as no bounding box was drawn.")
#         # continue
    
#     # Create the mask
#     mask = np.zeros(image.shape[:2], dtype=np.uint8)
#     x0, y0 = bbox[0]
#     x1, y1 = bbox[1]
#     mask[y0:y1, x0:x1] = 255  # Set the bounding box region to white for inpainting
    
#     # Save the mask and object
#     mask_path = os.path.join(mask_folder, f'mask_{image_file}')
#     cv2.imwrite(mask_path, mask)
    
#     object_img = clone[y0:y1, x0:x1]
#     # Apply Gaussian noise to the original image after object derection and save
#     ctr+=1
#     if ctr%2==0:
#         augmented_object_image = add_gaussian_noise(object_img)
#         augmented_objects_folder_image_path = os.path.join(objects_folder, f'object_{image_file}')
#         cv2.imwrite(augmented_objects_folder_image_path, augmented_object_image)
#     else:
#         object_image_path = os.path.join(objects_folder, f'object_{object_img}')
#         cv2.imwrite(object_image_path, object_img)

#     # Always apply noise to the backround image
#     augmented_original_image = add_gaussian_noise(image_file)
#     augmented_image_path = os.path.join(augmented_folder, f'{image_file}')
#     cv2.imwrite(augmented_image_path, augmented_original_image)
    
#     print(f"Processed {image_file}: Mask saved to {mask_path}, Object saved to {object_path}")

# print("Processing complete for all images.")


## Manual Selection of BB 

In [14]:
import cv2
import numpy as np
import os

# Directory paths
input_folder = 'C:/Users/kalya/Desktop/Desktop/ML/lama/lama/test_aug'
output_folder = 'C:/Users/kalya/Desktop/Desktop/ML/lama/lama/outputs_all_images'
augmented_folder = os.path.join(output_folder, 'augmented_images')
mask_folder = os.path.join(output_folder, 'masks')
objects_folder = os.path.join(output_folder, 'extracted_objects')

# Create directories for output, masks, and objects
os.makedirs(output_folder, exist_ok=True)
os.makedirs(augmented_folder, exist_ok=True)
os.makedirs(mask_folder, exist_ok=True)
os.makedirs(objects_folder, exist_ok=True)

# List of all images in the input folder
image_files = [f for f in os.listdir(input_folder) if f.endswith(('.jpg', '.jpeg', '.png'))]

# Bounding box coordinates
bbox = []
drawing = False  # Flag to indicate if we're currently drawing the bounding box

# Gaussian noise function
def add_gaussian_noise(image, mean=0, std=25):
    noise = np.random.normal(mean, std, image.shape).astype(np.uint8)
    noisy_image = cv2.add(image, noise)
    return noisy_image

def draw_bbox(event, x, y, flags, param):
    """Mouse callback function to draw bounding box dynamically."""
    global bbox, drawing, image, clone

    if event == cv2.EVENT_LBUTTONDOWN:
        # Start drawing the bounding box
        bbox = [(x, y)]
        drawing = True

    elif event == cv2.EVENT_MOUSEMOVE:
        # Update the bounding box while the mouse is moving
        if drawing:
            temp_image = clone.copy()  # Copy of the original image to reset drawing on each move
            cv2.rectangle(temp_image, bbox[0], (x, y), (0, 255, 0), 2)
            cv2.imshow("Image", temp_image)

    elif event == cv2.EVENT_LBUTTONUP:
        # Finalize the bounding box
        bbox.append((x, y))
        drawing = False
        cv2.rectangle(image, bbox[0], bbox[1], (0, 255, 0), 2)
        cv2.imshow("Image", image)

# Process each image in the folder
ctr = 0
for image_file in image_files:
    # Load the image
    image_path = os.path.join(input_folder, image_file)
    image = cv2.imread(image_path)
    clone = image.copy()  # Keep a clone of the original image
    bbox.clear()
    
    # Display image and set mouse callback
    cv2.namedWindow("Image")
    cv2.setMouseCallback("Image", draw_bbox)
    cv2.imshow("Image", image)
    print(f"Draw a bounding box for {image_file} and press any key when done.")
    
    # Wait for the user to draw the bounding box and press a key
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    
    # Skip if bounding box is not drawn
    if len(bbox) != 2:
        print(f"Skipping {image_file} as no bounding box was drawn.")
        continue
    
    # Create the mask
    mask = np.zeros(image.shape[:2], dtype=np.uint8)
    x0, y0 = bbox[0]
    x1, y1 = bbox[1]
    mask[y0:y1, x0:x1] = 255  # Set the bounding box region to white for inpainting
    
    # Save the mask and object
    mask_path = os.path.join(mask_folder, f'mask_{os.path.splitext(image_file)[0]}.png')
    cv2.imwrite(mask_path, mask)
    
    object_img = clone[y0:y1, x0:x1]
    ctr += 1
    if ctr % 2 == 0:
        augmented_object_image = add_gaussian_noise(object_img)
        augmented_object_path = os.path.join(objects_folder, f'object_{os.path.splitext(image_file)[0]}_aug.png')
        cv2.imwrite(augmented_object_path, augmented_object_image)
    else:
        object_image_path = os.path.join(objects_folder, f'object_{os.path.splitext(image_file)[0]}.png')
        cv2.imwrite(object_image_path, object_img)

    # Add Gaussian noise to the background image
    augmented_original_image = add_gaussian_noise(clone)
    augmented_image_path = os.path.join(augmented_folder, f'{os.path.splitext(image_file)[0]}.png')
    cv2.imwrite(augmented_image_path, augmented_original_image)
    
    print(f"Processed {image_file}: Mask saved to {mask_path}, Object saved.")

print("Processing complete for all images.")


Draw a bounding box for image1.png and press any key when done.
Processed image1.png: Mask saved to C:/Users/kalya/Desktop/Desktop/ML/lama/lama/outputs_all_images\masks\mask_image1.png, Object saved.
Draw a bounding box for image17.png and press any key when done.
Processed image17.png: Mask saved to C:/Users/kalya/Desktop/Desktop/ML/lama/lama/outputs_all_images\masks\mask_image17.png, Object saved.
Draw a bounding box for image19.png and press any key when done.
Processed image19.png: Mask saved to C:/Users/kalya/Desktop/Desktop/ML/lama/lama/outputs_all_images\masks\mask_image19.png, Object saved.
Draw a bounding box for image2.png and press any key when done.
Processed image2.png: Mask saved to C:/Users/kalya/Desktop/Desktop/ML/lama/lama/outputs_all_images\masks\mask_image2.png, Object saved.
Draw a bounding box for image20.png and press any key when done.
Processed image20.png: Mask saved to C:/Users/kalya/Desktop/Desktop/ML/lama/lama/outputs_all_images\masks\mask_image20.png, Objec

## Model Object Detection

In [30]:
import cv2
import numpy as np
import os
from ultralytics import YOLO
import torch

# Directory paths
input_folder = 'C:/Users/kalya/Desktop/Desktop/ML/lama/lama/coco_val2017'
# input_folder = 'C:/Users/kalya/Desktop/Desktop/ML/lama/lama/test_aug'
output_folder = 'C:/Users/kalya/Desktop/Desktop/ML/lama/lama/outputs_all_images'
augmented_folder = os.path.join(output_folder, 'augmented_images')
mask_folder = os.path.join(output_folder, 'masks')
objects_folder = os.path.join(output_folder, 'extracted_objects')

# Create directories for output, masks, and objects
os.makedirs(output_folder, exist_ok=True)
os.makedirs(augmented_folder, exist_ok=True)
os.makedirs(mask_folder, exist_ok=True)
os.makedirs(objects_folder, exist_ok=True)

# List of all images in the input folder
image_files = [f for f in os.listdir(input_folder) if f.endswith(('.jpg', '.jpeg', '.png'))]

# # Bounding box coordinates
# bbox = []
# drawing = False  # Flag to indicate if we're currently drawing the bounding box

device = 'cuda' if torch.cuda.is_available() else 'cpu'
# model = YOLO('yolov8l.pt') '' # Using medium model for potentially better accuracy
model = YOLO("yolo11n-seg.pt")
model.to(device)

# Gaussian noise function
def add_gaussian_noise(image, mean=0, std=25):
    noise = np.random.normal(mean, std, image.shape).astype(np.uint8)
    noisy_image = cv2.add(image, noise)
    return noisy_image

# Process each image in the folder
ctr = 0
for image_file in image_files:
    # Load the image
    image_path = os.path.join(input_folder, image_file)
    image = cv2.imread(image_path)
    clone = image.copy()  # Keep a clone of the original image
    # Resize the image to 640x640 for YOLO
    # resized_image = cv2.resize(image, (640, 640))
    # Object detection with lower confidence threshold
    results = model(image, conf=0.5)
    # Get detections for the first image in results
    detections = results[0].boxes 
    
    # Prepare mask for the detected objects (black background)
    # mask = np.zeros(resized_image.shape[:2], dtype=np.uint8)
    mask = np.zeros(image.shape[:2], dtype=np.uint8)
    
    ctr += 1
    # Loop through each detected object and get the bounding box coordinates
    for i, box in enumerate(detections):
        # Move bounding box data to CPU, then convert to integers
        x1, y1, x2, y2 = map(int, box.xyxy[0].cpu().numpy())
        print(f"for image number = {image_file} x1 = {x1} y1 = {y1} x2 = {x2} y2 = {y2}")
        confidence = box.conf.cpu().numpy()
        
        # if confidence > 0.2:  # Lower confidence threshold
        # Extract object and save it
        if ctr%2 == 0:
            # obj_image = resized_image[y1:y2, x1:x2]
            # Adding noise to the object image
            # print(f"ctr={ctr}")
            obj_image = image[y1:y2, x1:x2]
            obj_image_with_noise = add_gaussian_noise(obj_image)
            augmented_object_path = os.path.join(objects_folder, f'object_{i}_{os.path.splitext(image_file)[0]}.png')
            cv2.imwrite(augmented_object_path, obj_image_with_noise)

        else:
            # obj_image = resized_image[y1:y2, x1:x2]
            # No noise to the object image
            # print(f"ctr={ctr}")
            obj_image = image[y1:y2, x1:x2]
            obj_path = os.path.join(objects_folder, f'object_{i}_{os.path.splitext(image_file)[0]}.png')
            cv2.imwrite(obj_path, obj_image)
                
        # Draw bounding box on mask to indicate regions of interest (black inside objects)
        mask[y1:y2, x1:x2] = 255  # Black region for detected object on mask

    if ctr%2 == 0:
        # No Gaussian noise to the background image and save it
        # augmented_original_image = add_gaussian_noise(clone)
        clone_image_path = os.path.join(augmented_folder, f'{os.path.splitext(image_file)[0]}.png')
        cv2.imwrite(clone_image_path, clone)
    else:
        # Add Gaussian noise to the background image and save it
        augmented_original_image = add_gaussian_noise(clone)
        augmented_image_path = os.path.join(augmented_folder, f'{os.path.splitext(image_file)[0]}.png')
        cv2.imwrite(augmented_image_path, augmented_original_image)
        

    # Save the mask
    tmp=str(image_file)
    img = tmp.split(".")[0]
    num = tmp.split("e")[-1].split(".")[0]
    mask_path = os.path.join(mask_folder, f'{img}_mask{num}.png')
    cv2.imwrite(mask_path, mask)
    
    # Add Gaussian noise to the background image and save it
    # augmented_original_image = add_gaussian_noise(clone)
    # augmented_image_path = os.path.join(augmented_folder, f'{os.path.splitext(image_file)[0]}.png')
    # cv2.imwrite(augmented_image_path, augmented_original_image)
    
    # print(f"Processed {image_file}: Mask saved to {mask_path}, Object saved.")

print("Processing complete for all images.")



0: 448x640 3 chairs, 1 tv, 1 vase, 42.8ms
Speed: 4.3ms preprocess, 42.8ms inference, 15.6ms postprocess per image at shape (1, 3, 448, 640)
for image number = 000000000139.jpg x1 = 6 y1 = 166 x2 = 154 y2 = 263
for image number = 000000000139.jpg x1 = 293 y1 = 216 x2 = 353 y2 = 321
for image number = 000000000139.jpg x1 = 549 y1 = 302 x2 = 586 y2 = 401
for image number = 000000000139.jpg x1 = 391 y1 = 219 x2 = 443 y2 = 307
for image number = 000000000139.jpg x1 = 361 y1 = 217 x2 = 417 y2 = 311

0: 640x608 1 bear, 39.1ms
Speed: 14.9ms preprocess, 39.1ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 608)
for image number = 000000000285.jpg x1 = 17 y1 = 68 x2 = 585 y2 = 640

0: 512x640 2 potted plants, 1 bed, 15.2ms
Speed: 14.3ms preprocess, 15.2ms inference, 15.7ms postprocess per image at shape (1, 3, 512, 640)
for image number = 000000000632.jpg x1 = 0 y1 = 279 x2 = 401 y2 = 476
for image number = 000000000632.jpg x1 = 339 y1 = 214 x2 = 430 y2 = 351
for image number = 000