In [3]:
import cv2
import os
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image, ImageOps


CLAHE = cv2.createCLAHE(clipLimit=1.9, tileGridSize=(4,4))
tarSize = (224, 448)

def load_image(image_path):
	return cv2.imread(image_path)

def detect_background(image):
	# Convert image to grayscale
	gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	# Calculate the mean pixel intensity
	mean_intensity = np.mean(gray)
	# Assume a white background if the mean intensity is high, otherwise black
	if mean_intensity > 127:
		return 'white'
	else:
		return 'black'

def preprocess_image(image, background):
	gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)  # Convert to grayscale
	blurred = cv2.GaussianBlur(gray, (1, 1), 2,2)  # Apply Gaussian blur to reduce noise
	
	if background == 'white':
		# Use binary thresholding for white background
		# _, thresh_gray = cv2.threshold(blurred, 200, 255, cv2.THRESH_BINARY_INV)
		thresh_gray = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)
	else:
		# Use binary thresholding for black background
		_, thresh_gray = cv2.threshold(blurred, 50, 255, cv2.THRESH_BINARY)
		edges = cv2.Canny(blurred, 50, 150)
		thresh_gray = cv2.bitwise_or(thresh_gray, edges)        
	
	# # Optional: Combine with Canny edge detection for better contours
	# edges = cv2.Canny(blurred, 50, 150)
	# thresh_gray = cv2.bitwise_or(thresh_gray, edges)
	
	return thresh_gray

def remove_noise(thresh_gray):
	kernel = np.ones((5, 5), np.uint8)
	cleaned = cv2.morphologyEx(thresh_gray, cv2.MORPH_CLOSE, kernel, iterations=2)
	cleaned = cv2.morphologyEx(cleaned, cv2.MORPH_OPEN, kernel, iterations=2)
	return cleaned

def find_largest_contour(cleaned):
	contours, _ = cv2.findContours(cleaned, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
	if contours:
		# Filter out small contours
		contours = [c for c in contours if cv2.contourArea(c) > 1000]
		if contours:
			largest_contour = max(contours, key=cv2.contourArea)
			return largest_contour
	return None

def crop_bounding_rect(image, contour):
	x, y, w, h = cv2.boundingRect(contour)
	cropped = image[y:y+h, x:x+w]
	return cropped

def display_images(original, thresh, result):
	plt.figure(figsize=(12, 6))
	plt.subplot(1, 3, 1)
	plt.title("Original Image")
	plt.imshow(cv2.cvtColor(original, cv2.COLOR_BGR2RGB))
	plt.subplot(1, 3, 2)
	plt.title("Thresholded Image")
	plt.imshow(thresh, cmap='gray')
	plt.subplot(1, 3, 3)
	plt.title("Extracted Object")
	plt.imshow(result)
	plt.show()


## Loop all folders to cut out from all images


##############################################################################
################# Put the path of the images folder here #####################
##############################################################################
## Step 2
path = r'.\PNG-Images' # <--- The Path



dest_path = rf'{os.path.dirname(path)}\Data-Inference' 

## creat the destination folder
if not os.path.exists(dest_path):
	os.makedirs(dest_path)

for sub_folder in os.listdir(path):
	# Create subfolder if not exists
	if not os.path.exists(os.path.join(dest_path, sub_folder)):
		os.makedirs(os.path.join(dest_path, sub_folder))
	
	# Loop through all images
	for image_file in os.listdir(os.path.join(path, sub_folder)):
		image_path = os.path.join(path, sub_folder, image_file)
		
		# Load the image
		img = load_image(image_path)
		background = detect_background(img)
		thresh_gray = preprocess_image(img, background)
		cleaned = remove_noise(thresh_gray)
		largest_contour = find_largest_contour(cleaned)
		
		if background == 'black':
			img = cv2.bitwise_not(img)
		
		if largest_contour is not None:
			extracted_object = crop_bounding_rect(img, largest_contour)
			
			gray = cv2.cvtColor(extracted_object, cv2.COLOR_BGR2GRAY)

			cropped_image = Image.fromarray(gray)
			cropped_image =  cropped_image.resize(tarSize, resample= Image.HAMMING)
			
			pad_left = tarSize[0] - cropped_image.width
			pad_top = 0

			background_color = (0, 0, 0)
			new_image = Image.new('RGB', tarSize, background_color)

			new_image.paste(cropped_image, (pad_left, pad_top))

			## Save the image
			new_image.save(os.path.join(dest_path, sub_folder, image_file))
			# display_images(img, thresh_gray, new_image)
			# break
		else:
			print(f"No contours found for image: {image_path}")

		print(f"Processed: {image_path}")


Processed: .\PNG-Images\20191\RCC.png
Processed: .\PNG-Images\20191\RMLO.png
Processed: .\PNG-Images\20215\RCC.png
Processed: .\PNG-Images\20215\RMLO.png
Processed: .\PNG-Images\20465\RCC.png
Processed: .\PNG-Images\20465\RMLO.png
Processed: .\PNG-Images\20472\RCC.png
Processed: .\PNG-Images\20472\RMLO.png
Processed: .\PNG-Images\20565\RCC.png
Processed: .\PNG-Images\20565\RMLO.png
Processed: .\PNG-Images\20667\RCC.png
Processed: .\PNG-Images\20667\RMLO.png
Processed: .\PNG-Images\21047\LCC.png
Processed: .\PNG-Images\21047\LMLO.png
Processed: .\PNG-Images\21075\LCC.png
Processed: .\PNG-Images\21075\LMLO.png
Processed: .\PNG-Images\21123\LCC.png
Processed: .\PNG-Images\21123\LMLO.png
Processed: .\PNG-Images\21173\LCC.png
Processed: .\PNG-Images\21173\LMLO.png
Processed: .\PNG-Images\21201\RCC.png
Processed: .\PNG-Images\21201\RMLO.png
Processed: .\PNG-Images\21229\RCC.png
Processed: .\PNG-Images\21229\RMLO.png
Processed: .\PNG-Images\21320\LCC.png
Processed: .\PNG-Images\21320\LMLO.png