In [2]:
import kagglehub
import numpy as np 
import cv2
import os
import imutils
import tqdm
from tqdm import tqdm

In [3]:
data_set = '../data/AlzheimersData_Split'

In [4]:
def crop_img(img):
	"""
	Finds the extreme points on the image and crops the rectangular out of them
	"""
	gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
	gray = cv2.GaussianBlur(gray, (3, 3), 0)

	# threshold the image, then perform a series of erosions +
	# dilations to remove any small regions of noise
	thresh = cv2.threshold(gray, 45, 255, cv2.THRESH_BINARY)[1]
	thresh = cv2.erode(thresh, None, iterations=2)
	thresh = cv2.dilate(thresh, None, iterations=2)

	# find contours in thresholded image, then grab the largest one
	cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
	cnts = imutils.grab_contours(cnts)
	c = max(cnts, key=cv2.contourArea)

	# find the extreme points
	extLeft = tuple(c[c[:, :, 0].argmin()][0])
	extRight = tuple(c[c[:, :, 0].argmax()][0])
	extTop = tuple(c[c[:, :, 1].argmin()][0])
	extBot = tuple(c[c[:, :, 1].argmax()][0])
	ADD_PIXELS = 0
	new_img = img[extTop[1]-ADD_PIXELS:extBot[1]+ADD_PIXELS, extLeft[0]-ADD_PIXELS:extRight[0]+ADD_PIXELS].copy()
	
	return new_img

In [5]:
training = os.path.join(data_set, 'train')
testing = os.path.join(data_set, 'test')
validate = os.path.join(data_set, 'val')

training_dir = os.listdir(training)
testing_dir = os.listdir(testing)
validate_dir = os.listdir(validate)


In [6]:
IMG_SIZE = 256

In [7]:
def preprocess_and_write(src_dir: str, dest_dir: str, img_name: str):
	image = cv2.imread(os.path.join(src_dir, img_name))
	
	new_img = crop_img(image)
	
	new_img = cv2.resize(new_img,(IMG_SIZE,IMG_SIZE))
	
	if not os.path.exists(dest_dir):
		os.makedirs(dest_dir)
	
	cv2.imwrite(dest_dir + '/' + img_name, new_img)


In [8]:
dest_dir = '../data/Preprocessed'

for split in ['train', 'test', 'val']:
	for cl_name in ['MildDemented', 'ModerateDemented', 'NonDemented', 'VeryMildDemented']:
		save_path = os.path.join(dest_dir, split, cl_name)
		src_path = os.path.join(data_set, split, cl_name)

		image_dir = os.listdir(src_path)
		for img in tqdm(image_dir, desc=f"Copy {split} {cl_name} images"):
			preprocess_and_write(src_dir=src_path, dest_dir=save_path, img_name=img)


Copy train MildDemented images: 100%|██████████| 7000/7000 [00:08<00:00, 847.95it/s]
Copy train ModerateDemented images: 100%|██████████| 7000/7000 [00:08<00:00, 848.11it/s]
Copy train NonDemented images: 100%|██████████| 8960/8960 [00:10<00:00, 837.67it/s]
Copy train VeryMildDemented images: 100%|██████████| 7839/7839 [00:09<00:00, 837.33it/s]
Copy test MildDemented images: 100%|██████████| 1500/1500 [00:01<00:00, 824.42it/s]
Copy test ModerateDemented images: 100%|██████████| 1500/1500 [00:01<00:00, 803.57it/s]
Copy test NonDemented images: 100%|██████████| 1920/1920 [00:02<00:00, 798.67it/s]
Copy test VeryMildDemented images: 100%|██████████| 1680/1680 [00:02<00:00, 744.29it/s]
Copy val MildDemented images: 100%|██████████| 1500/1500 [00:01<00:00, 822.39it/s]
Copy val ModerateDemented images: 100%|██████████| 1500/1500 [00:01<00:00, 823.11it/s]
Copy val NonDemented images: 100%|██████████| 1920/1920 [00:02<00:00, 822.62it/s]
Copy val VeryMildDemented images: 100%|██████████| 1681/16