In [3]:
import kagglehub
import numpy as np 
import cv2
import os
import imutils
# from tqdm import tqdm

In [8]:
data_set = '../data/AlzheimersData_Split'

In [9]:
def crop_img(img):
	"""
	Finds the extreme points on the image and crops the rectangular out of them
	"""
	gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
	gray = cv2.GaussianBlur(gray, (3, 3), 0)

	# threshold the image, then perform a series of erosions +
	# dilations to remove any small regions of noise
	thresh = cv2.threshold(gray, 45, 255, cv2.THRESH_BINARY)[1]
	thresh = cv2.erode(thresh, None, iterations=2)
	thresh = cv2.dilate(thresh, None, iterations=2)

	# find contours in thresholded image, then grab the largest one
	cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
	cnts = imutils.grab_contours(cnts)
	c = max(cnts, key=cv2.contourArea)

	# find the extreme points
	extLeft = tuple(c[c[:, :, 0].argmin()][0])
	extRight = tuple(c[c[:, :, 0].argmax()][0])
	extTop = tuple(c[c[:, :, 1].argmin()][0])
	extBot = tuple(c[c[:, :, 1].argmax()][0])
	ADD_PIXELS = 0
	new_img = img[extTop[1]-ADD_PIXELS:extBot[1]+ADD_PIXELS, extLeft[0]-ADD_PIXELS:extRight[0]+ADD_PIXELS].copy()
	
	return new_img

In [10]:
training = os.path.join(data_set, 'train')
testing = os.path.join(data_set, 'test')
validate = os.path.join(data_set, 'val')

training_dir = os.listdir(training)
testing_dir = os.listdir(testing)
validate_dir = os.listdir(validate)


In [11]:
IMG_SIZE = 256

In [15]:
def preprocess_and_write(src_dir: str, dest_dir: str, img_name: str):
	image = cv2.imread(os.path.join(src_dir, img_name))
	
	new_img = crop_img(image)
	
	new_img = cv2.resize(new_img,(IMG_SIZE,IMG_SIZE))
	
	if not os.path.exists(dest_dir):
		os.makedirs(dest_dir)
	
	cv2.imwrite(dest_dir + '/' + img_name, new_img)


In [18]:
# preprocess_and_write('../data/AlzheimersData_Split/test/MildDemented', '../data/Preprocessed/test/MildDemented', '0a664a0f-4c61-4e1e-8a83-94b7250bfb92.jpg')
dest_dir = '../data/Preprocessed'

for split in ['train', 'test', 'val']:
	for cl_name in ['MildDemented', 'ModerateDemented', 'NonDemented', 'VeryMildDemented']:
		save_path = os.path.join(dest_dir, split, cl_name)
		src_path = os.path.join(data_set, split, cl_name)

		image_dir = os.listdir(src_path)
		for img in image_dir:
			preprocess_and_write(src_dir=src_path, dest_dir=save_path, img_name=img)
