In [None]:
# #!pip install "dlib-19.22.99-cp310-cp310-win_amd64.whl"

In [1]:
import os
import tarfile
import dlib
import matplotlib.pyplot as plt
import cv2
from pathlib import Path
from joblib import Parallel, delayed
import time

In [None]:
tar_files = ['part1.tar.gz', 'part2.tar.gz', 'part3.tar.gz']
output_folder = os.path.join(os.getcwd(), 'all_images')

os.makedirs(output_folder, exist_ok=True)

for tar_file in tar_files:
    with tarfile.open(tar_file, 'r:gz') as tar:
        jpg_members = [member for member in tar.getmembers() if member.name.endswith('.jpg')]
        for member in jpg_members:
            # Remove the leading directory name from the member's name
            member.name = os.path.basename(member.name)
        tar.extractall(path=output_folder, members=jpg_members)

In [2]:
def hogDetectFaces(image, display = True):

    height, width, _ = image.shape
    output_image = image.copy()
    
    # OpenCV reads images in BGR format by default
    imgRGB = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    hog_face_detector = dlib.get_frontal_face_detector()
    results = hog_face_detector(imgRGB, 0)
    
    # If 0 or more than 1 face is detected
    if not len(results)==1:
        return []
    for bbox in results:

        # Some images have bounding box in their borders
        x1 = max(0, bbox.left()) 
        y1 = max(0, bbox.top())
        x2 = max(0, bbox.right())
        y2 = max(0, bbox.bottom())
        
        #cv2.rectangle(output_image, pt1=(x1, y1), pt2=(x2, y2), color=(0, 255, 0), thickness=width//200) #use if u want display 

    if display:

        plt.figure(figsize=[15,15])
        plt.subplot(121);plt.imshow(image[:,:,::-1]);plt.title("Original Image");plt.axis('off');
        plt.subplot(122);plt.imshow(output_image[:,:,::-1]);plt.title("Output");plt.axis('off');

    else:
        return output_image[y1:y2, x1:x2]

In [3]:
cropped_folder = os.path.join(os.getcwd(), 'cropped_images')
os.makedirs(cropped_folder, exist_ok=True)
output_folder = Path(os.getcwd()) / 'all_images'
cropped_folder = Path(os.getcwd()) / 'cropped_images'

s = time.time()
def process_image(image_path):
    image = cv2.imread(str(image_path))
    cropped_image = hogDetectFaces(image, display=False)
    if len(cropped_image) == 0:
        os.remove(image_path)
    else:
        
        # Interpolation
        interpolation = cv2.INTER_CUBIC if cropped_image.shape[0] <= 180 or cropped_image.shape[1] <= 180 else cv2.INTER_AREA
        cropped_image = cv2.resize(cropped_image, (180, 180), interpolation=interpolation)
        
        # Save the first cropped image to replace the original image
        cropped_path = cropped_folder / image_path.name
        cv2.imwrite(str(cropped_path), cropped_image)

Parallel(n_jobs=-4)(delayed(process_image)(image_path) for image_path in output_folder.glob('*') )
time.time()-s

2096.007228374481