UWAGA: Skrypt przyjmuje, że znajduje się w folderze, w którym jest folder "generated_dataset" utworzony dzięki test1.ipynb
Wszystkie funkcje później operują na tym folderze lub folderach utworzonych na jego podstawie
Wyjątkami są funkcje na samym dole, służące do podzielenia zbioru na zbiory do uczenia i testowania, oraz do dodania obróconych kopii obrazków w danym zbiorze - w tych funkcjach sugerowane jest podanie nazwy zbioru danych (folderu), który chcemy przetworzyć

In [2]:
import sys
sys.executable
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import shutil
import cv2
import mediapipe as mp
mp_face_detection = mp.solutions.face_detection

Nazwy folderów - zakresy wiekowe

In [3]:
balanced_ranges = ['1-2', '3-11', '12-21', '22-25', '26-27', '28-31', '32-38', '39-48', '49-58', '59-80']       

Wycinanie twarzy Mediapipem

In [16]:
def CropImage(image, xmin, ymin, xwidth, yheight):
    """Function for cropping images
        image - source image
        xmin, ymin - normalized top-left point coordinates for the cropped image
        xwidth, ywidth - normalized width and height for the cropped image"""
    left = int(xmin * image.shape[1])
    top = int(ymin * image.shape[0])
    width = int(xwidth * image.shape[1])
    height = int(yheight * image.shape[0])
    image_cropped = image[top:top+height, left:left+width]
    return image_cropped

def cut_out_faces(keyranges):
    if os.path.isdir('./dataset_mediapipe'):
        shutil.rmtree('./dataset_mediapipe')

    os.mkdir("./dataset_mediapipe")
    with mp_face_detection.FaceDetection(
        model_selection=1, min_detection_confidence=0.5) as face_detection:
        for range in keyranges:
            dst_directory = os.path.join('./dataset_mediapipe', range)
            os.mkdir(dst_directory)
            src_directory = os.path.join('./generated_dataset', range)
            for filename in os.listdir(src_directory):
                f = os.path.join(src_directory, filename)
                if os.path.isfile(f):
                    image = cv2.imread(f)
                    # Convert the BGR image to RGB and process it with MediaPipe Face Detection.
                    results = face_detection.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
                    if not results.detections:
                        continue

                    for detection in results.detections:
                        data = detection.location_data.relative_bounding_box
                        cropped_image = CropImage(image, data.xmin, data.ymin, data.width, data.height)
                        image_size = cropped_image.shape
                        if image_size[0] > 20 and image_size[1] > 20 and image_size[0] == image_size[1]:
                            cropped_image = cv2.resize(cropped_image, (200, 200))
                            cv2.imwrite(
                                f"./dataset_mediapipe/{range}/{filename}",
                                cropped_image,
                                [int(cv2.IMWRITE_JPEG_QUALITY), 100],
                            )

cut_out_faces(balanced_ranges)

Konwersja do odcieni szarości

In [17]:
def filter_dataset_greyscale(keyranges):
    if os.path.isdir("./dataset_greyscale"):
        shutil.rmtree("./dataset_greyscale")
    
    os.mkdir("./dataset_greyscale")

    for range in keyranges:
        dst_directory = os.path.join('./dataset_greyscale', range)
        os.mkdir(dst_directory)
        src_directory = os.path.join('./dataset_mediapipe', range)
        for filename in os.listdir(src_directory):
            f = os.path.join(src_directory, filename)
            if os.path.isfile(f):
                #copy to dataset_canny_edges/range
                image = cv2.imread(f)
                image_filtered = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
                cv2.imwrite(
                    f"./dataset_greyscale/{range}/{filename}",
                    image_filtered,
                    [int(cv2.IMWRITE_JPEG_QUALITY), 100],
                )
            
filter_dataset_greyscale(balanced_ranges)

Filtrowanie Canny edges na obrazkach czarno-białych

In [11]:
def filter_dataset_canny_edges(keyranges):
    if os.path.isdir("./dataset_canny_edges"):
        shutil.rmtree("./dataset_canny_edges")

    os.mkdir("./dataset_canny_edges")

    for range in keyranges:
        dst_directory = os.path.join('./dataset_canny_edges', range)
        os.mkdir(dst_directory)
        src_directory = os.path.join('./dataset_greyscale', range)
        for filename in os.listdir(src_directory):
            f = os.path.join(src_directory, filename)
            if os.path.isfile(f):
                #copy to dataset_canny_edges/range
                image = cv2.imread(f)
                image_filtered = cv2.Canny(image, 50, 75)
                cv2.imwrite(
                    f"./dataset_canny_edges/{range}/{filename}",
                    image_filtered,
                    [int(cv2.IMWRITE_JPEG_QUALITY), 100],
                )
            
filter_dataset_canny_edges(balanced_ranges)

Rozmycie Gaussa (wsm takie odszumianie)

In [16]:
def filter_dataset_gaussian_blur(keyranges):
    if os.path.isdir("./dataset_gaussian_blur"):
        shutil.rmtree("./dataset_gaussian_blur")

    os.mkdir("./dataset_gaussian_blur")
    ksize = 3

    for range in keyranges:
        dst_directory = os.path.join('./dataset_gaussian_blur', range)
        os.mkdir(dst_directory)
        src_directory = os.path.join('./dataset_greyscale', range)
        for filename in os.listdir(src_directory):
            f = os.path.join(src_directory, filename)
            if os.path.isfile(f):
                #copy to dataset_canny_edges/range
                image = cv2.imread(f)
                image_filtered = cv2.GaussianBlur(image, (5,5),0)
                cv2.imwrite(
                    f"./dataset_gaussian_blur/{range}/{filename}",
                    image_filtered,
                    [int(cv2.IMWRITE_JPEG_QUALITY), 100],
                )
            
filter_dataset_gaussian_blur(balanced_ranges)

Canny Edges po rozmyciu Gaussa

In [22]:
def filter_dataset_canny_edges_gaussian_blur(keyranges):
    if os.path.isdir("./dataset_canny_edges_gaussian_blur"):
        shutil.rmtree("./dataset_canny_edges_gaussian_blur")

    os.mkdir("./dataset_canny_edges_gaussian_blur")

    for range in keyranges:
        dst_directory = os.path.join('./dataset_canny_edges_gaussian_blur', range)
        os.mkdir(dst_directory)
        src_directory = os.path.join('./dataset_gaussian_blur', range)
        for filename in os.listdir(src_directory):
            f = os.path.join(src_directory, filename)
            if os.path.isfile(f):
                #copy to dataset_canny_edges/range
                image = cv2.imread(f)
                image_filtered = cv2.Canny(image, 50, 75)
                cv2.imwrite(
                    f"./dataset_canny_edges_gaussian_blur/{range}/{filename}",
                    image_filtered,
                    [int(cv2.IMWRITE_JPEG_QUALITY), 100],
                )
            
filter_dataset_canny_edges_gaussian_blur(balanced_ranges)

Filtrowanie Sobel

In [17]:
def filter_dataset_sobel_x(keyranges):
    if os.path.isdir("./dataset_sobel_x"):
        shutil.rmtree("./dataset_sobel_x")

    os.mkdir("./dataset_sobel_x")
    ksize = 3

    for range in keyranges:
        dst_directory = os.path.join('./dataset_sobel_x', range)
        os.mkdir(dst_directory)
        src_directory = os.path.join('./dataset_greyscale', range)
        for filename in os.listdir(src_directory):
            f = os.path.join(src_directory, filename)
            if os.path.isfile(f):
                #copy to dataset_canny_edges/range
                image = cv2.imread(f)
                image_filtered = cv2.Sobel(image, ddepth=cv2.CV_32F, dx=1, dy=0, ksize=ksize)
                cv2.imwrite(
                    f"./dataset_sobel_x/{range}/{filename}",
                    image_filtered,
                    [int(cv2.IMWRITE_JPEG_QUALITY), 100],
                )
            
filter_dataset_sobel_x(balanced_ranges)

In [18]:
def filter_dataset_sobel_y(keyranges):
    if os.path.isdir("./dataset_sobel_y"):
        shutil.rmtree("./dataset_sobel_y")

    os.mkdir("./dataset_sobel_y")
    ksize = 3

    for range in keyranges:
        dst_directory = os.path.join('./dataset_sobel_y', range)
        os.mkdir(dst_directory)
        src_directory = os.path.join('./dataset_greyscale', range)
        for filename in os.listdir(src_directory):
            f = os.path.join(src_directory, filename)
            if os.path.isfile(f):
                #copy to dataset_canny_edges/range
                image = cv2.imread(f)
                image_filtered = cv2.Sobel(image, ddepth=cv2.CV_32F, dx=0, dy=1, ksize=ksize)
                cv2.imwrite(
                    f"./dataset_sobel_y/{range}/{filename}",
                    image_filtered,
                    [int(cv2.IMWRITE_JPEG_QUALITY), 100],
                )
            
filter_dataset_sobel_y(balanced_ranges)

Filtrowanie Sobel po obrazkach wygładzonych Gaussem

In [None]:
def filter_dataset_sobel_x_gauss(keyranges):
    if os.path.isdir("./dataset_sobel_x_gauss"):
        shutil.rmtree("./dataset_sobel_x_gauss")

    os.mkdir("./dataset_sobel_x_gauss")
    ksize = 3

    for range in keyranges:
        dst_directory = os.path.join('./dataset_sobel_x_gauss', range)
        os.mkdir(dst_directory)
        src_directory = os.path.join('./dataset_gaussian_blur', range)
        for filename in os.listdir(src_directory):
            f = os.path.join(src_directory, filename)
            if os.path.isfile(f):
                #copy to dataset_canny_edges/range
                image = cv2.imread(f)
                image_filtered = cv2.Sobel(image, ddepth=cv2.CV_32F, dx=1, dy=0, ksize=ksize)
                cv2.imwrite(
                    f"./dataset_sobel_x_gauss/{range}/{filename}",
                    image_filtered,
                    [int(cv2.IMWRITE_JPEG_QUALITY), 100],
                )
            
filter_dataset_sobel_x_gauss(balanced_ranges)

In [None]:
def filter_dataset_sobel_y_gauss(keyranges):
    if os.path.isdir("./dataset_sobel_y_gauss"):
        shutil.rmtree("./dataset_sobel_y_gauss")   

    os.mkdir("./dataset_sobel_y_gauss")    
    ksize = 3

    for range in keyranges:
        dst_directory = os.path.join('./dataset_sobel_y_gauss', range)
        os.mkdir(dst_directory)
        src_directory = os.path.join('./dataset_gaussian_blur', range)
        for filename in os.listdir(src_directory):
            f = os.path.join(src_directory, filename)
            if os.path.isfile(f):
                #copy to dataset_canny_edges/range
                image = cv2.imread(f)
                image_filtered = cv2.Sobel(image, ddepth=cv2.CV_32F, dx=0, dy=1, ksize=ksize)
                cv2.imwrite(
                    f"./dataset_sobel_y_gauss/{range}/{filename}",
                    image_filtered,
                    [int(cv2.IMWRITE_JPEG_QUALITY), 100],
                )
            
filter_dataset_sobel_y_gauss(balanced_ranges)

Podzielenie zbioru danych

In [None]:
def split_dataset(dataset_name, percent_learn):
    if os.path.isdir(f"./{dataset_name}_learn"):
        shutil.rmtree(f"./{dataset_name}_learn")   

    if os.path.isdir(f"./{dataset_name}_validation"):
        shutil.rmtree(f"./{dataset_name}_validation")           

    os.mkdir(f"./{dataset_name}_learn")
    os.mkdir(f"./{dataset_name}_validation")
    for dirname in os.listdir(f"./{dataset_name}"):
        os.mkdir(f"./{dataset_name}_learn/{dirname}")
        os.mkdir(f"./{dataset_name}_validation/{dirname}")
        cnt=0
        cnt_learn = (percent_learn/100) * len(os.listdir(f'./{dataset_name}/{dirname}'))
        for filename in os.listdir(f'./{dataset_name}/{dirname}'):
            if cnt<cnt_learn:
                shutil.copyfile(src=f"./{dataset_name}/{dirname}/{filename}", dst=f"./{dataset_name}_learn/{dirname}/{filename}")
            else:
                shutil.copyfile(src=f"./{dataset_name}/{dirname}/{filename}", dst=f"./{dataset_name}_validation/{dirname}/{filename}")
            cnt+=1

split_dataset("dataset_sobel_y", 80)

Utworzenie obróconych wersji obrazków dla danego zbioru

In [25]:
def augment_images(dataset_name, keyranges):
    image_size = (200, 200)
    image_center = (100, 100)
    # create rotation images
    M_counter_30 = cv2.getRotationMatrix2D(image_center, -30, 1) 
    M_counter_15 = cv2.getRotationMatrix2D(image_center, -15, 1) 
    M_clockwise_15 = cv2.getRotationMatrix2D(image_center, 15, 1) 
    M_clockwise_30 = cv2.getRotationMatrix2D(image_center, 30, 1) 

    for range in keyranges:
        directory = os.path.join(f"./{dataset_name}", range)
        for filename in os.listdir(directory):
            f = os.path.join(directory, filename)
            if os.path.isfile(f):
                image = cv2.imread(f)
                filename_prefix = filename.split(".")[0]
                # augment original image
                rotate_counter_30 = cv2.warpAffine(image, M_counter_30, image_size)
                rotate_counter_15 = cv2.warpAffine(image, M_counter_15, image_size)
                rotate_clockwise_15 = cv2.warpAffine(image, M_clockwise_15, image_size)
                rotate_clockwise_30 = cv2.warpAffine(image, M_clockwise_30, image_size)
                # save copies
                cv2.imwrite(
                    f"./{dataset_name}/{range}/{filename_prefix}_-30.jpg",
                    rotate_counter_30,
                    [int(cv2.IMWRITE_JPEG_QUALITY), 100],
                )
                cv2.imwrite(
                    f"./{dataset_name}/{range}/{filename_prefix}_-15.jpg",
                    rotate_counter_15,
                    [int(cv2.IMWRITE_JPEG_QUALITY), 100],
                )
                cv2.imwrite(
                    f"./{dataset_name}/{range}/{filename_prefix}_15.jpg",
                    rotate_clockwise_15,
                    [int(cv2.IMWRITE_JPEG_QUALITY), 100],
                )
                cv2.imwrite(
                    f"./{dataset_name}/{range}/{filename_prefix}_30.jpg",
                    rotate_clockwise_30,
                    [int(cv2.IMWRITE_JPEG_QUALITY), 100],
                )

                # augment flipped image
                image_flipped = cv2.flip(image, 1)
                # augment original image
                rotate_counter_30 = cv2.warpAffine(image_flipped, M_counter_30, image_size)
                rotate_counter_15 = cv2.warpAffine(image_flipped, M_counter_15, image_size)
                rotate_clockwise_15 = cv2.warpAffine(image_flipped, M_clockwise_15, image_size)
                rotate_clockwise_30 = cv2.warpAffine(image_flipped, M_clockwise_30, image_size)
                # save copies
                cv2.imwrite(
                    f"./{dataset_name}/{range}/{filename_prefix}_-30_flipped.jpg",
                    rotate_counter_30,
                    [int(cv2.IMWRITE_JPEG_QUALITY), 100],
                )
                cv2.imwrite(
                    f"./{dataset_name}/{range}/{filename_prefix}_-15_flipped.jpg",
                    rotate_counter_15,
                    [int(cv2.IMWRITE_JPEG_QUALITY), 100],
                )
                cv2.imwrite(
                    f"./{dataset_name}/{range}/{filename_prefix}_15_flipped.jpg",
                    rotate_clockwise_15,
                    [int(cv2.IMWRITE_JPEG_QUALITY), 100],
                )
                cv2.imwrite(
                    f"./{dataset_name}/{range}/{filename_prefix}_30_flipped.jpg",
                    rotate_clockwise_30,
                    [int(cv2.IMWRITE_JPEG_QUALITY), 100],
                )

augment_images("dataset_test", balanced_ranges)

FileNotFoundError: [WinError 3] The system cannot find the path specified: './dataset_test\\3-11'

Konwertuj pokatalogowany dataset na dataset zbiorczy

In [29]:
def merge_datasets(dataset, keyranges, image_limit = -1):
    if os.path.isdir(f"./data_merged_{dataset}"):
        shutil.rmtree(f"./data_merged_{dataset}")
    
    os.mkdir(f"./data_merged_{dataset}")

    current_age = 0
    image_counter = 0


    for range in keyranges:
        src_directory = os.path.join(f'./{dataset}', range)
        for filename in os.listdir(src_directory):
            f = os.path.join(src_directory, filename)
            if os.path.isfile(f):
                # determine age and increase counter for the limiter
                if image_limit > 0:
                    age = int(filename.split("_")[0])
                    if age == current_age:
                        image_counter += 1
                    else:
                        current_age = age
                        image_counter = 0

                    if image_counter >= image_limit:
                        continue
                # copy image
                image = cv2.imread(f)
                cv2.imwrite(
                    f"./data_merged_{dataset}/{filename}",
                    image,
                    [int(cv2.IMWRITE_JPEG_QUALITY), 100],
                )
            
merge_datasets("dataset_greyscale", balanced_ranges, 1)

KeyboardInterrupt: 