In [1]:
import os
import cv2
import numpy as np

## Definindo funções de carregamento/salvamento das imagens

In [22]:
# Carrega as imagens, faz resize e converte pra grayscale
def load_images(folder, img_size=(96, 96)):
    images = []
    for file in os.listdir(folder):
        path = os.path.join(folder, file)
        img = cv2.imread(path)

        if img is not None:
            img = cv2.resize(img, img_size)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # Convertendo pra grayscale
            images.append(img)

    return images

In [23]:
def save_images(images, folder):
    for i, img in enumerate(images):
        cv2.imwrite(folder + "/" + str(i) + ".jpg", img)

## Processando as imagens de homens e mulheres

In [25]:
male_imgs = load_images("./gender/consolidated/male")

save_images(male_imgs, "./gender/img_totais/homens")

In [26]:
female_imgs = load_images("./gender/consolidated/female")

save_images(female_imgs, "./gender/img_totais/mulheres")

## Dividindo os dados em treino e teste

In [31]:
import random
import shutil

def split_dataset(folder_or, folder_des, label, train_sz=0.7, test_sz=0.15, val_sz=0.15):
    files = os.listdir(os.path.join(folder_or, label))
    random.shuffle(files)

    N = len(files) # Num total de imagens
    N_train = int(N * train_sz) # Tamanho do treino
    N_val = int(N* val_sz) # Tamanho do teste

    images = {
        'treino': files[:N_train],
        'validacao': files[N_train:N_train+N_val],
        'teste': files[N_train+N_val:]
    }

    for im_type, images in images.items():
        for img in images:
            source = os.path.join(folder_or, label, img)
            dest = os.path.join(folder_des, im_type, label, img)
            shutil.copyfile(source, dest)            

In [33]:
split_dataset("./gender/img_totais", "./gender", "homens") # homens

split_dataset("./gender/img_totais", "./gender", "mulheres") # mulheres