### Importações

In [1]:
import os
import cv2
import boto3
import numpy as np
from PIL import Image
from mtcnn import MTCNN

In [2]:
def convert_name(name):
    return name.lower().replace(" ", "_").replace(".", "")

In [6]:
session = boto3.Session(
    )

s3 = session.client('s3')

### Pegar as imagens do dataset

In [9]:
local_dir = './images'
bucket_name = 'seekinglost-dados-treino-raw'

user_count = 0
image_count_start = 10
current_prefix = None
names = [""]

file_name = "list_actors.json"
actors = []

# Ler o conteúdo do arquivo de texto de volta para um array de strings
with open(file_name, "r", encoding="utf-8") as file:
    actors = file.read().splitlines()

# Exibir o array de strings
print(actors)

for actor in actors:
    cv_name = convert_name(actor)
    print(cv_name)
    response = s3.list_objects_v2(Bucket=bucket_name, Prefix=cv_name) # Prefix='timothée_chalamet/'

    if 'Contents' in response:
        for item in response['Contents']:
            file_name = item['Key']

            prefix = file_name.split('/')[0]

            if prefix != current_prefix:
                current_prefix = prefix
                user_count += 1
                user_image_count = 10
                user_image_count = image_count_start
                names.append(prefix)

            # Define o caminho completo para o arquivo de destino
            destination_path = os.path.join(local_dir, f"User.{user_count}.{user_image_count}.jpg")

            # Cria diretórios se não existirem
            os.makedirs(os.path.dirname(destination_path), exist_ok=True)

            # Baixa o arquivo
            s3.download_file(bucket_name, file_name, destination_path)

            print(f"Arquivo {file_name} baixado para {destination_path}")
            user_image_count += 1
    else:
        print("O bucket está vazio ou não contém arquivos.")

print("Download concluído.")

['Sydney Sweeney', 'Timothée Chalamet', 'Zendaya', 'Tom Holland', 'Theo James', 'Tom Cruise', 'Lebron James', 'Adam Sandler', 'Dwayne Johnson', 'Quentin Tarantino', 'Johnny Depp', 'Will Smith', 'Jennifer Lawrence', 'Leonardo DiCaprio', 'Channing Tatum', 'Nicolas Cage', 'Tom Cruise', 'Robert Downey Jr.', 'Jackie Chan', 'Keanu Reeves']
sydney_sweeney
Arquivo sydney_sweeney/Image_1.jpg baixado para ./images\User.1.10.jpg
Arquivo sydney_sweeney/Image_10.jpg baixado para ./images\User.1.11.jpg
Arquivo sydney_sweeney/Image_100.jpg baixado para ./images\User.1.12.jpg
Arquivo sydney_sweeney/Image_11.jpg baixado para ./images\User.1.13.jpg
Arquivo sydney_sweeney/Image_12.jpg baixado para ./images\User.1.14.jpg
Arquivo sydney_sweeney/Image_13.jpg baixado para ./images\User.1.15.jpg
Arquivo sydney_sweeney/Image_14.jpg baixado para ./images\User.1.16.jpg
Arquivo sydney_sweeney/Image_15.jpg baixado para ./images\User.1.17.jpg
Arquivo sydney_sweeney/Image_16.jpg baixado para ./images\User.1.18.jpg
A

In [10]:
print(names)

['', 'sydney_sweeney', 'timothée_chalamet', 'zendaya', 'tom_holland', 'theo_james', 'tom_cruise', 'lebron_james', 'adam_sandler', 'dwayne_johnson', 'quentin_tarantino', 'johnny_depp', 'will_smith', 'jennifer_lawrence', 'leonardo_dicaprio', 'channing_tatum', 'nicolas_cage', 'tom_cruise', 'robert_downey_jr.', 'jackie_chan', 'keanu_reeves']


### Pré Processamento das imagens

In [None]:
input_path = 'images'
output_path = 'processed_images'

if not os.path.exists(output_path):
    os.makedirs(output_path)

face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")


def resize_face(image, face_coordinates):
    x, y, w, h = face_coordinates
    face_roi = image[y:y+h, x:x+w]
    resized_face = cv2.resize(face_roi, (100, 100))
    return resized_face


def resize_face_with_margin(image, face_coordinates, margin):
    x, y, w, h = face_coordinates

    x -= margin
    y -= margin
    w += 2 * margin
    h += 2 * margin

    x = max(0, x)
    y = max(0, y)

    w = min(w, image.shape[1] - x)
    h = min(h, image.shape[0] - y)

    face_roi = image[y:y+h, x:x+w]
    resized_face = cv2.resize(face_roi, (260, 260))
    return resized_face


for filename in os.listdir(input_path):
    input_image_path = os.path.join(input_path, filename)
    image = cv2.imread(input_image_path)
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    equalized_gray_image = cv2.equalizeHist(gray_image)
    
    faces = face_cascade.detectMultiScale(
        gray_image,
        scaleFactor=1.5,
        minNeighbors=8,
        minSize=(30, 30)
        )

    for i, (x, y, w, h) in enumerate(faces):
        resized_face = resize_face_with_margin(gray_image, (x, y, w, h), margin=80)
        output_filename = f"{os.path.splitext(filename)[0]}_face_{i}.png"
        output_image_path = os.path.join(output_path, output_filename)

        cv2.imwrite(output_image_path, resized_face)

        
print("Processamento concluído. As imagens processadas foram salvas em:", output_path)

### Pré processamento de imagens com augment

In [3]:
input_path = 'dataset/train'
output_path = 'processed_images/train'

if not os.path.exists(output_path):
    os.makedirs(output_path)

# Inicializar detector de faces MTCNN
detector = MTCNN()

def preprocess_image(image):
    """Aplica pré-processamento básico à imagem"""
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    equalized_image = cv2.equalizeHist(gray_image)
    return equalized_image


def extract_faces(image):
    """Detecta e extrai faces da imagem usando MTCNN"""
    pixels = np.asarray(image)
    results = detector.detect_faces(pixels)
    faces = []
    for result in results:
        x, y, width, height = result['box']
        x, y = abs(x), abs(y)
        face = pixels[y:y+height, x:x+width]
        face = Image.fromarray(face).resize((100, 100))
        face = np.asarray(face)
        faces.append(face)
    return faces


def augment_image(image):
    """Aumenta a imagem aplicando várias transformações"""
    augmented_images = [image]

    # Flip horizontal
    augmented_images.append(cv2.flip(image, 1))

    # Rotate 10 degrees
    rows, cols = image.shape[:2]
    M = cv2.getRotationMatrix2D((cols/2, rows/2), 10, 1)
    augmented_images.append(cv2.warpAffine(image, M, (cols, rows)))

    # Rotate -10 degrees
    M = cv2.getRotationMatrix2D((cols/2, rows/2), -10, 1)
    augmented_images.append(cv2.warpAffine(image, M, (cols, rows)))

    # Brightness adjustment
    bright_image = cv2.convertScaleAbs(image, alpha=1.2, beta=30)
    augmented_images.append(bright_image)

    # Contrast adjustment
    contrast_image = cv2.convertScaleAbs(image, alpha=1.5, beta=0)
    augmented_images.append(contrast_image)

    # Zoom in
    zoom_factor = 1.2
    zoomed_image = cv2.resize(image, None, fx=zoom_factor, fy=zoom_factor)
    center_x, center_y = zoomed_image.shape[1]//2, zoomed_image.shape[0]//2
    crop_x, crop_y = cols//2, rows//2
    zoomed_image = zoomed_image[center_y-crop_y:center_y+crop_y, center_x-crop_x:center_x+crop_x]
    augmented_images.append(zoomed_image)

    # Shift (Translation)
    shift_x, shift_y = 20, 20
    M = np.float32([[1, 0, shift_x], [0, 1, shift_y]])
    shifted_image = cv2.warpAffine(image, M, (cols, rows))
    augmented_images.append(shifted_image)

    # Gaussian blur
    blurred_image = cv2.GaussianBlur(image, (5, 5), 0)
    augmented_images.append(blurred_image)

    return augmented_images


def augment_image_small(image):
    """Aumenta a imagem aplicando várias transformações"""
    augmented_images = [image]
    augmented_images.append(cv2.flip(image, 1))  # Flip horizontal
    rows, cols = image.shape[:2]
    M = cv2.getRotationMatrix2D((cols/2, rows/2), 10, 1)
    augmented_images.append(cv2.warpAffine(image, M, (cols, rows)))  # Rotate
    M = cv2.getRotationMatrix2D((cols/2, rows/2), -10, 1)
    augmented_images.append(cv2.warpAffine(image, M, (cols, rows)))  # Rotate
    return augmented_images


k = 1
for subdir in os.listdir(input_path):
    directory = os.path.join(input_path, subdir)

    current_subdir = subdir

    if subdir != current_subdir:
        k = 1

    for filename in os.listdir(directory):
        input_image_path = os.path.join(directory, filename)
        image = cv2.imread(input_image_path)

        if image is None:
            continue
        
        preprocessed_image = preprocess_image(image)
        
        faces = extract_faces(image)
        if not faces:
            print(f"Rosto não encontrado para a seguinte imagem {input_image_path}")
            continue

        for i, face in enumerate(faces):
            image_files = [f for f in os.listdir(directory)]

            # Verificar se há mais de 400 imagens
            # if len(image_files) >= 400:
            #     augmented_faces = augment_image_small(face)
            # else:
            #     augmented_faces = augment_image(face)
            
            augmented_faces = augment_image(face)

            for j, aug_face in enumerate(augmented_faces):
                print(subdir)
                final_output_path = os.path.join(output_path, subdir)

                if not os.path.exists(final_output_path):
                    os.makedirs(final_output_path)
                
                output_filename = f"Image_{k}_face_{i}_aug_{j}.png"
                output_image_path = os.path.join(final_output_path, output_filename)
                cv2.imwrite(output_image_path, aug_face)

        k = k + 1

print("Processamento concluído. As imagens processadas foram salvas em:", output_path)


tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise
tom_cruise

In [3]:
path = 'processed_images'

recognizer = cv2.face.LBPHFaceRecognizer_create()
detector = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml");

def trainingModel(path):
    paths = [os.path.join(path, f) for f in os.listdir(path)]     
    faceSamples = []
    ids = []

    for imagePath in paths:
        if os.path.isfile(imagePath):
            PIL_img = Image.open(imagePath).convert('L')
            img_uint8 = np.array(PIL_img, 'uint8')

            id = int(os.path.split(imagePath)[-1].split(".")[1])
            faces = detector.detectMultiScale(img_uint8)

            for (x, y, w, h) in faces:
                faceSamples.append(img_uint8[y:y+h, x:x+w])
                ids.append(id)
            

            print("Rosto treinado para {0}".format(id))

    return faceSamples, ids

print ("Treinando modelo...")
faces, ids = trainingModel(path)
recognizer.train(faces, np.array(ids))

recognizer.write('trainer/trainer.yml')



print("{0} rostos treinados.".format(len(np.unique(ids))))

Treinando modelo...
Rosto treinado para 1
Rosto treinado para 1
Rosto treinado para 1
Rosto treinado para 1
Rosto treinado para 1
Rosto treinado para 1
Rosto treinado para 1
Rosto treinado para 1
Rosto treinado para 1
Rosto treinado para 1
Rosto treinado para 1
Rosto treinado para 1
Rosto treinado para 1
Rosto treinado para 1
Rosto treinado para 1
Rosto treinado para 1
Rosto treinado para 1
Rosto treinado para 1
Rosto treinado para 1
Rosto treinado para 1
Rosto treinado para 1
Rosto treinado para 1
Rosto treinado para 1
Rosto treinado para 1
Rosto treinado para 1
Rosto treinado para 1
Rosto treinado para 1
Rosto treinado para 1
Rosto treinado para 1
Rosto treinado para 1
Rosto treinado para 1
Rosto treinado para 1
Rosto treinado para 1
Rosto treinado para 1
Rosto treinado para 1
Rosto treinado para 1
Rosto treinado para 1
Rosto treinado para 1
Rosto treinado para 1
Rosto treinado para 1
Rosto treinado para 1
Rosto treinado para 1
Rosto treinado para 1
Rosto treinado para 1
Rosto treina

### Treinamento do modelo com extração de caracteriscas com MTCNN

In [68]:
import os
import cv2
import numpy as np
from PIL import Image
from mtcnn import MTCNN

path = 'processed_images'
output_model_path = 'trainer/trainer.yml'

# Inicializar o reconhecedor de faces LBPH
recognizer = cv2.face.LBPHFaceRecognizer_create()
# Inicializar o detector de faces MTCNN
detector = MTCNN()


def preprocess_image(image):
    """Aplica pré-processamento básico à imagem"""
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    equalized_image = cv2.equalizeHist(gray_image)
    return equalized_image


def extract_faces(image):
    """Detecta e extrai faces da imagem usando MTCNN"""
    try:
        pixels = np.asarray(image)
        results = detector.detect_faces(pixels)
        faces = []
        for result in results:
            print("Result:", result)
            if 'box' in result:  # Verifica se 'box' está presente nos resultados
                x, y, width, height = result['box']
                x, y = abs(x), abs(y)
                face = pixels[y:y+height, x:x+width]
                face = Image.fromarray(face).resize((100, 100))
                face = np.asarray(face)
                faces.append(face)
            else:
                print("Resultado inválido:", result)
        return faces
    except Exception as e:
        print(f"Erro ao extrair faces: {e}")
        return []


def extract_id_from_filename(filename):
    """Extrai o ID do nome do arquivo"""
    try:
        name, ext = os.path.splitext(filename)
        parts = name.split(".")
        if len(parts) > 0:
            id = parts[1]
            return id
        else:
            raise ValueError(f"Formato de nome de arquivo inesperado: {filename}")
    except Exception as e:
        print(f"Erro ao extrair ID do nome do arquivo {filename}: {e}")
    return None


def trainingModel(path):
    paths = [os.path.join(path, f) for f in os.listdir(path)]     
    faceSamples = []
    ids = []

    for imagePath in paths:
        try:
            if os.path.isfile(imagePath):
                PIL_img = Image.open(imagePath)
                if PIL_img.mode == 'L':
                    PIL_img = Image.open(imagePath).convert('L')
                    img_uint8 = np.array(PIL_img, 'uint8')
                else:
                    print(f"A imagem já está em escala de cinza: {imagePath}")
                    img_uint8 = np.array(PIL_img, 'uint8')

                if img_uint8 is None:
                    print(f"Erro ao carregar imagem {imagePath}")
                    continue

                preprocessed_image = preprocess_image(img_uint8)

                if preprocessed_image.size == 0:
                    print(f"Processamento com erro: {filename}")
                    continue

                id = os.path.split(imagePath)[-1].split(".")[1]
                print("Id: " + id)
                
                faces = extract_faces(preprocessed_image)
                if not faces:
                    print(f"Nenhuma face encontrada na imagem {imagePath}")
                    continue
                else:
                    print(faces)

                for (x, y, w, h) in faces:
                    faceSamples.append(img_uint8[y:y+h, x:x+w])
                    ids.append(id)
                

                print("Rosto treinado para {0}".format(id))
        except Exception as e:
            print(f"Erro ao processar imagem {imagePath}: {e}")

    return faceSamples, ids

print("Treinando modelo...")
faces, ids = trainingModel(path)

if len(faces) > 0:
    recognizer.train(faces, np.array(ids))
    if not os.path.exists('trainer'):
        os.makedirs('trainer')
    recognizer.write(output_model_path)
    print(f"{len(np.unique(ids))} rostos treinados e modelo salvo em {output_model_path}")
else:
    print("Nenhum rosto foi treinado.")


Treinando modelo...
A imagem já está em escala de cinza: processed_images\User.1.100_face_0_aug_0.png
Id: 1
Erro ao extrair faces: not enough values to unpack (expected 3, got 2)
Nenhuma face encontrada na imagem processed_images\User.1.100_face_0_aug_0.png
A imagem já está em escala de cinza: processed_images\User.1.100_face_0_aug_1.png
Id: 1
Erro ao extrair faces: not enough values to unpack (expected 3, got 2)
Nenhuma face encontrada na imagem processed_images\User.1.100_face_0_aug_1.png
A imagem já está em escala de cinza: processed_images\User.1.100_face_0_aug_2.png
Id: 1
Erro ao extrair faces: not enough values to unpack (expected 3, got 2)
Nenhuma face encontrada na imagem processed_images\User.1.100_face_0_aug_2.png
A imagem já está em escala de cinza: processed_images\User.1.100_face_0_aug_3.png
Id: 1
Erro ao extrair faces: not enough values to unpack (expected 3, got 2)
Nenhuma face encontrada na imagem processed_images\User.1.100_face_0_aug_3.png
A imagem já está em escala d

In [12]:
file_name = "names.txt"
with open(file_name, "w", encoding="utf-8") as file:
    for string in names:
        file.write(string + "\n")

In [9]:
directory = "./processed_images"
bucket_name = "seekinglost-dados-raw-augment"
for root, dirs, files in os.walk(directory):
        for file in files:
            file_path = os.path.join(root, file)
            s3_key = os.path.relpath(file_path, directory)
            s3.upload_file(file_path, bucket_name, s3_key)
            print(f"Uploaded {file_path} to s3://{bucket_name}/{s3_key}")

Uploaded ./processed_images\User.1.100_face_0_aug_0.png to s3://seekinglost-dados-raw-augment/User.1.100_face_0_aug_0.png
Uploaded ./processed_images\User.1.100_face_0_aug_1.png to s3://seekinglost-dados-raw-augment/User.1.100_face_0_aug_1.png
Uploaded ./processed_images\User.1.100_face_0_aug_2.png to s3://seekinglost-dados-raw-augment/User.1.100_face_0_aug_2.png
Uploaded ./processed_images\User.1.100_face_0_aug_3.png to s3://seekinglost-dados-raw-augment/User.1.100_face_0_aug_3.png
Uploaded ./processed_images\User.1.101_face_0_aug_0.png to s3://seekinglost-dados-raw-augment/User.1.101_face_0_aug_0.png
Uploaded ./processed_images\User.1.101_face_0_aug_1.png to s3://seekinglost-dados-raw-augment/User.1.101_face_0_aug_1.png
Uploaded ./processed_images\User.1.101_face_0_aug_2.png to s3://seekinglost-dados-raw-augment/User.1.101_face_0_aug_2.png
Uploaded ./processed_images\User.1.101_face_0_aug_3.png to s3://seekinglost-dados-raw-augment/User.1.101_face_0_aug_3.png
Uploaded ./processed_ima

In [10]:
s3.upload_file("./trainer/trainer.yml", "seekinglost-modelos", "trainer/trainer.yml")

In [13]:
!jupyter nbconvert --to script face_training.ipynb

[NbConvertApp] Converting notebook face_training.ipynb to script
[NbConvertApp] Writing 4938 bytes to face_training.py
