# 이미지 전처리


In [1]:
import pathlib
import uuid
from collections import Counter

import cv2
import numpy as np
from PIL import Image

# 데이터셋 초기 전처리

나온 얼굴만 뽑아내기 (얼굴 여러개 포함)


In [3]:
from facenet_pytorch import MTCNN


def face_extraction(image, mtcnn):
    # image = np.array(image)

    # Check the number of dimensions in the image
    if len(image.shape) == 3 and image.shape[2] == 4:
        # Convert the image from RGBA to RGB
        image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)

    # Detect faces
    boxes, _ = mtcnn.detect(image)

    faces = []
    if boxes is not None and len(boxes) > 0:
        for i, box in enumerate(boxes):
            box = [int(b) for b in box]
            face = image[box[1]: box[3], box[0]: box[2]]
            faces.append(face)
    return faces


mtcnn = MTCNN()

face_counter = Counter()

name_list = ["이승기", "남주혁", "박보영", "서강준"]

for who in name_list:
    pathlib.Path(f"faces/{who}_faces").mkdir(parents=True, exist_ok=True)
    for file in pathlib.Path(f"imgs/{who}_images").iterdir():
        image = cv2.imread(str(file))
        # image = Image.open(file)
        print(f"processing {file}")
        faces = face_extraction(image, mtcnn)
        for i, face in enumerate(faces):
            if face is not None and face.size != 0:
                cv2.imwrite(f"faces/{who}_faces/{uuid.uuid4()}_{i}.png", face)

processing imgs\이승기_images\이승기_0.jpeg


AttributeError: 'NoneType' object has no attribute 'shape'

In [None]:
# from sklearn.metrics.pairwise import cosine_similarity

# 얼굴 빈도가 적은 파일 삭제 (패스함. 그대신 손으로 했음)


In [None]:
# import face_recognition

# # Load known face encodings and labels
# known_face_encodings = ...
# known_face_labels = ...

# def face_extraction_and_recognition(image, mtcnn):
#     # Detect faces
#     boxes, _ = mtcnn.detect(image)

#     faces = []
#     if boxes is not None:
#         for i, box in enumerate(boxes):
#             box = [int(b) for b in box]
#             face = image[box[1] : box[3], box[0] : box[2]]
#             # Compute face encoding
#             face_encoding = face_recognition.face_encodings(face)
#             # Compare face encoding with known face encodings
#             matches = face_recognition.compare_faces(known_face_encodings, face_encoding)
#             if True in matches:
#                 matched_label = known_face_labels[matches.index(True)]
#                 faces.append((face, matched_label))
#     return faces

# face_counter = Counter()

# for who in name_list:
#     pathlib.Path(f"faces/{who}_faces").mkdir(parents=True, exist_ok=True)
#     for file in pathlib.Path(f"imgs/{who}_images").iterdir():
#         image = cv2.imread(str(file))
#         print(f"processing {file}")
#         faces = face_extraction_and_recognition(image, mtcnn)
#         for i, (face, label) in enumerate(faces):
#             if face is not None and face.size != 0 and label == who:
#                 cv2.imwrite(f"faces/{who}_faces/{uuid.uuid4()}_{i}.png", face)
#                 face_counter[who] += 1

In [None]:
# name_list = ["이승기", "남주혁", "박보영", "서강준"]

# # for other survived files, grayscale and resize
# for who in name_list:
#     pathlib.Path(f"faces/{who}_faces_resized").mkdir(parents=True, exist_ok=True)
#     for file in pathlib.Path(f"faces/{who}_faces").iterdir():
#         image = cv2.imread(str(file))
#         gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY, cv2.IMREAD_GRAYSCALE)
#         resized = cv2.resize(gray, (299, 299))
#         cv2.imwrite(f"faces/{who}_faces_resized/{file.name}", resized)

# 파이토치로 학습


In [2]:
import torch
from torchvision.datasets import ImageFolder
from torchvision import transforms

# Load your images
data_dir = "faces"
dataset = ImageFolder(data_dir, transform=transforms.ToTensor())

# Calculate the mean and std
mean = torch.stack([t.mean(1).mean(1) for t, _ in dataset]).mean(0)
std = torch.stack([t.std(1).std(1) for t, _ in dataset]).std(0)

print(mean)
print(std)
# tensor([0.6144, 0.4978, 0.4530])
# tensor([0.0212, 0.0162, 0.0165])

tensor([0.6144, 0.4978, 0.4530])
tensor([0.0212, 0.0162, 0.0165])


In [2]:
from torch.utils.data import DataLoader
from torchvision import transforms
from torch.utils.data import random_split
from torchvision.datasets import ImageFolder

data_dir = "faces"

# Define your transformations
train_transforms = transforms.Compose(
    [
        transforms.Resize((299, 299)),
        transforms.RandomResizedCrop(299),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(40),
        transforms.RandomAffine(0, shear=20, scale=(0.8, 1.2)),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
        transforms.ToTensor(),
        transforms.Normalize([0.6144, 0.4978, 0.4530], [0.0212, 0.0162, 0.0165]),
    ]
)

validation_transforms = transforms.Compose(
    [
        transforms.Resize((299, 299)),
        transforms.CenterCrop(299),
        transforms.ToTensor(),
        transforms.Normalize([0.6144, 0.4978, 0.4530], [0.0212, 0.0162, 0.0165]),
    ]
)

# Load your images and labels
dataset = ImageFolder(data_dir, transform=train_transforms)

# Split the data into training and validation sets
train_size = int(0.8 * len(dataset))
valid_size = len(dataset) - train_size
train_dataset, valid_dataset = random_split(dataset, [train_size, valid_size])

# Apply the validation transformations to the validation dataset
valid_dataset.dataset.transform = validation_transforms

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=64, shuffle=True)

print(ImageFolder(data_dir).class_to_idx)

{'남주혁_faces': 0, '박보영_faces': 1, '서강준_faces': 2, '이승기_faces': 3}


In [3]:
import torch
from torch import nn, optim
from torchvision import models

print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))
# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load pre-trained model
model = models.inception_v3(weights="IMAGENET1K_V1")

# Freeze parameters so we don't backprop through them
for param in model.parameters():
    param.requires_grad = False

# Modify the last layer
num_classes = len(dataset.classes)  # 4

model.fc = nn.Sequential(
    nn.Flatten(),
    nn.Linear(model.fc.in_features, 1024),
    nn.RReLU(),
    nn.Dropout(0.128),
    nn.Linear(1024, num_classes),
    nn.LogSoftmax(dim=1),
)

model = model.to(device)
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.016)

# Train the model
epochs = 40
for epoch in range(epochs):
    model.train()
    train_loss = 0
    for inputs, labels in train_loader:
        inputs, labels = (
            inputs.to(device),
            labels.to(device),
        )  # Move inputs and labels to device
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs.logits, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    # Validate the model
    model.eval()
    valid_loss = 0
    accuracy = 0
    with torch.no_grad():
        for inputs, labels in valid_loader:
            inputs, labels = (
                inputs.to(device),
                labels.to(device),
            )  # Move inputs and labels to device
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            valid_loss += loss.item()

            ps = torch.exp(outputs)
            top_p, top_class = ps.topk(1, dim=1)
            equals = top_class == labels.view(*top_class.shape)
            accuracy += torch.mean(equals.type(torch.FloatTensor))

    print(
        f"Epoch: {epoch + 1}/{epochs}.. Training Loss: {train_loss / len(train_loader):.3f}.. Validation Loss: {valid_loss / len(valid_loader):.3f}.. Accuracy: {accuracy / len(valid_loader):.3f}"
    )

True
NVIDIA GeForce RTX 4090
Epoch: 1/40.. Training Loss: 1.583.. Validation Loss: 1.469.. Accuracy: 0.281
Epoch: 2/40.. Training Loss: 1.290.. Validation Loss: 1.328.. Accuracy: 0.352
Epoch: 3/40.. Training Loss: 1.177.. Validation Loss: 1.590.. Accuracy: 0.211
Epoch: 4/40.. Training Loss: 1.104.. Validation Loss: 1.082.. Accuracy: 0.508
Epoch: 5/40.. Training Loss: 0.975.. Validation Loss: 1.329.. Accuracy: 0.469
Epoch: 6/40.. Training Loss: 0.970.. Validation Loss: 1.251.. Accuracy: 0.438
Epoch: 7/40.. Training Loss: 0.847.. Validation Loss: 1.195.. Accuracy: 0.438
Epoch: 8/40.. Training Loss: 0.793.. Validation Loss: 1.033.. Accuracy: 0.602
Epoch: 9/40.. Training Loss: 0.723.. Validation Loss: 0.957.. Accuracy: 0.586
Epoch: 10/40.. Training Loss: 0.680.. Validation Loss: 1.061.. Accuracy: 0.453
Epoch: 11/40.. Training Loss: 0.682.. Validation Loss: 0.918.. Accuracy: 0.656
Epoch: 12/40.. Training Loss: 0.560.. Validation Loss: 0.872.. Accuracy: 0.625
Epoch: 13/40.. Training Loss: 0.

In [4]:
# Initialize the number of correct predictions
correct_preds = 0
total_preds = 0

# No need to track gradients for validation, we're not optimizing.
with torch.no_grad():
    for inputs, labels in valid_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        # Forward pass
        outputs = model(inputs)

        # Get the predicted class with the highest score
        _, predictions = torch.max(outputs, 1)

        # Count number of correct predictions
        correct_preds += (predictions == labels).sum().item()
        total_preds += labels.size(0)

# Calculate accuracy
accuracy = correct_preds / total_preds
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 73.61%


In [11]:
from PIL import Image

# Define the transformations
transform = transforms.Compose(
    [
        transforms.Resize((299, 299)),
        transforms.ToTensor(),
        transforms.Normalize([0.6144, 0.4978, 0.4530], [0.0212, 0.0162, 0.0165]),
    ]
)

# Load the image
image = cv2.imread("/home/ubuntu/ai_service_project2/app/uploads/NamJoohyuk17.jpg")
image = cv2.cvtColor(image, cv2.COLOR_BGRA2BGR)
image = Image.fromarray(image)

# Apply the transformations
input_image = transform(image)

# Add an extra dimension for the batch
input_image = input_image.unsqueeze(0)

# Move the input to the same device as the model
input_image = input_image.to(device)

# Make the prediction
with torch.no_grad():
    model.eval()
    outputs = model(input_image)
    predictions = torch.exp(outputs)

# Get the class labels
class_labels = ["남주혁", "박보영", "서강준", "이승기"]

# Get the index of the class with the highest probability
_, predict_class_index = torch.max(predictions, 1)

# Get the name of the class
predict_label = class_labels[predict_class_index]
print(predictions)

print(
    f"당신이 {predict_label}일 확률은 {predictions[0][predict_class_index].item() * 100:.2f}% 입니다."
)

tensor([[0.6831, 0.0360, 0.1716, 0.1092]], device='cuda:0')
당신이 남주혁일 확률은 68.31% 입니다.


In [20]:
pathlib.Path("./model").mkdir(parents=True, exist_ok=True)
torch.save(model, "./model/model.pth")