In [2]:
import cv2
import torch
import torch.optim as optim
import torch.functional as F
import matplotlib.pyplot as plt

from torch import nn
from torchvision import transforms, models, datasets


In [3]:
import torch.utils.data.dataloader


data_dir = '../../data/face-recog'

train_transform = transforms.Compose([transforms.Resize((640, 640)),
                                       transforms.RandomHorizontalFlip(),
                                       transforms.RandomRotation(30),
                                       transforms.ToTensor(),
                                       transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

valid_transform = transforms.Compose([transforms.Resize((640, 640)),
                                       transforms.RandomHorizontalFlip(),
                                       transforms.ToTensor(),
                                       transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

train_data = datasets.ImageFolder(data_dir + '/train', transform=train_transform)
valid_data = datasets.ImageFolder(data_dir + '/valid', transform=valid_transform)

trainloader = torch.utils.data.DataLoader(train_data, batch_size=32, shuffle=True)
validloader = torch.utils.data.DataLoader(valid_data, batch_size=32)


In [4]:
from ultralytics import YOLO

yolo_model = YOLO("yolo11n.pt")

In [5]:
def detect_face(image_path):
    results = yolo_model(image_path)
    detections = results[0].xyxy

    img = cv2.imread(image_path)
    for box in detections:
        padding = 10
        x1, y1 = max(0, x1 - padding), max(0, y1 - padding)
        x2, y2 = min(img.shape[1], x2 + padding), min(img.shape[0], y2 + padding)
        face = img[y1:y2, x1:x2]
        face = img[y1:y2, x1:x2]
        return face
    
    return None

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [7]:
num_classes = len(train_data.classes)
print(num_classes)

model = models.resnet34(pretrained=True)

model.fc = nn.Sequential(nn.Linear(model.fc.in_features, 256),
                         nn.ReLU(),
                         nn.Dropout(0.2),
                         nn.Linear(256, 128),
                         nn.ReLU(),
                         nn.Dropout(0.2),
                         nn.Linear(128, 64),
                         nn.ReLU(),
                         nn.Dropout(0.2),
                         nn.Linear(64, num_classes))

3




In [8]:
for param in model.fc.parameters():
    param.requires_grad = True

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)

model.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [9]:
epochs = 100
steps = 0
running_loss = 0
print_every = 5
# stop_at_step = 30
for epoch in range(epochs):
    for inputs, labels in trainloader:
        steps += 1
        inputs, labels = inputs.to(device), labels.to(device)

        logps = model.forward(inputs)
        loss = criterion(logps, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        if steps % print_every == 0:
            valid_loss = 0
            accuracy = 0
            model.eval()

            with torch.no_grad():
                for inputs, labels in validloader:
                    inputs, labels = inputs.to(device), labels.to(device)
                    
                    logps = model.forward(inputs)
                    batch_loss = criterion(logps, labels)
                    valid_loss += batch_loss.item()

                    ps = torch.exp(logps)
                    top_p, top_class = ps.topk(1, dim=1)
                    equals = top_class == labels.view(*top_class.shape)
                    accuracy += torch.mean(equals.type(torch.FloatTensor)).item()

            print(f"Epoch {epoch+1}/{epochs} | "
                  f"Step {steps}/{len(trainloader) * epochs} | "
                  f"Train Loss: {running_loss/print_every:.3f} | "
                  f"Test Loss: {valid_loss/len(validloader):.3f} | "
                  f"Test Accuracy: {accuracy/len(validloader):.3f}")
            running_loss = 0
            model.train()

        # if steps >= stop_at_step:
        #     print(f"Stopping early at step {steps}")
        #     break

path = 'trained_face-recog.pth'
torch.save(model.state_dict(), 'trained_face-recog.pth')
print(f'Model saved to {path}')

Epoch 5/100 | Step 5/100 | Train Loss: 1.011 | Test Loss: 1.107 | Test Accuracy: 0.250
Epoch 10/100 | Step 10/100 | Train Loss: 0.892 | Test Loss: 1.181 | Test Accuracy: 0.500
Epoch 15/100 | Step 15/100 | Train Loss: 0.808 | Test Loss: 1.138 | Test Accuracy: 0.250
Epoch 20/100 | Step 20/100 | Train Loss: 0.695 | Test Loss: 0.991 | Test Accuracy: 0.750
Epoch 25/100 | Step 25/100 | Train Loss: 0.574 | Test Loss: 0.949 | Test Accuracy: 0.500
Epoch 30/100 | Step 30/100 | Train Loss: 0.406 | Test Loss: 0.926 | Test Accuracy: 0.500
Epoch 35/100 | Step 35/100 | Train Loss: 0.276 | Test Loss: 0.716 | Test Accuracy: 0.500
Epoch 40/100 | Step 40/100 | Train Loss: 0.247 | Test Loss: 0.639 | Test Accuracy: 0.500
Epoch 45/100 | Step 45/100 | Train Loss: 0.185 | Test Loss: 0.486 | Test Accuracy: 0.750
Epoch 50/100 | Step 50/100 | Train Loss: 0.153 | Test Loss: 0.982 | Test Accuracy: 0.500
Epoch 55/100 | Step 55/100 | Train Loss: 0.139 | Test Loss: 0.614 | Test Accuracy: 0.500
Epoch 60/100 | Step 60/

KeyboardInterrupt: 

In [10]:
model.load_state_dict(torch.load('trained_face-recog.pth'))
model.to(device)
model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [11]:
test_transform = transforms.Compose([transforms.Resize((640, 640)),
                                       transforms.RandomHorizontalFlip(),
                                       transforms.ToTensor(),
                                       transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

In [12]:
def predict_face(image):
    class_names = train_data.classes
    image = valid_transform(image).unsqueeze(0).to(device)
    with torch.no_grad():
        outputs = model(image)
        _, predicted = torch.max(outputs, 1)
    return class_names[predicted.item()]

face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")

In [13]:
def check_camera(index):
    cap = cv2.VideoCapture(index)
    if not cap.isOpened():
        print(f"no camera detected")
        return None
    print(f"camera exists")
    return cap

In [14]:
from PIL import Image

cap = check_camera(3) 
if cap is None:
    print("no cam")

width, height = 960, 540 
cap.set(cv2.CAP_PROP_FRAME_WIDTH, width)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray, 1.3, 5)

    for (x, y, w, h) in faces:
        face = frame[y:y+h, x:x+w]
        face = Image.fromarray(cv2.cvtColor(face, cv2.COLOR_BGR2RGB))

        prediction = predict_face(face)

        # LABEL BOX AND CHANGE COLOR ETC
        cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 0, 255), 2)
        cv2.putText(frame, prediction, (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)

    cv2.imshow("HELLO FRIENDS", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

camera exists
