In [2]:
import numpy as np
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data.sampler import SubsetRandomSampler
from torchvision.transforms.autoaugment import AutoAugmentPolicy, AutoAugment

from torchsampler.imbalanced import ImbalancedDatasetSampler

In [3]:

train_dir = './train_images'
test_dir = './test_images'

transform = transforms.Compose(
    [transforms.AutoAugment(AutoAugmentPolicy.CIFAR10),
    transforms.Grayscale(),
    transforms.ToTensor(), 
    transforms.Normalize(mean=(0,),std=(1,))])

train_data = torchvision.datasets.ImageFolder(train_dir, transform=transform)
test_data = torchvision.datasets.ImageFolder(test_dir, transform=transform)

# 20% of the train dataset will be used as a validation exercise
valid_size = 0.2
batch_size = 32

num_train = len(train_data)
# (0, 1, 2, 3, ..., num_train)
indices_train = list(range(num_train))
# Reorders the indexes randomly (so we get 7, 4, 19...)
np.random.shuffle(indices_train)

# The first valid_size% of indexes will be for validation purposes
split_tv = int(np.floor(valid_size * num_train))

# Get the indexes, split between training and validation
train_new_idx, valid_idx = indices_train[split_tv:],indices_train[:split_tv]

train_sampler = ImbalancedDatasetSampler(train_data,train_new_idx)
valid_sampler = ImbalancedDatasetSampler(train_data,valid_idx)

train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, sampler=train_sampler, num_workers=1)
valid_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, sampler=valid_sampler, num_workers=1)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=True, num_workers=1)
classes = ('noface','face')

In [4]:
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 18, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(18, 32, 5)
        self.fc1 = nn.Linear(32* 6 * 6, 48)
        self.fc2 = nn.Linear(48, 32)
        self.fc3 = nn.Linear(32, 16)
        self.fc4 = nn.Linear(16, 2)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 32 * 6 * 6)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return x

In [7]:
import torch.optim as optim

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

network = Net()
network.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(network.parameters(), lr = 0.001, momentum=0.9)

In [24]:
epoch = 1
max_epoch = 10
print_every_n_batch = 400

best_network: Net = network
best_loss_validation = 999999999
steps_since_last_best = 0
threshold_early_stopping = 4 # Stop after 4 iterations without a new best network
best_model_found = False

print(type(best_network))

while not best_model_found and epoch <= max_epoch:

    total_loss_training = 0.0
    i = 0
    
    for data_training, target_training in train_loader:

        optimizer.zero_grad()
        inputs, labels = data_training.to(device), target_training.to(device)

        outputs = network(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss_training += loss.item()

        #Every 400 batches, we test the model on the validation data
        if i % print_every_n_batch == print_every_n_batch - 1:    # print every n mini-batches
            running_loss_training = total_loss_training / print_every_n_batch
            total_loss_training = 0.0
            total_loss_validation = 0.0


            # We feed the validation data to the network
            for data_valid, labels_valid in valid_loader:
                images, labels = data_valid.to(device), labels_valid.to(device)
                outputs = network(images)
                loss = criterion(outputs,labels)
                total_loss_validation += loss.item()
            
            running_loss_validation = total_loss_validation / len(valid_loader)

            print(epoch, i+1, "-", "Training loss: ", running_loss_training, " - Validation loss:", running_loss_validation)
            
            if(running_loss_validation < best_loss_validation):
                best_loss_validation = running_loss_validation
                best_network = network
                steps_since_last_best = 0
            else:
                steps_since_last_best += 1

            if steps_since_last_best >= threshold_early_stopping:
                best_model_found = True
                break

        i += 1

    epoch += 1

<class 'collections.OrderedDict'>


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x0000019EB8D86310>
Traceback (most recent call last):
  File "c:\Users\maxen\AppData\Local\Programs\Python\Python39\lib\site-packages\torch\utils\data\dataloader.py", line 1510, in __del__
    self._shutdown_workers()
  File "c:\Users\maxen\AppData\Local\Programs\Python\Python39\lib\site-packages\torch\utils\data\dataloader.py", line 1474, in _shutdown_workers
    w.join(timeout=_utils.MP_STATUS_CHECK_INTERVAL)
  File "c:\Users\maxen\AppData\Local\Programs\Python\Python39\lib\multiprocessing\process.py", line 149, in join
    res = self._popen.wait(timeout)
  File "c:\Users\maxen\AppData\Local\Programs\Python\Python39\lib\multiprocessing\popen_spawn_win32.py", line 108, in wait
    res = _winapi.WaitForSingleObject(int(self._handle), msecs)
KeyboardInterrupt: 


TypeError: 'collections.OrderedDict' object is not callable

In [7]:
print(best_loss_validation)

0.06220278132089109


In [5]:
# Saving the model
network = best_network
torch.save(network.state_dict(), "./face_recognizer.pt")

NameError: name 'best_network' is not defined

In [8]:
# Loading the model
network = Net()
network.load_state_dict(torch.load("./face_recognizer.pt"))
network.eval()

network.to(device)

Net(
  (conv1): Conv2d(1, 18, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(18, 32, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=1152, out_features=48, bias=True)
  (fc2): Linear(in_features=48, out_features=32, bias=True)
  (fc3): Linear(in_features=32, out_features=16, bias=True)
  (fc4): Linear(in_features=16, out_features=2, bias=True)
)

In [35]:
correct = 0
total = 0
with torch.no_grad():
    for imgs, labls in test_loader:
        images, labels = imgs.to(device), labls.to(device)
        outputs = network(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %2.2f %%' % (
    100 * correct / total))

Accuracy of the network on the 10000 test images: 95.60 %


In [23]:
import cv2 as cv
from PIL import Image

original_image = cv.imread('./class_picture.jpg')

# Create the sub-images of the original image with the sliding window
window_size_x = 36
window_size_y = 36

transform = transforms.Compose(
    [ transforms.Grayscale(),
    transforms.ToTensor(), 
    transforms.Normalize(mean=(0,),std=(1,))])

to_detect = original_image 

total_scaling = 1

while(to_detect.shape[0] > 36 and to_detect.shape[1] > 36) :
    for y_window in range(0, to_detect.shape[0],window_size_y):
        for x_window in range(0, to_detect.shape[1], window_size_x):
            # Extract image:
            sub_image = to_detect[
                y_window:y_window+window_size_y,
                x_window:x_window+window_size_x,
            ]
            sub_image_PIL = Image.fromarray(sub_image)
            sub_image_PIL = sub_image_PIL.resize((36,36))
            sub_image_PIL = transform(sub_image_PIL)

            input = sub_image_PIL.to(device)
            outputs = network(input)
            predicted = torch.softmax(outputs.data, 1)
            face_probability = predicted[0][1].item()

            if(face_probability > 0.50):
                x_back_to_scale = int((x_window+18)/total_scaling)
                y_back_to_scale = int((y_window+18)/total_scaling)
                cv.circle(original_image,(x_back_to_scale,y_back_to_scale), 4 , (0,255,0), -1)
            x_window += 4
        
        y_window += 4

     
    step_scaling = 0.98   # percent of original size
    total_scaling = total_scaling*step_scaling
    width = int(to_detect.shape[1] * step_scaling)
    height = int(to_detect.shape[0] * step_scaling)
    dim = (width, height)

    to_detect = cv.resize(to_detect, dim, interpolation = cv.INTER_AREA)

cv.imshow("img",original_image)
cv.waitKey(0)

103