# Machine Learning Project

## Introduction

The goal of the project is to recognize if the image is a face or not.

Images are greyscale 36x36 pixels images.

To reach the goal, we will try to train a convolutional neural network.

In [1]:
from deep_learning_project.load_data import basic_load, imbalanced_load, get_transform, get_both_transform
from deep_learning_project.net import FirstNeuralNetwork, LinearRegressionNetwork, SecondNeuralNetwork, ThirdNeuralNetwork, FourthNeuralNetwork
from deep_learning_project.torchsampler import ImbalancedDatasetSampler
import torch
import matplotlib.pyplot as plt
from torch import nn
from deep_learning_project.trainers import BaseTrainer
from torchvision.transforms import InterpolationMode
import torchvision
import os
import json
import datetime
import torchvision.transforms as T
from tqdm import tqdm
from deep_learning_project.utils import Exporter
import cv2 as cv
from cv2 import IMREAD_GRAYSCALE, IMREAD_COLOR
import math
import numpy as np
import random

CURRENT_FOLDER = '.'
MODEL_FOLDERS = os.path.join(CURRENT_FOLDER, 'models')

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def predict(model, img):
    obj = None
    with torch.no_grad():
        # exploit the model
        logits = model(img)
        pred_probab = nn.Softmax(dim=1)(logits)
        y_pred = pred_probab.argmax(1).item() # indice(s) of the maximum value in the tensor
        obj = (y_pred, pred_probab)
    return obj

def load_background_images():
    TEXTURE_FOLDER = os.path.abspath('./deep_learning_project/textures/')

    onlyfiles = [f for f in os.listdir(TEXTURE_FOLDER) if os.path.isfile(os.path.join(TEXTURE_FOLDER, f))]

    img_datas = []

    for filename in onlyfiles:
        img_src = cv.samples.findFile(os.path.join(TEXTURE_FOLDER, filename))
        img_datas.append(cv.imread(img_src, IMREAD_COLOR))
    
    return img_datas

def gather_false_positive(model, background_images, rescale=0.8, threshold=0.8, stride=1, limit=1000, per_image_limit=50, shuffle=True, device='cpu'):
    images = [] # this array will contain false positives images

    if shuffle:
        random.shuffle(background_images)

    num_fp = 0
    while True:
        i = 0
        image = background_images[i]
        transform = get_transform()
        transformed_image = transform(T.ToPILImage()(image))

        nextImage = False
        shouldStop = False
        num_im = 0

        while (True):
            for y in range(0, transformed_image.size()[1] - 36, stride):
                for x in range(0, transformed_image.size()[2] - 36, stride):

                    # crop and preparing the cropped image
                    new_img = transformed_image[:, y:y+36, x:x+36]
                    torch_new_img = new_img.reshape((1, 1, 36, 36))

                    (y_pred, pred_probab) = predict(model, torch_new_img.to(device))
                    
                    # 0 = noface, 1 = face
                    if pred_probab.squeeze()[1] >= threshold:
                        images.append(new_img.reshape(36, 36))
                        num_fp += 1
                        num_im += 1

                        if num_fp >= limit:
                            shouldStop = True
                            break
                        if num_im >= per_image_limit:
                            nextImage = True
                            break
                
                if shouldStop or nextImage:
                    break

            if shouldStop or nextImage:
                break

            new_height = math.ceil(transformed_image.size()[1] * rescale)
            new_width = math.ceil(transformed_image.size()[2] * rescale)

            # stop the loop if the image is smaller than the retina
            if new_height < 36 or new_width < 36:
                break

            transformed_image = T.Resize((new_height, new_width), interpolation=InterpolationMode.BILINEAR)(transformed_image)
        
        if shouldStop:
            break
        
        i += 1
        i = (i % len(background_images))
    
    images = torch.stack(images)
    images = images.reshape(images.size()[0], 1, 36, 36).permute((0, 2, 3, 1)).numpy()
    return images

TEXTURE_FP_FOLDER = os.path.abspath('./deep_learning_project/texturesfp/')
def save_images(dir_path, images):
    dir_path = os.path.join(dir_path, '0')
    os.makedirs(dir_path, exist_ok=True)

    offset = len(os.listdir(dir_path))

    for i, img in enumerate(images):
        filename = str(offset + i) + ".pgm"
        cv.imwrite(os.path.join(dir_path, filename), img)

def delete_files_in_dir(dir_path):
    onlyfiles = [f for f in os.listdir(dir_path) if os.path.isfile(os.path.join(dir_path, f))]
    for file in onlyfiles:
        os.remove(os.path.join(dir_path, file))

In [3]:
min_epochs=10
epochs=20
learning_rate=0.0005
momentum=0.90
weight_decay=0
valid_size=0.2
batch_size=64

bootstrap_min_epoch=3
bootstrap_max_epoch=10
bootstrap_lr = learning_rate * 0.005

## Data

Data is separated in 3 datasets.

Train : to train the ML model.

Valid : to valid the ML model.

Test : to test the ML model.

What is the difference between valid and test datasets. The main differencec is when there are used : valid are used inside the training process but test are used when the training is complete. Why use different datasets to do the same thing (test the generalization of model) ? Some do the validation with the test dataset but it is not scientifically correct because it will include a bias on the model. If we train the model until the test dataset error is the lowest, we effectively train the model for the test dataset... This is why we use two different dataset.

In [4]:
device = "cpu"
parallel = False

if torch.cuda.is_available():
    device = "cuda:0"
    if torch.cuda.device_count() > 1:
        parallel = True

print(f"Running on {device}. Parallel={parallel}.")

data = imbalanced_load(valid_size=valid_size, batch_size=batch_size, device=device)
train_loader = data[0]
valid_loader = data[1]
test_loader = data[2]
classes = data[3]
train_data = data[4]
train_idx = data[5]

background_images = load_background_images()

Running on cuda:0. Parallel=False.


Printing one image of the training set.


## Initializing the FirstNeuralNetwork network

We initialize the network.

Print the configuration.

Then predict a random input.

---

FirstNeuralNetwork is the neural network given by the teacher.

In [5]:
loss_fn = nn.CrossEntropyLoss()

model = FourthNeuralNetwork()
if parallel:
    model = nn.DataParallel(model)
model.to(device)

FourthNeuralNetwork(
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Conv2d(64, 64, kernel_size=(7, 7), stride=(1, 1))
  (fc2): Conv2d(64, 2, kernel_size=(1, 1), stride=(1, 1))
)

## Optimizing the network

In [6]:
exporter = Exporter()
exporter.prepare_export(MODEL_FOLDERS, str(model.__class__.__name__))

In [8]:
# optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay=weight_decay, momentum=momentum)
trainer = BaseTrainer(model, loss_fn, optimizer, checkpoints_path=exporter.folder)

In [9]:
%%time

trainer.fit(train_loader=train_loader,
            valid_loader=valid_loader,
            test_loader=test_loader,
            min_epochs=min_epochs,
            max_epochs=epochs,
            early_stopping='valid',
            device=device)

Size of train dataset=73376, train batches=1147, valid dataset=18344, valid batches=287, test dataset=7628, test batches=120


1147it [01:25, 13.40it/s]


epoch 1 of 20 : train_loss: 0.55311, train_accuracy: 71.89%, valid_loss: 0.40419, valid_accuracy: 84.29%, test_loss: 0.23297, test_accuracy: 90.06%


1147it [00:44, 25.97it/s]


epoch 2 of 20 : train_loss: 0.40680, train_accuracy: 81.73%, valid_loss: 0.36516, valid_accuracy: 85.31%, test_loss: 0.18222, test_accuracy: 91.99%


1147it [00:28, 40.06it/s]


epoch 3 of 20 : train_loss: 0.30076, train_accuracy: 87.44%, valid_loss: 0.22037, valid_accuracy: 91.24%, test_loss: 0.18023, test_accuracy: 92.03%


1147it [00:23, 49.41it/s]


epoch 4 of 20 : train_loss: 0.23240, train_accuracy: 91.05%, valid_loss: 0.20990, valid_accuracy: 92.65%, test_loss: 0.09823, test_accuracy: 96.45%


1147it [00:20, 55.99it/s]


epoch 5 of 20 : train_loss: 0.19386, train_accuracy: 92.75%, valid_loss: 0.22261, valid_accuracy: 91.80%, test_loss: 0.08210, test_accuracy: 97.14%


1147it [00:19, 60.21it/s]


epoch 6 of 20 : train_loss: 0.16887, train_accuracy: 93.73%, valid_loss: 0.16401, valid_accuracy: 94.27%, test_loss: 0.07448, test_accuracy: 97.46%


1147it [00:18, 61.97it/s]


epoch 7 of 20 : train_loss: 0.14568, train_accuracy: 94.68%, valid_loss: 0.11952, valid_accuracy: 95.76%, test_loss: 0.08746, test_accuracy: 97.08%


1147it [00:18, 63.38it/s]


epoch 8 of 20 : train_loss: 0.13394, train_accuracy: 95.10%, valid_loss: 0.11327, valid_accuracy: 96.12%, test_loss: 0.06636, test_accuracy: 97.85%


1147it [00:17, 64.13it/s]


epoch 9 of 20 : train_loss: 0.12088, train_accuracy: 95.60%, valid_loss: 0.09580, valid_accuracy: 96.57%, test_loss: 0.06548, test_accuracy: 97.95%


1147it [00:18, 62.18it/s]


epoch 10 of 20 : train_loss: 0.11235, train_accuracy: 95.96%, valid_loss: 0.09366, valid_accuracy: 96.61%, test_loss: 0.05984, test_accuracy: 98.05%


1147it [00:18, 62.80it/s]


epoch 11 of 20 : train_loss: 0.10853, train_accuracy: 96.10%, valid_loss: 0.08459, valid_accuracy: 97.03%, test_loss: 0.06853, test_accuracy: 97.80%


1147it [00:18, 63.03it/s]


epoch 12 of 20 : train_loss: 0.10114, train_accuracy: 96.33%, valid_loss: 0.08093, valid_accuracy: 97.20%, test_loss: 0.07250, test_accuracy: 97.55%


1147it [00:18, 62.71it/s]


epoch 13 of 20 : train_loss: 0.09655, train_accuracy: 96.55%, valid_loss: 0.09504, valid_accuracy: 96.68%, test_loss: 0.06346, test_accuracy: 97.80%
CPU times: total: 1min 50s
Wall time: 12min 27s


In [10]:
def make_dataset_equal_or_less(dataset, num_samples):
    # find indices or face in training set and shuffle the indices
    indices = [idx for idx, target in enumerate(dataset.targets) if target == 1]
    indices = [idx for idx in indices if idx in train_idx]
    np.random.shuffle(indices)

    # use the same number if fp_data is small or use the whole set
    if num_samples < len(indices):
        indices = indices[:num_samples]
    
    return torch.utils.data.Subset(dataset, indices)


In [12]:
dir_path = os.path.join(TEXTURE_FP_FOLDER, "0")
# delete_files_in_dir(dir_path)
#  
for threshold in [0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1, 0]:
    print("Starting bootstrapping threshold={0}".format(str(threshold)))
    fp_images = gather_false_positive(
        model,
        background_images,
        rescale=0.8,
        stride=36,
        threshold=threshold,
        limit=4000,
        per_image_limit=4000/len(background_images),
        shuffle=True,
        device=device)
    
    save_images(TEXTURE_FP_FOLDER, fp_images)

    fp_data = torchvision.datasets.ImageFolder(TEXTURE_FP_FOLDER, transform=get_both_transform())
    
    # face_imgs = make_dataset_equal_or_less(train_data, len(fp_data))
    face_imgs = torch.utils.data.Subset(train_data, train_idx)

    both_data = torch.utils.data.ConcatDataset([face_imgs, fp_data])
    both_sampler = ImbalancedDatasetSampler(both_data, callback_get_label=(lambda x, y: x[y][1]))

    kwargs = {'num_workers': 4, 'pin_memory': True} if 'cuda' in device else {}
    loader = torch.utils.data.DataLoader(both_data, batch_size=batch_size, sampler=both_sampler, **kwargs)
    
    # trainer.optimizer = torch.optim.Adam(model.parameters(), lr=bootstrap_lr, weight_decay=weight_decay)
    trainer.optimizer = torch.optim.SGD(model.parameters(), lr=bootstrap_lr, weight_decay=weight_decay, momentum=momentum)
    trainer.fit(train_loader=loader,
            valid_loader=valid_loader,
            test_loader=test_loader,
            min_epochs=bootstrap_min_epoch,
            max_epochs=bootstrap_max_epoch,
            early_stopping='valid',
            device=device,
            verbose=False)

    trainer._print_evaluation(valid_loader, test_loader)
    print()

# fullset = torchvision.datasets.ImageFolder(TEXTURE_FP_FOLDER, transform=get_transform())
# both_data_full = torch.utils.data.ConcatDataset([torch.utils.data.Subset(train_data, train_idx), fp_data])
# full_both_sampler = ImbalancedDatasetSampler(both_data_full, callback_get_label=(lambda x, y: x[y][1]))
# kwargs = {'num_workers': 4, 'pin_memory': True} if 'cuda' in device else {}
# full_loader = torch.utils.data.DataLoader(both_data_full, batch_size=batch_size, sampler=full_both_sampler, **kwargs)

# trainer.fit(train_loader=full_loader,
#             valid_loader=valid_loader,
#             test_loader=test_loader,
#             epochs=epochs,
#             device=device)

Starting bootstrapping threshold=0.9
Size of train dataset=121376, train batches=1897, valid dataset=18344, valid batches=287, test dataset=7628, test batches=120


1897it [00:32, 58.13it/s]
1897it [00:29, 63.53it/s]
1897it [00:30, 62.57it/s]


epoch 3 of 10 : train_loss: 0.04646, train_accuracy: 98.41%, valid_loss: 0.07573, valid_accuracy: 97.14%, test_loss: 0.07755, test_accuracy: 97.51%

Starting bootstrapping threshold=0.8
Size of train dataset=125376, train batches=1959, valid dataset=18344, valid batches=287, test dataset=7628, test batches=120


1959it [00:31, 62.22it/s]
1959it [00:31, 62.33it/s]
1959it [00:30, 63.23it/s]


epoch 3 of 10 : train_loss: 0.04476, train_accuracy: 98.46%, valid_loss: 0.07464, valid_accuracy: 97.38%, test_loss: 0.07548, test_accuracy: 97.55%

Starting bootstrapping threshold=0.7
Size of train dataset=129376, train batches=2022, valid dataset=18344, valid batches=287, test dataset=7628, test batches=120


2022it [00:56, 35.62it/s]
2022it [00:41, 48.46it/s]
2022it [00:35, 57.46it/s]


epoch 3 of 10 : train_loss: 0.04300, train_accuracy: 98.51%, valid_loss: 0.07612, valid_accuracy: 97.14%, test_loss: 0.07581, test_accuracy: 97.56%

Starting bootstrapping threshold=0.6
Size of train dataset=133376, train batches=2084, valid dataset=18344, valid batches=287, test dataset=7628, test batches=120


2084it [00:34, 60.75it/s]
2084it [00:32, 63.74it/s]
2084it [00:32, 63.49it/s]


epoch 3 of 10 : train_loss: 0.04080, train_accuracy: 98.57%, valid_loss: 0.07285, valid_accuracy: 97.30%, test_loss: 0.07601, test_accuracy: 97.55%

Starting bootstrapping threshold=0.5
Size of train dataset=137376, train batches=2147, valid dataset=18344, valid batches=287, test dataset=7628, test batches=120


2147it [00:34, 62.89it/s]
2147it [00:33, 63.74it/s]
2147it [00:33, 63.36it/s]


epoch 3 of 10 : train_loss: 0.04073, train_accuracy: 98.59%, valid_loss: 0.07489, valid_accuracy: 97.27%, test_loss: 0.07672, test_accuracy: 97.54%

Starting bootstrapping threshold=0.4
Size of train dataset=141376, train batches=2209, valid dataset=18344, valid batches=287, test dataset=7628, test batches=120


2209it [01:48, 20.30it/s]
2209it [01:01, 36.00it/s]
2209it [00:44, 49.12it/s]


epoch 3 of 10 : train_loss: 0.03944, train_accuracy: 98.63%, valid_loss: 0.07638, valid_accuracy: 97.24%, test_loss: 0.07858, test_accuracy: 97.48%

Starting bootstrapping threshold=0.3
Size of train dataset=145376, train batches=2272, valid dataset=18344, valid batches=287, test dataset=7628, test batches=120


2272it [01:30, 25.11it/s]
2272it [01:14, 30.36it/s]
2272it [00:50, 44.98it/s]


epoch 3 of 10 : train_loss: 0.03853, train_accuracy: 98.66%, valid_loss: 0.07759, valid_accuracy: 97.03%, test_loss: 0.08245, test_accuracy: 97.27%

Starting bootstrapping threshold=0.2
Size of train dataset=149376, train batches=2334, valid dataset=18344, valid batches=287, test dataset=7628, test batches=120


2334it [00:36, 64.12it/s]
2334it [00:36, 63.35it/s]
2334it [00:36, 63.84it/s]


epoch 3 of 10 : train_loss: 0.03776, train_accuracy: 98.70%, valid_loss: 0.07696, valid_accuracy: 97.12%, test_loss: 0.07872, test_accuracy: 97.46%

Starting bootstrapping threshold=0.1
Size of train dataset=153376, train batches=2397, valid dataset=18344, valid batches=287, test dataset=7628, test batches=120


2397it [00:37, 64.13it/s]
2397it [00:37, 63.97it/s]
2397it [00:37, 63.98it/s]


epoch 3 of 10 : train_loss: 0.03746, train_accuracy: 98.70%, valid_loss: 0.07921, valid_accuracy: 97.08%, test_loss: 0.08421, test_accuracy: 97.10%

Starting bootstrapping threshold=0
Size of train dataset=157376, train batches=2459, valid dataset=18344, valid batches=287, test dataset=7628, test batches=120


2459it [00:38, 63.55it/s]
2459it [00:38, 64.06it/s]
2459it [00:38, 63.70it/s]


epoch 3 of 10 : train_loss: 0.03643, train_accuracy: 98.72%, valid_loss: 0.07729, valid_accuracy: 97.08%, test_loss: 0.08571, test_accuracy: 97.08%



In [None]:
stats_file_data = {
    "device": device,
    "network": str(model.__class__.__name__),
    "epochs_number": epochs,
    "learning_rate": learning_rate,
    "batch_size": batch_size,
    "train_len_data": len(train_loader.dataset), 
    "test_len_data": len(test_loader.dataset),
    "trainer": str(trainer.__class__.__name__),
    "optimizer": str(optimizer.__class__.__name__),
    "pytorch_version": torch.__version__,
    "loss_function": str(loss_fn.__class__.__name__),
    "performances" : trainer.get_stats(),
}

exporter.export_stat_file(stats_file_data)

exporter.export_model(model, 'weights.pt')
exporter.export_best_models(trainer.get_best_models())

In [None]:
# load model
# ld_model = torch.load('model.pt')

# test_loop(test_loader, ld_model, loss_fn)