# Machine Learning Project

## Introduction

The goal of the project is to recognize if the image is a face or not.

Images are greyscale 36x36 pixels images.

To reach the goal, we will try to train a convolutional neural network.

In [1]:
from deep_learning_project.load_data import basic_load, imbalanced_load, get_transform, get_both_transform
from deep_learning_project.net import FirstNeuralNetwork, LinearRegressionNetwork, SecondNeuralNetwork, ThirdNeuralNetwork, FourthNeuralNetwork, FifthNeuralNetwork
from deep_learning_project.torchsampler import ImbalancedDatasetSampler
import torch
import matplotlib.pyplot as plt
from torch import nn
from torch.utils.data import TensorDataset
from deep_learning_project.trainers import BaseTrainer
from torchvision.transforms import InterpolationMode
import torchvision
import os
import json
import datetime
import torchvision.transforms as T
from tqdm import tqdm
from deep_learning_project.utils import Exporter
import cv2 as cv
from cv2 import IMREAD_GRAYSCALE, IMREAD_COLOR
import math
import numpy as np
import random
from shutil import rmtree
import time

CURRENT_FOLDER = '.'
MODEL_FOLDERS = os.path.join(CURRENT_FOLDER, 'models')
TEXTURE_FP_FOLDER = os.path.abspath('./deep_learning_project/texturesfp/')

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def predict(model, img):
    obj = None
    with torch.no_grad():
        # exploit the model
        logits = model(img)
        pred_probab = nn.Softmax(dim=1)(logits)
        
        del logits
        torch.cuda.empty_cache()
        # y_pred = pred_probab.argmax(1).item() # indice(s) of the maximum value in the tensor
        obj = pred_probab
    return obj

def load_background_images():
    TEXTURE_FOLDER = os.path.abspath('./deep_learning_project/textures/')

    onlyfiles = [f for f in os.listdir(TEXTURE_FOLDER) if os.path.isfile(os.path.join(TEXTURE_FOLDER, f))]

    img_datas = []

    for filename in onlyfiles:
        img_src = cv.samples.findFile(os.path.join(TEXTURE_FOLDER, filename))
        img_datas.append(cv.imread(img_src, IMREAD_COLOR))
    
    return img_datas

def gather_false_positive(model, background_images, rescale=0.8, threshold=0.8, stride=1, limit=1000, per_image_limit=50, timelimit=1800, timelimit_per_image=600, shuffle=True, device='cpu'):
    torch.cuda.empty_cache()
    images = [] # this array will contain false positives images

    if shuffle:
        random.shuffle(background_images)

    num_fp = 0

    start_time = time.time()

    while True:
        i = 0
        image = background_images[i]
        transform = get_transform()
        transformed_image = transform(T.ToPILImage()(image)).to(device)

        nextImage = False
        shouldStop = False
        num_im = 0

        while (True):
            current_windows = []
            time_image = time.time()
            current_time = time_image
            for y in range(0, transformed_image.size()[1] - 36, stride):
                for x in range(0, transformed_image.size()[2] - 36, stride):

                    # crop and preparing the cropped image
                    new_img = transformed_image[:, y:y+36, x:x+36]
                    # torch_new_img = new_img.reshape((1, 1, 36, 36))

                    current_windows.append(new_img)

                    current_time = time.time()
                    if time_image + timelimit_per_image < current_time or start_time + timelimit < start_time + current_time - time_image:
                        break

                if time_image + timelimit_per_image < current_time  or start_time + timelimit < start_time + current_time - time_image:
                        break
                
            for i in range(0, len(current_windows), 512):
                current_torch = torch.stack(current_windows[i:i+512])

                pred_probab = predict(model, current_torch.to(device))
                
                pred_cpu = pred_probab.cpu()

                del current_torch
                del pred_probab
                torch.cuda.empty_cache()

                # 0 = noface, 1 = face
                for j, pred in enumerate(pred_cpu):
                    if pred[1] >= threshold:
                        images.append(current_windows[j].cpu())
                        num_fp += 1
                        num_im += 1

                        if num_fp >= limit:
                            shouldStop = True
                            break
                        if num_im >= per_image_limit:
                            nextImage = True
                            break
                
                del pred_cpu
                
                if shouldStop or nextImage:
                    break
            
            if shouldStop or nextImage:
                    break

            if start_time - time.time() > timelimit:
                shouldStop = True
                break

            # scale down
            new_height = math.ceil(transformed_image.size()[1] * rescale)
            new_width = math.ceil(transformed_image.size()[2] * rescale)

            # stop the loop if the image is smaller than the retina
            if new_height < 36 or new_width < 36:
                break

            transformed_image = T.Resize((new_height, new_width), interpolation=InterpolationMode.BILINEAR)(transformed_image)
        
        if shouldStop:
            break
        
        i += 1
        i = (i % len(background_images))
    
    images = torch.stack(images)
    images = images.reshape(images.size()[0], 1, 36, 36).permute((0, 2, 3, 1)).numpy()
    images = (images*0.5 + 0.5) * 255
    return images

def save_images(dir_path, images):
    dir_path = os.path.join(dir_path, '0')
    os.makedirs(dir_path, exist_ok=True)

    offset = len(os.listdir(dir_path))

    for i, img in enumerate(images):
        filename = str(offset + i) + ".pgm"
        cv.imwrite(os.path.join(dir_path, filename), img)

In [3]:
min_epochs=10
epochs=20
learning_rate=0.001
momentum=0.90
weight_decay=0
valid_size=0.2
batch_size=64

bootstrap_min_epoch=1
bootstrap_max_epoch=10
bootstrap_lr = learning_rate * 0.1

## Data

Data is separated in 3 datasets.

Train : to train the ML model.

Valid : to valid the ML model.

Test : to test the ML model.

What is the difference between valid and test datasets. The main differencec is when there are used : valid are used inside the training process but test are used when the training is complete. Why use different datasets to do the same thing (test the generalization of model) ? Some do the validation with the test dataset but it is not scientifically correct because it will include a bias on the model. If we train the model until the test dataset error is the lowest, we effectively train the model for the test dataset... This is why we use two different dataset.

In [4]:
device = "cpu"
parallel = False

if torch.cuda.is_available():
    device = "cuda:0"
    if torch.cuda.device_count() > 1:
        parallel = True

print(f"Running on {device}. Parallel={parallel}.")

data = imbalanced_load(valid_size=valid_size, batch_size=batch_size, device=device)
train_loader = data[0]
valid_loader = data[1]
test_loader = data[2]
classes = data[3]
train_data = data[4]
train_idx = data[5]

background_images = load_background_images()

Running on cuda:0. Parallel=False.


Printing one image of the training set.


## Initializing the FirstNeuralNetwork network

We initialize the network.

Print the configuration.

Then predict a random input.

---

FirstNeuralNetwork is the neural network given by the teacher.

In [5]:
loss_fn = nn.CrossEntropyLoss()

model = FifthNeuralNetwork()
if parallel:
    model = nn.DataParallel(model)
model.to(device)

FifthNeuralNetwork(
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (dropout): Dropout(p=0.5, inplace=False)
  (conv1): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
  (conv3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1))
  (conv4): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Conv2d(256, 256, kernel_size=(6, 6), stride=(1, 1))
  (fc2): Conv2d(256, 512, kernel_size=(1, 1), stride=(1, 1))
  (fc3): Conv2d(512, 2, kernel_size=(1, 1), stride=(1, 1))
)

## Optimizing the network

In [6]:
exporter = Exporter()
exporter.prepare_export(MODEL_FOLDERS, str(model.__class__.__name__))

In [7]:
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
# optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay=weight_decay, momentum=momentum)
trainer = BaseTrainer(model, loss_fn, optimizer, checkpoints_path=exporter.folder)

In [8]:
%%time

trainer.fit(train_loader=train_loader,
            valid_loader=valid_loader,
            test_loader=test_loader,
            min_epochs=min_epochs,
            max_epochs=epochs,
            early_stopping='valid',
            device=device)

Size of train dataset=73376, train batches=1147, valid dataset=18344, valid batches=287, test dataset=7628, test batches=120


1147it [01:30, 12.74it/s]


epoch 1 of 20 : train_loss: 0.16940, train_accuracy: 93.06%, valid_loss: 0.05499, valid_accuracy: 98.11%, test_loss: 0.07714, test_accuracy: 97.63%


1147it [00:41, 27.61it/s]


epoch 2 of 20 : train_loss: 0.05045, train_accuracy: 98.29%, valid_loss: 0.03096, valid_accuracy: 98.99%, test_loss: 0.06133, test_accuracy: 98.06%


1147it [00:28, 39.74it/s]


epoch 3 of 20 : train_loss: 0.03801, train_accuracy: 98.75%, valid_loss: 0.02634, valid_accuracy: 99.10%, test_loss: 0.05073, test_accuracy: 98.41%


1147it [00:23, 48.70it/s]


epoch 4 of 20 : train_loss: 0.03022, train_accuracy: 98.98%, valid_loss: 0.01847, valid_accuracy: 99.42%, test_loss: 0.06569, test_accuracy: 98.09%


1147it [00:22, 51.03it/s]


epoch 5 of 20 : train_loss: 0.02716, train_accuracy: 99.16%, valid_loss: 0.01583, valid_accuracy: 99.53%, test_loss: 0.05048, test_accuracy: 98.65%


1147it [00:22, 51.74it/s]


epoch 6 of 20 : train_loss: 0.02216, train_accuracy: 99.31%, valid_loss: 0.02243, valid_accuracy: 99.30%, test_loss: 0.05164, test_accuracy: 98.68%


1147it [00:22, 51.49it/s]


epoch 7 of 20 : train_loss: 0.02011, train_accuracy: 99.38%, valid_loss: 0.01360, valid_accuracy: 99.48%, test_loss: 0.08283, test_accuracy: 97.89%


1147it [00:22, 51.91it/s]


epoch 8 of 20 : train_loss: 0.02172, train_accuracy: 99.38%, valid_loss: 0.01500, valid_accuracy: 99.54%, test_loss: 0.05684, test_accuracy: 98.58%


1147it [00:22, 52.03it/s]


epoch 9 of 20 : train_loss: 0.01863, train_accuracy: 99.44%, valid_loss: 0.01465, valid_accuracy: 99.51%, test_loss: 0.05544, test_accuracy: 98.57%


1147it [00:22, 51.99it/s]


epoch 10 of 20 : train_loss: 0.01865, train_accuracy: 99.42%, valid_loss: 0.01463, valid_accuracy: 99.57%, test_loss: 0.06665, test_accuracy: 98.23%


1147it [00:22, 52.01it/s]


epoch 11 of 20 : train_loss: 0.02007, train_accuracy: 99.41%, valid_loss: 0.04679, valid_accuracy: 98.51%, test_loss: 0.11521, test_accuracy: 97.29%
CPU times: total: 4min 37s
Wall time: 11min 43s


In [9]:
def make_dataset_equal_or_less(dataset, num_samples):
    # find indices or face in training set and shuffle the indices
    indices = [idx for idx, target in enumerate(dataset.targets) if target == 1]
    indices = [idx for idx in indices if idx in train_idx]
    np.random.shuffle(indices)

    # use the same number if fp_data is small or use the whole set
    if num_samples < len(indices):
        indices = indices[:num_samples]
    
    return torch.utils.data.Subset(dataset, indices)


In [10]:
class CustomTensorDataset(TensorDataset):
    def __init__(self, *tensors: torch.Tensor) -> None:
        super().__init__(*tensors)
    
    def __getitem__(self, index):
        return self.tensors[0][index], self.tensors[1][index].item()

In [11]:
# bootstrap_data = torch.utils.data.Subset(train_data, range(0,10))

# fp_images = gather_false_positive(
#     model,
#     background_images,
#     rescale=0.8,
#     stride=36,
#     threshold=0,
#     limit=10,
#     per_image_limit=5000/len(background_images),
#     shuffle=True,
#     device=device)
    
# fp_tensor = torch.Tensor(fp_images).permute((0,3,1,2))
# nonface = torch.zeros(fp_tensor.size(0), dtype=int)

# fp_data = CustomTensorDataset(fp_tensor, nonface)



# both_data = torch.utils.data.ConcatDataset([bootstrap_data, fp_data])
# both_sampler = ImbalancedDatasetSampler(both_data, callback_get_label=(lambda x, y: x[y][1] if type(x[y][1]) == int else x[y][1].item()))

# kwargs = {'num_workers': 0, 'pin_memory': True} if 'cuda' in device else {}
# loader = torch.utils.data.DataLoader(both_data, batch_size=batch_size, sampler=both_sampler, **kwargs)
# loader = torch.utils.data.DataLoader(both_data, batch_size=batch_size, sampler=both_sampler)

# trainer.optimizer = torch.optim.Adam(model.parameters(), lr=bootstrap_lr, weight_decay=weight_decay)
# trainer.optimizer = torch.optim.SGD(model.parameters(), lr=bootstrap_lr, weight_decay=weight_decay, momentum=momentum)
# trainer.fit(train_loader=loader,
#     valid_loader=valid_loader,
#     test_loader=test_loader,
#     min_epochs=bootstrap_min_epoch,
#     max_epochs=bootstrap_max_epoch,
#     early_stopping=None,
#     device=device,
#     verbose=False)


In [12]:
if os.path.exists(TEXTURE_FP_FOLDER):
    rmtree(TEXTURE_FP_FOLDER) # delete old bootstrap

bootstrap_data = torch.utils.data.Subset(train_data, train_idx)

for i, threshold in enumerate([0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1, 0]):
    print("Starting bootstrapping threshold={0}".format(str(threshold)))
    fp_images = gather_false_positive(
        model,
        background_images,
        rescale=0.8,
        stride=1,
        threshold=threshold,
        limit=5000,
        per_image_limit=5000/len(background_images),
        shuffle=True,
        device=device)
    
    save_path = os.path.join(TEXTURE_FP_FOLDER, str(i))
    
    save_images(save_path, fp_images)

    fp_tensor = torch.Tensor(fp_images).permute((0,3,1,2))
    nonface = torch.zeros(fp_tensor.size(0), dtype=int)

    fp_data = CustomTensorDataset(fp_tensor, nonface)

    # fp_data = torchvision.datasets.ImageFolder(save_path, transform=get_both_transform())
    
    # face_imgs = make_dataset_equal_or_less(train_data, len(fp_data))
    

    both_data = torch.utils.data.ConcatDataset([bootstrap_data, fp_data])
    both_sampler = ImbalancedDatasetSampler(both_data, callback_get_label=(lambda x, y: x[y][1]))

    kwargs = {'num_workers': 0, 'pin_memory': True} if 'cuda' in device else {}
    loader = torch.utils.data.DataLoader(both_data, batch_size=batch_size, sampler=both_sampler, **kwargs)
    
    trainer.optimizer = torch.optim.Adam(model.parameters(), lr=bootstrap_lr, weight_decay=weight_decay)
    # trainer.optimizer = torch.optim.SGD(model.parameters(), lr=bootstrap_lr, weight_decay=weight_decay, momentum=momentum)
    trainer.fit(train_loader=loader,
        valid_loader=valid_loader,
        test_loader=test_loader,
        min_epochs=bootstrap_min_epoch,
        max_epochs=bootstrap_max_epoch,
        early_stopping=None,
        device=device,
        verbose=False)

    trainer._print_evaluation(valid_loader, test_loader)
    bootstrap_data = both_data
    print()

# fullset = torchvision.datasets.ImageFolder(TEXTURE_FP_FOLDER, transform=get_transform())
# both_data_full = torch.utils.data.ConcatDataset([torch.utils.data.Subset(train_data, train_idx), fp_data])
# full_both_sampler = ImbalancedDatasetSampler(both_data_full, callback_get_label=(lambda x, y: x[y][1]))
# kwargs = {'num_workers': 4, 'pin_memory': True} if 'cuda' in device else {}
# full_loader = torch.utils.data.DataLoader(both_data_full, batch_size=batch_size, sampler=full_both_sampler, **kwargs)

# trainer.fit(train_loader=full_loader,
#             valid_loader=valid_loader,
#             test_loader=test_loader,
#             epochs=epochs,
#             device=device)

Starting bootstrapping threshold=0.9


: 

In [None]:
stats_file_data = {
    "device": device,
    "network": str(model.__class__.__name__),
    "epochs_number": epochs,
    "learning_rate": learning_rate,
    "batch_size": batch_size,
    "train_len_data": len(train_loader.dataset), 
    "test_len_data": len(test_loader.dataset),
    "trainer": str(trainer.__class__.__name__),
    "optimizer": str(optimizer.__class__.__name__),
    "pytorch_version": torch.__version__,
    "loss_function": str(loss_fn.__class__.__name__),
    "performances" : trainer.get_stats(),
}

exporter.export_stat_file(stats_file_data)

exporter.export_model(model, 'weights.pt')
exporter.export_best_models(trainer.get_best_models())

In [None]:
# model.load_state_dict(torch.load('models/20221127_2300_ThirdNeuralNetwork/best_valid_weights.pt'))

In [None]:
# fp_data = torchvision.datasets.ImageFolder(TEXTURE_FP_FOLDER, transform=get_both_transform())
    
# # face_imgs = make_dataset_equal_or_less(train_data, len(fp_data))
# face_imgs = torch.utils.data.Subset(train_data, train_idx)

# both_data = torch.utils.data.ConcatDataset([face_imgs, fp_data])
# both_sampler = ImbalancedDatasetSampler(both_data, callback_get_label=(lambda x, y: x[y][1]))

# kwargs = {'num_workers': 4, 'pin_memory': True} if 'cuda' in device else {}
# loader = torch.utils.data.DataLoader(both_data, batch_size=batch_size, sampler=both_sampler, **kwargs)

# trainer.optimizer = torch.optim.Adam(model.parameters(), lr=0.000005, weight_decay=weight_decay)
# # trainer.optimizer = torch.optim.SGD(model.parameters(), lr=bootstrap_lr, weight_decay=weight_decay, momentum=momentum)
# trainer.fit(train_loader=loader,
#         valid_loader=valid_loader,
#         test_loader=test_loader,
#         min_epochs=bootstrap_min_epoch,
#         max_epochs=bootstrap_max_epoch,
#         early_stopping=None,
#         device=device)

In [None]:
# load model
# ld_model = torch.load('model.pt')

# test_loop(test_loader, ld_model, loss_fn)