# Process Data
---



In [None]:
pip install -q kaggle

In [None]:
from google.colab import files
files.upload()

In [None]:
! mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json
!kaggle datasets download -d ntl0601/casia-webface

Downloading casia-webface.zip to /content
100% 2.53G/2.53G [02:07<00:00, 24.2MB/s]
100% 2.53G/2.53G [02:07<00:00, 21.4MB/s]


In [None]:
from zipfile import ZipFile
Casia_path = "/content/casia-webface.zip"
with ZipFile(Casia_path, 'r') as myzip:
    myzip.extractall()
    print('Done unzipping CASIA-WebFace.zip')

Done unzipping CASIA-WebFace.zip


In [None]:
%cd /content/drive/MyDrive/MobileFaceNet
!git clone https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB

/content/drive/MyDrive/MobileFaceNet
Cloning into 'Ultra-Light-Fast-Generic-Face-Detector-1MB'...
remote: Enumerating objects: 953, done.[K
remote: Counting objects: 100% (169/169), done.[K
remote: Compressing objects: 100% (65/65), done.[K
remote: Total 953 (delta 120), reused 104 (delta 104), pack-reused 784[K
Receiving objects: 100% (953/953), 37.29 MiB | 16.41 MiB/s, done.
Resolving deltas: 100% (483/483), done.
Checking out files: 100% (230/230), done.


In [None]:
%cd /content/drive/MyDrive/MobileFaceNet/Ultra-Light-Fast-Generic-Face-Detector-1MB
!pip install -r requirements.txt

In [None]:
# Tạo hàm xử lý ảnh --> Chuyển ảnh về đúng format của models
import os
import cv2
import numpy as np
import vision.utils.box_utils_numpy as box_utils
import onnxruntime as ort


def pre_process(img):
    image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    image = cv2.resize(image, (320, 240)) # Đúng kích thước ảnh của mô hình RFB-320
    image_mean = np.array([127, 127, 127])
    image = (image - image_mean) / 128
    image = np.transpose(image, [2, 0, 1]) # Thay đổi kích thước thành định dạng (batch size, chanel, height, weight)
    image = np.expand_dims(image, axis=0)
    image = image.astype(np.float32)
    return image

# Tạo hàm Non - maximum suppresion --> Bỏ các BBox không cần thiết
def non_max_sup(width,height,confidences,boxes,prob_threshold, iou_threshold = 0.3, top_k = 1):
    boxes = boxes[0]
    confidences = confidences[0]

    picked_box_probs,picked_labels = [],[]
    for class_index in range(1, confidences.shape[1]):
        """Lấy ra những Confidence > prob_threshold (đk đầu vào)"""
        probs = confidences[:, class_index]
        mask = probs > prob_threshold # --> Bool
        probs = probs[mask]

        if probs.shape[0] == 0: continue
        subset_boxes = boxes[mask,:]
        box_probs = np.concatenate([subset_boxes,probs.reshape(-1,1)], axis = 1)
        box_probs = box_utils.hard_nms(box_probs,iou_threshold,top_k,)
        picked_box_probs.append(box_probs)
        picked_labels.extend([class_index]*box_probs.shape[0])
    if not picked_box_probs:
        return np.array([]), np.array([]), np.array([])
    
    picked_box_probs = np.concatenate(picked_box_probs)
    picked_box_probs[:,0] *= width
    picked_box_probs[:,1] *= height
    picked_box_probs[:,2] *= width
    picked_box_probs[:,3] *= height
    return picked_box_probs[:,:4].astype(np.int32), np.array(picked_labels), picked_box_probs[:,4]

In [None]:
dataset_path = "/content/casia-webface"
new_dataset = "New_CASIA-WebFace"

list_folder = os.listdir(dataset_path)

# Khởi tạo model ONNX
onnx_path = "models/onnx/version-RFB-320.onnx"
ort_session = ort.InferenceSession(onnx_path)
input_name = ort_session.get_inputs()[0].name
threshold = 0.7

list_folder = os.listdir(dataset_path)

for folder_path in list_folder:
    new_data_path = os.path.join(new_dataset,folder_path)
    if not os.path.exists(new_data_path):
        os.makedirs(new_data_path)
    
    img_path = os.path.join(dataset_path, folder_path)

    list_file = os.listdir(img_path)

    for file_path in list_file:
        image_path = os.path.join(img_path, file_path)
        print('Image path: ', image_path)

        img = cv2.imread(image_path)
        image = pre_process(img)
        confidences,boxes = ort_session.run(None, {input_name: image})
        boxes, labels, probs = non_max_sup(img.shape[1], img.shape[0], confidences, boxes, threshold)

        if len(boxes) == 0:
            cv2.imwrite(os.path.join(new_data_path, file_path), img)
        
        for i in range(boxes.shape[0]):
            if boxes[i][1] < 0:
                boxes[i][1] = 0
            
            if boxes[i][0] < 0:
                boxes[i][0] = 0
            
            box = boxes[i,:]
            max = boxes[0][2] * boxes[0][3]

            if boxes[i][2] * boxes[i][3] >= max:
                box = boxes[i]
            else:
                box = boxes[0]
            
            cv2.imwrite(os.path.join(new_data_path, file_path), img[box[1]:box[3], box[0]: box[2]])

# Huấn luyện mô hình

In [None]:
from google.colab import files
files.upload()

In [None]:
! mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json
!kaggle datasets download -d unkownhihi/casiawebface-cropped

Downloading casiawebface-cropped.zip to /content
100% 6.76G/6.76G [05:44<00:00, 23.0MB/s]
100% 6.76G/6.76G [05:44<00:00, 21.0MB/s]


## Create file csv from dataset-crop

In [None]:
from zipfile import ZipFile
Casia_path = "/content/casiawebface-cropped.zip"
with ZipFile(Casia_path, 'r') as myzip:
    myzip.extractall()
    print('Done unzipping New-CASIA-WebFace.zip')

Done unzipping New-CASIA-WebFace.zip


In [None]:
import csv
import os

root = 'CASIA-WebFace_cropped/'
folder_name_list = os.listdir(root)
folder_name_list.sort() 
print('Number of classes (or nuber of person ID): ', len(folder_name_list))

Number of classes (or nuber of person ID):  10575


In [None]:
with open('data_train.csv', 'a') as csvfile:
    writer = csv.writer(csvfile)
    for i, folder_name in enumerate(folder_name_list):
        folder_path = root + folder_name
        img_name_list = os.listdir(folder_path)

        for img_name in img_name_list:
            img_path = os.path.join(root, folder_name, img_name)
            label = i
            writer.writerow([img_path, label])

In [None]:
import pandas as pd

df = pd.read_csv('data_train.csv',names=["img_path","label"])
df.head(-1)

Unnamed: 0,img_path,label
0,CASIA-WebFace_cropped/0000045/003.jpg,0
1,CASIA-WebFace_cropped/0000045/013.jpg,0
2,CASIA-WebFace_cropped/0000045/008.jpg,0
3,CASIA-WebFace_cropped/0000045/011.jpg,0
4,CASIA-WebFace_cropped/0000045/015.jpg,0
...,...,...
494408,CASIA-WebFace_cropped/6573530/052.jpg,10574
494409,CASIA-WebFace_cropped/6573530/023.jpg,10574
494410,CASIA-WebFace_cropped/6573530/026.jpg,10574
494411,CASIA-WebFace_cropped/6573530/017.jpg,10574


## Preporcess data

In [None]:
import numpy as np
import os
import torch
import pandas as pd
import cv2

class CASIA_Face(object):
    def __init__(self, root_path='data_train.csv', image_size=(112, 112)):
         self.data_path = pd.read_csv(root_path)    # Đọc file csv
         self.num_images = len(self.data_path)      # Số row = số images
         self.image_size = image_size               # Kích thước image input

    # Hàm dùng để lấy path từ img_path của DataFrame = ["img_path","label"]
    def __getitem__(self, index):
        # read image path from cvs file
        image_path = os.path.join(self.data_path.iloc[index, 0])

        # read image from image path
        img = cv2.imread(image_path)

        # Convert image from BGR to RGB
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # Resize all image into same size 112x112
        img = cv2.resize(img, self.image_size)

         # read label to train cross_entropy loss
        label_cross = self.data_path.iloc[index, 1]

        # if image is gray image --> stack gray image 3 times
        # make gray image become 3 channels
        if len(img.shape) == 2:
            img = np.stack([img] * 3, 2)

        # Augmentation using flip technique
        flip = np.random.choice(2)*2-1
        img = img[:,::flip, :]
        img = img/255

        # Convert image from format [h, w, channel] into format [channel, h, w]
        img = img.transpose(2, 0, 1)

        # Convert image from array to torch
        img = torch.from_numpy(img).float()

        return img, torch.from_numpy(np.array(label_cross, dtype=np.long))

    # Số lượng ảnh input
    def __len__(self):
        return self.num_images

In [None]:
from torch import nn
import torch
import torch.nn.functional as F
import math
from torch.nn import Parameter
from torch.nn import Linear, Conv2d, BatchNorm2d, PReLU, Sequential, Module, Parameter
# Thuật toán Norm
def l2_norm(input, axis = 1):
    norm = torch.norm(input, 2, axis, True)
    output = torch.div(input, norm)
    return output

class Flatten(Module):
    def forward(self, input):
        return input.view(input.size(0), -1)

class Conv_block(Module):
    def __init__(self, in_c, out_c, kernel=(1, 1), stride=(1, 1), padding=(0, 0), groups=1):
        super(Conv_block, self).__init__()
        self.conv = Conv2d(in_c, out_channels=out_c, kernel_size=kernel, groups=groups, stride=stride, padding=padding, bias=False)
        self.bn = BatchNorm2d(out_c)
        self.prelu = PReLU(out_c)
    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        x = self.prelu(x)
        return x

class Linear_block(Module):
    def __init__(self, in_c, out_c, kernel=(1, 1), stride=(1, 1), padding=(0, 0), groups=1):
        super(Linear_block, self).__init__()
        self.conv = Conv2d(in_c, out_channels=out_c, kernel_size=kernel, groups=groups, stride=stride, padding=padding, bias=False)
        self.bn = BatchNorm2d(out_c)
    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        return x

# Basic block of MobileFacenet, combining the idea from DW Conv and ResNet
class Depth_Wise_Res(Module):
    def __init__(self, in_c, out_c, residual = False, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=1):
        super(Depth_Wise_Res, self).__init__()
        self.conv = Conv_block(in_c, out_c=groups, kernel=(1, 1), padding=(0, 0), stride=(1, 1))
        self.conv_dw = Conv_block(groups, groups, groups=groups, kernel=kernel, padding=padding, stride=stride)
        self.project = Linear_block(groups, out_c, kernel=(1, 1), padding=(0, 0), stride=(1, 1))
        self.residual = residual
    def forward(self, x):
        if self.residual:
            short_cut = x
        x = self.conv(x)
        x = self.conv_dw(x)
        x = self.project(x)
        if self.residual:
            output = short_cut + x
        else:
            output = x
        return output

class Multi_Res(Module):
    def __init__(self, c, num_block, groups, kernel=(3, 3), stride=(1, 1), padding=(1, 1)):
        super(Multi_Res, self).__init__()
        modules = []
        for _ in range(num_block):
            modules.append(Depth_Wise_Res(c, c, residual=True, kernel=kernel, padding=padding, stride=stride, groups=groups))
        self.model = Sequential(*modules)
    def forward(self, x):
        return self.model(x)

class MobileFaceNet(Module):
    def __init__(self, embedding_size=512, class_num=1):
        super(MobileFaceNet, self).__init__()
        self.conv1 = Conv_block(3, 64, kernel=(3, 3), stride=(2, 2), padding=(1, 1))
        self.conv2_dw = Conv_block(64, 64, kernel=(3, 3), stride=(1, 1), padding=(1, 1), groups=64)
        self.conv_23 = Depth_Wise_Res(64, 64, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=128)
        self.conv_3 = Multi_Res(64, num_block=4, groups=128, kernel=(3, 3), stride=(1, 1), padding=(1, 1))
        self.conv_34 = Depth_Wise_Res(64, 128, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=256)
        self.conv_4 = Multi_Res(128, num_block=6, groups=256, kernel=(3, 3), stride=(1, 1), padding=(1, 1))
        self.conv_45 = Depth_Wise_Res(128, 128, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=512)
        self.conv_5 = Multi_Res(128, num_block=2, groups=256, kernel=(3, 3), stride=(1, 1), padding=(1, 1))
        self.conv_6_sep = Conv_block(128, 512, kernel=(1, 1), stride=(1, 1), padding=(0, 0))
        self.conv_6_dw = Linear_block(512, 512, groups=512, kernel=(7,7), stride=(1, 1), padding=(0, 0))
       # self.adaptivePooling = AdaptiveAvgPool2d((1,1))
        self.conv_6_flatten = Flatten()
        self.linear = Linear(512, embedding_size, bias=False)
           
    def forward(self, x):
        out = self.conv1(x)

        out = self.conv2_dw(out)

        out = self.conv_23(out)

        
        out = self.conv_3(out)
        
        out = self.conv_34(out)

        out = self.conv_4(out)

        out = self.conv_45(out)

        out = self.conv_5(out)

        out = self.conv_6_sep(out)

        out = self.conv_6_dw(out)

        out = self.conv_6_flatten(out)

        out = self.linear(out)

        return out

class ArcMarginProduct(nn.Module):
    def __init__(self, in_features=512, out_features=10575, s=32.0, m=0.50):
        super(ArcMarginProduct, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.s = s  # gia tri ||s||
        self.m = m # gia tri margin
        self.weight = Parameter(torch.Tensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)
        
        self.cos_m = math.cos(m)
        self.sin_m = math.sin(m)
        # make the function cos(theta+m) monotonic decreasing while theta in [0°,180°]
        self.th = math.cos(math.pi - m)
        self.mm = math.sin(math.pi - m) * m


    def forward(self, x, label):
        cosine = F.linear(F.normalize(x), F.normalize(self.weight))
        sine = torch.sqrt(1.0 - torch.pow(cosine, 2))
        phi = cosine * self.cos_m - sine * self.sin_m
        
        phi = torch.where((cosine - self.th) > 0, phi, cosine - self.mm)

        one_hot = torch.zeros(cosine.size(), device='cuda')
        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
        output *= self.s
        
        return output


In [None]:
# define some hyper-parameter and some configurations
BATCH_SIZE = 256
TOTAL_EPOCH = 20
IMG_SIZE = (112, 112)
SAVE_DIR = '/content/drive/MyDrive/MobileFaceNet/saved_models'
NUM_CLASSES = 10575

SAVED_EPOCH = 3 # save model's weight every 5 epochs
CASIA_DATA_DIR = './data_train.csv'

LEARNING_RATE = 0.001

In [None]:
import os
import torch.utils.data
from torch import nn
from torch.optim import lr_scheduler
import torch.optim as optim
import time

In [None]:
import torch
torch.cuda.empty_cache()

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('Using device:', device)
print()

#Additional Info when using cuda
if device.type == 'cuda':
    
    print(torch.cuda.get_device_name(0))
    print('Memory Usage:')
    print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
    print('Cached:   ', round(torch.cuda.memory_reserved(0)/1024**3,1), 'GB')

Using device: cuda:0

Tesla T4
Memory Usage:
Allocated: 0.0 GB
Cached:    0.0 GB


In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# define trainloader and testloader
trainset = CASIA_Face(root_path=CASIA_DATA_DIR, image_size = IMG_SIZE)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE,
                                          shuffle=True, num_workers=2, drop_last=False)
# define model
net = MobileFaceNet()
ArcMargin = ArcMarginProduct(512, NUM_CLASSES)

net = net.to(device)
ArcMargin = ArcMargin.to(device)

# define optimizers
ignored_params = list(map(id, net.linear.parameters()))
ignored_params += list(map(id, ArcMargin.weight))
prelu_params_id = []
prelu_params = []
for m in net.modules():
    if isinstance(m, nn.PReLU):
        ignored_params += list(map(id, m.parameters()))
        prelu_params += m.parameters()
base_params = filter(lambda p: id(p) not in ignored_params, net.parameters())

optimizer_ft = optim.SGD([
    {'params': base_params, 'weight_decay': 4e-5},
    {'params': net.linear.parameters(), 'weight_decay': 4e-4},
    {'params': ArcMargin.weight, 'weight_decay': 4e-4},
    {'params': prelu_params, 'weight_decay': 0.0}
], lr=LEARNING_RATE, momentum=0.9, nesterov=True)

exp_lr_scheduler = lr_scheduler.MultiStepLR(optimizer_ft, milestones=[200000], gamma=0.1)

# Using cross-entropy loss for classification human ID
criterion = torch.nn.CrossEntropyLoss()

best_acc = 0.0
best_epoch = 0
iters = 0
for epoch in range(0, TOTAL_EPOCH+1):
    # train model
    print('Train Epoch: {}/{} ...'.format(epoch, TOTAL_EPOCH))
    net.train()

    train_total_loss = 0.0
    total = 0
    since = time.time()
    print('iteration for 1 epoch: ', iters)
    for data in trainloader:
        iters = iters + 1 
        img, label = data[0].cuda(), data[1].cuda()
        batch_size = img.size(0)
        optimizer_ft.zero_grad()

        raw_logits = net(img)

        output = ArcMargin(raw_logits, label)
        total_loss = criterion(output, label)
        total_loss.backward()
        optimizer_ft.step()
        exp_lr_scheduler.step()

        train_total_loss += total_loss.item() * batch_size
        total += batch_size

    train_total_loss = train_total_loss / total
    time_elapsed = time.time() - since
    loss_msg = 'total_loss: {:.4f} time: {:.0f}m {:.0f}s'\
        .format(train_total_loss, time_elapsed // 60, time_elapsed % 60)
    print(loss_msg)

    # save model
    if not os.path.exists(SAVE_DIR):
        os.mkdir(SAVE_DIR)
    if epoch % SAVED_EPOCH == 0:
        torch.save(net.state_dict(),
            os.path.join(SAVE_DIR, '%03d.pth' % epoch))
print('finishing training')
