import library

In [1]:
import numpy as np
import torch
import torch.nn as nn
from torch.nn import Parameter, DataParallel
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms as T
from torchvision.utils import make_grid
from torchvision.models import resnet18
import shutil
import os
from PIL import Image
import pandas as pd
import math
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from tqdm.notebook import tqdm
import time
import matplotlib.pyplot as plt
import random

In [2]:
# TODO:
# lr scheduler
# weight decay (optimizer)
# weight decay (lr scheduler)

# validation


In [3]:
def get_path_df(identity_root, identity_list):
    path_list = []
    identity_label_list = []
    for identity in tqdm(identity_list):
        image_list = os.listdir(os.path.join(identity_root, identity)) 
        for img in image_list:
            path = os.path.join(identity_root, identity, img)
            path_list.append(path)
            identity_label_list.append(identity) 

    data_df = pd.DataFrame({'img_path': path_list, 'identity': identity_label_list})
    data_df['identity_code'] = pd.Categorical(data_df['identity']).codes # convert identity to unique code(int)
    data_df['identity_code'] = data_df['identity_code'].astype('int32')
    return data_df

# save numpy array from path
def get_img_npy(path_df, img_shape):
    img_npy = np.zeros((len(path_df), img_shape[0], img_shape[1], img_shape[2]), dtype=np.uint8)
    label_npy = np.zeros((len(path_df), 1), dtype=np.uint8)
    for i in tqdm(range(len(path_df)), desc='get_img_npy'):
        img_path = path_df.iloc[i]['img_path']
        img = Image.open(img_path)
        img_npy[i] = np.array(img)
        label_npy[i] = path_df.iloc[i]['identity_code']
    return img_npy, label_npy 

def save_chunk_npy(path_df, num_chunk, img_shape, root):
    path_df_list = np.array_split(path_df, num_chunk)
    for i in range(len(path_df_list)):
        img_npy, label_npy = get_img_npy(path_df_list[i], img_shape)
        np.save(os.path.join(root, 'data_{}.npy'.format(i)), img_npy)
        np.save(os.path.join(root, 'label_{}.npy'.format(i)), label_npy)

def get_pair_npy(pair_df, img_shape):
    img_npy1 = np.zeros((len(pair_df), img_shape[0], img_shape[1], img_shape[2]), dtype=np.uint8)
    img_npy2 = np.zeros((len(pair_df), img_shape[0], img_shape[1], img_shape[2]), dtype=np.uint8)
    label_npy = np.zeros((len(pair_df), 1), dtype=np.uint8)
    for i in tqdm(range(len(pair_df)), desc='get_pair_npy'):
        img_path1 = pair_df.iloc[i]['img1_path']
        img_path2 = pair_df.iloc[i]['img2_path']
        img1 = Image.open(img_path1)
        img2 = Image.open(img_path2)
        img_npy1[i] = np.array(img1)
        img_npy2[i] = np.array(img2)
        label_npy[i] = pair_df.iloc[i]['label']
    return img_npy1, img_npy2, label_npy

def save_chunk_pair_npy(pair_df, num_chunk, img_shape, root):
    pair_df_list = np.array_split(pair_df, num_chunk)
    for i in range(num_chunk):
        img_npy1, img_npy2, label_npy = get_pair_npy(pair_df_list[i], img_shape)
        np.save(os.path.join(root, 'img1_{}.npy'.format(i)), img_npy1)
        np.save(os.path.join(root, 'img2_{}.npy'.format(i)), img_npy2)
        np.save(os.path.join(root, 'label_{}.npy'.format(i)), label_npy)

In [4]:
# buid the dataframe from the the path
identity_root = 'lfw_funneled'
identity_list = os.listdir(identity_root)
identity_list = [identity for identity in identity_list if os.path.isdir(os.path.join(identity_root, identity))] #only folder is identity

# total data for test
path_df = get_path_df(identity_root, identity_list)

# train test val split
train_df, test_df = train_test_split(path_df, test_size=0.2, random_state=42)
test_df, val_df = train_test_split(test_df, test_size=0.5, random_state=42)
print('train: {}, val: {}, test: {}'.format(len(train_df), len(val_df), len(test_df)))

# split the path_df into 10 chunks
num_chunk = 1
npy_root = 'lfw_funneled_npy'
img_shape = (250, 250, 3)
save_chunk_npy(train_df, num_chunk, img_shape, npy_root)

  0%|          | 0/5749 [00:00<?, ?it/s]

train: 10585, val: 1324, test: 1323


get_img_npy:   0%|          | 0/10585 [00:00<?, ?it/s]

In [5]:
class FaceDataset(Dataset):
    def __init__(self, img_npy_list, label_npy_list, input_shape, phase="train"):
        self.img_npy = np.vstack(img_npy_list)
        self.label_npy = np.vstack(label_npy_list)
        self.phase = phase
        self.input_shape = input_shape
        if self.phase == 'train':
            self.transforms = T.Compose([
                T.RandomCrop(self.input_shape[1:]),
                T.RandomHorizontalFlip(),
                T.ToTensor(),
                T.Normalize(mean=[0.5], std=[0.5])
            ])
        else:   
            self.transforms = T.Compose([
                T.CenterCrop(self.input_shape[1:]),
                T.ToTensor(),
                T.Normalize(mean=[0.5], std=[0.5])
            ])
            
    def __len__(self):
        return self.label_npy.shape[0]
    
    def __getitem__(self, index):
        data = self.img_npy[index]
        label = self.label_npy[index]
        data = Image.fromarray(data)
        data = data.convert('RGB')
        data = self.transforms(data)
        return data.float(), label.squeeze()
    
class PairFaceDataset(Dataset):
    def __init__(self, img_npy_list1, img_npy_list2, label_npy_list, input_shape):
        self.img_npy1 = np.vstack(img_npy_list1)
        self.img_npy2 = np.vstack(img_npy_list2)
        self.label_npy = np.vstack(label_npy_list)
        self.input_shape = input_shape
        self.transforms = T.Compose([
            T.CenterCrop(self.input_shape[1:]),
            T.ToTensor(),
            T.Normalize(mean=[0.5], std=[0.5])
        ])
            
    def __len__(self):
        return self.label_npy.shape[0]
    
    def __getitem__(self, index):
        data1 = self.img_npy1[index]
        data1 = Image.fromarray(data1)
        data1 = data1.convert('RGB')
        data1 = self.transforms(data1)

        data2 = self.img_npy2[index]
        data2 = Image.fromarray(data2)
        data2 = data2.convert('RGB')
        data2 = self.transforms(data2)
        label = self.label_npy[index]
        return data1.float(), data2.float(), label.squeeze()

In [6]:
# dataloader
img_npy_list = [np.load(os.path.join(npy_root, 'data_1.npy'))]
label_npy_list = [np.load(os.path.join(npy_root, 'label_1.npy'))]

input_shape = (1, 250, 250)
phase = 'train'
dataset = FaceDataset(img_npy_list, label_npy_list, input_shape, phase)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)
num_classes = len(np.unique(path_df['identity'].values))
print(num_classes)

5749


In [7]:
# # prepare data for cosine similarity test
# data_df = test_df.copy()

# # find positive pairs
# anchor_indexs = data_df.groupby('identity_code').filter(lambda x: len(x) > 1).index
# anchor_df = data_df.loc[data_df.index.isin(anchor_indexs)]
# anchors = np.unique(anchor_df['identity_code'].values)
# anchor_path1s = []
# anchor_path2s = []

# for anchor in anchors:
#     anchor_identities = anchor_df[anchor_df['identity_code'] == anchor]
#     anchor_path1, anchor_path2 = np.random.choice(anchor_identities['img_path'].values, size=2, replace=False)
#     anchor_path1s.append(anchor_path1)
#     anchor_path2s.append(anchor_path2)
# labels = [1] * len(anchor_path1s)


# # find negative pairs
# size = len(anchor_path1s)
# anchors = np.unique(data_df['identity_code'].values)
# for i in range(size):
#     anchor1, anchor2 = np.random.choice(anchors, size=2, replace=False)
#     anchor_identities1 = data_df[data_df['identity_code'] == anchor1]
#     anchor_identities2 = data_df[data_df['identity_code'] == anchor2]
#     anchor_path1 = np.random.choice(anchor_identities1['img_path'].values)
#     anchor_path2 = np.random.choice(anchor_identities2['img_path'].values)
#     anchor_path1s.append(anchor_path1)
#     anchor_path2s.append(anchor_path2)

# labels += [0] * size
# pair_df = pd.DataFrame({'img1_path':anchor_path1s, 'img2_path':anchor_path2s, 'label':labels})
# pair_df = pair_df.copy()
# img_shape = (250, 250, 3)
# num_chunks = 1
# root = 'data/test_pair'
# save_chunk_pair_npy(pair_df, num_chunks, img_shape, root)
# pair_df.head()

In [9]:
# pair dataloader
root = 'data/test_pair'
img_npy_list1 = [np.load(os.path.join(root, 'img1_0.npy'))]
img_npy_list2 = [np.load(os.path.join(root, 'img2_0.npy'))]
label_npy_list = [np.load(os.path.join(root, 'label_0.npy'))]

input_shape = (3, 250, 250)
pair_dataset = PairFaceDataset(img_npy_list1, img_npy_list2, label_npy_list, input_shape)
pair_dataloader = DataLoader(pair_dataset, batch_size=16, shuffle=True)
num_classes = 2

build model

In [10]:
# load pretrained model
model_resnet = resnet18(pretrained=False)
# change first and last layer
model_resnet.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
model_resnet.fc = nn.Linear(512, 512)



loss function

In [11]:
class ArcMarginProduct(nn.Module):
    r"""Implement of large margin arc distance: :
        Args:
            in_features: size of each input sample
            out_features: size of each output sample
            s: norm of input feature
            m: margin

            cos(theta + m)
        """
    def __init__(self, in_features, out_features, s=30.0, m=0.50, easy_margin=False):
        super(ArcMarginProduct, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.s = s
        self.m = m
        self.weight = Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)

        self.easy_margin = easy_margin
        self.cos_m = math.cos(m)
        self.sin_m = math.sin(m)
        self.th = math.cos(math.pi - m)
        self.mm = math.sin(math.pi - m) * m

    def forward(self, input, label):
        # --------------------------- cos(theta) & phi(theta) ---------------------------
        cosine = F.linear(F.normalize(input), F.normalize(self.weight))
        sine = torch.sqrt((1.0 - torch.pow(cosine, 2)).clamp(0, 1))
        phi = cosine * self.cos_m - sine * self.sin_m
        if self.easy_margin:
            phi = torch.where(cosine > 0, phi, cosine)
        else:
            phi = torch.where(cosine > self.th, phi, cosine - self.mm)
        # --------------------------- convert label to one-hot ---------------------------
        # one_hot = torch.zeros(cosine.size(), requires_grad=True, device='cuda')
        one_hot = torch.zeros(cosine.size(), device='cuda')
        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
        # -------------torch.where(out_i = {x_i if condition_i else y_i) -------------
        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)  # you can use torch.where if your torch.__version__ is 0.4
        output *= self.s
        return output

train

In [12]:
import tensorflow as tf
import tensorboard as tb
tf.io.gfile = tb.compat.tensorflow_stub.io.gfile

In [13]:
def get_feature(model, img1, img2):
    with torch.no_grad():
        img1 = img1.cuda()
        img2 = img2.cuda()
        feature1 = model(img1)
        feature2 = model(img2)
    return feature1, feature2

def cosin_metric(feature1, feature2):
    return F.cosine_similarity(feature1, feature2)

def get_acc(y_score, y_true):
    thresholds = sorted(set(y_score), reverse=True)
    best_acc = 0
    best_th = 0
    for th in thresholds:
        y_pred = (y_score >= th).astype(int)
        acc = accuracy_score(y_true, y_pred)
        if acc > best_acc:
            best_acc = acc
            best_th = th
    return best_acc, best_th

In [14]:
# test the training step
data_df = val_df.copy()    
num_classes = len(np.unique(data_df['identity'].values))
model = DataParallel(model_resnet)
metric_fc = DataParallel(ArcMarginProduct(512, num_classes, s=30, m=0.5, easy_margin=False))
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD([{'params': model.parameters()}, {'params': metric_fc.parameters()}], lr=0.001)
model = model.train()

for data, label in dataloader:
    feature = model(data)
    output = metric_fc(feature, label)
    loss = criterion(output, label.long().cuda())
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    output = output.data.cpu().numpy()
    output = np.argmax(output, axis=1)
    label = label.data.cpu().numpy()
    acc = np.mean((output == label).astype(int))
    break
print('Okay')

Okay


In [15]:
# hyper parameters
data_df = data_df.copy()
num_epochs = 2
batch_size = 32
learning_rate = 0.001
validate_every = 1

# dataloader
train_dataset = FaceDataset(img_npy_list, label_npy_list, input_shape, 'train')
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# model, metric_fc, criterion, optimizer
num_classes = len(np.unique(data_df['identity'].values))
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
classes = data_df['identity_code'].unique()

# model, metric_fc, criterion, optimizer
model = DataParallel(model_resnet)
metric_fc = DataParallel(ArcMarginProduct(512, num_classes, s=30, m=0.5, easy_margin=False))
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD([{'params': model.parameters()}, {'params': metric_fc.parameters()}], lr=learning_rate)

# writer
exist_path = os.listdir('log')
writer_version = int(exist_path[-1][-1]) + 1 if len(exist_path) > 0 else 0
writer = SummaryWriter(log_dir="log/v" + str(writer_version))

In [16]:
# train
model.train()
step = 0
for epoch in range(num_epochs):
    for data, train_label in tqdm(dataloader, desc='Epoch {}/{}'.format(epoch+1, num_epochs)):
        # forward
        data = data.to(device)
        train_label = train_label.to(device)
        feature = model(data).to(device)
        output = metric_fc(feature, train_label).to(device)
        loss = criterion(output, train_label.long().cuda())

        # backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # validate
        if step % validate_every == 0:
            model.eval()
            similarity_list = []
            label_list = []
            with torch.no_grad():
                for data1, data2, label in pair_dataloader:
                    data1 = data1.to(device)
                    data2 = data2.to(device)
                    label = label.to(device)
                    feature1, feature2 = get_feature(model, data1, data2)
                    similarity = cosin_metric(feature1, feature2)
                    similarity_list.append(similarity.cpu().numpy())
                    label_list.append(label.cpu().numpy())
                similarity_list = np.concatenate(similarity_list)
                label_list = np.concatenate(label_list)
                acc, th = get_acc(similarity_list, label_list)   

            # visualize
            img_grid = make_grid(data)
            img = data.cpu().numpy()
            img = img.reshape(img.shape[0], -1)
            output = output.data.cpu().numpy()
            output = np.argmax(output, axis=1)
            train_label = train_label.data.cpu().numpy()

            # writer.add_histogram('fc', model_resnet18.fc.weight, epoch)
            writer.add_image('image', img_grid, step)
            writer.add_scalar('loss', loss.item(), step)
            writer.add_scalar('acc', acc, step)
            writer.add_scalar('threshold', th, step)
            writer.add_embedding(img, metadata=train_label, label_img=data, global_step=step)
            step += 1

Epoch 1/2:   0%|          | 0/34 [00:00<?, ?it/s]

Epoch 2/2:   0%|          | 0/34 [00:00<?, ?it/s]

In [None]:
# save model
model_path = 'model_save/model_resnet18.pth'
metric_fc_path = 'model_save/metric_fc.pth'
torch.save(model.state_dict(), model_path)
torch.save(metric_fc.state_dict(), metric_fc_path) 