In [1]:
import torch

In [2]:
from face_lib.dataset_classes.lightning_datasets import MXFaceDataset
from torch.utils.data import DataLoader
batch_size = 37
ms1mv3_ds = MXFaceDataset("/app/datasets/ms1m/")
ms1mv3_loader = DataLoader(ms1mv3_ds, batch_size=batch_size,
            drop_last=False,
            shuffle=False,
            num_workers=40,
        )



In [3]:
from face_lib.models.lightning_wrappers import ResNet
device = torch.device('cuda:0')
backbone = ResNet('iresnet50_normalized', '/app/model_weights/backbone/ms1mv3_arcface_r50/backbone.pth', False)
backbone.eval()
backbone.to(device)

ResNet(
  (backbone): IResNetNorm(
    (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (prelu): PReLU(num_parameters=64)
    (layer1): Sequential(
      (0): IBasicBlock(
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (prelu): PReLU(num_parameters=64)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (downsample): Sequential(
          (0): Conv2d(64, 64, kernel_size=(1, 1), stride=(2, 2), bias=False)
          (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True,

In [4]:
len(ms1mv3_ds)

5822653

In [5]:
from tqdm import tqdm
import numpy as np
bottleneck_features = np.zeros((len(ms1mv3_ds), 25088), np.float32)
features = np.zeros((len(ms1mv3_ds), 512), np.float32)
labels_all = np.zeros(len(ms1mv3_ds), dtype=np.int)
for i, batch in enumerate(tqdm(ms1mv3_loader)):
    images, labels = batch
    images = images.to(device)
    backbone_outputs = backbone(images)
    bottleneck_features[i*batch_size: (i+1)*batch_size] = backbone_outputs["bottleneck_feature"].detach().cpu().numpy().copy()
    features[i*batch_size: (i+1)*batch_size] = backbone_outputs["feature"].detach().cpu().numpy().copy()
    labels_all[i*batch_size: (i+1)*batch_size] = labels.detach().cpu().numpy().copy()
    del backbone_outputs
    

  2%|‚ñè         | 3098/157369 [01:03<52:32, 48.93it/s] 


KeyboardInterrupt: 

In [None]:
labels.dtype

torch.int64

In [None]:
i

414

In [None]:
bottleneck_features[0]

array([ 5.8258479e-03, -3.9811176e-04, -3.4052644e-02, ...,
        6.1517689e-02, -1.1023444e-02,  3.3121618e-05], dtype=float32)

In [None]:
labels_all[-100:]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [None]:
np.linalg.norm(features[0])

0.9999999721507012

In [None]:
features.shape

(5822653, 512)

In [None]:
images.dtype

torch.float32

In [None]:

features = np.concatenate(features)
bottleneck_features = np.concatenate(bottleneck_features)
labels_all = np.concatenate(labels_all)

In [None]:
features.shape, bottleneck_features.shape, labels_all.shape

((4200, 512), (4200, 25088), (4200,))

In [None]:
labels_all

array([ 0,  0,  0, ..., 54, 54, 54])

In [None]:
batch[1]

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])