In [1]:
!pip install tensorflow
!pip install transformers tensorboard --upgrade

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Collecting tensorboard<2.12,>=2.11
  Downloading tensorboard-2.11.2-py3-none-any.whl (6.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.0/6.0 MB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting tensorboard-data-server<0.7.0,>=0.6.0
  Downloading tensorboard_data_server-0.6.1-py3-none-manylinux2010_x86_64.whl (4.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.9/4.9 MB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Installing collected packages: tensorboard-data-server, tensorboard
  Attempting uninstall: tensorboard-data-server
    Found existing installation: tensorboard-data-server 0.7.0
    Uninstalling tensorboard-data-server-0.7.0:
      Successfully uninstalled tensorboard-data-server-0.7.0
  Attempting uninstall: tensorboard
    Found existing installation: tensorboard 2.12.0
    Uninstalling tensorboard-2.12.0:
      Succes

### DigiFace-1M Dataset

https://github.com/microsoft/DigiFace1M

The DigiFace-1M dataset is a collection of over one million diverse synthetic face images for face recognition.

It was introduced in our paper DigiFace-1M: 1 Million Digital Face Images for Face Recognition and can be used to train deep learning models for facial recognition.

The dataset contains:

    720K images with 10K identities (72 images per identity). For each identity, 4 different sets of accessories are sampled and 18 images are rendered for each set.
    500K images with 100K identities (5 images per identity). For each identity, only one set of accessories is sampled.

The DigiFace-1M dataset can be used for non-commercial research, and is licensed under the license found in LICENSE.

In [1]:
import os

home=os.path.abspath(os.getcwd())
data_path=os.path.join(home, 'data')

#global data
pairs=[]
classes=[]
IMAGE_DIMS = 112 # was 224 pixels

for file in os.listdir(data_path):
    classes.append(file)
    

In [13]:
# print(os.listdir(data_path))

In [2]:
print(len(classes))

2000


In [11]:
import numpy as np

class DataGenerator(keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, list_IDs, labels, batch_size=32, dim=(IMAGE_DIMS,IMAGE_DIMS), n_channels=1,
                 n_classes=2, shuffle=True):
        'Initialization'
        self.dim = dim
        self.batch_size = batch_size
        self.labels = labels
        self.list_IDs = list_IDs
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]
        # Generate data
        X = self.__data_generation(list_IDs_temp)
        return X

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        'Generates data containing batch_size samples'
        x1 = np.empty((self.batch_size, IMAGE_DIMS,IMAGE_DIMS,3))
        x2 = np.empty((self.batch_size, IMAGE_DIMS,IMAGE_DIMS,3))
        y = np.empty((self.batch_size, 1))

        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            # print(self.labels[int(ID)])
            path1, path2, label, s1, s2 = self.labels[int(ID)]

            _x1 = cv2.imread(path1)
            _x2 = cv2.imread(path2)
            _x1 = preprocess(_x1, s1)
            _x1 = scale_back(_x1) / 255.
            _x2 = preprocess(_x2, s2)
            _x2 = scale_back(_x2) / 255.

            x1[i,] = _x1
            x2[i,] = _x2
            y[i,] = label
        return [x1, x2], y



In [5]:
import random
import numpy as np

def make_pairs():#makes pairs of data
    global pairs, classes, labels
    # pairs = np.array(pairs).astype("float32")
    # labels = np.array(labels).astype("float32")
    # pairs = []
    for class_ in classes:
        class_path = os.path.join(data_path, class_)
        for img_path in os.listdir(class_path):
            if np.random.uniform()<=0.25:#rescale images
                image1 = os.path.join(class_path, img_path)
                image_select=random.choice(os.listdir(class_path))
                image2 = os.path.join(class_path, image_select)
                scale = np.random.uniform(0.3,0.6)#scaling factor
                select_index = random.choice([1,2])
                if select_index==1:
                    s1=int(scale*IMAGE_DIMS)#scale down
                    s2 = IMAGE_DIMS
                    scale_flag=1
                else:
                    s2=int(scale*IMAGE_DIMS)#scale down
                    s1 = IMAGE_DIMS
                    scale_flag=2
                pairs+=[[image1, image2, 1, s1, s2]]

                class_select = random.choice(classes)
                while class_select == class_:# keep trying if select the current class
                    class_select = random.choice(classes)
                class_path2 = os.path.join(data_path, class_select)
                image_select=random.choice(os.listdir(class_path2))
                image2 = os.path.join(class_path2, image_select)
                if scale_flag ==1:
                    s1 = IMAGE_DIMS
                    if np.random.uniform()<0.5:
                        s2=int(scale*IMAGE_DIMS)#scale down
                    else:
                        s2 = IMAGE_DIMS
                elif scale_flag ==2:
                    if np.random.uniform()<0.5:
                        select_index = random.choice([1,2])
                        if select_index==1:
                            s1=int(scale*IMAGE_DIMS)#scale down
                            s2 = IMAGE_DIMS
                        else:
                            s2=int(scale*IMAGE_DIMS)#scale down
                            s1 = IMAGE_DIMS
                scale_flag=0
                pairs+=[[image1, image2, 0, s1, s2]]

            image1 = os.path.join(class_path, img_path)
            image_select=random.choice(os.listdir(class_path))
            image2 = os.path.join(class_path, image_select)
            # image1=preprocess(image1)
            # image2=preprocess(image2)
            pairs+=[[image1, image2, 1, IMAGE_DIMS, IMAGE_DIMS]]


            class_select = random.choice(classes)
            while class_select == class_:# keep trying if select the current class
                class_select = random.choice(classes)
            class_path2 = os.path.join(data_path, class_select)
            image_select=random.choice(os.listdir(class_path2))
            image2 = os.path.join(class_path2, image_select)
            # image2=preprocess(image2)
            pairs+=[[image1, image2, 0, IMAGE_DIMS, IMAGE_DIMS]]




In [6]:
make_pairs()

In [7]:
pairs

[['/home/halim/Documents/ECE 613 Image Processing/GitHub/Face_id_meta/data/1634/68.png',
  '/home/halim/Documents/ECE 613 Image Processing/GitHub/Face_id_meta/data/1634/0.png',
  1,
  112,
  112],
 ['/home/halim/Documents/ECE 613 Image Processing/GitHub/Face_id_meta/data/1634/68.png',
  '/home/halim/Documents/ECE 613 Image Processing/GitHub/Face_id_meta/data/1112/14.png',
  0,
  112,
  112],
 ['/home/halim/Documents/ECE 613 Image Processing/GitHub/Face_id_meta/data/1634/13.png',
  '/home/halim/Documents/ECE 613 Image Processing/GitHub/Face_id_meta/data/1634/29.png',
  1,
  112,
  112],
 ['/home/halim/Documents/ECE 613 Image Processing/GitHub/Face_id_meta/data/1634/13.png',
  '/home/halim/Documents/ECE 613 Image Processing/GitHub/Face_id_meta/data/827/24.png',
  0,
  112,
  112],
 ['/home/halim/Documents/ECE 613 Image Processing/GitHub/Face_id_meta/data/1634/28.png',
  '/home/halim/Documents/ECE 613 Image Processing/GitHub/Face_id_meta/data/1634/7.png',
  1,
  112,
  112],
 ['/home/hali

In [11]:
print(len(pairs))
print(pairs[0])

360910
['/home/halim/Documents/ECE 613 Image Processing/GitHub/Face_id_meta/data/1634/68.png', '/home/halim/Documents/ECE 613 Image Processing/GitHub/Face_id_meta/data/1634/0.png', 1, 112, 112]


In [13]:
print(pairs[360909])

['/home/halim/Documents/ECE 613 Image Processing/GitHub/Face_id_meta/data/256/55.png', '/home/halim/Documents/ECE 613 Image Processing/GitHub/Face_id_meta/data/1924/26.png', 0, 112, 112]


### Pre-trained Model

In this example are we going to fine-tune the [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) a Vision Transformer (ViT) pre-trained on ImageNet-21k (14 million images, 21,843 classes) at resolution 224x224.

You can easily adjust the `model_id` to another Vision Transformer model, e.g. `google/vit-base-patch32-384`

In [1]:
model_id = "google/vit-base-patch16-224-in21k"

### Pre-processing

To train our model we need to convert our "Images" to `pixel_values`. This is done by a [🤗 Transformers Feature Extractor](https://huggingface.co/docs/transformers/master/en/main_classes/feature_extractor#feature-extractor) which allows us to `augment` and convert the images into a 3D Array to be fed into our model.

In [2]:
from transformers import ViTFeatureExtractor
from tensorflow import keras 
from tensorflow.keras import layers


feature_extractor = ViTFeatureExtractor.from_pretrained(model_id)

# learn more about data augmentation here: https://www.tensorflow.org/tutorials/images/data_augmentation
data_augmentation = keras.Sequential(
    [
        layers.Resizing(feature_extractor.size, feature_extractor.size),
        layers.Rescaling(1./255),
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(factor=0.02),
        layers.RandomZoom(
            height_factor=0.2, width_factor=0.2
        ),
    ],
    name="data_augmentation",
)
# use keras image data augementation processing
def augmentation(examples):
    # print(examples["img"])
    examples["pixel_values"] = [data_augmentation(image) for image in examples["img"]]
    return examples


# basic processing (only resizing)
def process(examples):
    examples.update(feature_extractor(examples['img'], ))
    return examples

2023-02-25 01:42:48.579796: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
class L1Dist(layers.Layer):
    def __init__(self, **kwargs):
        super().__init__()
        
    def call(self, embedding1, embedding2):
        return tf.math.abs(embedding1-embedding2)


class L1Dist_mod(layers.Layer):
    def __init__(self, **kwargs):
        super().__init__()
        
    def call(self, embedding1, embedding2):
        return tf.math.reduce_sum(tf.math.abs(embedding1-embedding2), axis=1, keepdims=True)

class L2Dist(layers.Layer):
    def __init__(self, **kwargs):
        super().__init__()
        
    def call(self, embedding1, embedding2):
        sum_square = tf.math.reduce_sum(tf.math.square(embedding1 - embedding2), axis=1, keepdims=True)
        return tf.math.sqrt(tf.math.maximum(sum_square, tf.keras.backend.epsilon()))

class cosine(layers.Layer):
    def __init__(self, **kwargs):
        super().__init__()
        
    def call(self, embedding1, embedding2):
        return 1-tf.keras.losses.cosine_similarity(embedding1,embedding2)

class TF_L2Dist(layers.Layer):
    def __init__(self, **kwargs):
        super().__init__()
        
    def call(self, embedding1, embedding2):
        return tf.math.reduce_euclidean_norm(embedding1,embedding2)

### Download the pretrained transformer model and fine-tune it. 

In [None]:
# from transformers import TFViTForImageClassification, create_optimizer
# import tensorflow as tf

# # create optimizer wight weigh decay
# num_train_steps = len(tf_train_dataset) * num_train_epochs
# optimizer, lr_schedule = create_optimizer(
#     init_lr=learning_rate,
#     num_train_steps=num_train_steps,
#     weight_decay_rate=weight_decay_rate,
#     num_warmup_steps=num_warmup_steps,
# )

# # load pre-trained ViT model
# model = TFViTForImageClassification.from_pretrained(
#     model_id,
#     num_labels=len(img_class_labels),
#     id2label=id2label,
#     label2id=label2id,
# )

# # define loss
# loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

# # define metrics 
# metrics=[
#     tf.keras.metrics.SparseCategoricalAccuracy(name="accuracy"),
#     tf.keras.metrics.SparseTopKCategoricalAccuracy(3, name="top-3-accuracy"),
# ]

# # compile model
# model.compile(optimizer=optimizer,
#               loss=loss,
#               metrics=metrics
#               )

If you want to create you own classification head or if you want to add the augmentation/processing layer to your model, you can directly use the [functional Keras API](https://keras.io/guides/functional_api/). Below you find an example on how you would create a classification head.

In [4]:
# import tensorflow as tf
# from transformers import TFViTModel

# base_model = TFViTModel.from_pretrained('google/vit-base-patch16-224-in21k')

All model checkpoint layers were used when initializing TFViTModel.

All the layers of TFViTModel were initialized from the model checkpoint at google/vit-base-patch16-224-in21k.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFViTModel for predictions without further training.


In [5]:
# base_model.summary()

Model: "tf_vi_t_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vit (TFViTMainLayer)        multiple                  86389248  
                                                                 
Total params: 86,389,248
Trainable params: 86,389,248
Non-trainable params: 0
_________________________________________________________________


In [5]:
# alternatively create Image Classification model using Keras Layer and ViTModel 
# here you can also add the processing layers of keras

import tensorflow as tf
from transformers import TFViTModel
from tensorflow.keras import regularizers

base_model = TFViTModel.from_pretrained('google/vit-base-patch16-224-in21k')
base_model.summary()

# inputs
pixel_values1 = tf.keras.layers.Input(shape=(3,IMAGE_DIMS,IMAGE_DIMS), name='pixel_values', dtype='float32')
pixel_values2 = tf.keras.layers.Input(shape=(3,IMAGE_DIMS,IMAGE_DIMS), name='pixel_values', dtype='float32')

# model layer
# vit = base_model.vit(pixel_values)[0]
vit1 = base_model.vit(pixel_values1)[0]
vit2 = base_model.vit(pixel_values2)[0]

# x1 = layers.Dense(1024, activation='sigmoid',kernel_regularizer=regularizers.l2(1e-3))(vit1)
# x2 = layers.Dense(1024, activation='sigmoid',kernel_regularizer=regularizers.l2(1e-3))(vit2)

siamese_layer=L1Dist_mod()
distances=siamese_layer(vit1, vit2)

# classifier = tf.keras.layers.Dense(1, activation='softmax', name='outputs')(distances)
classifier = tf.keras.layers.Dense(1, activation='sigmoid', name='outputs')(distances)

# model
keras_model = tf.keras.Model(inputs=(pixel_values1,pixel_values2) , outputs=classifier)

Downloading:   0%|          | 0.00/502 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/346M [00:00<?, ?B/s]

All model checkpoint layers were used when initializing TFViTModel.

All the layers of TFViTModel were initialized from the model checkpoint at google/vit-base-patch16-224-in21k.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFViTModel for predictions without further training.
