In [1]:
!pip install keras
!pip install tensorflow
!pip install numpy
!pip install matplotlib



In [2]:
# Import libraries and modules
import numpy as np
import tensorflow as tf
import keras.backend as K
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.models import Model
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Dropout
from keras.layers import Lambda
from keras.layers import Input
from keras.layers import Embedding
from keras.layers import Resizing
from tensorflow.keras import layers
from keras.layers import GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from keras.applications.mobilenet_v2 import MobileNetV2
from tensorflow.keras.utils import to_categorical
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from keras.models import load_model
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
import seaborn as sns

In [3]:
def normalize_pixels(image):
    """normalize pixels to be between 0 and 1"""

	# convert from integers to floats
    image_norm = image.astype('float32')
	# normalize to range -1 and 1
    image_norm = (image_norm - 127.5) / 127.5

	# return normalized images
    return image_norm

def de_normalize_pixels(image, _from = 0, _to = 1):
    """de-normalize pixels to be between 0 and 255"""

    # Normalize between 0 and 1
    image_de_norm = (image * 127.5) + 127.5

    # Normalize between 0 and 1
    image_de_norm = image_de_norm/255 

    # Normalize between _from and _to
    image_de_norm = (image_de_norm * (_to - _from)) + _from
    
    return image_de_norm

def load_image(path):
    """load image from path and convert to array"""

    img = load_img(path, target_size=(224, 224), interpolation='bilinear')
    x = img_to_array(img)
    x = normalize_pixels(x)
    x = np.expand_dims(x, axis=0)
    return x

def unison_shuffled_copies(a, b):
    """shuffle two arrays in unison"""

    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]

test_image_1 = load_image(r'./data/dataset/all_souls/all_souls_000002.jpg')
test_image_1_de = de_normalize_pixels(test_image_1, 0, 255)
print(np.min(test_image_1))
print(np.max(test_image_1))
print(np.min(test_image_1_de))
print(np.max(test_image_1_de))

-1.0
1.0
0.0
255.0


In [4]:
# Variables

DATA_PATH = './data/dataset'
DATA_TRAIN_SPLIT = 0.8
DATA_VALIDATION_SPLIT = 0.1
# The TEST split will be the remaining of the two splits

#amount_of_positive_pairs_per_class = 50

In [5]:
# Load images into arrays
import pathlib

data_dir = pathlib.Path(DATA_PATH)

image_count = len(list(data_dir.glob('*/*.jpg')) + list(data_dir.glob('*/*.jpeg')) + list(data_dir.glob('*/*.png')))
print("Total amount of images: " + str(image_count))

folders = [x for x in data_dir.iterdir() if x.is_dir()]

img_array_data = []

for i, folder in enumerate(folders):
    img_array_data.append([])

    for j, img in enumerate(folder.iterdir()):
        img_array_data[i].append(load_image(img))

Total amount of images: 555


In [6]:
data = []
labels = []

# Create image pairs
def create_image_pairs(images):
    for i, array in enumerate(images):
        for j in range(len(array)):
            # True
            data.append([
                images[i][j], 
                images[i][np.random.randint(0, len(images[i]) - 1)]])
            labels.append(1)

            x_1 = np.random.randint(0, len(images) - 1)
            x_2 = np.random.randint(0, len(images[x_1]) - 1)

            # False
            data.append([
                images[i][j], 
                images[x_1][x_2]])
            labels.append(0)

create_image_pairs(img_array_data)

data = np.array(data)
labels = np.array(labels)

In [7]:
# Shuffle Dataset
data, labels = unison_shuffled_copies(data, labels)

show_amount = 10
print(data[:show_amount])
print(labels[:show_amount])

[[[[[[ 0.11372549  0.39607844  0.6862745 ]
     [ 0.11372549  0.39607844  0.6862745 ]
     [ 0.11372549  0.40392157  0.6862745 ]
     ...
     [ 0.21568628  0.4509804   0.6784314 ]
     [ 0.21568628  0.4509804   0.6784314 ]
     [ 0.22352941  0.44313726  0.67058825]]

    [[ 0.11372549  0.40392157  0.6862745 ]
     [ 0.12156863  0.4117647   0.6862745 ]
     [ 0.11372549  0.40392157  0.69411767]
     ...
     [ 0.21568628  0.4509804   0.6784314 ]
     [ 0.21568628  0.4509804   0.6784314 ]
     [ 0.20784314  0.4509804   0.6784314 ]]

    [[ 0.12156863  0.4117647   0.6862745 ]
     [ 0.12156863  0.4117647   0.6862745 ]
     [ 0.12156863  0.4117647   0.69411767]
     ...
     [ 0.23137255  0.4509804   0.6784314 ]
     [ 0.21568628  0.4509804   0.6784314 ]
     [ 0.2         0.45882353  0.6784314 ]]

    ...

    [[-0.8980392  -0.88235295 -0.88235295]
     [-0.827451   -0.827451   -0.8509804 ]
     [-0.7882353  -0.7882353  -0.8117647 ]
     ...
     [-0.88235295 -0.88235295 -0.8980392 ]
   

In [8]:
# Split data into train, validation, and test

train_split = int(DATA_TRAIN_SPLIT * len(data))
validation_split = train_split+int(DATA_VALIDATION_SPLIT * len(data))

print("TRAIN SPLIT: " + "0 - " + str(train_split) + " | " + str(train_split) + " | " + str(DATA_TRAIN_SPLIT*100) + '%')
print("VALIDATION SPLIT: " + str(train_split) + " - " + str(validation_split) + " | " + str(validation_split-train_split) + " | " + str(DATA_VALIDATION_SPLIT*100) + '%')
print("TEST SPLIT: " + str(validation_split) + " - " + str(len(data)) + " | " + str(len(data) - validation_split) + " | " + str(100 - DATA_TRAIN_SPLIT*100 - DATA_VALIDATION_SPLIT*100) + '%')

data_train = data[0:train_split]
data_validation = data[train_split:validation_split]
data_test = data[validation_split:]

labels_train = labels[0:train_split]
labels_validation = labels[train_split:validation_split]
labels_test = labels[validation_split:]

print("---")
print("Training Data Shape: " + str(data_train.shape))
print("Validation Data Shape: " + str(data_validation.shape))
print("Test Data Shape: " + str(data_test.shape))
print("---")
print("Training Labels Shape: " + str(labels_train.shape))
print("Validation Labels Shape: " + str(labels_validation.shape))
print("Test Labels Shape: " + str(labels_test.shape))

TRAIN SPLIT: 0 - 888 | 888 | 80.0%
VALIDATION SPLIT: 888 - 999 | 111 | 10.0%
TEST SPLIT: 999 - 1110 | 111 | 10.0%
---
Training Data Shape: (888, 2, 1, 224, 224, 3)
Validation Data Shape: (111, 2, 1, 224, 224, 3)
Test Data Shape: (111, 2, 1, 224, 224, 3)
---
Training Labels Shape: (888,)
Validation Labels Shape: (111,)
Test Labels Shape: (111,)


## Feature Extractor M1

In [11]:
input_shape = (224, 224, 3)

# embedding extractor
embedding_extractor = Sequential()
base_model = MobileNetV2(input_shape=input_shape, include_top = False, weights='imagenet')
base_model.trainable = False
embedding_extractor.add(base_model)

embedding_extractor.add(GlobalAveragePooling2D())

embedding_extractor.summary()

# Input pair
input_image_1 = Input(input_shape)
input_image_2 = Input(input_shape)

# Output pair
encoded_image_1 = embedding_extractor(input_image_1)
encoded_image_2 = embedding_extractor(input_image_2)

# Model
feature_extractor = Model(inputs=[input_image_1, input_image_2], outputs=[encoded_image_1, encoded_image_2])
feature_extractor.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 mobilenetv2_1.00_224 (Funct  (None, 7, 7, 1280)       2257984   
 ional)                                                          
                                                                 
 global_average_pooling2d_1   (None, 1280)             0         
 (GlobalAveragePooling2D)                                        
                                                                 
Total params: 2,257,984
Trainable params: 0
Non-trainable params: 2,257,984
_________________________________________________________________
Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_5 (InputLayer)           [(None, 224, 224, 3  0           []                          

## M3

In [12]:

def euclidean_distance(vects):
    x = vects[0]
    y = vects[1]
    return K.sqrt(K.sum(K.square(x - y), axis=1, keepdims=True))

m3 = Sequential()

ex = feature_extractor

m3.add(ex)
# m3.summary()

# m3.add(Lambda(euclidean_distance))


AssertionError: Exception encountered when calling layer "model_1" (type Functional).

Could not compute output KerasTensor(type_spec=TensorSpec(shape=(None, 1280), dtype=tf.float32, name=None), name='sequential_2/global_average_pooling2d_1/Mean:0', description="created by layer 'sequential_2'")

Call arguments received:
  • inputs=tf.Tensor(shape=(None, 224, 224, 3), dtype=float32)
  • training=False
  • mask=None