In [1]:
from Manager import *
tf.enable_eager_execution()

In [2]:
"""
Author: Tyrel Cadogan
Email: shaqc777@yahoo.com
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from matplotlib import patches, patheffects
from tensorflow.keras.models import model_from_json, load_model
from mtcnn.mtcnn import MTCNN
from pathlib import Path as path
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import json
import pdb ### Python debuger
import os ### Navigate Through Dirrectory
import pandas as pd
import collections
import cv2
import random
import itertools
from VisionUtils import *
from sklearn.model_selection import train_test_split


class ImageManager:
    ### Class Constructor
    def __init__(self,  
                 feature_extractor = None, face_detector = None, 
                 img_original_rtpath = None, img_aug_rtpath = None ,
                 feat_original_rtpath = None,feat_aug_rt_path = None, augment_mode =False):
        """
        The ImageManger Object is a pipeline object for facial recognition from feature extraction
        detection preprocessing
        """


        
        self.feature_extractor = feature_extractor
        self.face_detector = face_detector
        self.im_sz = feature_extractor.input_shape
        
        self.imgpaths = [str(i[0]) for i in imgpaths.values ]
        self.img_rtpath = None
        self.img_original_rtpath = img_original_rtpath
        self.img_aug_rtpath = img_aug_rtpath
        
        self.feat_original_rtpath = feat_original_rtpath
        self.feat_rtpath = None
        self.feat_aug_rtpath = feat_aug_rt_path
        
        idty = []
        for i in self.imgpaths:
            idty.append(i.split("/"))
        df = pd.DataFrame(idty)
        self.c = df[0].values ### The class of each image
        self.classes = list(df[0].unique()) ### different class in dset
        self.min_class_count = min(df[0].value_counts())
        ### class to index mapping
        self.classmap = {}
        for i in self.classes:
            self.classmap[i] = np.where(self.c == i)
        
        ### indexes of the random sample of each feature
        self.sample_feat1 = []
        self.sample_feat2 = []
      
        ### Features
        self.feat1 = None
        self.feat2 = None
        ### labels
        self.sample_labels = []
        ### Face Detector performance
        self.undetected_faces = []
        self.detected_faces = []
        self.face_detector_error = None
        
        self.xtrain, self.xtest = None, None
        self.ytrain, self.ytest = None, None
        
        self.xtrainp, self.xtestp = None, None
        self.ytrainp, self.ytestp = None, None
        
        ### Image Augmentation
        from tensorflow.keras.preprocessing.image import ImageDataGenerator
        self.datagen = ImageDataGenerator(
            brightness_range = [0.00000001,2],
            rotation_range= 15)
        
        if (augment_mode == True):
            self.FEAT_RTPATH = self.FEAT_AUG_RTPATH
            self.IMG_RTPATH = self.IMG_AUG_RTPATH
        else:
            self.FEAT_RTPATH = self.FEAT_ORIGINAL_RTPATH
            self.IMG_RTPATH= self.IMG_ORIGINAL_RTPATH
            
    def getImagList(self):
        """
        Gets sub paths for all the images in IMG_RTPATH
        """

        return
            
    def split(self, train_size):
        paths = self.get_sample_image_paths()
        labels = []
        for i in range(len(paths["path1"])):
            if (paths["path1"][i][:7] == paths["path2"][i][:7]):
                labels.append(1)
            else:
                labels.append(0)
        
        self.xtrainp,self.xtestp,self.ytrainp,self.ytestp = train_test_split(
            pd.DataFrame(paths),
            labels,
            train_size = train_size,
            shuffle = True,
            random_state = 42
        )
        
    def load_features(self, augment_mode = False):
        """
        loads in extracted features and stores it to the 
        feat1 and feat2 attribute of the object
        """
        ### Obtaining feature1
        if (augment_mode == True):
            self.feat_rtpath = self.feat_aug_rtpath
            self.img_rtpath = self.img_aug_rtpath
        else:
            self.feat_rtpath = self.feat_original_rtpath
            self.img_rtpath = self.img_original_rtpath
        
        
        
        img_sam = np.array(self.detected_faces)[self.sample_feat1]
        img_sam = list(img_sam)
        if(os.path.isfile(self.feat_rtpath+ img_sam[0][:-4]+".npy") == True):
            imf = np.load(self.feat_rtpath+ img_sam[0][:-4]+".npy")###
        else:
            self.undetected_faces.append(self.feat_rtpath+ img_sam[0][:-4]+".npy")
        for i in img_sam[1:]:
            if (os.path.isfile(self.feat_rtpath+ i[:-4]+".npy") == True):
                imf1 = np.load(self.feat_rtpath+ i[:-4]+".npy")###
                imf = np.vstack([imf, imf1])
            else:
                self.undetected_faces.append(self.feat_rtpath+ i[:-4]+".npy")
        self.feat1 = imf
        
        ### Loading feat2
        img_sam = np.array(self.detected_faces)[self.sample_feat2]
        img_sam = list(img_sam)
        if(os.path.isfile(self.feat_rtpath+ img_sam[0][:-4]+".npy") == True):
            imf = np.load(self.feat_rtpath+ img_sam[0][:-4]+".npy")###
        else:
            self.undetected_faces.append(self.feat_rtpath+ img_sam[0][:-4]+".npy")
        for i in img_sam[1:]:
            if(os.path.isfile(self.feat_rtpath+ i[:-4]+".npy") == True):
                imf1 = np.load(self.feat_rtpath+ i[:-4]+".npy")###
                imf = np.vstack([imf, imf1])
            else:
                self.undetected_faces.append(self.feat_rtpath+ i[:-4]+".npy")
        self.feat2 = imf
        
    def detect_faces(self, img):
        """
        Returns BBox list delimiting all the faces  in the image 
        """
        bb_lst = []
        results = self.face_detector.detect_faces(img)
        for i in results:
            bb_lst.append(i["box"])
        return bb_lst
    
    def extract_feature(self, img, bb):
        img = self.crop_face(img, bb)
        img = resize(img,(self.im_sz[1],self.im_sz[2]))
        img = self.normalize(img).reshape((1, self.im_sz[1],self.im_sz[2], self.im_sz[3]))
        return self.feature_extractor.predict(img)
    
    def extract_save_features(brange = [10, 20, 30, 40, 60 80, 100]):
        im1 = []
        for i in self.imgpaths:
            img = plt.imread(self.img_original_rtpath+"/" + i)
            img = self.datagen.random_transform(img, seed = 42)
            im1 = np.vstack([im1, extract_feature(img)])
        im2= []
        for i in self.imgpaths:
            img = plt.imread(self.img_original_rtpath+"/" + i)
            img = incease_brightness(img)
            im1 = np.vstack([im1, extract_feature(img)])
    
    
    
    def augment_images(self, method = 'keras', brange = []):
        for i in self.imgpaths:
            ###Check to see if image feature is already extracted                
            if(os.path.isfile(self.img_aug_rtpath+"/"+i) ==True):
                continue
            else:
                img = plt.imread(self.img_original_rtpath+"/" + i)
                if(method = 'keras'):
                    img = self.datagen.random_transform(img, seed = 42)
                else:
                    for brightness in brange:
                        img = increase_brightness(img, brightness)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                ##check to see if dirrectory exists
                if os.path.isdir(self.img_aug_rtpath+"/"+i[:7]):
                    cv2.imwrite(self.img_aug_rtpath + i,img)
                else:
                    os.mkdir(self.img_aug_rtpath+"/"+i[:7])
                    cv2.imwrite(self.img_aug_rtpath+i,img)
    
    
    def extract_all_features(self, augment= False): ###To be tested
        """
        Extracts all the features in image batch
        """
        for i in self.imgpaths:
            ###Check to see if image feature is already extracted
            if(augment ==True):
                self.feat_rtpath = self.feat_aug_rtpath
                self.img_rtpath = self.img_aug_rtpath
            else:
                self.feat_rtpath = self.feat_original_rtpath
                self.img_rtpath = self.img_original_rtpath
            
                
            if(os.path.isfile(self.feat_rtpath+"/"+i[:-4]+".npy") ==True):
                continue
            else:
                img = plt.imread(self.img_rtpath+"/" + i)
                    
                bb_lst = self.detect_faces(img)
                if (len(bb_lst)>1):
                    continue
                for i1 in range(len(bb_lst)):
                    feature = self.extract_feature(img, bb_lst[i1])
                    ###check to see if dirrectory exists
                    if os.path.isdir(self.feat_rtpath+"/"+i[:7]):
                        np.save(self.feat_rtpath+"/"+i[:-4]+".npy", feature)
                    else:
                        os.mkdir(self.feat_rtpath+"/"+i[:7])
                        np.save( self.feat_rtpath+"/"+i[:-4]+".npy", feature)


    def normalize(self, img):
        """
        Normalize the image pixels values between  1 and 0
        """
        return img/255

    def crop_face(self,img, bb):
        """
        Crop the area delimited by the the bounding box bb
        """
        x1, y1, width, height = bb
        x1,y1 = abs(x1), abs(y1)
        x2, y2 = x1 + width, y1 + height
        return img[y1:y2,x1:x2]
    
    def random_sample(self,label, classidx, k):
        """
        Returns k random indexes of of a particalar class or all other classes 
        than that is indicated by classidx
        Args:
            label: Indicates whether to sample indexes form th indicate class or not.
            classidx: which class to sample
            k: Sample size 
        """
        if (label == 1):
            return random.choices(self.classmap[self.classes[classidx]][0], k = k)
        elif(label == 0):
            tmp = [list(self.classmap[i][0].reshape(-1)) 
                   for i in self.classes if(i != self.classes[classidx])]
            tmp = list(itertools.chain.from_iterable(tmp))
            return random.choices(tmp, k = k)
        print("OOPS it broke")
        return None
            
    def update_samples(self,sample, label):
        """
        Update sample values(Mutator funtion)
        Args:
            Sample: Tuple containing the feature pairs in form of a list
                    eg.(feature_idx_list1, feature_idx_list2)
            label: Indicates the label of the input feature pair are 
        """
        self.sample_feat1 = self.sample_feat1 + sample[0]
        self.sample_feat2 = self.sample_feat2 + sample[1]
        
    def balance_random_sample(self, k):
        """
        Sample k possitive and negative samples in each class
        Args:
            k: Number of samples in each class
        """
        for clss_idx in range(len(self.classes)):
            self.update_samples((self.random_sample(1, clss_idx, k),
                                 self.random_sample(1, clss_idx, k)),1)
            self.update_samples((self.random_sample(0, clss_idx, k),
                                 self.random_sample(0, clss_idx, k)),0)
        

    
    def get_features(self):
        return (self.feat1, self.feat2, self.sample_labels)
    
    def face_detection_check(self):
        """
        Gets all the detected and undetected faces
        """
        for i in self.imgpaths:
            if (os.path.isfile(self.feat_rtpath+ i[:-4]+".npy")):
                self.detected_faces.append(i)
            else: 
                self.undetected_faces.append(i)
        
    def update_metainfo(self):
        """
        Updates meat info about dataset i.e. self.classmap etc with the values of the detected faces
        since the face_detector may have some errors
        """
        self.face_detection_check()

        idty = []
        for i in self.detected_faces:
            idty.append(i.split("/"))
        df = pd.DataFrame(idty)
        self.c = df[0].values ### The class of each image
        self.classes = list(df[0].unique()) ### different class in dset
        self.min_class_count = min(df[0].value_counts())
        self.classmap = {}### class to index mapping
        for i in self.classes:
            self.classmap[i] = np.where(self.c == i)

    def get_sample_image_paths(self):
        """
        Returns the path to the images of the features that were extracted and return 
        """
        tmp = np.array(self.detected_faces)
        sample_paths1 = tmp[self.sample_feat1]
        sample_paths2 = tmp[self.sample_feat2]
        paths = {"path1": sample_paths1, "path2": sample_paths2}
        ### Update Labels
        for i in range(len(paths["path1"])):
            if (paths["path1"][i][:7] == paths["path2"][i][:7]):
                self.sample_labels.append(1)
            else:
                self.sample_labels.append(0)
        return paths

In [3]:
IMG_RT_PATH = 'data/images/'
FEATURE_RT_PATH = "data/image_features/"
FEATURE_AUG_RT_PATH = "data/image_aug_features/"
IMG_AUG_RT_PATH = 'data/images_aug/'

#img_list = pd.read_csv(IMG_RT_PATH + "test_list.TXT")
feature_extractor = load_model("Models\FaceNet/Facenet_keras.h5")
face_detector = MTCNN()

W0819 02:59:55.454645  2908 hdf5_format.py:221] No training configuration found in save file: the model was *not* compiled. Compile it manually.
W0819 02:59:55.462807  2908 deprecation_wrapper.py:119] From C:\Users\user\Anaconda3\lib\site-packages\mtcnn\mtcnn.py:187: The name tf.ConfigProto is deprecated. Please use tf.compat.v1.ConfigProto instead.

W0819 02:59:55.462807  2908 deprecation_wrapper.py:119] From C:\Users\user\Anaconda3\lib\site-packages\mtcnn\mtcnn.py:193: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.

W0819 02:59:55.484754  2908 deprecation_wrapper.py:119] From C:\Users\user\Anaconda3\lib\site-packages\mtcnn\network.py:43: The name tf.variable_scope is deprecated. Please use tf.compat.v1.variable_scope instead.

W0819 02:59:55.485751  2908 deprecation_wrapper.py:119] From C:\Users\user\Anaconda3\lib\site-packages\mtcnn\layer_factory.py:88: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0819 02:59:55.48

In [4]:
import os 
img_path_list = []
folder_list = os.listdir(IMG_RT_PATH)
for FOLDER in folder_list:
    image_list_names = os.listdir(IMG_RT_PATH + FOLDER)
    for NAME in image_list_names:
        img_path_list.append(FOLDER+'/'+NAME)

In [5]:
img_list = pd.DataFrame({"path": img_path_list})

In [6]:
iman = ImageManager(
    imgpaths = img_list,
    feature_extractor = feature_extractor,
    face_detector =  face_detector, 
    img_original_rtpath = IMG_RT_PATH,
    img_aug_rtpath = IMG_AUG_RT_PATH,
    feat_original_rtpath = FEATURE_RT_PATH,
    feat_aug_rt_path = FEATURE_AUG_RT_PATH,
    augment_mode = True
)

iman.augment_images()

iman.extract_all_features(augment = True) ### dont run

In [7]:
iman.face_detection_check()

In [8]:
iman.update_metainfo()
iman.balance_random_sample(100)
iman.split(train_size = 0.75)



In [18]:
len( iman.xtrainp["path1"].values)

18750

In [None]:
feat_list1 =[]
feat_list2 = []
index = []
i = 0
y_aug =  []
t1 = []
for PATH1, PATH2 in zip(iman.xtrainp["path1"].values, iman.xtrainp["path2"].values):
    if ((os.path.isfile(FEATURE_AUG_RT_PATH + PATH1[:-4]+".npy") ==True) and 
    (os.path.isfile(FEATURE_AUG_RT_PATH + PATH2[:-4]+".npy") == True)):
        feat_list1.append(np.load(FEATURE_AUG_RT_PATH + PATH1[:-4]+".npy"))
        feat_list2.append(np.load(FEATURE_AUG_RT_PATH + PATH2[:-4]+".npy"))
        
        if (PATH1[:7] == PATH2[:7]):
            t1.append(1)
        else:
            t1.append(0)
        
    i = i + 1

train_AUG_feat1 = np.vstack(feat_list1)
train_AUG_feat2 = np.vstack(feat_list2)

del feat_list1
del feat_list2

In [22]:
feat_list1 =[]
feat_list2 = []
index = []
i = 0
y_aug =  []
t1 = []
for PATH1, PATH2 in zip(iman.xtrainp["path1"].values, iman.xtrainp["path2"].values):
    if ((os.path.isfile(FEATURE_AUG_RT_PATH + PATH1[:-4]+".npy") ==True) and 
    (os.path.isfile(FEATURE_AUG_RT_PATH + PATH2[:-4]+".npy") == True)):
        feat_list1.append(np.load(FEATURE_AUG_RT_PATH + PATH1[:-4]+".npy"))
        feat_list2.append(np.load(FEATURE_AUG_RT_PATH + PATH2[:-4]+".npy"))
        
        if (PATH1[:7] == PATH2[:7]):
            t1.append(1)
        else:
            t1.append(0)
        
    i = i + 1

train_AUG_feat1 = np.vstack(feat_list1)
train_AUG_feat2 = np.vstack(feat_list2)

del feat_list1
del feat_list2

In [23]:
feat_list1 =[]
feat_list2 = []
index = []
i = 0
y_aug =  []
t2 = []
for PATH1, PATH2 in zip(iman.xtrainp["path1"].values, iman.xtrainp["path2"].values):
    if ((os.path.isfile(FEATURE_RT_PATH + PATH1[:-4]+".npy") ==True) and 
    (os.path.isfile(FEATURE_RT_PATH + PATH2[:-4]+".npy") == True)):
        feat_list1.append(np.load(FEATURE_RT_PATH + PATH1[:-4]+".npy"))
        feat_list2.append(np.load(FEATURE_RT_PATH + PATH2[:-4]+".npy"))
        
        if (PATH1[:7] == PATH2[:7]):
            t2.append(1)
        else:
            t2.append(0)
        
    i = i + 1

train_feat1 = np.vstack(feat_list1)
train_feat2 = np.vstack(feat_list2)

del feat_list1
del feat_list2

In [24]:
feat_list1 =[]
feat_list2 = []
index = []
i = 0
y =  []
t3 = []
iman.ytestp
for PATH1, PATH2 in zip(iman.xtestp["path1"].values, iman.xtestp["path2"].values):
    if ((os.path.isfile(FEATURE_RT_PATH + PATH1[:-4]+".npy") ==True) and 
    (os.path.isfile(FEATURE_RT_PATH + PATH2[:-4]+".npy") == True)):
        feat_list1.append(np.load(FEATURE_RT_PATH + PATH1[:-4]+".npy"))
        feat_list2.append(np.load(FEATURE_RT_PATH + PATH2[:-4]+".npy"))
        y.append(iman.ytestp[i])
        if (PATH1[:7] == PATH2[:7]):
            t3.append(1)
        else:
            t3.append(0)
        
    i = i + 1

test_feat1 = np.vstack(feat_list1)
test_feat2 = np.vstack(feat_list2)

del feat_list1
del feat_list2

In [25]:
labels_aug = np.array(t1 + t2 + t3)

In [33]:
len(labels_aug)

42956

In [35]:
labels_aug

array([0, 0, 0, ..., 0, 1, 1])

In [29]:
len(train_aug_feat1) + len(train_feat1) + len(test_feat1)

42956

In [34]:
import pandas as pd
np.save("features/train_aug_feat1.npy", train_aug_feat1)
np.save("features/train_aug_feat2.npy", train_aug_feat2)

np.save("features/train_feat1.npy", train_feat1)
np.save("features/train_feat2.npy", train_feat2)

np.save("features/test_feat1.npy", test_feat1)
np.save("features/test_feat2.npy", test_feat2)

np.save("features/labels_aug.npy", labels_aug)

paths = iman.get_sample_image_paths()
paths_df = pd.DataFrame(paths)
paths_df.to_csv("features/image_paths.csv")