In [13]:
import cv2
import numpy as np
import pkg_resources
import tensorflow as tf
from tensorflow.python.saved_model import tag_constants
import os
from layer_factory import LayerFactory
from network import Network
from exceptions import InvalidImage

class PNet(Network):
    """
    Network to propose areas with faces.
    """
    def _config(self):
        layer_factory = LayerFactory(self)

        layer_factory.new_feed(name='data', layer_shape=(None, 12, 12, 3))
        layer_factory.new_conv(name='conv1', kernel_size=(3, 3), channels_output=10, stride_size=(1, 1),
                            padding='SAME', relu=False)
        layer_factory.new_prelu(name='prelu1')
        layer_factory.new_max_pool(name='pool1', kernel_size=(2, 2), stride_size=(2, 2))
        layer_factory.new_conv(name='conv2', kernel_size=(3, 3), channels_output=16, stride_size=(1, 1), #kernel size 3,3
                               padding='SAME', relu=True)
        layer_factory.new_prelu(name='prelu2')
        layer_factory.new_conv(name='conv3', kernel_size=(3, 3), channels_output=32, stride_size=(1, 1), #kernel size 3,3
                               padding='SAME', relu=True)
        layer_factory.new_prelu(name='prelu3')
        layer_factory.new_conv(name='conv4-1', kernel_size=(1, 1), channels_output=2, stride_size=(1, 1), relu=False)
        layer_factory.new_softmax(name='prob1', axis=3)
        layer_factory.new_conv(name='conv4-2', kernel_size=(1, 1), channels_output=4, stride_size=(1, 1),
                               input_layer_name='prelu3', relu=False)
    

    def _feed(self, image):
        print('Running Pnet!')
        return self._session.run(['pnet/conv4-2/BiasAdd:0', 'pnet/prob1:0'], feed_dict={'pnet/input:0': image})

def read_pos_images():
    #Read positive images:
    path, __, filenames = next(os.walk("./pos_train/"))
    file_count = len(filenames)
    images = np.empty([0,12,3])
    for i in range(file_count):
        j=i+1
        img=cv2.imread(f"{path}{j}.bmp")
        images=np.append(images,img,axis=0)
    #Create list of probabilities:
    prob=[]
    for i in range(file_count):
        prob.append([[[0.0,1.0]]])
    #Create list of coordinates:
    coordinates=[]
    file = open('./coordinates.txt','r')
    lines = file.readlines()
    lines = [line[:-1] for line in lines]
    idx=[1,0,3,2]
    for line in lines:
        line = line.split(" ")
        line = line[1]
        line=line[1:-1]
        line = line.split(",")
        #Transpose coordinates
        x=0
        nline=[]
        for i in idx:
            nline.append(line[i])
            x=x+1
        line=[[[float(c) for c in nline]]]
        coordinates.append(line)
    #Return images, probs, and coordinates
    return images, prob, coordinates

def read_neg_images():
    #Read negative images:
    path, __, filenames = next(os.walk("./neg_train/"))
    file_count = len(filenames)
    images = np.empty([0,12,3])
    for i in range(file_count):
        j=i+1
        img=cv2.imread(f"{path}{j}.bmp")
        images=np.append(images,img,axis=0)
    #Create list of probabilities:
    prob=[]
    for i in range(file_count):
        prob.append([[[1.0,0.0]]])
    #Create list of coordinates:
    coordinates=[]
    for i in range(file_count):
        coordinates.append([[[0.0,0.0,0.0,0.0]]])
    #Return images, prob, coordinates
    return images, prob, coordinates


#Read in all images, probabilities, and coordinates
pimages, pprob, pcoordinates = read_pos_images()
nimages, nprob, ncoordinates = read_neg_images()
o_images=np.append(pimages,nimages,axis=0)
o_images=np.reshape(o_images,(-1,12,12,3))
o_prob=pprob+nprob
o_coordinates=pcoordinates+ncoordinates

#Shuffle them up using an index
idx=np.arange(len(o_prob))
np.random.shuffle(idx)
images=np.empty_like(o_images)
c=0
for i in idx:
    images[c]=o_images[i]
    c=c+1
images=(images-127.5)/128.0
images = np.transpose(images, (0, 2, 1, 3)) #Transpose images
prob=[]
for i in idx:
    prob.append(o_prob[i])
coordinates=[]
for i in idx:
    coordinates.append(o_coordinates[i])

with tf.compat.v1.Session() as sess:
    with tf.Graph().as_default():
        
        #Initialize training
        sess = tf.compat.v1.Session()
        train_net=PNet(sess)
        bimg=tf.compat.v1.placeholder(tf.float32, shape=(100,12,12,3))
        bprob=tf.compat.v1.placeholder(tf.float32, shape=(100,1,1,2))
        bprobmask=tf.compat.v1.placeholder(tf.float32, shape=(100,1))
        bcoord=tf.compat.v1.placeholder(tf.float32, shape=(100,1,1,4))
        loss=tf.reduce_mean(tf.square(bprob-train_net.get_layer('conv4-1')))+bprobmask*0.5*tf.reduce_mean(tf.square(bcoord-train_net.get_layer('conv4-2')))
        optimizer = tf.compat.v1.train.AdamOptimizer()
        train = optimizer.minimize(loss)
        init = tf.compat.v1.global_variables_initializer()
        sess.run(init)
        saver = tf.compat.v1.train.Saver()
        
        #Test
        img=cv2.imread("1.bmp")
        img1=(img-127.5)/128.0
        img2=np.expand_dims(img1, 0)
        print(train_net.feed(img2))
        
        #Grab a batch of images, probs, and coordinates, and feed into training
        for j in range(10):
            i=0
            f=100
            while f<len(prob):
                batchimg=images[i:f]
                batchprob=prob[i:f]
                k=np.array(prob[i:f])
                k1=np.reshape(k,(100,2))
                k2=k1[:,1]*1.0
                k3=np.reshape(k2,(100,1))
                batchprobmask=k3
                batchcoord=coordinates[i:f]
                i=i+100
                f=f+100
                sess.run(train,feed_dict={'pnet/input:0': batchimg, bprob: batchprob, bcoord: batchcoord,bprobmask:batchprobmask})
            print(train_net.feed(img2))
        
        # Load existing weights
        wt = np.load('./data/mtcnn_weights.npy', allow_pickle=True).item()

        # Assuming `sess` is your TensorFlow session
        with tf.compat.v1.Session() as sess:
            # Initialize variables if not already initialized
            sess.run(tf.compat.v1.global_variables_initializer())

            # Get trainable variables
            trainable_vars = tf.compat.v1.trainable_variables()

            if isinstance(wt['pnet'], dict):
                # 'pnet' is a dictionary
                for layer_name, layer_values in wt['pnet'].items():
                    for var_name, _ in layer_values.items():
                        var_full_name = f"pnet/{layer_name}/{var_name}:0"
                        var_list = [v for v in trainable_vars if v.name == var_full_name]
                        if var_list:
                            var_value = sess.run(var_list[0])
                            wt['pnet'][layer_name][var_name] = var_value
            elif isinstance(wt['pnet'], np.ndarray): #np.ndarray
                # 'pnet' is a list
                for layer_values in wt['pnet']:
                    for var_name, var_value in layer_values.items():
                        var_full_name = f"pnet/{var_name}:0"
                        var_list = [v for v in trainable_vars if v.name == var_full_name]
                        if var_list:
                            wt['pnet'][layer_name][var_name] = sess.run(var_list[0])

            # Save updated weights
            np.save('./new_weights.npy', wt)

Running Pnet!
[array([[[[ 1.92913875e-01, -2.01348051e-01, -1.17081523e-01,
          -1.01517522e+00],
         [ 2.52451628e-01, -8.86579156e-02, -1.08815521e-01,
          -1.03785455e+00],
         [ 2.11455122e-01, -8.69828165e-02, -1.54920340e-01,
          -1.04884613e+00],
         [ 2.12247565e-01, -1.21255815e-01, -1.54018879e-01,
          -1.03497601e+00],
         [ 2.49417871e-01, -1.27684474e-01, -1.82239056e-01,
          -1.08372664e+00],
         [ 2.72667110e-01, -1.88323379e-01, -6.56434894e-02,
          -8.59537899e-01]],

        [[ 8.74897540e-02, -1.24535650e-01, -4.50477898e-02,
          -1.01014423e+00],
         [ 6.86218143e-02, -1.57740414e-01,  3.04086506e-02,
          -9.58631694e-01],
         [ 5.51436245e-02, -1.43437862e-01, -3.37539315e-02,
          -9.92604733e-01],
         [ 4.05781269e-02, -1.26649737e-01, -4.33047414e-02,
          -9.85453367e-01],
         [-7.04413652e-03, -2.01756656e-01, -4.28169966e-02,
          -1.01857173e+00],
    

Running Pnet!
[array([[[[ 0.0933138 ,  0.025996  ,  0.37000895,  0.1784442 ],
         [ 0.09091917,  0.08336765,  0.5628703 ,  0.39581937],
         [ 0.09595586,  0.12519121,  0.67836535,  0.5006427 ],
         [ 0.07585821,  0.11242834,  0.7167007 ,  0.5091403 ],
         [ 0.07254653,  0.0846411 ,  0.61209613,  0.49691194],
         [ 0.07370032,  0.04793468,  0.52542853,  0.29083556]],

        [[ 0.03530341,  0.1231271 ,  0.5162028 ,  0.47113138],
         [-0.00330389,  0.14050862,  0.6812158 ,  0.6451227 ],
         [ 0.01584736,  0.1807929 ,  0.7175108 ,  0.6351879 ],
         [ 0.00658494,  0.19836584,  0.6990391 ,  0.66520387],
         [ 0.02746391,  0.1078575 ,  0.61593103,  0.6309516 ],
         [ 0.06645882,  0.12899214,  0.601007  ,  0.5096554 ]],

        [[ 0.03876558,  0.0649299 ,  0.46746516,  0.43684393],
         [ 0.00438979,  0.07351157,  0.5945932 ,  0.58238786],
         [ 0.02389014,  0.18162206,  0.6324822 ,  0.5198794 ],
         [ 0.0657284 ,  0.22728965, 

Running Pnet!
[array([[[[ 4.02780175e-02,  7.22741485e-02,  5.11628985e-01,
           3.63503456e-01],
         [ 6.58179373e-02,  1.45491898e-01,  7.05474615e-01,
           5.45554161e-01],
         [ 9.15946811e-02,  1.65078044e-01,  8.18508029e-01,
           6.53748751e-01],
         [ 6.26291484e-02,  1.65486276e-01,  8.17296565e-01,
           6.23683095e-01],
         [ 7.16522187e-02,  1.38662338e-01,  6.93439662e-01,
           5.50732613e-01],
         [ 5.86954057e-02,  1.01933479e-01,  5.81420422e-01,
           3.94044101e-01]],

        [[ 3.27429175e-02,  1.47795022e-01,  6.34667754e-01,
           5.73450089e-01],
         [ 3.16640735e-02,  1.95523679e-01,  7.37477899e-01,
           6.94118142e-01],
         [ 3.67778242e-02,  1.61998749e-01,  8.24642360e-01,
           7.39415407e-01],
         [ 2.58560181e-02,  2.05541670e-01,  8.20660114e-01,
           8.18794250e-01],
         [ 7.27524757e-02,  1.55248821e-01,  7.36977398e-01,
           7.27218032e-01],
    

Running Pnet!
[array([[[[ 0.04782352,  0.09178174,  0.5513321 ,  0.40839207],
         [ 0.05663325,  0.17419484,  0.72862303,  0.5934832 ],
         [ 0.08318737,  0.18987092,  0.8260429 ,  0.67558587],
         [ 0.04842129,  0.20263645,  0.82886004,  0.6589191 ],
         [ 0.05842696,  0.15074188,  0.7084955 ,  0.5607593 ],
         [ 0.03922114,  0.11732653,  0.6100882 ,  0.4620229 ]],

        [[ 0.06057365,  0.14999092,  0.6826726 ,  0.59647906],
         [ 0.04560724,  0.21791402,  0.7355329 ,  0.6843529 ],
         [ 0.03305519,  0.17776176,  0.83714247,  0.7152512 ],
         [ 0.0311076 ,  0.24939492,  0.8141668 ,  0.7910199 ],
         [ 0.08393642,  0.18976077,  0.7180864 ,  0.6828239 ],
         [ 0.07500164,  0.20490512,  0.6873938 ,  0.64787114]],

        [[ 0.0655892 ,  0.0679684 ,  0.6486237 ,  0.5819509 ],
         [ 0.03414881,  0.10314494,  0.7243481 ,  0.64733994],
         [ 0.05240487,  0.16216096,  0.76033986,  0.6283984 ],
         [ 0.07899009,  0.19867912, 

In [10]:
#Testing an image
img=cv2.imread("1.bmp")
img1=(img-127.5)/128.0
img2=np.expand_dims(img1, 0)
print(train_net.feed(img2))

Running Pnet!
[array([[[[0.09913689, 0.19981617, 0.7198129 , 0.6677117 ]]]],
      dtype=float32), array([[[[0.37227347, 0.6277265 ]]]], dtype=float32)]


Correct coordinates: 1.bmp: [0.25,0,1,1]