In [1]:
import os
import pickle
import tensorflow as tf
import numpy as np
from numpy import genfromtxt

from DataPrep.data_io import DataFormatter
from nn.network import getModel,  summaryBuilder

from nn.load_params import layer_name, convShape, getWeights

## LOAD WEIGHTS

In [2]:
parentPath = "/Users/sam/All-Program/App-DataSet/Deep-Neural-Nets/Models/FaceNet-Inception"
moduleWeightDict = getWeights(parentPath)

conv1
(7, 7, 3, 64)
(64,)
bn1
(64,) (64,) (64,) (64,)
conv2
(1, 1, 64, 64)
(64,)
bn2
(64,) (64,) (64,) (64,)
conv3
(3, 3, 64, 192)
(192,)
bn3
(192,) (192,) (192,) (192,)
inception_3a_1x1_conv
(1, 1, 192, 64)
(64,)
inception_3a_1x1_bn
(64,) (64,) (64,) (64,)
inception_3a_5x5_conv1
(1, 1, 192, 16)
(16,)
inception_3a_5x5_conv2
(5, 5, 16, 32)
(32,)
inception_3a_5x5_bn1
(16,) (16,) (16,) (16,)
inception_3a_5x5_bn2
(32,) (32,) (32,) (32,)
inception_3a_3x3_conv1
(1, 1, 192, 96)
(96,)
inception_3a_3x3_conv2
(3, 3, 96, 128)
(128,)
inception_3a_3x3_bn1
(96,) (96,) (96,) (96,)
inception_3a_3x3_bn2
(128,) (128,) (128,) (128,)
inception_3a_pool_conv
(1, 1, 192, 32)
(32,)
inception_3a_pool_bn
(32,) (32,) (32,) (32,)
inception_3b_1x1_conv
(1, 1, 256, 64)
(64,)
inception_3b_1x1_bn
(64,) (64,) (64,) (64,)
inception_3b_3x3_conv1
(1, 1, 256, 96)
(96,)
inception_3b_3x3_conv2
(3, 3, 96, 128)
(128,)
inception_3b_3x3_bn1
(96,) (96,) (96,) (96,)
inception_3b_3x3_bn2
(128,) (128,) (128,) (128,)
inception_3b_5x

## INITIALIZE NETWORK

In [3]:
def reset_graph():  # Reset the graph
    if 'sess' in globals() and sess:
        sess.close()
    tf.reset_default_graph()

def initNetwork(weightDict):
    reset_graph()
    tensorDict = getModel([96,96,3], params=weightDict)
    return tensorDict

## CREATE ENCODINGS FOR VERIFICATION DATA

In [4]:
dataPath = "/Users/sam/All-Program/App-DataSet/DeepFaceRecognition/data_models"
encodingPath = "/Users/sam/All-Program/App-DataSet/DeepFaceRecognition/data_models/encodings/"
verification_data = 'verification_imgarr.pickle'
verification_encoding_file = 'verification_img_encodings.pickle'

tensorDict = initNetwork(moduleWeightDict)
dataX, dataY, labelDict = DataFormatter.getPickleFile(dataPath, verification_data, getStats=True)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    dataNorm = dataX/255
    x = sess.run([tensorDict['output']], feed_dict={tensorDict['inpTensor']:dataNorm})
    
    DataFormatter.dumpPickleFile(x, dataY, labelDict=labelDict, folderPath=encodingPath,
                                 picklefileName=verification_encoding_file, getStats=True)
#     print (x[0].shape)

inpTensor  (?, 96, 96, 3)
conv1:  (?, 48, 48, 64)
conv1 Zero-Padding + MAXPOOL  (?, 50, 50, 64)
conv1 Zero-Padding + MAXPOOL  (?, 24, 24, 64)
conv2:  (?, 24, 24, 64)
conv2 Zero-Padding + MAXPOOL  (?, 26, 26, 64)
conv3:  (?, 24, 24, 192)
conv3 Zero-Padding + MAXPOOL  (?, 26, 26, 192)
conv3 Zero-Padding + MAXPOOL  (?, 12, 12, 192)
inception_3x3 Chain 2:  (?, 12, 12, 128)
inception_5x5 Chain 3:  (?, 12, 12, 32)
inception_pool Chain 4:  (?, 12, 12, 32)
inception_1x1: Chain 1:  (?, 12, 12, 64)
inception3a:  (?, 12, 12, 256)
inception_3x3 Chain 2:  (?, 12, 12, 128)
inception_5x5 Chain 3:  (?, 12, 12, 64)
inception_pool Chain 4:  (?, 12, 12, 64)
inception_1x1: Chain 1:  (?, 12, 12, 64)
inception3b:  (?, 12, 12, 320)
inception_3x3 Chain 2:  (?, 6, 6, 256)
inception_5x5 Chain 3:  (?, 6, 6, 64)
pool_pad Chain 4:  (?, 6, 6, 320)
inception3c:  (?, 6, 6, 640)
Inside Inception module1:  (?, 6, 6, 640)
inception_3x3 Chain 2:  (?, 6, 6, 192)
inception_5x5 Chain 3:  (?, 6, 6, 64)
inception_pool Chain 4

## CREATE ENCODINGS FOR TRAINING DATA

In [5]:
summaryOutputPath = "/Users/sam/All-Program/App-DataSet/DeepFaceRecognition/data_models/summary"
dataPath = "/Users/sam/All-Program/App-DataSet/DeepFaceRecognition/data_models"
encodingPath = "/Users/sam/All-Program/App-DataSet/DeepFaceRecognition/data_models/encodings/"
training_data = 'training_imgarr.pickle'
training_encoding_file = 'training_img_encodings.pickle'

tensorDict = initNetwork(moduleWeightDict)
dataX, dataY, labelDict = DataFormatter.getPickleFile(dataPath, training_data)
print(dataX.shape, dataY.shape)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    mergedSummary, writer = summaryBuilder(sess, summaryOutputPath)
    dataNorm = dataX/255
    x = sess.run([tensorDict['output']], feed_dict={tensorDict['inpTensor']:dataNorm})
    writer = tf.summary.FileWriter(summaryOutputPath, sess.graph)
    writer.close()
    DataFormatter.dumpPickleFile(x, dataY, labelDict=labelDict, folderPath=encodingPath,
                                 picklefileName=training_encoding_file)


inpTensor  (?, 96, 96, 3)
conv1:  (?, 48, 48, 64)
conv1 Zero-Padding + MAXPOOL  (?, 50, 50, 64)
conv1 Zero-Padding + MAXPOOL  (?, 24, 24, 64)
conv2:  (?, 24, 24, 64)
conv2 Zero-Padding + MAXPOOL  (?, 26, 26, 64)
conv3:  (?, 24, 24, 192)
conv3 Zero-Padding + MAXPOOL  (?, 26, 26, 192)
conv3 Zero-Padding + MAXPOOL  (?, 12, 12, 192)
inception_3x3 Chain 2:  (?, 12, 12, 128)
inception_5x5 Chain 3:  (?, 12, 12, 32)
inception_pool Chain 4:  (?, 12, 12, 32)
inception_1x1: Chain 1:  (?, 12, 12, 64)
inception3a:  (?, 12, 12, 256)
inception_3x3 Chain 2:  (?, 12, 12, 128)
inception_5x5 Chain 3:  (?, 12, 12, 64)
inception_pool Chain 4:  (?, 12, 12, 64)
inception_1x1: Chain 1:  (?, 12, 12, 64)
inception3b:  (?, 12, 12, 320)
inception_3x3 Chain 2:  (?, 6, 6, 256)
inception_5x5 Chain 3:  (?, 6, 6, 64)
pool_pad Chain 4:  (?, 6, 6, 320)
inception3c:  (?, 6, 6, 640)
Inside Inception module1:  (?, 6, 6, 640)
inception_3x3 Chain 2:  (?, 6, 6, 192)
inception_5x5 Chain 3:  (?, 6, 6, 64)
inception_pool Chain 4

## Find the most closest Picture using only encodings:

In [14]:
training_encoding_path = "/Users/sam/All-Program/App-DataSet/DeepFaceRecognition/data_models/encodings"
verification_encoding_path = "/Users/sam/All-Program/App-DataSet/DeepFaceRecognition/data_models/encodings"    
 
trainX, trainY, trainLabelDict = DataFormatter.getPickleFile(training_encoding_path, 
                                                             'training_img_encodings.pickle')
verX, verY, verLabelDict = DataFormatter.getPickleFile(training_encoding_path, 
                                                       'verification_img_encodings.pickle')
print (trainX.shape, trainY.shape) #

AttributeError: 'list' object has no attribute 'shape'

In [12]:
trainLabelDict

{'0': 'ritu',
 '1': 'sam',
 '2': 'jetha',
 '3': 'pradyot',
 '4': 'soham',
 '5': 'chetan',
 '6': 'navneet',
 '7': 'raj'}

In [17]:
for t_encoding, t_label in zip(trainX, trainY):
    print (t_label)
    print ('TrainingData: ', trainLabelDict[str(t_label[0])])
    print (t_encoding)
    distArr = []
    labelArr = []
    for v_encoding, v_label in zip(verX, verY):
        dist = np.linalg.norm(t_encoding - v_encoding)
        distArr.append(dist)
        labelArr.append(trainLabelDict[str(v_label[0])])
#         print('distance is: 'dist)
#         print (trainLabelDict[str(v_label[0])])
    print(np.min(distArr))
    break

[0]
TrainingData:  ritu
[ 0.12038152 -0.014854   -0.03519159  0.0298566   0.07734559  0.08589125
  0.18633814  0.0638134  -0.0186901  -0.1612308  -0.05174623  0.03851372
  0.10089715 -0.1247714  -0.04237328 -0.09391901 -0.03459681 -0.00593167
 -0.16921839  0.15582454  0.08683969 -0.05168987  0.03058793  0.14142306
 -0.09239607 -0.24168749 -0.1542576  -0.14177261 -0.06083471  0.10674828
 -0.03752242 -0.02685932 -0.07723442  0.10103063 -0.00530118 -0.06769846
  0.01051617  0.03453248 -0.18141574 -0.03448711  0.03758086 -0.02653159
  0.01966289  0.02551536 -0.0915257  -0.01531907  0.02790756  0.02061272
 -0.04679441  0.18818331 -0.02952771 -0.21056528  0.01529649  0.01914737
  0.04706831 -0.01424129 -0.08025786  0.09960169 -0.01191272 -0.01406639
 -0.00418146  0.11671023  0.04186565 -0.15259305  0.02494658 -0.11609522
  0.0165531  -0.13106598 -0.18608896 -0.15082873 -0.02143332 -0.00638953
  0.01496287  0.0348852  -0.06435134  0.09323955  0.07846075 -0.05588973
  0.08066323  0.08887311 -0

  ret = sqrt(sqnorm)


### Previous Checkpoint

In [3]:
picPath = '/Users/sam/All-Program/App-DataSet/Deep-Neural-Nets/Models/FaceRecognition/camera_0.jpg'

from scipy import misc
from nn.Network import getModel

# Read an Image
def readImage(imagePath):
    '''
        The input data is is in the shape of [nh, nw, nc], convert it to [nc, nh, nw]
    '''
    image = misc.imread(picPath)
    
    img = np.around(image/255.0, decimals=12) #(2,0,1) = [nc, nh, nw]
    print (image.shape)
    return img

img = readImage(picPath)

# Reset previously built graph
def reset_graph():  # Reset the graph
    if 'sess' in globals() and sess:
        sess.close()
    tf.reset_default_graph()

# Get the inception Model
reset_graph()
tensorDict = getModel(img.shape, params=moduleWeightDict)#(img, conv1_w, conv1_b, s=2, pad='SAME',  scope_name='conv1', isTrainable=False)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
#     print (img.shape)
    img1 = np.reshape(img, (1,img.shape[0], img.shape[1], img.shape[2]))
    print (img1.shape)
    x = sess.run([tensorDict['output']], feed_dict={tensorDict['inpTensor']:img1})
    print (x[0].shape)

(96, 96, 3)
inpTensor  (?, 96, 96, 3)
conv1:  (?, 48, 48, 64)
conv1 Zero-Padding + MAXPOOL  (?, 50, 50, 64)
conv1 Zero-Padding + MAXPOOL  (?, 24, 24, 64)
conv2:  (?, 24, 24, 64)
conv2 Zero-Padding + MAXPOOL  (?, 26, 26, 64)
conv3:  (?, 24, 24, 192)
conv3 Zero-Padding + MAXPOOL  (?, 26, 26, 192)
conv3 Zero-Padding + MAXPOOL  (?, 12, 12, 192)
inception_3x3 Chain 2:  (?, 12, 12, 128)
inception_5x5 Chain 3:  (?, 12, 12, 32)
inception_pool Chain 4:  (?, 12, 12, 32)
inception_1x1: Chain 1:  (?, 12, 12, 64)
inception3a:  (?, 12, 12, 256)
inception_3x3 Chain 2:  (?, 12, 12, 128)
inception_5x5 Chain 3:  (?, 12, 12, 64)
inception_pool Chain 4:  (?, 12, 12, 64)
inception_1x1: Chain 1:  (?, 12, 12, 64)
inception3b:  (?, 12, 12, 320)
inception_3x3 Chain 2:  (?, 6, 6, 256)
inception_5x5 Chain 3:  (?, 6, 6, 64)
pool_pad Chain 4:  (?, 6, 6, 320)
inception3c:  (?, 6, 6, 640)
Inside Inception module1:  (?, 6, 6, 640)
inception_3x3 Chain 2:  (?, 6, 6, 192)
inception_5x5 Chain 3:  (?, 6, 6, 64)
inception_

## ROUGH:

FROM THE FACE NET PAPER

Choosing triplets (anchor, positive and negative) randomly then the loss function constraint is 
easily satisfied and we may end up learning a particular type of image and make our mode prone to 
overfitting the data.

So we choose triplets that are hard to train on such that select an anchor compare to all pasotive
examples and select the one whose encoding is furthest form the anchor, this is called Hardest Positive. 
Similarly while selecting a negative image select the image whose encoding is the closest to the 
achor image, this is called as selecting hardest negative.

In practise it is often preferable to we use all anchor- positive pairs in a mini-batch 
while still selecting the hard negatives.

* To Do's
   * Extract faces out of the images using Haar Cascades
   * Create encoding for each images and test if the encoding are by themself able to distinguish between 
     difrerent people
   * Then check on Fine tuning the network:
   * Creating Triplet batches.
      * First we create all pairs of ancors and positive.
      * Then we create all negative examples for each (anchor and positive), where the triplet violates.
      * For a incoming batch, we choise 1st record as anchor, get all its positive pairs, then
        we randomly sample a negative example from the negatve points found in prvious step and calculate
        the triplet loss

In [2]:
import numpy as np
import cv2
# face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
# eye_cascade = cv2.CascadeClassifier('haarcascade_eye.xml')

imagePath = "/Users/sam/All-Program/App-DataSet/DeepFaceRecognition/original/1.png"
img = cv2.imread(imagePath)
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
print (img)
# gray = cv2.cvtColor(img)
# img

error: /opt/concourse/volumes/live/65bc8d8a-a7db-4b09-4d80-394825911c99/volume/opencv_1512681450376/work/modules/imgproc/src/color.cpp:11016: error: (-215) scn == 3 || scn == 4 in function cvtColor


In [1]:
import cv2

In [3]:
cv2.imshow('image',img)
cv2.waitKey(0)
cv2.destroyAllWindows()

error: /Users/jenkins/miniconda/1/x64/conda-bld/conda_1486587097465/work/opencv-3.1.0/modules/highgui/src/window.cpp:281: error: (-215) size.width>0 && size.height>0 in function imshow


In [9]:
import tensorflow as tf
a = np.array([[1,2],[4,2],[1,1],[6,2],[4,3],[8,4],[1,2]])
a_tf = tf.cast(a, dtype=tf.float32)

sess = tf.Session()

# reset_graph()

In [12]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    b = tf.nn.l2_normalize(a_tf, dim=1, epsilon=1e-5)
    print (b.eval())

[[ 0.44721359  0.89442718]
 [ 0.89442718  0.44721359]
 [ 0.70710671  0.70710671]
 [ 0.94868326  0.31622776]
 [ 0.80000001  0.60000002]
 [ 0.89442718  0.44721359]
 [ 0.44721359  0.89442718]]


In [19]:
a = np.array([8,4])
a / pow(max(sum(a**2), 1e-5), 0.5)

array([ 0.89442719,  0.4472136 ])