In [None]:
import pandas
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score
from skimage.feature import local_binary_pattern # Local Binary Pattern function
from skimage import io
from scipy.stats import itemfreq # To calculate a normalized histogram 
from sklearn.preprocessing import normalize
from PIL import Image
import glob
import cv2
from natsort import natsorted, ns
import cvutils
import csvs
import os
import dlib
from sklearn.ensemble import IsolationForest
from IPython import display
from IPython.display import HTML
import progressbar
from sklearn.svm import SVC
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.ensemble import AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import GradientBoostingClassifier
from xgboost import XGBClassifier

import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.utils import to_categorical
from keras.preprocessing import image as kimage
from keras.utils import to_categorical
from tqdm import tqdm


In [None]:
# Create list of image names and corresponding gender classifications
image_dic = pandas.read_excel('../Datasets/labels_A.xlsx')
image_dic = image_dic[['img_name.jpg', 'gender']] # Choose columns which are of importance
df = pandas.DataFrame(image_dic)
image_dic_list = df.values.tolist()

# Create naturally sorted list of file paths for each image
file_paths = glob.glob ("../Datasets/img_A/*.jpg") #find all paths which match the given path
file_paths = natsorted(file_paths) #sort the list of file names such that the image list will be in the correct order


In [None]:
#just cropping - seems to have been aligned already
def detect_faces(image):
    # Create a face detector
    face_detector = dlib.get_frontal_face_detector()
    # Run detector and get bounding boxes of the faces on image.
    detected_faces = face_detector(image, 1)
    face_frames = [(x.left(), x.top(),
                    x.right(), x.bottom()) for x in detected_faces]
    return face_frames

# Set up progress bar
max_val = len(file_paths)
pb = display.ProgressBar(max_val)
pb.display()
i = 0

# initiate lists and counter
faces = []
imgs_used = []
imgs_not_used = []
face_counter = 0

for file_path in file_paths:
    # Load image
    image = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE) #read the image as grayscale
    # Detect faces
    detected_faces = detect_faces(image)
    # Find images where only one face is detected
    if len(detected_faces) == 1:
        # Crop faces
        for n, face_rect in enumerate(detected_faces):
            face = Image.fromarray(image).crop(face_rect)
            face_counter += 1
            faces.append(face)
        imgs_used.append(file_path)
    else:
        imgs_not_used.append(file_path)
    # Update progress bar
    pb.progress = i + 1
    i += 1


In [None]:
print(len(imgs_not_used))
print(type(faces[0]))

In [None]:
# Updating image label dictionary and path list

print("Original path list length:")
print(len(file_paths))
print("Number of images used:")
print(len(imgs_used))
print("Number of images not used:")
print(len(imgs_not_used))

# Removing unsued image paths from original image path list
for img_not_used in imgs_not_used:
    try:
        file_paths.remove(img_not_used)
    except:
       continue 
    
print("New path list length:")
print(len(file_paths))

# Removing name and label pairs which are not used from original image dictionary
for img_not_used in imgs_not_used:
    i = 0
    for image in image_dic_list:
        if image[0] == os.path.basename(img_not_used):
            del image_dic_list[i]
            break
        i += 1
        



In [None]:
print(image_dic_list[:10])

In [None]:
# Saving new image dictionary as CSV file
df = pandas.DataFrame(image_dic_list, columns= ['img_name.jpg', 'gender'])
export_csv = df.to_excel (r'../Datasets/labels_A_updated_2.xlsx')


In [None]:
# Saving cropped and gray-scale images to new directory for later use

new_directory = '../Datasets/cropped2_img_A/'
print("writing faces to disk...")
if os.path.exists(new_directory):
    print("Path already exists")
else:
    print('creating output directory: %s'%(new_directory))
    os.mkdir(new_directory)
    i = 0
    for face in faces:
        basewidth = 256
        wpercent = (basewidth / float(face.size[0]))
        hsize = int((float(face.size[1]) * float(wpercent)))
        face = face.resize((basewidth, hsize), Image.ANTIALIAS)
        face = face.crop(( 0, hsize - 255 , basewidth , hsize))
        face.save(''.join([new_directory,os.path.basename(imgs_used[i])]))
        i += 1
    print("wrote %d faces out of %d"%(len(faces), max_val))  


In [None]:
# Create updated list of image names and corresponding gender classifications
image_dic = pandas.read_excel('../Datasets/labels_A_updated.xlsx')
image_dic = image_dic[['img_name.jpg', 'gender']] # Choose columns which are of importance
df = pandas.DataFrame(image_dic)
image_dic_list = df.values.tolist()

# Create naturally sorted list of file paths for each cropped gray scale image image
updated_file_paths = glob.glob ("../Datasets/cropped2_img_A/*.jpg") #find all paths which match the given path
updated_file_paths = natsorted(updated_file_paths) #sort the list of file names such that the image list will be in the correct order

print(updated_file_paths[:10])
print(image_dic_list[:10])

In [None]:
width = 256
height = 255
changed = 0
for file in updated_file_paths:
    image = cv2.imread(file, cv2.IMREAD_GRAYSCALE)
    if image.shape != (255,256):
        print(false)
print(changed)
image = cv2.imread(updated_file_paths[6], cv2.IMREAD_GRAYSCALE)
print(image.shape == (255,256))
plt.imshow(image)

In [None]:
# align cropped images

def detect_faces_for_alignement(image):
    # Create a face detector
    face_detector = dlib.get_frontal_face_detector()
    # Run detector and get bounding boxes of the faces on image.
    detected_faces = face_detector(image, 1)
    face_frames = [(x.left(), x.top(), x.right(), x.bottom()) for x in detected_faces]
    return detected_faces

faces = []
imgs_used_2 = []
imgs_not_used_2 = []
face_counter = 0
width = 256
height = 255
for file_path in updated_file_paths:    
    # Load image
    image = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE) #read the image as grayscale
    # Detect & predict faces
    detected_faces = detect_faces_for_alignement(image)
    if len(detected_faces) == 1:
        # align faces
        predictor = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat')
        shape = predictor(image, detected_faces[0]) # get facial features
        shape = np.array([(shape.part(j).x, shape.part(j).y) for j in range(shape.num_parts)])

        # center and scale face around mid point between eyes
        center_eyes = shape[27].astype(np.int)
        eyes_d = np.linalg.norm(shape[36]-shape[45])
        face_size_x = int(eyes_d * 2.)
        #if face_size_x < 50: continue

        # rotate to normalized angle
        d = (shape[45] - shape[36]) / eyes_d # normalized eyes-differnce vector (direction)
        a = np.rad2deg(np.arctan2(d[1],d[0])) # angle
        scale_factor = float(256) / float(face_size_x * 2.) # scale to fit in output_size
        # rotation (around center_eyes) + scale transform
        M = np.append(cv2.getRotationMatrix2D((center_eyes[0], center_eyes[1]),a,scale_factor),[[0,0,1]], axis=0)
        # apply shift from center_eyes to middle of output_size 
        M1 = np.array([[1.,0.,-center_eyes[0]+width/2.],
                       [0.,1.,-center_eyes[1]+height/2.],
                       [0,0,1.]])
        # concatenate transforms (rotation-scale + translation)
        M = M1.dot(M)[:2]
        # warp
        face = cv2.warpAffine(image, M, (width, height), borderMode=cv2.BORDER_REPLICATE)
        #face = Image.fromarray(image_aligned)
        #face = face.resize((width,height), Image.ANTIALIAS)
        face = cv2.resize(face,(width,height))
        faces.append(face)
        face_counter += 1
        print(file_path)
        imgs_used_2.append(file_path)
    else:
        imgs_not_used_2.append(file_path)        
print(face_counter)
faces = np.asarray(faces)


In [None]:
print(len(imgs_used_2))
print(imgs_not_used_2[:10])
print(faces[0].shape)


In [None]:
#saving aligned gray and crapped
new_directory = '../Datasets/again_aligned_cropped_img_A/'
print("writing faces to disk...")
if os.path.exists(new_directory):
    print("Path already exists")
else:
    print('creating output directory: %s'%(new_directory))
    os.mkdir(new_directory)
    i = 0
    for face in faces:
        face = Image.fromarray(face).crop(( face.shape[1]/4, face.shape[0]/4 , face.shape[1]*(3/4) , face.shape[0]*(3/4)))
        #face = face.crop(( 0, hsize - 255 , basewidth , hsize))
        #cv2.imwrite(''.join([new_directory,os.path.basename(imgs_used_2[i])]), face)
        face.save(''.join([new_directory,os.path.basename(imgs_used_2[i])]))
        i += 1  


In [None]:
for img_not_used in imgs_not_used_2:
    i = 0
    for image in image_dic_list:
        if image[0] == os.path.basename(img_not_used):
            del image_dic_list[i]
            break
        i += 1
        
# Saving new image dictionary as CSV file
df = pandas.DataFrame(image_dic_list, columns= ['img_name.jpg', 'gender'])
export_csv = df.to_excel (r'../Datasets/again_aligned_labels_A_updated_2.xlsx')

In [None]:
# Create updated list of image names and corresponding gender classifications
image_dic = pandas.read_excel('../Datasets/labels_A_updated_2.xlsx')
image_dic = image_dic[['img_name.jpg', 'gender']] # Choose columns which are of importance
df = pandas.DataFrame(image_dic)
image_dic_list = df.values.tolist()

# Create naturally sorted list of file paths for each cropped gray scale image image
updated_file_paths = glob.glob ("../Datasets/cropped2_img_A/*.jpg") #find all paths which match the given path
updated_file_paths = natsorted(updated_file_paths) #sort the list of file names such that the image list will be in the correct order

print(updated_file_paths[:10])
print(len(updated_file_paths))
print(image_dic_list[:10])
print(len(image_dic_list))

In [None]:
# CNN

images = []
y = []
i = 0
for file_path in updated_file_paths:
    img = kimage.load_img(file_path, target_size=(258,256,1), grayscale = True) #read the image as grayscale
    img = kimage.img_to_array(img)
    img = img/255
    images.append(img)
    y.append(image_dic_list[i][1]) # Append class label
    i += 1

X = np.array(images)

i = 0
for e in y:
    if e == -1:
        y[i] = 0
    i += 1
        
print(y[:10])
y = to_categorical(y)
print(y[:10])

print(X.shape)

X_train, X_test, y_train, y_test = train_test_split(X,y)

model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),activation='relu',input_shape=(258, 256,1)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(2, activation='softmax'))
model.compile(loss='categorical_crossentropy',optimizer='Adam',metrics=['accuracy'])
model.fit(X_train, y_train, epochs=10)


In [None]:
#LBP on updated grayscale cropped images

images_list = []
image_path = []
image_inputs = []
image_labels = []

i = 0
for file_path in updated_file_paths:
    image = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE) #read the image as grayscale
    radius = 3    
    no_points = 8*radius  # Number of points to be considered as neighbourers
    eps=1e-7
    lbp = local_binary_pattern(image, no_points, radius, method='uniform') # Uniform LBP is used
    (hist, _) = np.histogram(lbp.ravel(), bins=np.arange(0, no_points + 3), range=(0, no_points + 2))
    hist = hist.astype("float")
    hist /= (hist.sum() + eps)
    hist_norm = hist
    #hist = itemfreq(lbp.ravel()) # Calculate the histogram -> why do we ravel
    #hist_norm = hist[:, 1]/sum(hist[:, 1]) # Normalize the histogram
    images_list.append(image)    
    image_path.append(file_path)# Append image path    
    image_inputs.append(hist_norm)# Append histogram    
    image_labels.append(image_dic_list[i][1]) # Append class label
    i = i+1


In [None]:
images_list = []
image_path = []
image_inputs = []
image_labels = []

i = 0
for file_path in updated_file_paths:
    image = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE) #read the image as grayscale
    features = np.reshape(image, (255*256))
    image_inputs.append(features)# Append    
    image_labels.append(image_dic_list[i][1]) # Append class label
    i = i+1
image_inputs = normalize(image_inputs, axis=1, norm='l1')



In [None]:
print(image_inputs[:1])
print(image_labels[:10])
np.arange(11, 17, 0.5).tolist()


In [None]:
# Split the dataset in two equal parts
X_train, X_test, y_train, y_test = train_test_split(image_inputs,image_labels, test_size=0.5)

# Set the parameters by cross-validation
tuned_parameters = [{'kernel': ['rbf'], 'gamma': np.arange(9,10,0.1).tolist(),
                     'C': np.arange(1400,1420,5).tolist(), 'tol': [1e-5,1e-4,1e-3, 1e-2, 1e-1]},
                    {'kernel': ['linear'], 'C': [1, 10, 100, 1000, 10000]}]

scores = ['precision', 'recall']

for score in scores:
    print("# Tuning hyper-parameters for %s" % score)
    print()
    clf = GridSearchCV(SVC(), tuned_parameters, cv=5, scoring='%s_macro' % score)
    clf.fit(X_train, y_train)
    print("Best parameters set found on development set:")
    print()
    print(clf.best_params_)
    print()
    print("Grid scores on development set:")
    print()
    means = clf.cv_results_['mean_test_score']
    stds = clf.cv_results_['std_test_score']
    for mean, std, params in zip(means, stds, clf.cv_results_['params']):
        print("%0.3f (+/-%0.03f) for %r"
              % (mean, std * 2, params))
    print()
    print("Detailed classification report:")
    print()
    print("The model is trained on the full development set.")
    print("The scores are computed on the full evaluation set.")
    print()
    y_true, y_pred = y_test, clf.predict(X_test)
    print(classification_report(y_true, y_pred))
    print()
    

In [None]:
# Split the data into training and testing(75% training and 25% testing data)
xTrain,xTest,yTrain,yTest=train_test_split(image_inputs,image_labels)

In [None]:
# Create adaboost classifer object
abc = AdaBoostClassifier(n_estimators=50,
                         learning_rate=1)
# Train Adaboost Classifer
model = abc.fit(xTrain, yTrain)

#Predict the response for test dataset
y_pred = model.predict(xTest)

print(accuracy_score(yTest,y_pred))

In [None]:
c=1405
gamma = 9.7
tol = 0.01
svc = SVC(C=c, kernel='rbf', gamma = gamma, tol = tol, probability = True)

# Create adaboost classifer object
abc =AdaBoostClassifier(n_estimators=50, base_estimator = svc, learning_rate=1)

# Train Adaboost Classifer
model = abc.fit(xTrain, yTrain)

#Predict the response for test dataset
y_pred = model.predict(xTest)

print(accuracy_score(yTest,y_pred))

In [None]:
gnb = GaussianNB()

#Train the model using the training sets
model = gnb.fit(xTrain, yTrain)

#Predict the response for test dataset
y_pred = model.predict(xTest)
print(accuracy_score(yTest,y_pred))

In [None]:
gnb = GaussianNB()

# Create adaboost classifer object
abc =AdaBoostClassifier(n_estimators=50, base_estimator = gnb, learning_rate=1)

# Train Adaboost Classifer
model = abc.fit(xTrain, yTrain)

#Predict the response for test dataset
y_pred = model.predict(xTest)

print(accuracy_score(yTest,y_pred))

In [None]:
lr_list = [0.05, 0.075, 0.1, 0.25, 0.5, 0.75, 1]

for learning_rate in lr_list:
    gb_clf = GradientBoostingClassifier(n_estimators=20, learning_rate=learning_rate, max_features=2, max_depth=2, random_state=0)
    gb_clf.fit(xTrain, yTrain)

    print("Learning rate: ", learning_rate)
    print("Accuracy score (training): {0:.3f}".format(gb_clf.score(xTrain, yTrain)))
    print("Accuracy score (validation): {0:.3f}".format(gb_clf.score(xTest, yTest)))

In [None]:
xgb_clf = XGBClassifier()
model = xgb_clf.fit(np.asarray(xTrain), np.asarray(yTrain))
score = model.score(xTest, yTest)
print(score)


In [None]:
c=1405
gamma = 9.7
tol = 0.01
clf = SVC(C=c, kernel='rbf', gamma = gamma, tol = tol)
clf.fit(xTrain, yTrain)
yPredict = clf.predict(xTest)
print(accuracy_score(yTest,yPredict))

In [None]:
#trying to understand the structure
#images_array = np.array(images_list)
#print(images_array.shape) #(number of images x height x width x number of channels) => note that since it is grayscale, there is only 1 channel
#print(np.array(image_path).shape)
#print(np.array(image_inputs).shape)
#print(np.array(image_labels).shape)

In [None]:
#Allignment Cell attempt 1
'''def face_data_normalizer(input_unaligned, 
                         output_aligned, 
                         output_size= 256, 
                         align_faces_= True, 
                         limit_num_faces_= None,
                         limit_num_files_= None,
                         remove_outliers_ = False):
    
    def write_faces_to_disk(new_directory, faces):
        print("writing faces to disk...")
        if os.path.exists(new_directory):
            shutil.rmtree(new_directory)
        print('creating output new_directory: %s'%(new_directory))
        os.mkdir(new_directory)
        for i in range(faces.shape[0]):
            cv2.imwrite(''.join([new_directory,"%03d.jpg"%i]),faces[i,:,:,::-1])
        print("wrote %d faces"%(faces.shape[0]))
     
    if input_unaligned[-1] != '/':
        input_unaligned += '/'
    if output_aligned[-1] != '/':
        output_aligned += '/'
 
    faces = []
 
    if os.path.exists(output_aligned):
        print('data already preprocessed? loading preprocessed files...')
        #for img_idx,img_file in enumerate(os.listdir(output_aligned)):
            # load the input image, resize it, and convert it to grayscale
        #    image = cv2.imread(''.join([output_aligned,img_file]))
         #   if image is None: continue
         #   image = image[:,:,::-1] #BGR to RGB
         #   faces.append(np.expand_dims(image,0))
        #faces = np.asarray(faces)
        #print('loaded %d preprocessed images'%(faces.shape[0]))
        #if remove_outliers_:
        #    faces,num_outliers = remove_outliers(faces)
        #write_faces_to_disk(output_aligned,faces)
        return faces
     
    detector = dlib.get_frontal_face_detector()
    predictor = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat')
         
    max_val = len(os.listdir(input_unaligned)) if limit_num_files_ is None else limit_num_files_
    print(max_val)
    pb = display.ProgressBar(max_val)
    pb.display()
         
    face_counter = 0
    for img_idx,img_file in enumerate(os.listdir(input_unaligned)):
        # load the input image, resize it, and convert it to grayscale
        image = cv2.imread(''.join([input_unaligned,img_file]))
 
        #if image is None:
        #    continue
 
        image = image[:,:,::-1] #BGR to RGB
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
 
        # detect faces in the grayscale image
        rects = detector(gray, 1)
        #print(len(rects))
 
        if len(rects) > 0:
            # loop over the face detections
            for (i, rect) in enumerate(rects):
                if align_faces_:
                    #print("Aligning")
                    ######### Align with facial features detector #########
 
                    shape = predictor(gray, rect) # get facial features
                    shape = np.array([(shape.part(j).x, shape.part(j).y) for j in range(shape.num_parts)])
 
                    # center and scale face around mid point between eyes
                    center_eyes = shape[27].astype(np.int)
                    eyes_d = np.linalg.norm(shape[36]-shape[45])
                    face_size_x = int(eyes_d * 2.)
                    #if face_size_x < 50: continue
 
                    # rotate to normalized angle
                    d = (shape[45] - shape[36]) / eyes_d # normalized eyes-differnce vector (direction)
                    a = np.rad2deg(np.arctan2(d[1],d[0])) # angle
                    scale_factor = float(output_size) / float(face_size_x * 2.) # scale to fit in output_size
                    # rotation (around center_eyes) + scale transform
                    M = np.append(cv2.getRotationMatrix2D((center_eyes[0], center_eyes[1]),a,scale_factor),[[0,0,1]], axis=0)
                    # apply shift from center_eyes to middle of output_size 
                    M1 = np.array([[1.,0.,-center_eyes[0]+output_size/2.],
                                   [0.,1.,-center_eyes[1]+output_size/2.],
                                   [0,0,1.]])
                    # concatenate transforms (rotation-scale + translation)
                    M = M1.dot(M)[:2]
                    # warp
                    face = cv2.warpAffine(image, M, (output_size, output_size), borderMode=cv2.BORDER_REPLICATE)
                    #try:
                    #   face = cv2.warpAffine(image, M, (output_size, output_size), borderMode=cv2.BORDER_REPLICATE)
                    #except:
                    #    continue
                    face_counter += 1
                    face = cv2.resize(face,(output_size,output_size))
                    faces.append(face)
                else:
                    ######### "No align" with just the detector #########
                    #print("Not Aligning")
                    #if rect.width() < 50: continue
                     
                    # find scale factor
                    scale_factor = float(output_size) / float(rect.width() * 2.) # scale to fit in output_size
                     
                    # scale around the center of the face (shift a bit for the approximate y-position of the eyes)
                    M = np.append(cv2.getRotationMatrix2D((rect.center().x,rect.center().y-rect.height()/6.),0,scale_factor),[[0,0,1]], axis=0)
                    # apply shift from center_eyes to middle of output_size 
                    M1 = np.array([[1.,0.,-rect.center().x+output_size/2.],
                                   [0.,1.,-rect.center().y+output_size/2.+rect.height()/6.],
                                   [0,0,1.]])
                    # concatenate transforms (rotation-scale + translation)
                    M = M1.dot(M)[:2]
                    face = cv2.warpAffine(image, M, (output_size, output_size), borderMode=cv2.BORDER_REPLICATE)
                    #try:
                    #    face = cv2.warpAffine(image, M, (output_size, output_size), borderMode=cv2.BORDER_REPLICATE)
                    #except:
                    #    continue
                    face_counter += 1
 
                    faces.append(face)
                 
        pb.progress = img_idx+1
        if face_counter > 50:
            break
        if limit_num_faces_ is not None and faces.shape[0] > limit_num_faces_:
            break
        if limit_num_files_ is not None and img_idx >= limit_num_files_:
            break
    print(type(faces))
    print(type(faces[0]))
    print(len(faces[0]))
    faces = np.asarray(faces)
    print(type(faces))
    print(type(faces[0]))
    print(len(faces[0]))
    print(faces.shape[0])

    write_faces_to_disk(output_aligned,faces)
     
    return faces
    
faces_align = face_data_normalizer('../Datasets/img_A/',
                                   '../Datasets/aligned_img_A/', 
                                   align_faces_=True, 
                                   remove_outliers_= False,
                                   limit_num_faces_= None,
                                   limit_num_files_= None,
                                   output_size=256);

'''

In [None]:
#align & cropping attempt 2 Cropping Cell
'''
def detect_faces(image):

    # Create a face detector
    face_detector = dlib.get_frontal_face_detector()

    # Run detector and get bounding boxes of the faces on image.
    detected_faces = face_detector(image, 1)
    face_frames = [(x.left(), x.top(), x.right(), x.bottom()) for x in detected_faces]
    return detected_faces, face_frames

faces = []
face_counter = 0
output_size = 256
for file_path in file_paths:
    
    # Load image
    image = io.imread(file_path)
    image = image[:,:,::-1] #BGR to RGB
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    #image = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE) #read the image as grayscale
    # Detect & predict faces
    detected_faces, face_frames = detect_faces(image)
    predictor = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat')

    shape = predictor(image, detected_faces[0]) # get facial features
    shape = np.array([(shape.part(j).x, shape.part(j).y) for j in range(shape.num_parts)])

    # center and scale face around mid point between eyes
    center_eyes = shape[27].astype(np.int)
    eyes_d = np.linalg.norm(shape[36]-shape[45])
    face_size_x = int(eyes_d * 2.)
    #if face_size_x < 50: continue

    # rotate to normalized angle
    d = (shape[45] - shape[36]) / eyes_d # normalized eyes-differnce vector (direction)
    a = np.rad2deg(np.arctan2(d[1],d[0])) # angle
    scale_factor = float(output_size) / float(face_size_x * 2.) # scale to fit in output_size
    # rotation (around center_eyes) + scale transform
    M = np.append(cv2.getRotationMatrix2D((center_eyes[0], center_eyes[1]),a,scale_factor),[[0,0,1]], axis=0)
    # apply shift from center_eyes to middle of output_size 
    M1 = np.array([[1.,0.,-center_eyes[0]+output_size/2.],
                   [0.,1.,-center_eyes[1]+output_size/2.],
                   [0,0,1.]])
    # concatenate transforms (rotation-scale + translation)
    M = M1.dot(M)[:2]
    # warp
    image_aligned = cv2.warpAffine(image, M, (output_size, output_size), borderMode=cv2.BORDER_REPLICATE)
    face = Image.fromarray(image_aligned).crop(face_frames[0])
    #face = cv2.resize(face,(output_size,output_size))
    faces.append(face)
    face_counter += 1
    
print(face_counter)
faces = np.asarray(faces)
'''