In [8]:
"""
STEP 1: RUN to pull all of the needed Libraries 
"""
import json
import numpy as np
import matplotlib.pyplot as plt
import tensorflow.keras as keras
from tensorflow.keras import backend as K
from IPython.display import display
from mpl_toolkits.mplot3d import axes3d
%matplotlib inline
import face_recognition
import cv2
import time

IMAGESIZE = [200, 200]  # width (0) Height (1) Images are resized to the this before getting push to the network.

In [9]:
"""

STEP 2: RUN to pull in the META data files 


"""
# load up the meta data Image paths 
imdb_jsonFile = json.load(open("imdb_outputdata.json"))
wiki_jsonFile = json.load(open("wiki_outputdata.json"))
imdb_file_location = '../imdb/'
wiki_file_location = '../wiki/'
imdbLen = len(imdb_jsonFile)
wikiLen = len(wiki_jsonFile)
print(imdbLen, " ", wikiLen)

460594   62328


In [10]:
"""

STEP 3: RUN to define all the functions needed to play with the models 


"""
# usage print(get_face_locations('nm0000001_rm946909184_1899-5-10_1968.jpg'))
def get_face_locations(imagePath): 
    image = face_recognition.load_image_file(imagePath)
    return face_recognition.face_locations(image)

# crop the image to just read the face location  
def crop_image(image, face_loc):
    return image[face_loc[1]:face_loc[3], face_loc[0]:face_loc[2]]  #  1 , 3, 0, 2

# resize the image to match the 
def resize_image(image):
    dsize = (IMAGESIZE[0], IMAGESIZE[1]) # width (0) Height (1) 
    return cv2.resize(image, dsize)

# function to  fetch the IMAGES from X(start) to Y(end) and returns three arrays  
def read_images_gender_Age(start, end, JSON_File, images_location):
    X=[]
    Y=[]
    Z=[]
    corrupted_entry = 0 
    for x in range(start, end):
        try:
            image = cv2.imread(images_location+JSON_File[x][3])
                
                # if the image is one of the corrupted image, skip this entry
            if(image.shape == (47,100,3) or image.shape == (1,1,3) or JSON_File[x][0] == "nan"):
                corrupted_entry +=1 
                continue
            
            face_loc = JSON_File[x][2]
            cropped_image = crop_image(image, face_loc)
            resized_image = resize_image(cropped_image)
            X.append(resized_image)
            Y.append(JSON_File[x][0]) # gender 
            Z.append(JSON_File[x][1]) # age
            #print(resized_image.shape)
            #plt.imshow(resized_image) 
            #plt.show()
            if(x % 1000 == 0):
                print('read one 1000')
        except Exception as e:
            print('ran into exception, skipping this entry, Error:', e)
            continue
            
    X = np.array(X)
    Y = np.array(Y).astype('float32')
    Y = keras.utils.to_categorical(Y)
    Z = np.array(Z).astype('float32')
    Z = keras.utils.to_categorical(Z, num_classes = 120)
    print('Read from ', start, ' to ', end, '. There were ',corrupted_entry, ' corrupted entries avoided')
    return X,Y,Z

#store the model and the history and which data entries were processed 
def store_history_model(model, history, start, end, duration, historyFilelocation, model_name):
    try:
        try:
            json_object = json.load(open(historyFilelocation))
        except:
            json_object = []

        storage_file = open(historyFilelocation, 'w')

        h5filename = str(start) + '-' + str(end)+ model_name + '.h5'
        h5_location = "./" + model_name + "/" + h5filename;

        model.save(h5_location);

        json_object.append({"h5Filename": h5filename,
                "start": start,
                "end": end,
                "duration": duration,
                "h5_location": h5_location, 
                "categorical_accuracy": str(history.history['categorical_accuracy']),          
                "val_categorical_accuracy": str(history.history['val_categorical_accuracy']),          
                "loss": str(history.history['loss']),
                "val_loss": str(history.history['val_loss'])
                           })

        json.dump(json_object, storage_file)
        storage_file.close()
    except Exception as e:
        print('ran into exception while trying to store model after fitting for index ', start, ' to index ', end, ' , Error:', e)
        return 0
    
    return 1

# pull the latest model checkpoint    
def pull_latest_model(historyFilelocation):
    try:
        json_object = json.load(open(historyFilelocation))
    except Exception as e:
        print('could not get the latest model, Error:', e)
        return 0
    return keras.models.load_model(json_object[len(json_object)-1]['h5_location'])

#get the base gender model                                  
def gender_model(X,Y):
    model = keras.Sequential()
    model.add(keras.layers.Conv2D(64, kernel_size=(10, 10),
                                  activation='relu',
                                  input_shape=[X.shape[1],
                                               X.shape[2],
                                               X.shape[3]]))
    model.add(keras.layers.Conv2D(128, (10, 10), activation='relu'))
    model.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(keras.layers.Dropout(0.25))
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(128, activation='relu'))
    model.add(keras.layers.Dense(128, activation='relu'))
    model.add(keras.layers.Dropout(0.5))
    model.add(keras.layers.Dense(Y.shape[1], activation='softmax'))
    model.compile(loss=keras.losses.CategoricalCrossentropy(),
                  optimizer=keras.optimizers.Adam(),
                  metrics=[keras.metrics.CategoricalAccuracy()])
    model.summary()
    return model

#get the base age model                                   
def age_model(X,Y):
    model = keras.Sequential()
    model.add(keras.layers.Conv2D(64, kernel_size=(10, 10),
                                  activation='relu',
                                  input_shape=[X.shape[1],
                                               X.shape[2],
                                               X.shape[3]]))
    model.add(keras.layers.Conv2D(128, (10, 10), activation='relu'))
    model.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(keras.layers.Dropout(0.25))
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(128, activation='relu'))
    model.add(keras.layers.Dense(128, activation='relu'))
    model.add(keras.layers.Dropout(0.5))
    model.add(keras.layers.Dense(Y.shape[1], activation='softmax'))
    model.compile(loss=keras.losses.CategoricalCrossentropy(),
                  optimizer=keras.optimizers.Adam(),
                  metrics=[keras.metrics.CategoricalAccuracy()])
    model.summary()
    return model                                
                                   
def fit_model(model, X, Y):
    batch_size = 25
    epochs = 1
    validation_split = 0.2
    history = model.fit(X, Y,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,
                    validation_split= validation_split)
    return history

#pick up from where you last left off and start fitting
#start_time = time.time()
#time.sleep(75)
#print((time.time() - start_time))
def train_models(start, end, JSON_FILE, images_location, historyFile ):
    try:
        start_time = time.time()
        X,Y,Z = read_images_gender_Age(start, end, JSON_FILE, images_location) #"../imdb/"
        latest_gender_model =  pull_latest_model(historyFile)  # "./genderHistory.json"
        if(latest_gender_model == 0):
            print("couldn't find a stored model, generating a new one")
            latest_gender_model = gender_model(X,Y)
        else:
            print("Found previously stored model")
        
        history = fit_model(latest_gender_model, X, Y)
        duration = (time.time() - start_time)
        store_history_model(latest_gender_model,history,start,end,duration,historyFile,'gender')
        
        
    except Exception as e:
        print('ran into exception while training index ', start, ' to index ', end, ' did not train this set, Error:', e)
        return 0
        
    return 1 

#fetch the last 20 percent from the imdb(368,475  --  92,119) & wiki(49,862 --- 12,466) data sets, and run model.predict and see what's the accuracy rate.     
def test_model():
    
    return 0 

In [None]:
"""

STEP 4: Train and play with the models

""" 
    
X,Y,Z = read_images_gender_Age(0,50, imdb_jsonFile, "../imdb/") 
latest_gender_model =  pull_latest_model("./genderHistory.json")
if(latest_gender_model == 0):
    print("couldn't find a stored model, generating a new one")
    latest_gender_model = gender_model(X,Y)
else:
    print("Found previously stored model")
history = fit_model(latest_gender_model)


print(X.shape)
print(Y.shape)

In [11]:
#my_latest = pull_latest_model("./genderHistory.json")
train_models(60000, 80000, imdb_jsonFile, "../imdb/", "./genderHistory.json")
#print(my_latest)

read one 1000
read one 1000
read one 1000
read one 1000
read one 1000
read one 1000
read one 1000
read one 1000
read one 1000
read one 1000
read one 1000
read one 1000
read one 1000
read one 1000
read one 1000
read one 1000
read one 1000
read one 1000
read one 1000
read one 1000
Read from  40000  to  60000 . There were  3  corrupted entries avoided
Found previously stored model


1

In [7]:
model = keras.Sequential()
model.add(keras.layers.Conv2D(64, kernel_size=(2, 2),
 activation='relu',
 input_shape=[X.shape[1],
 X.shape[2],
 X.shape[3]]))
model.add(keras.layers.Conv2D(128, (2, 2), activation='relu'))
model.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))
model.add(keras.layers.Dropout(0.25))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(128, activation='relu'))
model.add(keras.layers.Dense(128, activation='relu'))
model.add(keras.layers.Dropout(0.5))
model.add(keras.layers.Dense(Y.shape[1], activation='softmax'))
model.compile(loss=keras.losses.CategoricalCrossentropy(),
 optimizer=keras.optimizers.Adam(),
 metrics=[keras.metrics.CategoricalAccuracy()])
model.summary()

batch_size = 25
epochs = 10
validation_split = 0.2

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 199, 199, 64)      832       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 198, 198, 128)     32896     
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 99, 99, 128)       0         
_________________________________________________________________
dropout (Dropout)            (None, 99, 99, 128)       0         
_________________________________________________________________
flatten (Flatten)            (None, 1254528)           0         
_________________________________________________________________
dense (Dense)                (None, 128)               160579712 
_________________________________________________________________
dense_1 (Dense)              (None, 128)               1

In [64]:
history = model.fit(X, Y,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,
                    validation_split= validation_split)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [65]:
predicitions  = model.predict(X[:,:,:,:])
men_classified_as_women = []
women_classified_as_men = []
for x in range(len(Y)):
    if(float(Y[x][1]) == 1.0 and float(predicitions[x][1]) < 0.5):
        men_classified_as_women.append(x)
        
for x in range(len(Y)):
    if(float(Y[x][0]) == 1.0 and float(predicitions[x][0]) < 0.5):
        women_classified_as_men.append(x)

In [None]:
print('men_classified_as_women ', len(men_classified_as_women))
print('women_classified_as_men ',  len(women_classified_as_men))

In [None]:
for x in range(len(women_classified_as_men)):
    print('showing Image at index -', women_classified_as_men[x])
    print(Y[women_classified_as_men[x]])
    print(predicitions[women_classified_as_men[x]])
    plt.imshow(X[women_classified_as_men[x],:,:,:])
    plt.show()

In [None]:
plt.subplot(211)
plt.plot(history.history['categorical_accuracy'])
plt.plot(history.history['val_categorical_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')

# summarize history for loss

plt.subplot(212)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss') 
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.tight_layout()
plt.show()

In [None]:
#type(imdb_jsonFile) THIS IS JUST FOR TESTTTTING 
counter = 0 
corrupted_images = 0 

for x in imdb_jsonFile:
    if counter < 4:
        print(x)
        print(x[3])
        image = cv2.imread(x[3])
        
        # if the image is one of the corrupted image, skip this entry
        if(image.shape == (47,100,3) or image.shape == (1,1,3)):
            corrupted_images +=1 
            continue
        
        face_loc = x[2]
        cropped_image = crop_image(image, face_loc)
        resized_image = resize_image(cropped_image)
        print(resized_image.shape)
        plt.imshow(resized_image) 
        plt.show()
    counter+=1


In [22]:
#start_time = time.time()
#time.sleep(75)
#print((time.time() - start_time))

In [16]:

for x in range(0, 50000, 1000):
    if(x % 25000 == 0):
        print(x+1 , ' ', x+1000, ' saving')
    else:
        print(x+1 , ' ', x+1000)
 
   

1   1000  saving
1001   2000
2001   3000
3001   4000
4001   5000
5001   6000
6001   7000
7001   8000
8001   9000
9001   10000
10001   11000
11001   12000
12001   13000
13001   14000
14001   15000
15001   16000
16001   17000
17001   18000
18001   19000
19001   20000
20001   21000
21001   22000
22001   23000
23001   24000
24001   25000
25001   26000  saving
26001   27000
27001   28000
28001   29000
29001   30000
30001   31000
31001   32000
32001   33000
33001   34000
34001   35000
35001   36000
36001   37000
37001   38000
38001   39000
39001   40000
40001   41000
41001   42000
42001   43000
43001   44000
44001   45000
45001   46000
46001   47000
47001   48000
48001   49000
49001   50000
