In [1]:
import json
import numpy as np
import matplotlib.pyplot as plt
import tensorflow.keras as keras
from tensorflow.keras import backend as K
from IPython.display import display
from mpl_toolkits.mplot3d import axes3d
%matplotlib inline
#import face_recognition
import cv2
import time
import math

from tensorflow.keras.applications.xception import Xception
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.xception import preprocess_input, decode_predictions
#global variables of the picture size
IMAGESIZE = [299, 299]  # width (0) Height (1) Images are resized to the this before getting push to the network.

In [2]:
# load up the meta data Image paths 
imdb_jsonFile = json.load(open("imdb_outputdata.json"))
wiki_jsonFile = json.load(open("wiki_outputdata.json"))
imdb_file_location = '../imdb/'
wiki_file_location = '../wiki/'
imdbLen = len(imdb_jsonFile)
wikiLen = len(wiki_jsonFile)
print(imdbLen, " ", wikiLen)

460594   62328


In [3]:
# usage print(get_face_locations('nm0000001_rm946909184_1899-5-10_1968.jpg'))
def get_face_locations(imagePath): 
    image = face_recognition.load_image_file(imagePath)
    return face_recognition.face_locations(image)

# crop the image to just read the face location  
def crop_image(image, face_loc):
    return image[face_loc[1]:face_loc[3], face_loc[0]:face_loc[2]]  #  1 , 3, 0, 2

# resize the image to match the 
def resize_image(image):
    dsize = (IMAGESIZE[0], IMAGESIZE[1]) # width (0) Height (1)  
    return cv2.resize(image, dsize)

In [2]:
#get the base gender model                                  
def gender_model(X,Y):
    base_model = keras.applications.Xception(weights='imagenet',include_top=False)
    new_model = base_model.output
    new_model = keras.layers.GlobalAveragePooling2D()(new_model)
    new_model = keras.layers.Dense(128,activation='relu')(new_model)
    new_model = keras.layers.Dense(Y.shape[1],activation='softmax')(new_model)
    model = keras.Model(inputs=base_model.input,outputs=new_model)
    model.compile(loss=keras.losses.CategoricalCrossentropy(),
                optimizer=keras.optimizers.Adam(),
             metrics=[keras.metrics.CategoricalAccuracy()])
    model.summary()
    return model         
#get the base age model                                   
def age_model(X,Y):
    base_model = keras.applications.Xception(weights='imagenet',include_top=False)
    new_model = base_model.output
    new_model = keras.layers.GlobalAveragePooling2D()(new_model)
    new_model = keras.layers.Dense(128,activation='relu')(new_model)
    new_model = keras.layers.Dense(Y.shape[1],activation='softmax')(new_model)
    model = keras.Model(inputs=base_model.input,outputs=new_model)
    model.compile(loss=keras.losses.CategoricalCrossentropy(),
                optimizer=keras.optimizers.Adam(),
             metrics=[keras.metrics.CategoricalAccuracy()])
    model.summary()
    return model                                
                                   

In [3]:
# function to  fetch the IMAGES from X(start) to Y(end) and returns three arrays  
def read_images_gender_Age(start, end, JSON_File, images_location):
    X=[]
    Y=[]
    Z=[]
    corrupted_entry = 0 
    corrupted_age = 0 
    undetected_faces = 0
    for x in range(start, end):
        try:
            image = cv2.imread(images_location+JSON_File[x][3])    
                # if the image is one of the corrupted image, skip this entry 
            if(image is None or image.shape == (47,100,3) or image.shape == (1,1,3) or JSON_File[x][0] == "nan"):
                corrupted_entry +=1 
                continue
            face_loc = JSON_File[x][2]
            cropped_image = crop_image(image, face_loc)
            resized_image = resize_image(cropped_image)
            X.append(resized_image)
            Y.append(JSON_File[x][0]) # gender 
            Z.append(int(JSON_File[x][1])//5) # age
            #print(resized_image.shape)
            #plt.imshow(resized_image) 
            #plt.show()
            if(x % 1000 == 0):
                print('read one 1000')
        except Exception as e:
            print('ran into exception, skipping this photo entry, Error:', e)
            continue
            
    X = np.array(X)
    Y = np.array(Y).astype('float32')
    Y = keras.utils.to_categorical(Y)
    Z = np.array(Z).astype('float32')
    Z = keras.utils.to_categorical(Z, num_classes = 26)
    print('Read from ', start, ' to ', end, '. There were ',corrupted_entry)
    return X,Y,Z

#                       ['00','01','02','03','04','05']  
def read_images_byfile(files, JSON_File, images_location):
    X=[]
    Y=[]
    Z=[]
    corrupted_entry = 0 
    corrupted_age = 0 
    undetected_faces = 0
    count =0 
    for x in range(len(JSON_File)):
        if ((JSON_File[x][3]).split("/")[0] in files):
            try:
                image = cv2.imread(images_location+JSON_File[x][3])
                
                    # if the image is one of the corrupted image, skip this entry 
                if(image is None or image.shape == (47,100,3) or image.shape == (1,1,3) or JSON_File[x][0] == "nan"):
                    corrupted_entry +=1 
                    continue
                face_loc = JSON_File[x][2]
                cropped_image = crop_image(image, face_loc)
                resized_image = resize_image(cropped_image)
                X.append(resized_image)
                Y.append(JSON_File[x][0]) # gender 
                Z.append(int(JSON_File[x][1])//5) # age
                count +=1
                
                if(count % 1000 == 0):
                    print('read one 1000')
            except Exception as e:
                print('ran into exception, skipping this photo entry, Error:', e)
                continue
    
    X = np.array(X)
    Y = np.array(Y).astype('float32')
    Y = keras.utils.to_categorical(Y)
    Z = np.array(Z).astype('float32')
    Z = keras.utils.to_categorical(Z, num_classes = 26)
    print('read_images_by_file read', count, 'images')
    print('read these files' , str(files),'There were ',corrupted_entry')
    return X,Y,Z


#store the model and the history and which data entries were processed 
def store_history_model(model, history, start, end, duration, historyFilelocation, model_name, dataSetName):
    try:
        try:
            json_object = json.load(open(historyFilelocation))
        except:
            json_object = []

        storage_file = open(historyFilelocation, 'w')

        h5filename = str(start) + '-' + str(end)+ model_name + "-" + str(dataSetName)+ '.h5'
        h5_location = "./" + model_name + "/" + h5filename;

        model.save(h5_location);

        json_object.append({"h5Filename": h5filename,
                "start": start,
                "end": end,
                "duration": duration,
                "h5_location": h5_location, 
                "categorical_accuracy": str(history.history['categorical_accuracy']),          
                "val_categorical_accuracy": str(history.history['val_categorical_accuracy']),          
                "loss": str(history.history['loss']),
                "val_loss": str(history.history['val_loss'])
                           })

        json.dump(json_object, storage_file)
        storage_file.close()
    except Exception as e:
        print('ran into exception while trying to store model after fitting for index ', start, ' to index ', end, ' , Error:', e)
        return 0
    
    return 1
def store_history_model_byFiles(model, history, files, duration, historyFilelocation, model_name, dataSetName):
    try:
        try:
            json_object = json.load(open(historyFilelocation))
        except:
            json_object = []

        storage_file = open(historyFilelocation, 'w')

        h5filename = str(files)+ model_name + "-" + str(dataSetName)+ '.h5'
        h5_location = "./" + model_name + "/" + h5filename;

        model.save(h5_location);

        json_object.append({"h5Filename": h5filename,
                "files": str(files),
                "duration": duration,
                "h5_location": h5_location, 
                "categorical_accuracy": str(history.history['categorical_accuracy']),          
                "val_categorical_accuracy": str(history.history['val_categorical_accuracy']),          
                "loss": str(history.history['loss']),
                "val_loss": str(history.history['val_loss'])
                           })

        json.dump(json_object, storage_file)
        storage_file.close()
    except Exception as e:
        print('ran into exception while trying to store model Error:', e)
        return 0
    
    return 1

# pull the latest model checkpoint    
def pull_latest_model(historyFilelocation):
    try:
        json_object = json.load(open(historyFilelocation))
    except Exception as e:
        print('could not get the latest model, Error:', e)
        return 0
    print('trying to load model from', json_object[len(json_object)-1]['h5_location'])
    return keras.models.load_model(json_object[len(json_object)-1]['h5_location'])


def fit_model(model, X, Y, num_epochs):
    batch_size = 40
    epochs = num_epochs
    validation_split = 0.2
    history = model.fit(X, Y,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=2,
                    validation_split = validation_split)
    return history


def train_gender_model(start, end, JSON_FILE, images_location, historyFile, dataSetName):
    try:
        print('Training gender model')
        start_time = time.time()
        X,Y,Z = read_images_gender_Age(start, end, JSON_FILE, images_location) #"../imdb/"
        latest_gender_model =  pull_latest_model(historyFile)  # "./genderHistory.json"
        if(latest_gender_model == 0):
            print("couldn't find a stored model, generating a new one")
            latest_gender_model = gender_model(X,Y)
        else:
            print("Found previously stored model from ", historyFile)
        
        history = fit_model(latest_gender_model, X, Y, 1)
        duration = (time.time() - start_time)
        store_history_model(latest_gender_model,history,start,end,duration,historyFile,'gender',dataSetName)
        
        
    except Exception as e:
        print('ran into exception while training index ', start, ' to index ', end, ' did not train this set, Error:', e)
        return 0
        
    return X,Y,Z

def train_age_model(start, end, JSON_FILE, images_location, historyFile, dataSetName):
    try:
        print('Training age model')
        start_time = time.time()
        X,Y,Z = read_images_gender_Age(start, end, JSON_FILE, images_location) #"../imdb/"
        latest_age_model =  pull_latest_model(historyFile)  # "./genderHistory.json"
        if(latest_age_model == 0):
            print("couldn't find a stored model, generating a new one")
            latest_age_model = age_model(X,Z)
        else:
            print("Found previously stored model from ", historyFile)
        
        history = fit_model(latest_age_model, X, Z, 2)
        duration = (time.time() - start_time)
        store_history_model(latest_age_model,history,start,end,duration,historyFile,'age',dataSetName)
          
    except Exception as e:
        print('ran into exception while training index ', start, ' to index ', end, ' did not train this set, Error:', e)
        return 0
        
    return X,Y,Z 

def train_gender_model_byfile(files, JSON_FILE, images_location, historyFile, dataSetName):
    try:
        print('Training gender model')
        start_time = time.time()
        X,Y,Z = read_images_byfile(files, JSON_FILE, images_location) #"../imdb/"
        latest_gender_model =  pull_latest_model(historyFile)  # "./genderHistory.json"
        if(latest_gender_model == 0):
            print("couldn't find a stored model, generating a new one")
            latest_gender_model = gender_model(X,Y)
        else:
            print("Found previously stored model from ", historyFile)
        
        history = fit_model(latest_gender_model, X, Y, 1)
        duration = (time.time() - start_time)
        store_history_model_byFiles(latest_gender_model,history,files,duration,historyFile,'gender',dataSetName)
        
        
    except Exception as e:
        print('ran into exception while training did not train this set, Error:', e)
        return 0
        
    return X,Y,Z 

def train_age_model_byfile(files, JSON_FILE, images_location, historyFile, dataSetName):
    try:
        print('Training age model')
        start_time = time.time()
        X,Y,Z = read_images_byfile(files, JSON_FILE, images_location) #"../imdb/"
        latest_age_model =  pull_latest_model(historyFile)  # "./genderHistory.json"
        if(latest_age_model == 0):
            print("couldn't find a stored model, generating a new one")
            latest_age_model = age_model(X,Z)
        else:
            print("Found previously stored model from ", historyFile)
            
        history = fit_model(latest_age_model, X, Z, 2)
        duration = (time.time() - start_time)
        store_history_model_byFiles(latest_age_model,history,files,duration,historyFile,'age',dataSetName)
          
    except Exception as e:
        print('ran into exception while training did not train this set, Error:', e)
        return 0
        
    return X,Y,Z 

#fetch the last 20 percent from the imdb(368,475  --  92,119) & wiki(49,862 --- 12,466) data sets, and run model.predict and see what's the accuracy rate.     
def test_model():
    
    return 0 

In [5]:
X,Y,Z = train_age_model_byfile(["35","36","37","38","39","40"], imdb_jsonFile, "../imdb/", "./ageHistory2.json", "IMDB")

Training age model
read one 1000
read one 1000
read one 1000
read one 1000
read one 1000
read one 1000
read one 1000
read one 1000
read one 1000
read one 1000
read one 1000
read one 1000
read one 1000
read one 1000
read one 1000
read one 1000
read one 1000
read one 1000
read one 1000
read one 1000
read one 1000
read one 1000
read one 1000
read one 1000
read one 1000
read one 1000
read_images_by_file read 26334 images
read these files ['35', '36', '37', '38', '39', '40'] There were  310  corrupted entries avoided, also avoided corrupted 0 age entries, also  4069  undetected faces skip
trying to load model from ./age/0-62328age-wikidataset2.h5
ran into exception while training did not train this set, Error: SavedModel file does not exist at: ./age/0-62328age-wikidataset2.h5/{saved_model.pbtxt|saved_model.pb}


TypeError: cannot unpack non-iterable int object

In [20]:
train_gender_model(40000, 49862, wiki_jsonFile, "../wiki/", "./genderHistory.json", "wikidataset")

read one 1000
read one 1000
read one 1000
read one 1000
read one 1000
read one 1000
read one 1000
read one 1000
read one 1000
read one 1000
Read from  40000  to  49862 . There were  1710  corrupted entries avoided, also avoided corrupted 5 age entries
Found previously stored model


1

In [None]:
train_age_model_byfile(["00"], wiki_jsonFile, "./", "./ageHistory.json", "wikidataset")

Training age model
read_images_by_file read 489 images
There were  110  corrupted entries avoided, also avoided corrupted 1 age entries
could not get the latest model, Error: [Errno 2] No such file or directory: './ageHistory.json'
couldn't find a stored model, generating a new one
Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, None, None,  0                                            
__________________________________________________________________________________________________
block1_conv1 (Conv2D)           (None, None, None, 3 864         input_1[0][0]                    
__________________________________________________________________________________________________
block1_conv1_bn (BatchNormaliza (None, None, None, 3 128         block1_conv1[0][0]               
_________

In [7]:
model = keras.Sequential()
model.add(keras.layers.Conv2D(64, kernel_size=(2, 2),
 activation='relu',
 input_shape=[X.shape[1],
 X.shape[2],
 X.shape[3]]))
model.add(keras.layers.Conv2D(128, (2, 2), activation='relu'))
model.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))
model.add(keras.layers.Dropout(0.25))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(128, activation='relu'))
model.add(keras.layers.Dense(128, activation='relu'))
model.add(keras.layers.Dropout(0.5))
model.add(keras.layers.Dense(Y.shape[1], activation='softmax'))
model.compile(loss=keras.losses.CategoricalCrossentropy(),
 optimizer=keras.optimizers.Adam(),
 metrics=[keras.metrics.CategoricalAccuracy()])
model.summary()

batch_size = 25
epochs = 10
validation_split = 0.2

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 199, 199, 64)      832       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 198, 198, 128)     32896     
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 99, 99, 128)       0         
_________________________________________________________________
dropout (Dropout)            (None, 99, 99, 128)       0         
_________________________________________________________________
flatten (Flatten)            (None, 1254528)           0         
_________________________________________________________________
dense (Dense)                (None, 128)               160579712 
_________________________________________________________________
dense_1 (Dense)              (None, 128)               1

In [65]:
predicitions  = model.predict(X[:,:,:,:])
men_classified_as_women = []
women_classified_as_men = []
for x in range(len(Y)):
    if(float(Y[x][1]) == 1.0 and float(predicitions[x][1]) < 0.5):
        men_classified_as_women.append(x)
        
for x in range(len(Y)):
    if(float(Y[x][0]) == 1.0 and float(predicitions[x][0]) < 0.5):
        women_classified_as_men.append(x)

In [None]:
print('men_classified_as_women ', len(men_classified_as_women))
print('women_classified_as_men ',  len(women_classified_as_men))

In [None]:
for x in range(len(women_classified_as_men)):
    print('showing Image at index -', women_classified_as_men[x])
    print(Y[women_classified_as_men[x]])
    print(predicitions[women_classified_as_men[x]])
    plt.imshow(X[women_classified_as_men[x],:,:,:])
    plt.show()

In [182]:

files = ['00','01','02','03','04','05']

name = '01/nm0000001_rm124825600_1899-5-10_1968.jpg'

str(files)

"['00', '01', '02', '03', '04', '05']"