In [31]:
import os

import tensorflow as tf

import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline

from PIL import Image

from skimage.feature import hog
from skimage.color import rgb2grey

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier

In [6]:
labels = pd.read_csv("Datasets/celeba/labels.csv", index_col=0, sep='\t')

In [56]:
# Function to get and preprocess(crop or resize) the images
def resize_get_image(row_id, root="Datasets/celeba/img"):
    # Open Image
    file_name = "{}.jpg".format(row_id)
    file_path = os.path.join(root, file_name)
    img_ori = Image.open(file_path)
    
    # Cropped Image Parameters(Assuming all faces are in the middle 64x128)
    left, right = 57, 121
    top, bot = 45, 173
    img_resize = img_ori.crop((left, top, right, bot))
    
    return np.array(img_resize)

In [12]:
def create_features(img):
    color_features = img.flatten()
    img_grey = rgb2grey(img)
    hog_feartures = hog(img_grey, block_norm="L2-Hys", pixels_per_cell=(16,16),cells_per_block=(2,2))
    flat_features = np.hstack(color_features)
    return flat_features

In [13]:
# Preprocessing the features
def create_feature_matrix(label_dataframe):
    feature_list = []
    i = 0
    for img_id in label_dataframe.index:
        img = resize_get_image(img_id)
        image_features = create_features(img)
        feature_list.append(image_features)
        if(i % 1000 == 0):
            print("Done: ", i)
        i+=1
    
    feature_matrix = np.array(feature_list)
    return feature_matrix

feature_matrix = create_feature_matrix(labels)

Done:  0
Done:  1000
Done:  2000
Done:  3000
Done:  4000


In [14]:
print("Feature Matrix Shape is: ", feature_matrix.shape)

ss = StandardScaler()
people_stand = ss.fit_transform(feature_matrix)

pca = PCA()
people_pca = pca.fit_transform(people_stand)
print("PCA Matrix Shape is: ", people_pca.shape)

Feature Matrix Shape is:  (5000, 24576)




PCA Matrix Shape is:  (5000, 5000)


# Split Dataset - Gender

In [25]:
X = pd.DataFrame(people_pca)
y = pd.Series(labels.gender.values)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .3, random_state=1234123)

pd.Series(y_train).value_counts()

-1    1782
 1    1718
dtype: int64

## SVM - Gender

In [26]:
svm = SVC(kernel='linear', probability=True, random_state=42)
gender_model = svm.fit(X_train, y_train)

In [27]:
y_pred = svm.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print("Model Accuracy is: ", accuracy)

Model Accuracy is:  0.8973333333333333


## MLP - Gender

In [54]:
mlp = MLPClassifier(solver='sgd', 
                    activation='relu', 
                    alpha=1e-4, 
                    hidden_layer_sizes=(50,50), 
                    random_state=1, 
                    max_iter=20, 
                    verbose=10, 
                    learning_rate_init=0.001)
gender_mlp_model = mlp.fit(X_train, y_train)

Iteration 1, loss = 0.68984559
Iteration 2, loss = 0.39215813
Iteration 3, loss = 0.30616732
Iteration 4, loss = 0.26079627
Iteration 5, loss = 0.22840465
Iteration 6, loss = 0.20511576
Iteration 7, loss = 0.18645466
Iteration 8, loss = 0.17110500
Iteration 9, loss = 0.15656403
Iteration 10, loss = 0.14414799
Iteration 11, loss = 0.13462009
Iteration 12, loss = 0.12438311
Iteration 13, loss = 0.11646397
Iteration 14, loss = 0.10700429
Iteration 15, loss = 0.10073194
Iteration 16, loss = 0.09579492
Iteration 17, loss = 0.08912325
Iteration 18, loss = 0.08331787
Iteration 19, loss = 0.07862276
Iteration 20, loss = 0.07353616




In [55]:
print("Gender MLP Model Acc: " + str(mlp.score(X_test, y_test)))
print("No. Layers: " + str(mlp.n_layers_))
print("No. Iterations: " + str(mlp.n_iter_))
print("Loss: " + str(mlp.loss_))
print("Output Activation Function: " + str(mlp.out_activation_))

Gender MLP Model Acc: 0.9133333333333333
No. Layers: 4
No. Iterations: 20
Loss: 0.07353615707748115
Output Activation Function: logistic


# Split Dataset - Smiling

In [28]:
X_smile = pd.DataFrame(people_pca)
y_smile = pd.Series(labels.smiling.values)
X_smile_train, X_smile_test, y_smile_train, y_smile_test = train_test_split(X_smile, y_smile, test_size = .3, random_state=1234123)

pd.Series(y_smile_train).value_counts()

-1    1753
 1    1747
dtype: int64

## SVM - Smiling

In [29]:
svm_smile = SVC(kernel='linear', probability=True, random_state=42)
smile_model = svm_smile.fit(X_smile_train, y_smile_train)

In [30]:
y_smile_pred = svm_smile.predict(X_smile_test)

accuracy_smile = accuracy_score(y_smile_test, y_smile_pred)
print("Model Accuracy is: ", accuracy_smile)

Model Accuracy is:  0.8466666666666667


## MLP - Smiling

In [49]:
mlp_smile = MLPClassifier(solver='sgd', 
                    activation='relu', 
                    alpha=1e-4, 
                    hidden_layer_sizes=(50,50), 
                    random_state=1, 
                    max_iter=20, 
                    verbose=10, 
                    learning_rate_init=0.001)
smile_mlp_model = mlp_smile.fit(X_smile_train, y_smile_train)

Iteration 1, loss = 0.71070306
Iteration 2, loss = 0.42078322
Iteration 3, loss = 0.33774067
Iteration 4, loss = 0.29315993
Iteration 5, loss = 0.26267296
Iteration 6, loss = 0.24044985
Iteration 7, loss = 0.22377056
Iteration 8, loss = 0.20652556
Iteration 9, loss = 0.19473451
Iteration 10, loss = 0.18184694
Iteration 11, loss = 0.17201466
Iteration 12, loss = 0.16207474
Iteration 13, loss = 0.15386548
Iteration 14, loss = 0.14571056
Iteration 15, loss = 0.13868039
Iteration 16, loss = 0.13262778
Iteration 17, loss = 0.12545761
Iteration 18, loss = 0.11905927
Iteration 19, loss = 0.11356846
Iteration 20, loss = 0.10924265




In [53]:
print("Smiling MLP Model Acc: " + str(mlp_smile.score(X_smile_test, y_smile_test)))
print("No. Layers: " + str(mlp_smile.n_layers_))
print("No. Iterations: " + str(mlp_smile.n_iter_))
print("Loss: " + str(mlp_smile.loss_))
print("Output Activation Function: " + str(mlp_smile.out_activation_))

Smiling MLP Model Acc: 0.8626666666666667
No. Layers: 4
No. Iterations: 20
Loss: 0.1092426542437643
Output Activation Function: logistic


# MLP

In [24]:
def allocate_weights_and_biases():
    n_hidden_1 = 2048
    n_hidden_2 = 2048
    
    X = tf.placeholder("float", [None, 68, 2])
    Y = tf.placeholder("float", [None, 2])
    
    img_flat = tf.contrib.layers.flatten(X)
    
    stddev = 0.01
    
    weights = {
        'hidden_layer1': tf.Variable(tf.random_normal([68*2, n_hidden_1], stddev = stddev)),
        'hidden_layer2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2], stddev = stddev)),
        'out': tf.Variable(tf.random_normal([n_hidden_2, 2], stddev=stddev))
    }
    
    biases = {
        'bias_layer1': tf.Variable(tf.random_normal([n_hidden_1], stddev = stddev)),
        'bias_layer2': tf.Variable(tf.random_normal([n_hidden_2], stddev = stddev)),
        'out': tf.Variable(tf.random_normal([2], stddev = stddev))
    }
    
    return weights, biases, X, Y, img_flat

In [22]:
def model_mlp():
    weights, biases, X, Y, img_flat = allocate_weights_and_biases()
    
    layer_1 = tf.add(tf.matmul(img_flat, weights['hidden_layer1']), biases['bias_layer1'])
    layer_1 = tf.math.sigmoid(layer_1)
    
    layer_2 = tf.add(tf.matmul(layer_1, weights['hidden_layer2']), biases['bias_layer2'])
    layer_2 = tf.math.sigmoid(layer_2)
    
    out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
    
    return out_layer, X, Y

In [23]:
# learning parameters
learning_rate = 0.00001
training_epochs = 500

# display training accuracy every ..
display_accuracy_step = 2
    

#
X_diu = pd.DataFrame(people_pca)
y_diu = pd.Series(labels.gender.values)
training_images, test_images, training_labels, test_labels = train_test_split(X_diu, y_diu, test_size = .3, random_state=1234123)
#
# training_images, training_labels, test_images, test_labels = get_data()
logits, X, Y = model_mlp()

# define loss and optimizer
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
    logits=logits, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)

# define training graph operation
train_op = optimizer.minimize(loss_op)

# graph operation to initialize all variables
init = tf.global_variables_initializer()

ValueError: Dimensions must be equal, but are 10000 and 136 for 'MatMul_3' (op: 'MatMul') with input shapes: [5000,10000], [136,2048].

In [20]:
with tf.Session() as sess:

        # run graph weights/biases initialization op
        sess.run(init)
        # begin training loop ..
        for epoch in range(training_epochs):
            # run optimization operation (backprop) and cost operation (to get loss value)
            _, cost = sess.run([train_op, loss_op], feed_dict={X: training_images,
                                                               Y: training_labels})

            # Display logs per epoch step
            print("Epoch:", '%04d' % (epoch + 1), "cost={:.9f}".format(cost))
                
            if epoch % display_accuracy_step == 0:
                pred = tf.nn.softmax(logits)  # Apply softmax to logits
                correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(Y, 1))

                # calculate training accuracy
                accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
                print("Accuracy: {:.3f}".format(accuracy.eval({X: training_images, Y: training_labels})))

        print("Optimization Finished!")

        # -- Define and run test operation -- #
        
        # apply softmax to output logits
        pred = tf.nn.softmax(logits)
        
        #  derive inffered calasses as the class with the top value in the output density function
        correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(Y, 1))
        
        # calculate accuracy
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

        # run test accuracy operation ..
        print("Test Accuracy:", accuracy.eval({X: test_images, Y: test_labels}))


ValueError: Cannot feed value of shape (3500, 5000) for Tensor 'Placeholder:0', which has shape '(?, 68, 2)'

In [None]:
# def features_extraction_with_labels(root="Datasets/celeba"):
#     # Read labels from csv
#     labels = pd.read_csv(root+"labels.csv", index_col=0, sep='\t')
    
#     img_paths = [os.path.join(root, "img"+file_name) for file_name in os.listdir(root)]
    
#     if os.path.isdir(root+"/img"):
#         img_features = []
#         img_labels = []
#         gender_labels_series = pd.Series(labels.gender.values)
        
#         for img_path in img_paths:
#             file_no = img_path.split('/')[-1].split('.')[0]
            
#             img = image.img_to_array(image.load_img(img_path, 
#                                                     target_size=None, 
#                                                     interpolation='bicubic'))
            
#             features, _ = img_preprocessing(img)
            
#             if features is not None:
#                 img_features.append(features) 
#                 img_labels.append(gender_labels_series[int(file_no)])
    
#     landmark_features = np.array(img_features)
#     gender_labels_new = (np.array(img_labels) + 1) / 2
    
#     return landmark_featrues, gender_labels_new
    