In [45]:
import os, math, import_ipynb, cv2, dlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from keras.preprocessing import image

from sklearn.metrics import classification_report,accuracy_score
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier

from utils import *

In [2]:
os.chdir("/Users/kenyew/Documents/Git/AMLSassignment19_20/AMLS_19-20_Ken_Yew_Piong_SN16004444/")
os.getcwd()

'/Users/kenyew/Documents/Git/AMLSassignment19_20/AMLS_19-20_Ken_Yew_Piong_SN16004444'

In [3]:
df = pd.read_csv("./Datasets/celeba/labels.csv")
df = split_df(df)
print(df)

      img_name gender smiling
0        0.jpg     -1       1
1        1.jpg     -1       1
2        2.jpg      1      -1
3        3.jpg     -1      -1
4        4.jpg     -1      -1
...        ...    ...     ...
4995  4995.jpg      1       1
4996  4996.jpg      1       1
4997  4997.jpg      1       1
4998  4998.jpg      1       1
4999  4999.jpg      1       1

[5000 rows x 3 columns]


In [4]:
img_name_data = df['img_name']
gender_data = df['gender']
smiling_data = df['smiling']

In [5]:
def extract_features_labels_from_celeba():
    """
    This funtion extracts the landmarks features for all images in the folder 'dataset/celeba'.
    It also extract the gender label for each image.
    :return:
        landmark_features:  an array containing 68 landmark points for each image in which a face was detected
        gender_labels:      an array containing the gender label (male=0 and female=1) for each image in
                            which a face was detected
    """
    image_paths = [os.path.join(images_dir, l) for l in os.listdir(images_dir)]
    target_size = None
    labels_file = open(os.path.join(basedir, labels_filename), 'r')
    lines = labels_file.readlines()
    lines = [line.strip('"\n') for line in lines[:]]
    gender_labels = {line.split('\t')[0] : int(line.split('\t')[2]) for line in lines[1:]}
    smiling_labels = {line.split('\t')[0] : int(line.split('\t')[3]) for line in lines[1:]}
    if os.path.isdir(images_dir):
        all_features = []
        all_gender_labels = []
        all_smiling_labels = []
        for img_path in image_paths:
            if not img_path.endswith('.jpg'):
                continue
            file_name= img_path.split('.')[1].split('/')[-1]

            # load image
            img = image.img_to_array(
                image.load_img(img_path,
                               target_size=target_size,
                               interpolation='bicubic'))
            features, _ = run_dlib_shape(img)
            if features is not None:
                all_features.append(features)
                all_gender_labels.append(gender_labels[file_name])
                all_smiling_labels.append(smiling_labels[file_name])

    landmark_features = np.array(all_features)
    gender_labels = (np.array(all_gender_labels) + 1)/2 # simply converts the -1 into 0, so male=0 and female=1
    smiling_labels = (np.array(all_smiling_labels) + 1)/2
    return landmark_features, gender_labels, smiling_labels

landmark_features, gender_labels, smiling_labels = extract_features_labels()

In [41]:
def get_data():

    X, y, _ = extract_features_labels_from_celeba()
    Y = np.array([y, -(y - 1)]).T
    tr_X = X[:100]
    tr_Y = Y[:100]
    te_X = X[100:]
    te_Y = Y[100:]

    return tr_X, tr_Y, te_X, te_Y

tr_X, tr_Y, te_X, te_Y= get_data()

In [42]:
# sklearn functions implementation
def img_SVM(training_images, training_labels, test_images, test_labels):
    classifier = SVC(kernel = "linear")
    classifier.fit(training_images, training_labels)
    pred = classifier.predict(test_images)
    print("Accuracy:", accuracy_score(test_labels, pred))
#     print(pred)
    return pred

def scale_data(training_images, test_images):
    scaler = StandardScaler()
    scaler.fit(training_images)
    tr_X = scaler.transform(training_images)
    te_X = scaler.transform(test_images)

    return tr_X, te_X

def img_MLP(training_images, training_labels, test_images, test_labels): 
    model = MLPClassifier(solver = 'adam', alpha = 1e-5, hidden_layer_sizes = (3,2), random_state = 1)
    tr_X, te_X = scale_data(training_images, test_images)
    model.fit(training_images, training_labels)
    pred = model.predict(test_images)
    print("Accuracy:", accuracy_score(test_labels, pred))
    
    return pred

In [43]:
pred_img_SVM = img_SVM(tr_X.reshape((100, 68*2)), list(zip(*tr_Y))[0], te_X.reshape((4700, 68*2)), list(zip(*te_Y))[0])

Accuracy: 0.8489361702127659


In [46]:
pred_img_MLP = img_MLP(tr_X.reshape((100, 68*2)), list(zip(*tr_Y))[0], te_X.reshape((4700, 68*2)), list(zip(*te_Y))[0])

Accuracy: 0.5048936170212766


In [None]:
# basedir = './Datasets/celeba'
# images_dir = os.path.join(basedir,'img')
# labels_filename = 'labels.csv'
# detector = dlib.get_frontal_face_detector()
# predictor = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat')
# image_paths = [os.path.join(images_dir, l) for l in os.listdir(images_dir)]
# target_size = None
# labels_file = open(os.path.join(basedir, labels_filename), 'r')
# lines = labels_file.readlines()
# lines = [line.strip('"\n') for line in lines[:]]

# my_lines = [int(line.split('\t')[3]) for line in lines[1:]]
# gender_labels = {line.split('\t')[0] : int(line.split('\t')[2]) for line in lines[1:]}
# smiling_labels = {line.split('\t')[0] : int(line.split('\t')[3]) for line in lines[1:]}