# Data preprocess

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cmx
import matplotlib.colors as colors
from mpl_toolkits.mplot3d import Axes3D
import cv2
import os

# parameters
NUM_EACH_SUBJECT = 170
NUM_SELFIES = 10
TRAIN_TEST_RATIO = 0.7
RANDOM_SEED = 22
np.random.seed(RANDOM_SEED)

def choose_random_idx(num, vmin, vmax, seed=RANDOM_SEED):
    np.random.seed(seed)
    idx = [i for i in range(vmin, vmax)]
    random_idx = np.random.permutation(idx)
    return sorted(random_idx[0 : num])

def get_train_test_list(input_list, ratio, seed=RANDOM_SEED):
    train_idx = choose_random_idx(num=round(len(input_list)*ratio), vmin=0, vmax=len(input_list), seed=RANDOM_SEED)

    train_list = []
    test_list = []
    for i in range(0,len(input_list)):
        if i in train_idx:
            train_list.append(input_list[i])
        else:
            test_list.append(input_list[i])
    return train_list, test_list

def get_pie_list(data_idx):
    # list of paths to PIE images
    pie_list = []
    pie_train_list = []
    pie_test_list = []

    for subj_idx in data_idx:
        subj_list = ['PIE/'+str(subj_idx)+'/'+str(i+1)+'.jpg' for i in range(0,NUM_EACH_SUBJECT)]
        subj_train_list, subj_test_list = get_train_test_list(subj_list, ratio=TRAIN_TEST_RATIO, seed=RANDOM_SEED)
        pie_train_list.extend(subj_train_list)
        pie_test_list.extend(subj_test_list)
        pie_list.extend(subj_list)
    return pie_list, pie_train_list, pie_test_list

def get_self_list():
    # list of paths to selfies
    self_list = ['selfimg/'+str(i+1)+'.jpg' for i in range(0,NUM_SELFIES)]
    self_train_list, self_test_list = get_train_test_list(self_list, ratio=TRAIN_TEST_RATIO, seed=RANDOM_SEED)
    return self_list, self_train_list, self_test_list

def get_img(input_list):
    img_v = []
    labels = []
    for i in range(len(input_list)):
        path = input_list[i]
        pathsplit = path.split('/')
        img = cv2.imread(path)
        #img = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
        img_v.append(img)
        if pathsplit[0] == 'PIE':
            labels.append(int(pathsplit[1]))
        elif pathsplit[0] == 'selfimg':
            #labels.append(pathsplit[1]) # label of selfimg is set as 'selfimg'
            labels.append(0) # label of selfimg is set as 0
        else:
            print('Error: Wrong path list!')
        
    img_a = np.array(img_v)
    #img_a = img_a.reshape(len(img_v), -1)
    
    labels_a = np.array(labels)
    
    return img_a, labels_a

#============================================================
#data_idx = choose_random_idx(num=25, vmin=1, vmax=68, seed=RANDOM_SEED)
data_idx = [i for i in range(1, 25)]

pie_list, pie_train_list, pie_test_list = get_pie_list(data_idx)
self_list, self_train_list, self_test_list = get_self_list()

# list of paths to all images of interest
list_img = pie_list + self_list
train_list = pie_train_list + self_train_list
test_list = pie_test_list + self_test_list


print('data_idx',data_idx)
print('Number of PIE images:', len(pie_list))
print('Number of PIE train images:', len(pie_train_list))
print('Number of PIE test images:', len(pie_test_list))
print('Number of self images:', len(self_list))
print('Number of self train images:', len(self_train_list))
print('Number of self test images:', len(self_test_list))
print('Number of whole train images:', len(train_list))
print('Number of whole test images:', len(test_list))

data_idx [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]
Number of PIE images: 4080
Number of PIE train images: 2856
Number of PIE test images: 1224
Number of self images: 10
Number of self train images: 7
Number of self test images: 3
Number of whole train images: 2863
Number of whole test images: 1227


# CNN

In [2]:
import tensorflow as tf


In [3]:
# load dataset
# data.shape = (Num, 32, 32, 3)
train_data, train_label = get_img(train_list)
test_data, test_label = get_img(test_list)

In [23]:
cnn_model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(filters=20,kernel_size=5,padding='same',input_shape=(32, 32, 3),activation='relu'),
    tf.keras.layers.MaxPool2D(pool_size=2, strides=2),
    tf.keras.layers.Conv2D(filters=50,kernel_size=5,padding='same',activation='relu'),
    tf.keras.layers.MaxPool2D(pool_size=2, strides=2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(500, activation='relu'),
    tf.keras.layers.Dense(26,activation='softmax')])

cnn_model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])
cnn_model.fit(train_data, train_label, epochs=10) 

loss, accuracy = cnn_model.evaluate(test_data, test_label, batch_size=32,verbose=2)
print('The loss is: ',loss)
print('The accuracy is: {}%'.format(np.round(accuracy*100, 2)))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
39/39 - 0s - loss: 0.0550 - accuracy: 0.9845 - 311ms/epoch - 8ms/step
The loss is:  0.05500802770256996
The accuracy is: 98.45%


In [12]:
# change network architectures
VGG11_model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(filters=64,kernel_size=3,padding='same',input_shape=(32, 32, 3),activation='relu'),
    #tf.keras.layers.MaxPool2D(pool_size=2, strides=2),
    tf.keras.layers.Conv2D(filters=128,kernel_size=3,padding='same',activation='relu'),
    tf.keras.layers.MaxPool2D(pool_size=2, strides=2),
    tf.keras.layers.Conv2D(filters=256,kernel_size=3,padding='same',activation='relu'),
    tf.keras.layers.Conv2D(filters=256,kernel_size=3,padding='same',activation='relu'),
    tf.keras.layers.MaxPool2D(pool_size=2, strides=2),
    tf.keras.layers.Conv2D(filters=512,kernel_size=3,padding='same',activation='relu'),
    tf.keras.layers.Conv2D(filters=512,kernel_size=3,padding='same',activation='relu'),
    tf.keras.layers.MaxPool2D(pool_size=2, strides=2),
    tf.keras.layers.Conv2D(filters=512,kernel_size=3,padding='same',activation='relu'),
    tf.keras.layers.Conv2D(filters=512,kernel_size=3,padding='same',activation='relu'),
    #tf.keras.layers.MaxPool2D(pool_size=2, strides=2),
    
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dropout(rate=0.5),
    tf.keras.layers.Dense(2048, activation='relu'),
    tf.keras.layers.Dropout(rate=0.5),
    tf.keras.layers.Dense(2048, activation='relu'),
    tf.keras.layers.Dense(26,activation='softmax')])

VGG11_model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])
VGG11_model.fit(train_data, train_label, epochs=15) 
loss, accuracy = VGG11_model.evaluate(test_data, test_label, batch_size=32,verbose=2)
print('The loss is: ',loss)
print('The accuracy is: {}%'.format(np.round(accuracy*100, 2)))

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
39/39 - 4s - loss: 0.0635 - accuracy: 0.9813 - 4s/epoch - 95ms/step
The loss is:  0.06349285691976547
The accuracy is: 98.13%
