## Data 270 Group 2
## Data Process

In [1]:
# make sure to install cv2 package first by running below command
# pip3 install opencv-python
import cv2
import os
import numpy as np
from PIL import Image as im
from sklearn.decomposition import IncrementalPCA

In [2]:
# define paths and filename
CUR_DIR = './'
JAFFE_DIR = CUR_DIR + 'jaffe'
CKP_DIR = CUR_DIR + 'ck+'
FACE_CASCADE_FILE = CUR_DIR + 'haarcascade_frontalface_default.xml'
PROCESSED_DATA_FILE = CUR_DIR + 'processed_data'

#### NE = neutral = 0; HA = happiness = 1; AN = angry = 2; DI = disgust = 3; FE = fear = 4; SA = sadness = 5; SU = surprise = 6; CO = contempt = 7; anything else = N/A = -1

In [3]:
LABEL_CODE = {
    'NE': 0,
    'HA': 1,
    'AN': 2,
    'DI': 3,
    'FE': 4,
    'SA': 5,
    'SU': 6,
    'CO': 7
}

LABEL_WORD = {
    0: 'NEUTRAL',
    1: 'HAPPINESS',
    2: 'ANGRY',
    3: 'DISGUST',
    4: 'FEAR',
    5: 'SADNESS',
    6: 'SURPRISE',
    7: 'CONTEMPT',
    -1: 'N/A'
}

In [4]:
IMG_LST = []
CODE = ''
LABEL = -1
DIM = (256, 256)
PCA_COMPONENTS = 26

In [5]:
# detect face
face_cascade = cv2.CascadeClassifier(FACE_CASCADE_FILE)

In [6]:
# data normalization
norm = np.zeros(DIM)

In [7]:
# pca
ipca = IncrementalPCA(n_components=PCA_COMPONENTS)

In [8]:
# data transformation
def transform_data(img):
    # data reduction - convert to grayscale
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    # detect face
    face_rect = face_cascade.detectMultiScale(img, 1.1, 5)
    img = img[face_rect[0][1]:(face_rect[0][1]+face_rect[0][3]), face_rect[0][0]:(face_rect[0][0]+face_rect[0][2])]

    # resize image to 256 by 256 and preserve aspect ratio
    img = cv2.resize(img, DIM, interpolation=cv2.INTER_AREA)
        
    # data normalization
    img = cv2.normalize(img, norm, 0, 255, cv2.NORM_MINMAX)
        
    # data reduction - PCA
    img = ipca.inverse_transform(ipca.fit_transform(img))
    
    return img

### Read and process JAFFE dataset

In [9]:
# open jaffe directory, read each image file, and extract label
for filename in os.listdir(JAFFE_DIR):
    # example of jaffe filename format: KA.AN1.39.tiff
    CODE = filename.split('.')[1]
    if CODE.startswith('NE'):
        LABEL = LABEL_CODE['NE']
    elif CODE.startswith('HA'):
        LABEL = LABEL_CODE['HA']
    elif CODE.startswith('AN'):
        LABEL = LABEL_CODE['AN']
    elif CODE.startswith('DI'):
        LABEL = LABEL_CODE['DI']
    elif CODE.startswith('FE'):
        LABEL = LABEL_CODE['FE']
    elif CODE.startswith('SA'):
        LABEL = LABEL_CODE['SA']
    elif CODE.startswith('SU'):
        LABEL = LABEL_CODE['SU']
    elif CODE.startswith('CO'):
        LABEL = LABEL_CODE['CO']
    else:
        LABEL = -1
        
    # read image
    img = cv2.imread(os.path.join(JAFFE_DIR, filename))
    
    # data tranformation
    img = transform_data(img)
        
    # data augmentation - flipping iamge
    # flipping image for neutral expression only because it contains less images
    # 1 is for flipping vertical axis only
    if CODE.startswith('NE'):
        flipped_img = cv2.flip(img, 1)
        IMG_LST.append((flipped_img, LABEL))
        
    # append a tuple (image array at index 0 and label at index 1) into jaffe image list
    IMG_LST.append((img, LABEL))

### Read and process CK+ dataset

In [10]:
# open each emotion directory in ck+ directory, read each image file, and extract label
for dirname in os.listdir(CKP_DIR):
    # skip hidden directory
    if dirname.startswith('.'):
        continue
    if dirname == 'HA':
        LABEL = LABEL_CODE['HA']
    elif dirname == 'AN':
        LABEL = LABEL_CODE['AN']
    elif dirname == 'DI':
        LABEL = LABEL_CODE['DI']
    elif dirname == 'FE':
        LABEL = LABEL_CODE['FE']
    elif dirname == 'SA':
        LABEL = LABEL_CODE['SA']
    elif dirname == 'SU':
        LABEL = LABEL_CODE['SU']
    elif dirname == 'CO':
        LABEL = LABEL_CODE['CO']
    else:
        LABEL = -1
        
    for filename in os.listdir(os.path.join(CKP_DIR, dirname)):
        # skip hidden file
        if filename.startswith('.'):
            continue
            
        # read image
        img = cv2.imread(os.path.join(CKP_DIR, dirname, filename))
        
        # data transformation
        img = transform_data(img)
        
        # data augmentation - flipping iamge
        # flipping image for contempt expression only because it contains less images
        # 1 is for flipping vertical axis only
        if dirname == 'CO':
            flipped_img = cv2.flip(img, 1)
            IMG_LST.append((flipped_img, LABEL))
            
        # append a tuple (image array at index 0 and label at index 1) into ckp image list
        IMG_LST.append((img, LABEL))

#### Save the processed image list into a npy file

In [11]:
dt = np.dtype('object,int')
np.save(PROCESSED_DATA_FILE, np.array(IMG_LST, dtype=dt))