In [1]:
import numpy as np
import cv2, random, os
import matplotlib.pyplot as plt

from PIL import Image
from pathlib import Path
from numpy import linalg as LA
from numpy.linalg import inv
from libsvm.svm import svm_problem, svm_parameter
from libsvm.svmutil import svm_train, svm_predict

In [2]:
folders = random.sample(range(1, 68), 25)
training_set_list = []
test_set_list = []
training_label_list = []
test_label_list = []

for folder in folders:
    sub_train_img_list = []
    sub_train_label_list = []
    sub_test_img_list = []
    sub_test_label_list = []
    
    # select random folder subject
    folder_path = os.getcwd()+"/PIE/%s"%folder
    jpg_files = [f for f in os.listdir(folder_path) if f.endswith('.jpg')]
    sub_training_img_file =random.sample(jpg_files, 119)
    sub_test_img_file = [f for f in jpg_files if f not in sub_training_img_file]
    
    for single_file in sub_training_img_file:
        single_img = cv2.imread(folder_path+'/'+single_file , cv2.IMREAD_GRAYSCALE)
        # flatten the pixel value from (32,32) to (1,1024)
        single_img = single_img.reshape(1,1024)
        #sub_train_img_list.append(single_img)
        #sub_train_label_list.append(folder)
        training_set_list.append(single_img)
        training_label_list.append(folder)
    
    for single_file in sub_test_img_file:
        single_img = cv2.imread(folder_path+'/'+single_file, cv2.IMREAD_GRAYSCALE)
        single_img = single_img.reshape(1,1024)
        #print(single_img)
        #print('')
        #sub_test_img_list.append(single_img)
        #sub_test_label_list.append(folder)        
        test_set_list.append(single_img)
        test_label_list.append(folder)

In [3]:
# get gray-scale and reshape the images
self_path = os.getcwd()+'/self'
self_img_list = []
for sub_self_img in os.listdir(self_path):
    if sub_self_img.startswith('self'):
        color_image = cv2.imread(self_path+'/'+sub_self_img)

        # convert colorful image to gray-scale image
        gray_image = cv2.cvtColor(color_image, cv2.COLOR_BGR2GRAY)
        # resize image to 32*32 pixels 
        resized_image = cv2.resize(gray_image, (32, 32))
        resized_image = resized_image.reshape(1,1024)
        #print(resized_image.shape)
        self_img_list.append(resized_image)
# split slef-images into train and test

# label self img as 100        
for single_self_img in self_img_list[:7]:
    training_set_list.append(single_self_img)
    training_label_list.append(100)
for single_self_img in self_img_list[7:]:
    test_set_list.append(single_img)
    test_label_list.append(100)
    
#samples = np.random.default_rng(120).choice(np.arange(2982), 500, replace=False)
samples = random.sample(range(1,2982),500)
X_train = []
y_train = []
for i in samples:
    #print(i)
    X_train.append(training_set_list[i])
    y_train.append(training_label_list[i])
X_train = np.array(X_train) # numpy.ndarray, each element is numpy.ndarray

#y_train = np.array(y_train)
mean_X = np.mean(X_train, axis = 0)
centered_X = X_train - mean_X # numpy.ndarray
centered_X = centered_X.reshape(500,1024)
centered_X = centered_X.T
U, D, VT = np.linalg.svd(centered_X , full_matrices=False)
D_diag = np.diag(D)
U_200 = U[:,:200]
X_train = X_train.reshape(500,1024)

U_200 = U[:,:200]
X_restructure_200 =  U_200.T @ X_train.T
X_restructure_200 = X_restructure_200.T # 500,200

In [4]:
test_x = []
test_y = []
for i in range(len(test_set_list)):
    test_x.append(test_set_list[i])
    test_y.append(test_label_list[i])
    
test_x = np.array(test_x)
test_x = test_x.reshape(1278,1024)

In [5]:
mean_testX = np.mean(test_x, axis = 0)
centered_testX = test_x - mean_testX # numpy.ndarray
centered_testX = centered_testX.reshape(1278,1024)
centered_testX = centered_testX.T

full_X_pca_test = U_200.T @ test_x.T
full_X_pca_test = full_X_pca_test.T

In [6]:
full_X_pca_test.shape, len(test_y)

((1278, 200), 1278)

 ## SVM

1. raw image classification

In [15]:
def SVM_raw(y_train, X_train,t, C):
    param = svm_parameter('-t %s -c %s -b 1'%(t,C))
    prob = svm_problem(y_train, X_train)
    model_001 = svm_train(prob, param)

    p_labs, p_acc, p_vals = svm_predict(test_y, test_x, model_001,'-b 1')
    return 

In [16]:
def SVM_200(y, X, t, C):
    param = svm_parameter('-t %s -c %s -b 1'%(t,C))
    prob = svm_problem(y, X)
    model_001 = svm_train(prob, param)

    p_labs, p_acc, p_vals = svm_predict(test_y, full_X_pca_test, model_001,'-b 1')
    return 

In [17]:
def SVM_80(y, X , t, C):
    param = svm_parameter('-t %s -c %s -b 1'%(t,C))
    prob = svm_problem(y, X)
    model_001 = svm_train(prob, param)

    p_labs, p_acc, p_vals = svm_predict(test_y, full_X_pca_test[:,:80], model_001,'-b 1')
    return 

1. RAW image classification

In [24]:
for C in [0.01,0.1,1]:
    for t in [0,1]:
        print('with penalty C %s and parameter t %s'%(C,t))
        SVM_raw(y_train, X_train,t,C)

with penalty C 0.01 and parameter t 0
Accuracy = 84.8983% (1085/1278) (classification)
with penalty C 0.01 and parameter t 1
Accuracy = 56.8858% (727/1278) (classification)
with penalty C 0.1 and parameter t 0
Accuracy = 84.8983% (1085/1278) (classification)
with penalty C 0.1 and parameter t 1
Accuracy = 56.4945% (722/1278) (classification)
with penalty C 1 and parameter t 0
Accuracy = 84.82% (1084/1278) (classification)
with penalty C 1 and parameter t 1
Accuracy = 56.7293% (725/1278) (classification)


2. 200 dimension image classification

In [25]:
for C in [0.01,0.1,1]:
    for t in [0,2]:
        print('with penalty C %s and parameter t %s'%(C,t))
        SVM_200(y_train, X_restructure_200,t, C)

with penalty C 0.01 and parameter t 0
Accuracy = 84.1941% (1076/1278) (classification)
with penalty C 0.01 and parameter t 2
Accuracy = 3.99061% (51/1278) (classification)
with penalty C 0.1 and parameter t 0
Accuracy = 84.507% (1080/1278) (classification)
with penalty C 0.1 and parameter t 2
Accuracy = 3.99061% (51/1278) (classification)
with penalty C 1 and parameter t 0
Accuracy = 83.9593% (1073/1278) (classification)
with penalty C 1 and parameter t 2
Accuracy = 3.99061% (51/1278) (classification)


3. 80 dimension image classification 

In [26]:
for C in [0.01,0.1,1]:
    for t in [0,3]:
        print('with penalty C %s and parameter t %s'%(C,t))
        SVM_80(y_train, X_restructure_200[:,:80], t, C)

with penalty C 0.01 and parameter t 0
Accuracy = 83.0203% (1061/1278) (classification)
with penalty C 0.01 and parameter t 3
Accuracy = 3.99061% (51/1278) (classification)
with penalty C 0.1 and parameter t 0
Accuracy = 82.7074% (1057/1278) (classification)
with penalty C 0.1 and parameter t 3
Accuracy = 3.99061% (51/1278) (classification)
with penalty C 1 and parameter t 0
Accuracy = 82.5509% (1055/1278) (classification)
with penalty C 1 and parameter t 3
Accuracy = 3.99061% (51/1278) (classification)
