In [22]:
# import libraries
import numpy as np 
from scipy.io import loadmat
import matplotlib.pyplot as plt
from numpy.linalg import matrix_rank
from numpy.linalg import inv
from numpy.linalg import norm

In [2]:
data = loadmat("face_emotion_data.mat")
print([key for key in data])

['__header__', '__version__', '__globals__', 'y', 'X']


In [20]:
face_matrix = data['X']
label_vec = data['y']

np.shape(face_matrix) # get the shape (128, 9)
matrix_rank(face_matrix) # get the rank(face_matrix) = 9

# so the face_matrix is full rank then the inverse of A^TA exist

# get the classifier weights 
w = inv(face_matrix.transpose() @ face_matrix) @ face_matrix.transpose() @ label_vec
w

array([[ 0.94366942],
       [ 0.21373778],
       [ 0.26641775],
       [-0.39221373],
       [-0.00538552],
       [-0.01764687],
       [-0.16632809],
       [-0.0822838 ],
       [-0.16644364]])

In [15]:
np.shape(label_vec)

(128, 1)

In [36]:
# each colomn of train set has been normalized, and its norm is 11.313708498984763
print(norm(face_matrix[:, 0]))
print(norm(face_matrix[:, 1]))

copied_matrix = np.copy(face_matrix)
copied_matrix

# calculate the least squared probrom value with original face_matrix
original_min = norm ((face_matrix @ w) - label_vec)
print("original:", original_min)

# index for making a unit vector
index = 1/np.sqrt(128)
copied_matrix[:, 0] = index
copied_matrix 
max_value = 0 # initialized maximal differences 
j = 0 # the index of the maximal differences 

# substitute norm vector for x1...x9 respectively
for i in range(9):
    copied_matrix = np.copy(face_matrix)
    copied_matrix[:, i] = index
    w = inv(copied_matrix.transpose() @ copied_matrix) @ copied_matrix.transpose() @ label_vec
    substituted_min = norm((copied_matrix @ w) - label_vec)
    print(i, "column vector changed, the minimal value is:", substituted_min)
    print("The absolute value of the difference between this value and the original solution is", np.absolute(substituted_min - original_min))
    if (np.absolute(substituted_min - original_min) > max_value):
        max_value = np.absolute(substituted_min - original_min)
        j = i

print("The most important vector is x", j+1, "The differences between it and the original solution is:",  max_value)



11.313708498984763
11.313708498984763
original: 12.060498275867596
0 column vector changed, the minimal value is: 7.950503332657295
The absolute value of the difference between this value and the original solution is 4.109994943210301
1 column vector changed, the minimal value is: 4.997434793801424
The absolute value of the difference between this value and the original solution is 7.063063482066172
2 column vector changed, the minimal value is: 5.018933019731187
The absolute value of the difference between this value and the original solution is 7.041565256136409
3 column vector changed, the minimal value is: 5.041292237038698
The absolute value of the difference between this value and the original solution is 7.019206038828898
4 column vector changed, the minimal value is: 4.980798737214211
The absolute value of the difference between this value and the original solution is 7.079699538653386
5 column vector changed, the minimal value is: 4.981755420792947
The absolute value of the di

In [37]:
for i in range(9):
    copied_matrix = np.copy(face_matrix)
    copied_matrix[:, i] = index
    w = inv(copied_matrix.transpose() @ copied_matrix) @ copied_matrix.transpose() @ label_vec
    substituted_min = norm((copied_matrix @ w) - label_vec)
    print(i, "column vector changed, the minimal value is:", substituted_min)
    print("The absolute value of the difference between this value and the original solution is", np.absolute(substituted_min - original_min))

0 column vector changed, the minimal value is: 7.950503332657295
The absolute value of the difference between this value and the original solution is 4.109994943210301
1 column vector changed, the minimal value is: 4.997434793801424
The absolute value of the difference between this value and the original solution is 7.063063482066172
2 column vector changed, the minimal value is: 5.018933019731187
The absolute value of the difference between this value and the original solution is 7.041565256136409
3 column vector changed, the minimal value is: 5.041292237038698
The absolute value of the difference between this value and the original solution is 7.019206038828898
4 column vector changed, the minimal value is: 4.980798737214211
The absolute value of the difference between this value and the original solution is 7.079699538653386
5 column vector changed, the minimal value is: 4.981755420792947
The absolute value of the difference between this value and the original solution is 7.07874285

In [71]:
# 50% data to train, 50% data to verify, we need to promise the randomness when we choose the data set
from sklearn.model_selection import train_test_split
# split the train set and validation set
x_train, x_eval, y_train, y_eval = train_test_split(face_matrix, label_vec, test_size = 0.5)

# classifier 1: use 9 paras 
w_opt = inv(x_train.transpose() @ x_train) @ x_train.transpose() @ y_train
# cross validation
y_hat = np.sign(x_eval @ w_opt)
# error calculation
error_vec = [0 if i[0]==i[1] else 1 for i in np.hstack((y_hat, y_eval))]
print("Errors:", sum(error_vec))
print("Error Proportion: %4.2f" % (sum(error_vec)/64), "%")

Errors: 4
Error Proportion: 0.06 %


In [78]:
# We just use para w_1, w_7, w_9 to train the model
x_train_x1 = np.ones((64,1))
x_train_x1[:, 0] = x_train[:, 0] 

x_train_x7 = np.ones((64, 1))
x_train_x7[:, 0] = x_train[:, 6]

x_train_x9 = np.ones((64, 1))
x_train_x9[:, 0] = x_train[:, 8]

x_train_3_para =np.concatenate((x_train_x1, x_train_x7, x_train_x9), axis = 1)

x_eval_x1 = np.ones((64,1))
x_eval_x1[:, 0] = x_eval[:, 0] 

x_eval_x7 = np.ones((64,1))
x_eval_x7[:, 0] = x_eval[:, 6] 

x_eval_x9 = np.ones((64,1))
x_eval_x9[:, 0] = x_eval[:, 8] 
x_eval_3_para = np.concatenate((x_eval_x1, x_eval_x7, x_eval_x9), axis = 1)


# classifier 2: use 3 paras
w_opt = inv(x_train_3_para.transpose() @ x_train_3_para) @ x_train_3_para.transpose() @ y_train

# cross validation 
y_hat = np.sign(x_eval_3_para @ w_opt)

# error calculation
error_vec = [0 if i[0]==i[1] else 1 for i in np.hstack((y_hat, y_eval))]
print("Errors:", sum(error_vec))
print("Error Proportion: %4.2f" % (sum(error_vec)/50), "%")


Errors: 4
Error Proportion: 0.08 %


In [94]:
for i in range(8):
    # seperate validation set and training set
    extracted_eval_data = face_matrix[16 * i : 16 * (i + 1), :]
    extracted_eval_label = label_vec[16 * i : 16 * (i + 1), :]
    remaining_train_data = np.delete(face_matrix, np.s_[16*i : 16*(i+1)], axis = 0)
    remaining_train_label = np.delete(face_matrix, np.s_[16*i : 16*(i+1)], axis = 0)
    
    # calculate the solution to least squared problem
    w_opt = inv(remaining_train_data.transpose() @ remaining_train_data) @ remaining_train_data.transpose() @ remaining_train_label
    # prediction
    y_hat = np.sign(extracted_eval_data @ w_opt)
    
    # Validation
    error_vec = [0 if i[0]==i[1] else 1 for i in np.hstack((y_hat, extracted_eval_label))]
    print("The", i+1, "Cross Validation's Results:")
    print("Errors:", sum(error_vec))
    print("Error Proportion: %4.2f" % (sum(error_vec)/64), "%")
    print("=" * 60)
    


The 1 Cross Validation's Results:
Errors: 6
Error Proportion: 0.09 %
The 2 Cross Validation's Results:
Errors: 4
Error Proportion: 0.06 %
The 3 Cross Validation's Results:
Errors: 7
Error Proportion: 0.11 %
The 4 Cross Validation's Results:
Errors: 5
Error Proportion: 0.08 %
The 5 Cross Validation's Results:
Errors: 0
Error Proportion: 0.00 %
The 6 Cross Validation's Results:
Errors: 2
Error Proportion: 0.03 %
The 7 Cross Validation's Results:
Errors: 2
Error Proportion: 0.03 %
The 8 Cross Validation's Results:
Errors: 7
Error Proportion: 0.11 %


In [88]:
extracted_eval_data = face_matrix[0 : 16, :]
remaining_train_data = np.delete(face_matrix, np.s_[0:16], axis = 0)
remaining_train_data
np.shape(remaining_train_data)


(112, 9)