In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import cv2

## Eigen Face
from sklearn.decomposition import PCA 

import pickle

In [None]:
# Load the data
data = pickle.load(open('./AP_data/expression_data_images_100_100.pickle',mode='rb')) # load the data

In [None]:
data.head()

##### Eigen Face

In [None]:
# Mean Face
X = data.drop('age',axis=1).values # all images

In [None]:
X

In [None]:
y = data['age'].values

# balancing the dataset

In [None]:
from imblearn.over_sampling import RandomOverSampler
from imblearn.under_sampling import RandomUnderSampler

def balance_dataset(X, y, method='oversample'):
    if method == 'oversample':
        sampler = RandomOverSampler()
    elif method == 'undersample':
        sampler = RandomUnderSampler()
    else:
        raise ValueError("Invalid method. Choose either 'oversample' or 'undersample'.")
    
    X_resampled, y_resampled = sampler.fit_resample(X, y)
    return X_resampled, y_resampled


X, y = balance_dataset(X, y, method='oversample')



In [None]:
dist_expression = pd.DataFrame(y,columns=['age']).expression.value_counts()
fig,ax = plt.subplots(nrows=1,ncols=2)
dist_expression.plot(kind='bar',ax=ax[0])
dist_expression.plot(kind='pie',ax=ax[1],autopct='%0.0f%%')
plt.show()

In [None]:
mean_face = X.mean(axis=0) # flatten mean face
mean_face.shape

In [None]:
# visualize mean face
plt.imshow(mean_face.reshape((100,100)),cmap='gray')
plt.axis('off')
plt.show()

**Subtract data with mean face**

In [None]:
X_t = X - mean_face # transformed data

In [None]:
pca = PCA(n_components=None,whiten=True,svd_solver='auto')
pca.fit(X_t)

In [None]:
exp_var_df = pd.DataFrame()
exp_var_df['explained_var'] = pca.explained_variance_ratio_
exp_var_df['cum_explained_var'] = exp_var_df['explained_var'].cumsum()
exp_var_df['principal_components'] = np.arange(1,len(exp_var_df)+1)

exp_var_df.head()

In [None]:
exp_var_df.set_index('principal_components',inplace=True)

In [None]:
# visualize explained variance
fig, ax = plt.subplots(nrows=2,figsize=(15,12))

exp_var_df['explained_var'].head(100).plot(kind='line',marker='o',ax=ax[0])
exp_var_df['cum_explained_var'].head(100).plot(kind='line',marker='o',ax=ax[1])

From above plot, it is clear that with approximately **50 pricipal components having 80%** converage.

In [None]:
pca_50 = PCA(n_components=50,whiten=True,svd_solver='auto')
pca_data = pca_50.fit_transform(X_t)

In [None]:
pca_data.shape

In [None]:
np.savez('./AP_data/expression_data_pca_50_target',pca_data,y)

In [None]:
# saving the model
pca_dict = {'pca':pca_50,'mean_face':mean_face}

In [None]:
pickle.dump(pca_dict,open('AP_model/pca_dict.pickle','wb'))

### Visualize Eigen Image

In [None]:
pca_data_inv = pca_50.inverse_transform(pca_data)

In [None]:
pca_data_inv.shape

In [None]:
eig_img = pca_data_inv[0,:].reshape((100,100))
eig_img.shape

In [None]:
plt.imshow(eig_img,cmap='gray')
plt.axis('off')

In [None]:
np.random.seed(1001)
pics = np.random.randint(0,1519,20)
plt.figure(figsize=(15,8))
for i,pic in enumerate(pics):
    plt.subplot(4,10,i+1)
    img = X[pic:pic+1].reshape(100,100)
    plt.imshow(img,cmap='gray')
    plt.title('{}'.format(y[pic]))
    plt.xticks([])
    plt.yticks([])
plt.show()

print("="*20+'Eigen Images'+"="*20)
plt.figure(figsize=(15,8))
for i,pic in enumerate(pics):
    plt.subplot(4,10,i+1)
    img = pca_data_inv[pic:pic+1].reshape(100,100)
    plt.imshow(img,cmap='gray')
    plt.title('{}'.format(y[pic]))
    plt.xticks([])
    plt.yticks([])
    
plt.show()