# Inverse PCA

In [None]:
!wget -q https://github.com/DM-GY-9103-2024F-H/9103-utils/raw/main/src/data_utils.py
!wget -q https://github.com/DM-GY-9103-2024F-H/9103-utils/raw/main/src/image_utils.py

!wget -qO- https://github.com/DM-GY-9103-2024F-H/9103-utils/releases/latest/download/att-faces.tar.gz | tar xz
!wget -qO- https://github.com/DM-GY-9103-2024F-H/9103-utils/releases/latest/download/metfaces.tar.gz | tar xz

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

from numpy.random import normal as np_normal, randint
from os import listdir, path
from PIL import Image as PImage

from data_utils import PCA, StandardScaler
from image_utils import make_image, open_image

## Load Dataset

In [None]:
img_width = 92

# lists for keeping track of image pixel lists, subject numeric id and subject label
face_pixels = []

# 40 directories
for l in range(1, 41):
  # 10 images per directory
  for i in range(1, 11):
    mimg = open_image(f"./data/image/att-faces/s{l}/{i}.pgm")
    face_pixels.append(mimg.pixels)

## Start Processing

In [None]:
# display first image
display(make_image(face_pixels[0], width=img_width))

In [None]:
# run pca and get first 256 PCs
pca = PCA(n_components=256)
faces_df = pca.fit_transform(face_pixels)

print(pca.explained_variance())

In [None]:
# Reconstruction: un-projects the data from PCA space into pixel space
pca_pixels = pca.inverse_transform(faces_df)

for i in range(4):
  idx = randint(0, len(face_pixels))
  display(make_image(face_pixels[idx], width=img_width))
  display(make_image(pca_pixels.loc[idx], width=img_width))

### Common Factors

These are the common faces extracted from the dataset.

All reconstructions are built from linear combinations of these.

In [None]:
for comp in pca.components_[:4]:
  minVal = comp.min()
  maxVal = comp.max()
  # manually mapping to [0, 255]
  pxs01 = 255 * (comp - minVal) / (maxVal - minVal)
  display(make_image(pxs01, width=img_width))

### Standardize the PC values

This helps understand the distribution of the values and also pick sensible random values later.

In [None]:
pca_scaler = StandardScaler()
faces_pca_std_df = pca_scaler.fit_transform(faces_df)

### Plot distribution of PCs

Plot first 32 PCs for all faces and highlight the values for the first 4 faces.

In [None]:
num_pcs = 32
num_faces = 4
cmap = plt.get_cmap("Set1")

plt.figure(figsize=(8,8))
for i in range(0,num_pcs):
  plt.scatter(faces_pca_std_df[f"PC{i}"], faces_pca_std_df.shape[0] * [i], alpha=0.25, color='#2280fa')


for i in range(num_faces):
  pcs = faces_pca_std_df.iloc[i].values[:num_pcs]
  mcolor = cmap(i/num_faces)
  plt.scatter(pcs, range(num_pcs), c=num_pcs*[mcolor])

plt.show()

### Most Average Face

Set all PCs to their average value.

In [None]:
rand_std_pcs = pd.DataFrame(np_normal(loc=0, scale=0, size=[1, pca.n_components_]), columns=faces_df.columns)
rand_pcs = pca_scaler.inverse_transform(rand_std_pcs)
rand_img = pca.inverse_transform(rand_pcs)

display(make_image(rand_img.loc[0], width=img_width))

### Random Face

Pick $10$ sets of random PCs, based on the distribution of the dataset's PC values.

In [None]:
num_ifaces = 4

rand_std_pcs = pd.DataFrame(np_normal(size=[num_ifaces, pca.n_components_], scale=0.666), columns=faces_df.columns)
rand_pcs = pca_scaler.inverse_transform(rand_std_pcs)
rand_img = pca.inverse_transform(rand_pcs)

for i in range(num_ifaces):
  display(make_image(rand_img.loc[i], width=img_width))

## Bonus Dataset

Run this cell and then repeat the process starting from the _Start Processing_ cell above.

In [None]:
img_width = 256
MET_PATH = f"./data/image/metfaces/{img_width}"

face_pixels = []

for f in sorted([f for f in listdir(MET_PATH) if f.endswith(".jpg")]):
  mimg = PImage.open(path.join(MET_PATH, f)).convert("L")
  face_pixels.append(list(mimg.getdata()))