# Link to Drive, Set File Paths

In [None]:
import os
base_directory = os.getcwd() # store your base directory for easy reference
assignment_two_data = base_directory + '/Homework_Two_Data/'

In [None]:
# Load core libraries and utilities
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import colors
from skimage import io

# Jupyter notebook "magic"
%matplotlib inline

# Helper Functions

In [None]:
# We need to be able to pass in a directory and build an image list
def file_builder(directory):
    image_list = []
    # iterate over files in
    # that directory
    for root, dirs, files in os.walk(directory):
        for filename in files:
            if filename != "Readme":
                image_list.append( io.imread(os.path.join(root, filename), as_gray=True) )
    return image_list

In [None]:
# We need a tool that converts images into grayscale (i.e. all pixel values in [0, 1])
def gray_scale_convert(image):
    rescaled_image = np.zeros((image.shape[0], image.shape[1]))
    rescaled_image[:, :] = image
    rescaled_image -= np.min(rescaled_image)
    rescaled_image /= np.max(rescaled_image)
    return rescaled_image

# Problem Set

In [None]:
# Build image lists for cropped and uncropped images
cropped_directory = assignment_two_data + '/CroppedYale/'
uncropped_directory = assignment_two_data + '/yalefaces_uncropped/'

cropped_image_list = file_builder(cropped_directory)
#uncroppod_image_list = file_builder(uncropped_directory)

So, using the code so far, you have built lists of grayscale matrices which encode a variety of images.  Now we want to "learn" from these images using the SVD.  

**Problem 1**: Following the model below, build a matrix from the uncropped images each of whose columns is a flattened grayscale image.  

In [None]:
# Problem 1 Model
num_crp_images = len(cropped_image_list)
crp_row, crp_col = np.shape(cropped_image_list[0])
crp_image_mat = np.zeros((crp_row*crp_col, num_crp_images), dtype=np.float64)
for cnt, image in enumerate(cropped_image_list):
    crp_image_mat[:, cnt] = image.flatten()

In [None]:
# Problem 1 answer goes here.


**Problem 2**: Following the discussion in lecture, take out the mean/average of the columns in each image matrix. Note, you'll need to look up how to use `np.mean()`  and `np.tile()` to make this efficient.  

In [None]:
crp_avg = np.mean(crp_image_mat, axis=1)
# Note, to turn crp_avg into a column vector use: crp_avg.reshape(-1, 1)
crp_image_mat -= np.tile(crp_avg.reshape(-1,1), (1, num_crp_images))

**Problem 3**: Using `.reshape()`, plot the average face for both the cropped and uncropped images.  

In [None]:
# Problem 3 code goes here.

**Problem 4**: For both the zero-average cropped and uncropped image matrices, find the SVD of each, rescale and take log plots of the singular values, and then characterize what percentage of values are within 1/10, 1/100, and 1/1000 of the size of the largest singular value.   

In [None]:
u_crp, s_crp, vh_crp = np.linalg.svd(crp_image_mat, full_matrices=False)
# Problem 4 code goes here

Now, how can we start to analyze our image collections using the SVD/PCA?  We're going to take a more naive approach than the book does to answer this.  First, plot the first three columns of `u_crp` as images in grayscale.  Explain what you are finding.  

In [None]:
# Problem 4 code goes here 

Next, let's see how the various projections look across all of the images.

In [None]:
num_modes = 14 # why did I choose 14 here?
pca_proj_crp = u_crp[:, :num_modes].T @ crp_image_mat

fig, axes = plt.subplots(2, 7, figsize=(20, 10))
ax = axes.ravel()
for jj in range(num_modes):
    ax[jj].plot(pca_proj_crp[jj, :])
    ax[jj].set_title(f"Mode {jj}")
plt.tight_layout()

So since we're trying to take a stab at "clustering" our data, we might also want to look at the same plots but now as histograms.  

In [None]:
fig, axes = plt.subplots(2, 7, figsize=(20, 5))
ax = axes.ravel()
num_bins = 15
for jj in range(num_modes):
  ax[jj].hist(pca_proj_crp[jj, :], bins=num_bins)
  ax[jj].set_title(f"Mode {jj}")
plt.tight_layout()

**Problem 5** From the mode plots above, which modes do you think are the most meaningful?  To help you answer this, think about which plots might help you label images.  To do this, you need strong contrasts in the plots.  For example, how would you compare Mode 3 to Mode 13?  Which gives you more insight into underlying features of the image dataset?

**Problem 6** (Graduate/Extra Credit): The above is for cropped images.  Plot equivalent results for the uncropped images.  

**Problem 7**: Now, for the cropped image set, choose the 3 most interesting modes and plot them against one another using the code below.  What else can you infer by looking at this plot?  Note, you might want to try a few different choices here.  Maybe even plot some 'bad' choices just so you have some points of comparison here.  

In [None]:
comb_list = [[, , ], [, , ]] # add your particular choices here

fig = plt.figure(figsize=(10, 5))
for jj in range(len(comb_list)):
  x = pca_proj_crp[comb_list[jj][0], :]
  y = pca_proj_crp[comb_list[jj][1], :]
  z = pca_proj_crp[comb_list[jj][2], :]
  ax = fig.add_subplot(1, len(comb_list), jj+1, projection='3d') # you might need to adjust this depending on how many images you want to plot at once
  ax.scatter(x, y, z, s=2.)
  ax.set_title(f"Modes: ({comb_list[jj][0]}, {comb_list[jj][1]}, {comb_list[jj][2]})")

plt.tight_layout()

**Problem 8** (Graduate/Extra Credit): Repeat the above analysis for the uncropped image set.  What differences do you see relative to the cropped image set?  How might you explain that?  