In [55]:
# NOTE: Images must already be in './image/' directory. This can be done using the query_bing_images.py script

#%load preprocess_images.py
import numpy as np
import Image
import os

STANDARD_SIZE = (300, 167)
def img_to_matrix(filename, verbose=False):
    """ 
    Turns filename into numpy array of RGB pixels
    """
    img = Image.open(filename)
    if verbose==True:
        print "Changing size from %s to %s" % (str(img.size), str(STANDARD_SIZE))
    img = img.resize(STANDARD_SIZE)
    img = list(img.getdata())
    img = map(list, img)
    img = np.array(img)
    return img

def flatten_image(img):
    """ 
    Takes in (m,n) numpy array and flattens it into an array of shape (1, m*n)
    """
    s = img.shape[0] * img.shape[1]
    img_wide = img.reshape(1, s)
    return img_wide[0]

In [56]:
# %load processing_all_the_images.py
img_dir = "images/"
images = [img_dir + f for f in os.listdir(img_dir)]
labels = ["check" if "check" in f.split('/')[-1] else "drivers_license" for f in images]

data = []
for image in images:
    img = img_to_matrix(image)
    img = flatten_image(img)
    data.append(img)

data = np.array(data)

In [57]:
# %load randomized_pca_2d.py
from sklearn.decomposition import RandomizedPCA
import pandas as pd
import matplotlib.pyplot as pl

pca = RandomizedPCA(n_components=2)
X = pca.fit_transform(data)
df = pd.DataFrame({"x": X[:, 0], "y": X[:, 1], "label":labels})
colors = ["red", "yellow"]
for label, color in zip(df['label'].unique(), colors):
    mask = df['label']==label
    pl.scatter(df[mask]['x'], df[mask]['y'], c=color, label=label)
pl.legend()
pl.show()

              label             x             y
0             check   8174.441118   6117.749615
1             check   9253.592719  10348.001487
2             check   9252.294724  10344.699792
3             check   2939.646777   8086.697761
4             check  -1304.963526   5604.346280
5   drivers_license   4210.159544  -4605.140747
6   drivers_license   8568.823817   9340.143134
7   drivers_license   8290.818792   2066.060728
8             check   5896.116411  -3300.048806
9   drivers_license   -766.156756 -12465.612082
10  drivers_license   9141.046664   4486.709497
11            check  10384.756851   9866.666029
12            check   8401.607628   2225.719224
13  drivers_license  -2408.230740 -20017.116166
14            check  -7141.546174  -3620.065401
15            check -12319.673512   8972.425840
16            check -23013.263842   5194.637467
17            check  -1291.756803   4886.403818
18  drivers_license   1077.268877 -10578.351833
19  drivers_license   9229.999768   4916