<h1> DS200A Computer Vision Assignment</h1>

<h2>  Part Two: Exploratory data analysis and feature extraction. </h2>	


<h5> In this section, Compute at least 15 such image features (a method for each), including the following (NOTE: At least 10 of these must be scalar features and 2 matrix-based features): (i) image size, (ii) average of the red-channel intensity, (iii) aspect ratio. This will require significant explatoratory research and data analysis. The first one is already implemented for you, and the next two are pre-specified. Additional requirements specfied in pdf. </h5>

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import matplotlib
import skimage
from skimage import data
from skimage import io
from skimage import feature

import os
import seaborn as sns
import cv2


In [None]:
data = pd.read_hdf("data.h5", "data")
print(len(data))
data.head()

#### Display three of the learning set images.

In [None]:
subsample = data.groupby('Encoding').agg(lambda x: x.iloc[0])
for i in range(3):
    skimage.io.imshow(subsample.iloc[i]['Pictures'])
    plt.show()

In [None]:
skimage.io.imshow(subsample.iloc[0]['Pictures'])

#### Provide graphical summaries of the sizes of the images, pixel intensities, and class frequencies.

In [None]:
plt.clf()
plt.figure(figsize=(6,4), dpi=120)
# class frequency
class_frequency = data.groupby('Encoding').size()
class_frequency.plot.bar()
plt.ylabel("number of images in each class")

plt.show()
# plt.plot(class_frequency[0], class_frequency[1])

In [None]:
plt.clf()
plt.figure(figsize=(8,6), dpi=120)

# sizes of the images
def size_image(img):
    return img.size
data['size'] = data['Pictures'].apply(size_image) 
sns.boxplot(x='Encoding', y='size', data=data)
plt.ylim((0,3e6))

In [None]:
# pixel intensity
# plt.clf()
# plt.figure(figsize=(8,6), dpi=120)

# sizes of the images
def mean_intensity_channel0(img):
    return img[:,:,0].mean()
def mean_intensity_channel1(img):
    return img[:,:,1].mean()
def mean_intensity_channel2(img):
    return img[:,:,2].mean()
def std_intensity_channel0(img):
    return img[:,:,0].std()
def std_intensity_channel1(img):
    return img[:,:,1].std()
def std_intensity_channel2(img):
    return img[:,:,2].std()

data['mean_channel0'] = data['Pictures'].apply(mean_intensity_channel0) 
data['mean_channel1'] = data['Pictures'].apply(mean_intensity_channel1) 
data['mean_channel2'] = data['Pictures'].apply(mean_intensity_channel2) 
data['std_channel0'] = data['Pictures'].apply(std_intensity_channel0) 
data['std_channel1'] = data['Pictures'].apply(std_intensity_channel1) 
data['std_channel2'] = data['Pictures'].apply(std_intensity_channel2) 

sns.boxplot(x='Encoding', y='mean_channel0', data=data)
plt.show()
sns.boxplot(x='Encoding', y='mean_channel1', data=data)
plt.show()
sns.boxplot(x='Encoding', y='mean_channel2', data=data)
plt.show()
sns.boxplot(x='Encoding', y='std_channel0', data=data)
plt.show()
sns.boxplot(x='Encoding', y='std_channel1', data=data)
plt.show()
sns.boxplot(x='Encoding', y='std_channel2', data=data)
plt.show()

# plt.ylim((0,8e5))

#### Provide functions that summarize pixel intensity data (e.g.,https://docs.opencv.org/3.0-beta/doc/py_tutorials/py_feature2d/py_table_of_contents_feature2d/py_table_of_contents_feature2d.html#py-table-of-content-feature2d). Compute at least 15 such image features (a method for each), including the following (NOTE: At least 10 of these must be scalar features and 2 matrix-based features): (i) image size, (ii) average of the red-channel intensity, (iii) aspect ratio.

In [None]:
# image size
def feature_size(image):
    """
    return image size
    """
    return image.size

def feature_avg_red(image):
    """
    return the average of the red-channel pictures for the image
    """
    return image[:,:,0].mean()

def feature_avg_green(image):
    """
    return the average of the green-channel pictures for the image
    """
    return image[:,:,1].mean()

def feature_avg_blue(image):
    """
    return the average of the blue-channel pictures for the image
    """
    return image[:,:,2].mean()

def feature_aspect_ratio(image):
    """
    return the aspect ratio of the image
    """
    raise image.shape[0] / image.shape[1]

    
    
    
def feature_std_red(image):
    """
    return standard derivation of red channel
    """
    return image[:, :, 0].std()

def feature_std_green(image):
    """
    return standard derivation of green channel
    """
    return image[:, :, 1].std()

def feature_std_blue(image):
    """
    return standard derivation of blue channel
    """
    return image[:, :, 2].std()

def feature_avg_gray(image):
    """
    return mean value of grayscale
    """
    return np.mean(image[:, :, 0] + image[:, :, 1] + image[:, :, 2] / 3)

def feature_aspect_ratio(image):
    """
    return aspect ratio of the image, 
    i.e., the height divided by the width of the image
    """
    return image.shape[0] / image.shape[1]

def short_side_resize(image, length=256):
    """
    resize the image to a fixed short side length
    """
    height, width, _ = image.shape
    if height < width:
        ratio = length / height
    else:
        ratio = length / width
    new_height = int(height * ratio)
    new_width = int(width * ratio)
    return skimage.transform.resize(image, (new_height, new_width), mode='reflect', anti_aliasing=True)

def center_crop(image, length=224):
    """
    crop the center patch of the image with length * length
    """
    height, width, _ = image.shape
    cx, cy = height // 2, width // 2
    lx, ly = cx - length//2, cy - length//2
    hx, hy = length + lx, length + ly
    return image[lx:hx, ly:hy, :]

def feature_harris(image):
    """
    return amount of corners detected by Harris corner detector
    """
    image = center_crop(short_side_resize(image))
    gray = skimage.color.rgb2gray(image)
    gray = np.array(gray * 255, dtype=np.uint8)
    harris = cv2.cornerHarris(gray, blockSize=2, ksize=3, k=0.04)
    harris_corners = np.where(harris > 0)
    return len(harris_corners[0]) / harris.size

def feature_dog(image):
    """
    return the differences of images processed by two Gaussian 
    filters with different variance (we choose 0.3 and 0.5)
    """
    gray = skimage.color.rgb2gray(image)
    g3 = np.asarray(skimage.filters.gaussian(gray, sigma=0.3))
    g5 = np.asarray(skimage.filters.gaussian(gray, sigma=0.5))
    dog = g3-g5
    return sum(sum(dog > 0.05 *dog.max() ))/dog.size

def feature_avg_y(image):
    """
    return mean value of luminance Y) 
    """
    image = skimage.color.rgb2ycbcr(image)
    return image[:, :, 0].mean()


def feature_avg_cb(image):
    """
    return mean value of blue chroma component (Cb)
    """
    image = skimage.color.rgb2ycbcr(image)
    return image[:, :, 1].mean()

def feature_avg_cr(image):
    """
    return mean value of red chroma component (Cr)
    """
    image = skimage.color.rgb2ycbcr(image)
    return image[:, :, 2].mean()

def feature_std_y(image):
    """
    return standard derivation of luminance (Y)
    """
    image = skimage.color.rgb2ycbcr(image)
    return image[:, :, 0].std()

def feature_std_cb(image):
    """
    return standard derivation of blue chroma component (Cb)
    """
    image = skimage.color.rgb2ycbcr(image)
    return image[:, :, 1].std()

def feature_std_cr(image):
    """
    return standard derivation of red chroma component (Cr)
    """
    image = skimage.color.rgb2ycbcr(image)
    return image[:, :, 2].std()

def feature_avg_hog(image):
    """
    return mean value of Histogram of Oriented Gradients (HOG)
    """
    return skimage.feature.hog(image).mean()

def feature_std_hog(image):
    """
    return standard derivation of Histogram of Oriented Gradients (HOG)
    """
    return skimage.feature.hog(image).std()

 We expect all external sources sited, and significant indication of EDA. 

<h4> Graphs </h4>

In [None]:
def feature_frame(df):
    # add all features to a DataFrame and drop `Picture` column
    df["size"] = df["Pictures"].apply(feature_size)
    df["avg_red"] = df["Pictures"].apply(feature_avg_red)
    df["avg_green"] = df["Pictures"].apply(feature_avg_green)
    df["avg_blue"] = df["Pictures"].apply(feature_avg_blue)
    df["aspect_ratio"] = df["Pictures"].apply(feature_aspect_ratio)
    df["harris"] = df["Pictures"].apply(feature_harris)
    df["dog"] = df["Pictures"].apply(feature_dog)
    df["avg_y"] = df["Pictures"].apply(feature_avg_y)
    df["avg_cb"] = df["Pictures"].apply(feature_avg_cb)
    df["avg_cr"] = df["Pictures"].apply(feature_avg_cr)
    df["std_y"] = df["Pictures"].apply(feature_std_y)
    df["std_cb"] = df["Pictures"].apply(feature_std_cb)
    df["std_cr"] = df["Pictures"].apply(feature_std_cr)
    df["avg_hog"] = df["Pictures"].apply(feature_avg_hog)
    df["std_hog"] = df["Pictures"].apply(feature_std_hog)
    del df["Pictures"]
    return df

In [None]:
feature_df = feature_frame(data)


In [None]:
sns.boxplot(x='Encoding', y='size', data=feature_df)
plt.show()
sns.boxplot(x='Encoding', y='avg_red', data=feature_df)
plt.show()
sns.boxplot(x='Encoding', y='avg_green', data=feature_df)
plt.show()
sns.boxplot(x='Encoding', y='avg_blue', data=feature_df)
plt.show()
sns.boxplot(x='Encoding', y='aspect_ratio', data=feature_df)
plt.show()
sns.boxplot(x='Encoding', y='harris', data=feature_df)
plt.show()
sns.boxplot(x='Encoding', y='dog', data=feature_df)
plt.show()
sns.boxplot(x='Encoding', y='avg_y', data=feature_df)
plt.show()
sns.boxplot(x='Encoding', y='avg_cb', data=feature_df)
plt.show()
sns.boxplot(x='Encoding', y='avg_cr', data=feature_df)
plt.show()
sns.boxplot(x='Encoding', y='std_y', data=feature_df)
plt.show()
sns.boxplot(x='Encoding', y='std_cb', data=feature_df)
plt.show()
sns.boxplot(x='Encoding', y='std_cr', data=feature_df)
plt.show()
sns.boxplot(x='Encoding', y='avg_hog', data=feature_df)
plt.show()
sns.boxplot(x='Encoding', y='std_hog', data=feature_df)
plt.show()


<h4> Sources </h4>

<h4> DataFrame Creation </h4>

In [None]:
def feature_frame(df):
    return df
    #Returns data-frame with all the features now inside, and calculated


In [None]:
feature_frame(data_from_nb1)

#### Examine how these image features vary between classes.