In [None]:
import time
import cv2
import datetime
import numpy as np
import matplotlib.pyplot as plt

from keras.datasets import cifar10
from scipy.spatial.distance import cdist
from skimage.feature import hog, local_binary_pattern
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.metrics import classification_report, accuracy_score
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF, Matern, RationalQuadratic, ExpSineSquared, DotProduct, WhiteKernel

In [None]:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
y_train = y_train.flatten()
y_test = y_test.flatten()
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

In [None]:
label = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']

In [None]:
def showimage(image, train):
    if train == True:
        plt.imshow(x_train[image])
        plt.axis('off')
        plt.show()
        return (label[y_train[image]])
    else:
        plt.imshow(x_test[image])
        plt.axis('off')
        plt.show()
        return (label[y_test[image]])

In [None]:
showimage(0,1)

# Grayscale

In [None]:
def gray(train, test):
    
    train_gray = []
    test_gray = []

    for image in train:
        image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        train_gray.append(image_gray)

    for image in test:
        image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        test_gray.append(image_gray)
        
    return np.array(train_gray), np.array(test_gray)

# Flatten

In [None]:
def flatten(train, test):
    
    return train.reshape(train.shape[0], -1), test.reshape(test.shape[0], -1)

# PCA

In [None]:
def pca(train, test, variance):
    
    train_flatten, test_flatten = flatten(train, test)
    
    pca = PCA(n_components = variance)
    pca.fit(train_flatten)
    
    return pca.transform(train_flatten), pca.transform(test_flatten)

# SIFT

In [None]:
def sift(train, test, num_cluster):
    
    sift = cv2.SIFT_create()
    
    sift_descriptors_train = []
    for image in train:
        kp, des = sift.detectAndCompute(image, None)
        sift_descriptors_train.append(des)
        
    sift_descriptors_test = []
    for image in test:
        kp, des = sift.detectAndCompute(image, None)
        sift_descriptors_test.append(des)
        
    sift_all_descriptors = []
    for descriptor in sift_descriptors_train:
        if descriptor is not None:
            for des in descriptor:
                sift_all_descriptors.append(des)
                
    sift_bow_dict = []
    kmeans = KMeans(n_clusters = num_cluster)
    kmeans.fit(sift_all_descriptors)
    sift_bow_dict = kmeans.cluster_centers_
    
    train_sift = []
    for image in sift_descriptors_train:
        features = np.array([0] * num_cluster)
        if image is not None:
            distance = cdist(image, sift_bow_dict)
            argmin = np.argmin(distance, axis = 1)
            for i in argmin:
                features[i] += 1
        train_sift.append(features)

    test_sift = []
    for image in sift_descriptors_test:
        features = np.array([0] * num_cluster)
        if image is not None:
            distance = cdist(image, sift_bow_dict)
            argmin = np.argmin(distance, axis = 1)
            for i in argmin:
                features[i] += 1
        test_sift.append(features)
        
    return np.array(train_sift), np.array(test_sift)

# HOG

In [None]:
def c_hog(train, test, orientations, ppc, cpb):
    
    train_hog = []
    test_hog = []

    for image in train:
        fd, hog_image = hog(image, orientations=orientations, pixels_per_cell=(ppc,ppc), cells_per_block=(cpb, cpb), visualize=True, channel_axis=-1)
        train_hog.append(fd)

    for image in test:
        fd, hog_image = hog(image, orientations=orientations, pixels_per_cell=(ppc,ppc), cells_per_block=(cpb, cpb), visualize=True, channel_axis=-1)
        test_hog.append(fd)
        
    return np.array(train_hog), np.array(test_hog)

# LBP

In [None]:
def lbp(train, test, radius):

    train_gray, test_gray = gray(train, test)
    train_lbp = []
    test_lbp = []

    for image in train_gray:
        lbp = local_binary_pattern(image, 8 * radius, radius, 'uniform')
        hist, _ = np.histogram(lbp, bins=8 * radius + 2, range=(0, 8 * radius + 2), density=True)
        train_lbp.append(hist)

    for image in test_gray:
        lbp = local_binary_pattern(image, 8 * radius, radius, 'uniform')
        hist, _ = np.histogram(lbp, bins=8 * radius + 2, range=(0, 8 * radius + 2), density=True)
        test_lbp.append(hist)
        
    return np.array(train_lbp), np.array(test_lbp)

# Optimize

### PCA

In [None]:
var = [0.5, 0.6, 0.7, 0.8, 0.9, 0.99]
for i in var:
    for j in range(0,5):
        start = time.time()
        train_x, valid_x = pca(x_train[j*10000:j*10000+2000], x_train[j*10000+2000:(j+1)*10000], i)
        
        train_y = y_train[j*10000:j*10000+2000]
        valid_y = y_train[j*10000+2000:(j+1)*10000]

        kernel = RationalQuadratic()
        gpc = GaussianProcessClassifier(kernel=kernel)
        gpc.fit(train_x, train_y)

        pred_y = gpc.predict(valid_x)
        end = time.time()
        print(datetime.timedelta(seconds = int(end-start)))
        print([i, j+1])
        print(accuracy_score(valid_y, pred_y))

In [None]:
# 0.5 [0.259375,0.2665,0.263,0.26625,0.27725]
# 0.6 [0.320625,0.313125,0.3235,0.302,0.327125]
# 0.7 [0.3465,0.34225,0.35,0.339625,0.341375]
# 0.8 [0.367625,0.3595,0.372625,0.36725,0.35875]
# 0.9 [0.372375,0.365625,0.363125,0.371375,0.367125]
# 0.99 [0.368375,0.3645,0.374625,0.371125,0.364625]

### SIFT

In [None]:
sift = cv2.SIFT_create()

sift_descriptors_train = []
for image in x_train:
    kp, des = sift.detectAndCompute(image, None)
    sift_descriptors_train.append(des)

sift_all_descriptors = []
for descriptor in sift_descriptors_train:
    if descriptor is not None:
        for des in descriptor:
            sift_all_descriptors.append(des)

In [None]:
num = [i for i in range(190,201,10)]
for num_cluster in num:
    
    start = time.time()
    sift_bow_dict = []
    kmeans = KMeans(n_clusters = num_cluster)
    kmeans.fit(sift_all_descriptors)
    sift_bow_dict = kmeans.cluster_centers_

    train_sift = []
    for image in sift_descriptors_train:
        features = np.array([0] * num_cluster)
        if image is not None:
            distance = cdist(image, sift_bow_dict)
            argmin = np.argmin(distance, axis = 1)
            for i in argmin:
                features[i] += 1
        train_sift.append(features)
    
    for j in range(0,5):
    
        train_x = train_sift[j*10000:j*10000+2000]
        valid_x = train_sift[j*10000+2000:(j+1)*10000]
        train_y = y_train[j*10000:j*10000+2000]
        valid_y = y_train[j*10000+2000:(j+1)*10000]

        kernel = RationalQuadratic()
        gpc = GaussianProcessClassifier(kernel=kernel)
        gpc.fit(train_x, train_y)

        pred_y = gpc.predict(valid_x)
        end = time.time()
        print(datetime.timedelta(seconds = int(end-start)))
        print([num_cluster, j+1])
        print(accuracy_score(valid_y, pred_y))

In [None]:
# 10 [0.098125,0.097,0.12525,0.100125,0.096875]
# 20 [0.098125,0.097,0.12925,0.100125,0.096875]
# 30 [0.098125,0.097,0.134,0.100125,0.096875]
# 40 [0.098125,0.097,0.136125,0.100125,0.096875]
# 50 [0.098125,0.097,0.138125,0.100125,0.096875]
# 60 [0.098125,0.097,0.13875,0.100125,0.096875]
# 70 [0.098125,0.097,0.14,0.100125,0.096875]
# 80 [0.098125,0.097,0.140125,0.100125,0.096875]
# 90 [0.098125,0.097,0.139375,0.100125,0.096875]
# 100 [0.098125,0.097,0.137,0.100125,0.096875]
# 110 [0.098125,0.097,0.13925,0.100125,0.096875]
# 120 [0.098125,0.097,0.137,0.100125,0.096875]
# 130 [0.098125,0.097,0.137,0.100125,0.096875]
# 140 [0.098125,0.097,0.1355,0.100125,0.096875]
# 150 [0.098125,0.097,0.1365,0.100125,0.096875]
# 160 [0.098125,0.097,0.1385,0.100125,0.096875]
# 170 [0.098125,0.097,0.133875,0.100125,0.096875]
# 180 [0.098125,0.097,0.136,0.100125,0.096875]
# 190 [0.098125,0.097,0.1365,0.100125,0.096875]
# 200 [0.098125,0.097,0.134875,0.100125,0.096875]

### HOG

In [None]:
ori = [8, 9]
ppc = [4, 8]
cpb = [1, 2, 3]
for i in ori:
    for j in ppc:
        for k in cpb:
            for l in range(0,5):
                
                start = time.time()
                train_x, valid_x = c_hog(x_train[l*10000:l*10000+2000], x_train[l*10000+2000:(l+1)*10000], i, j, k)

                train_y = y_train[l*10000:l*10000+2000]
                valid_y = y_train[l*10000+2000:(l+1)*10000]

                kernel = RationalQuadratic()
                gpc = GaussianProcessClassifier(kernel=kernel)
                gpc.fit(train_x, train_y)

                pred_y = gpc.predict(valid_x)
                end = time.time()
                print(datetime.timedelta(seconds = int(end-start)))
                print([i, j, k, l+1])
                print(accuracy_score(valid_y, pred_y))

In [None]:
# 841 [0.098125,0.097,0.162875,0.100125,0.096875]
# 842 [0.098125,0.097,0.1635,0.100125,0.096875]
# 843 [0.098125,0.177625,0.1635,0.100125,0.096875]
# 881 [0.409625,0.403125,0.41,0.40275,0.39925]
# 882 [0.48875,0.487875,0.486625,0.47775,0.486125]
# 883 [0.486625,0.499,0.49025,0.47875,0.48475]
# 941 [0.098125,0.097,0.163125,0.100125,0.096875]
# 942 [0.098125,0.097,0.163875,0.100125,0.096875]
# 943 [0.098125,0.097,0.164125,0.100125,0.096875]
# 981 [0.411875,0.41275,0.412375,0.4075,0.4025]
# 982 [0.4865,0.490125,0.488,0.47725,0.48525]
# 983 [0.49275,0.4925,0.49325,0.481375,0.48975]

### LBP

In [None]:
rad = [i for i in range(1,11)]
for i in rad:
    for j in range(0,5):
        
        start = time.time()
        train_x, valid_x = lbp(x_train[j*10000:j*10000+2000], x_train[j*10000+2000:(j+1)*10000], i)
        
        train_y = y_train[j*10000:j*10000+2000]
        valid_y = y_train[j*10000+2000:(j+1)*10000]

        kernel = RationalQuadratic()
        gpc = GaussianProcessClassifier(kernel=kernel)
        gpc.fit(train_x, train_y)

        pred_y = gpc.predict(valid_x)
        end = time.time()
        print(datetime.timedelta(seconds = int(end-start)))
        print([i, j+1])
        print(accuracy_score(valid_y, pred_y))