In [8]:
!pip install imutils



In [3]:
# import the necessary packages
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from imutils import paths
import numpy as np
import pickle
import imutils
import cv2
import os

In [6]:
# resize the image to a fixed size, then flatten the image into
# a list of raw pixel intensities
def image_to_feature_vector(image, size = (100, 50)):
    return cv2.resize(image, size).flatten()

In [5]:
def extract_color_histogram(image, bins = (8, 8, 8)):
    # extract a 3D color histogram from the HSV color space using
    # the supplied number of `bins` per channel
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    hist = cv2.calcHist([hsv], [0, 1, 2], None, bins, [0, 180, 0, 256, 0, 256])

    # handle normalizing the histogram if we are using OpenCV 2.4.X
    if imutils.is_cv2():
        hist = cv2.normalize(hist)

    # otherwise, perform "in place" normalization in OpenCV 3 (I
    # personally hate the way this is done
    else:
        cv2.normalize(hist, hist)

    # return the flattened histogram as the feature vector
    return hist.flatten()

In [12]:
dataset = 'cheekForKNN'
# initialize the raw pixel intensities matrix, the features matrix,
# and labels list
rawImages = []
features = []
labels = []

In [13]:
# loop over the input images
for dirname, dirnames, filenames in os.walk(dataset):
    if dirname != dataset:
        path, label = os.path.split(dirname)
        # grab the list of images that we'll be describing
        imagePaths = list(paths.list_images(dirname))
        for (i, imagePath) in enumerate(imagePaths):
        # load the image and extract the class label (assuming that our
        # path as the format: /dataset/{class}/{image_num}.jpg
            image = cv2.imread(imagePath)

            # extract raw pixel intensity "features", followed by a color
            # histogram to characterize the color distribution of the pixels
            # in the image
            pixels = image_to_feature_vector(image)
            hist = extract_color_histogram(image)

            # update the raw images, features, and labels matricies,
            # respectively
            rawImages.append(pixels)
            features.append(hist)
            labels.append(label)
            
            # show an update every 1,000 images
            if i > 0 and i % 1000 == 0:
                print("[INFO] processed {}/{}".format(i, len(imagePaths)))

In [14]:
# partition the data into training and testing splits, using 80%
# of the data for training and the remaining 20% for testing
(train_rX, test_rX, train_rY, test_rY) = train_test_split(rawImages, labels, test_size = 0.2, random_state = 51)
(train_fX, test_fX, train_fY, test_fY) = train_test_split(features, labels, test_size = 0.2, random_state = 51)

# train and evaluate a k-NN classifer on the raw pixel intensities
#for i in range(1,101):
model_r = KNeighborsClassifier(n_neighbors = 28, n_jobs = 3)
model_r.fit(train_rX, train_rY)
acc_r = model_r.score(test_rX, test_rY)
print("rgb accuracy: {:.2f}%".format(acc_r * 100))
# train and evaluate a k-NN classifer on the histogram
# representations
#for i in range(1,101):
model_f = KNeighborsClassifier(n_neighbors = 53, n_jobs = 3)
model_f.fit(train_fX, train_fY)
acc_f = model_f.score(test_fX, test_fY)
print("hist accuracy: {:.2f}%".format(acc_f * 100))

rgb accuracy: 77.00%
hist accuracy: 72.00%


In [36]:
image = cv2.imread('schannel/S__45334544.jpg')
pixels = image_to_feature_vector(image)
print(model_r.predict(pixels.reshape(1, -1)))
print(model_r.predict_proba(pixels.reshape(1, -1)))

hist = extract_color_histogram(image)
print(model_f.predict(hist.reshape(1, -1)))
print(model_f.predict_proba(hist.reshape(1, -1)))

['notgood']
[[ 0.25  0.75]]
['good']
[[ 0.71698113  0.28301887]]


In [37]:
#pickle.dump(model_f, open("20180408_final_knn_hsv_schannel_py2", "wb"), protocol = 2)
pickle.dump(model_r, open("20180408_final_knn_rgb_cheek_py2", "wb"), protocol = 2)

In [9]:
model_1 = pickle.load(open("20180408_final_knn_hsv_schannel.pkl", "rb"))
image_1 = cv2.imread('schannel/S__45334562.jpg')
hist = extract_color_histogram(image_1)
print(model_1.predict(hist.reshape(1, -1))[0])
print(model_1.predict_proba(hist.reshape(1, -1)))
model_2 = pickle.load(open("20180408_final_knn_rgb_cheek.pkl", "rb"))
image_2 = cv2.imread('acne/S__45334562.jpg')
pixels = image_to_feature_vector(image_2)
print(model_2.predict(pixels.reshape(1, -1))[0])
print(model_2.predict_proba(pixels.reshape(1, -1)))

oil
[[ 0.  0.  1.]]

notgood
[[ 0.25  0.75]]
