In [1]:
# import the necessary packages
import numpy as np
import imutils
import cv2
import os
import skimage
import itertools
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.cross_validation import train_test_split
from sklearn import metrics
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score, recall_score
from imutils import paths



In [2]:
def image_to_feature_vector(image, size=(32, 32)):
	# resize the image to a fixed size, then flatten the image into
	# a list of raw pixel intensities
	return cv2.resize(image, size).flatten()

def extract_color_histogram(image, bins=(8, 8, 8)):
	# extract a 3D color histogram from the HSV color space using
	# the supplied number of `bins` per channel
	#hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
	hist = cv2.calcHist(image, [0, 1, 2], None, bins,[0, 180, 0, 256, 0, 256])
    
	# handle normalizing the histogram if we are using OpenCV 2.4.X
	if imutils.is_cv2():
		hist = cv2.normalize(hist)

	# otherwise, perform "in place" normalization in OpenCV 3
	else:
		cv2.normalize(hist, hist)
        
	# return the flattened histogram as the feature vector
	return hist.flatten()

In [3]:
# grab the list of images that we'll be describing
print("[INFO] describing images...")
imagePaths = list(paths.list_images("C:\\Users\\JBWV9182\\Desktop\\train_vega"))

# initialize the raw pixel intensities matrix, the features matrix,
# and labels list
rawImages = []
features = []
labels = []

# loop over the input images
for (i, imagePath) in enumerate(imagePaths):
	# load the image and extract the class label (assuming that our
	# path as the format: /path/to/dataset/{class}.{image_num}.jpg
	image = cv2.imread(imagePath)
	image_flip = cv2.flip(image,1)
	for i in range(1, 11):
		image_dist = np.uint8(image + i*0.2 * image.std() * np.random.random(image.shape))
	label = imagePath.split(os.path.sep)[-1].split(".")[0]
    
	# extract raw pixel intensity "features", followed by a color
	# histogram to characterize the color distribution of the pixels
	# in the image
	pixels = image_to_feature_vector(image)
	pixels_flip = image_to_feature_vector(image_flip)
	pixels_dist = image_to_feature_vector(image_dist)
	hist = extract_color_histogram(image)
	hist_flip = extract_color_histogram(image_flip)
	hist_dist = extract_color_histogram(image_dist)

	# update the raw images, features and labels matricies,
	# respectively
	rawImages.append(pixels)
	rawImages.append(pixels_flip)
	rawImages.append(pixels_dist)
	features.append(hist)
	features.append(hist_flip)
	features.append(hist_dist)
	labels.append(label)
	labels.append(label)
	labels.append(label)


	# show an update every 1,000 images
	if i > 0 and i % 1000 == 0:
		print("[INFO] processed {}/{}".format(i, len(imagePaths)))

[INFO] describing images...


In [4]:
# partition the data into training and testing splits, using 75%
# of the data for training and the remaining 25% for testing
(trainRI, testRI, trainRL, testRL) = train_test_split(
	rawImages, labels, test_size=0.25, random_state=42)
(trainFeat, testFeat, trainLabels, testLabels) = train_test_split(
	features, labels, test_size=0.25, random_state=42)

In [5]:
# recherche du meilleur k 
neighbors = list(range(1,11))

# train and evaluate a k-NN classifer on the raw pixel intensities
print("[INFO] evaluating raw pixel accuracy...")
for k in neighbors :
    model = KNeighborsClassifier(n_neighbors=k, n_jobs=10)
    model.fit(trainRI, trainRL)
    acc = model.score(testRI, testRL)
    print("[INFO] raw pixel accuracy for ",k," neighbors : {:.2f}% ". format(acc * 100))
    print(metrics.classification_report(testRL, model.predict(testRI)))
    print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")

    
print("-----------------------------------------------------------------")
print("-----------------------------------------------------------------")

# train and evaluate a k-NN classifer on the histogram
# representations
print("[INFO] evaluating histogram accuracy...")
for k in neighbors : 
    model = KNeighborsClassifier(n_neighbors=k, n_jobs=10)
    model.fit(trainFeat, trainLabels)
    acc = model.score(testFeat, testLabels)
    print("[INFO] histogram accuracy for ",k," neighbors : {:.2f}% ". format(acc * 100))
    print(metrics.classification_report(testLabels, model.predict(testFeat)))
    print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")

[INFO] evaluating raw pixel accuracy...
[INFO] raw pixel accuracy for  1  neighbors : 20.24% 


  'precision', 'predicted', average, warn_for)


             precision    recall  f1-score   support

       Ankh       0.00      0.00      0.00        15
      Plume       1.00      0.23      0.38        13
     bouche       1.00      0.18      0.31        11
      hibou       0.50      0.06      0.11        17
      lapin       0.06      1.00      0.11         3
       oeil       0.00      0.00      0.00         9
      skara       0.31      0.50      0.38        16

avg / total       0.45      0.20      0.20        84

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[INFO] raw pixel accuracy for  2  neighbors : 19.05% 
             precision    recall  f1-score   support

       Ankh       0.00      0.00      0.00        15
      Plume       1.00      0.31      0.47        13
     bouche       0.67      0.18      0.29        11
      hibou       0.50      0.06      0.11        17
      lapin       0.05      1.00      0.09         3
       oeil       1.00      0.11      0.20         9
      skara       0.42      0.31

             precision    recall  f1-score   support

       Ankh       0.25      0.53      0.34        15
      Plume       0.20      0.38      0.26        13
     bouche       0.17      0.09      0.12        11
      hibou       0.00      0.00      0.00        17
      lapin       0.25      0.33      0.29         3
       oeil       0.12      0.11      0.12         9
      skara       1.00      0.19      0.32        16

avg / total       0.31      0.23      0.20        84

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[INFO] histogram accuracy for  6  neighbors : 25.00% 
             precision    recall  f1-score   support

       Ankh       0.34      0.73      0.47        15
      Plume       0.17      0.31      0.22        13
     bouche       0.11      0.09      0.10        11
      hibou       0.09      0.06      0.07        17
      lapin       0.50      0.33      0.40         3
       oeil       0.00      0.00      0.00         9
      skara       1.00      0.19