为了创建一个目标识别系统，需要从每张图像中提取特征向量。每张图像需要有一个标识标志，以用于匹配。     
用一个叫**视觉码本**的概念来创建图像识别标志。在训练数据集中，这个码本基本上是一个字典，用于提出关于图像的描述。     
我们用**向量量化**方法将很多特征点进行聚类并得出中心点。这些中心点作为视觉码本的元素。    

本例提供了包含3个类的示例训练数据集，每一类包含20幅图像，这些图像可以在[网址](http://www.vision.caltech.edu/html-files/archive.html)下载  

为了创建一个健壮的目标识别系统，需要数万幅图像。该领域有一个非常著名的数据集叫Caltech256，它包括256类图像，每一类都包含上千幅示例图像。可以在[网址](http://www.vision.caltech.edu/Image_Datasets/Caltech256)下载

In [None]:
import os
import sys
import pickle
import json
import cv2
import numpy as np
from sklearn.cluster import KMeans

class StarFeatureDetector(object):
    def __init__(self):
        self.detector = cv2.xfeatures2d.StarDetector_create()

    def detect(self, img):
        return self.detector.detect(img)
    
class FeatureBuilder(object):
    # 定义一个从输入图像提取特征的方法
    # 用Star检测器获得关键点，然后用SIFT提取这些位置的描述信息
    def extract_features(self, img):
        keypoints = StarFeatureDetector().detect(img)
        keypoints, feature_vectors = compute_sift_features(img, keypoints)
        return feature_vectors
    # 从描述信息中提取中心点
    def get_codewords(self, input_map, scaling_size, max_samples=12):
        keypoints_all = []
        
        count = 0
        cur_class = ''
        # 每幅图像都会产生大量的描述信息。这里将仅用一小部分图像，因为这些中心点并不会发生很大的改变
        for item in input_map:
            if count >= max_samples:
                if cur_class != item['object_class']:
                    count = 0
                else:
                    continue

            count += 1

            if count == max_samples:
                print("Built centroids for", item['object_class'])
            # 提取当前标签
            cur_class = item['object_class']
            # 读取图像并调整其大小
            img = cv2.imread(item['image_path'])
            img = resize_image(img, scaling_size)
            # 设置维度数为128并提取特征
            num_dims = 128
            feature_vectors = self.extract_features(img)
            keypoints_all.extend(feature_vectors) 
        # 用向量量化来量化特征点
        kmeans, centroids = BagOfWords().cluster(keypoints_all)
        return kmeans, centroids
    
# 定义一个类来处理词袋模型和向量量化
class BagOfWords(object):
    def __init__(self, num_clusters=32):
        self.num_dims = 128
        self.num_clusters = num_clusters
        self.num_retries = 10
    # 定义一个方法来量化数据点。下面将用k-means聚类来实现
    def cluster(self, datapoints):
        kmeans = KMeans(self.num_clusters, 
                        n_init=max(self.num_retries, 1),
                        max_iter=10, tol=1.0)
        # 提取中心点
        res = kmeans.fit(datapoints)
        centroids = res.cluster_centers_
        return kmeans, centroids
    # 定义一个方法来归一化数据
    def normalize(self, input_data):
        sum_input = np.sum(input_data)

        if sum_input > 0:
            return input_data / sum_input
        else:
            return input_data
    # 定义一个方法来获得特征向量
    def construct_feature(self, img, kmeans, centroids):
        keypoints = StarFeatureDetector().detect(img)
        keypoints, feature_vectors = compute_sift_features(img, keypoints)
        labels = kmeans.predict(feature_vectors)
        feature_vector = np.zeros(self.num_clusters)

        for i, item in enumerate(feature_vectors):
            feature_vector[labels[i]] += 1

        feature_vector_img = np.reshape(feature_vector, 
                ((1, feature_vector.shape[0])))
        return self.normalize(feature_vector_img)

# 获取特征
def get_feature_map(input_map, kmeans, centroids, scaling_size):
    feature_map = []
     
    for item in input_map:
        temp_dict = {}
        temp_dict['object_class'] = item['object_class']
    
        print("Extracting features for", item['image_path'])
        img = cv2.imread(item['image_path'])
        img = resize_image(img, scaling_size)

        temp_dict['feature_vector'] = BagOfWords().construct_feature(
                    img, kmeans, centroids)

        if temp_dict['feature_vector'] is not None:
            feature_map.append(temp_dict)

    return feature_map

# 提取SIFT特征
def compute_sift_features(img, keypoints):
    if img is None:
        raise TypeError('Invalid input image')

    img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    keypoints, descriptors = cv2.xfeatures2d.SIFT_create().compute(img_gray, keypoints)
    return keypoints, descriptors

# 将图像最短的那个维度转化为new_size,另一个维度等比例变换
def resize_image(input_img, new_size):
    h, w = input_img.shape[:2]
    scaling_factor = new_size / float(h)

    if w < h:
        scaling_factor = new_size / float(w)

    new_shape = (int(w * scaling_factor), int(h * scaling_factor))
    return cv2.resize(input_img, new_shape) 

# 加载图片并按文件夹名字（具体分类）做标记
def load_training_data(input_folder):
    training_data = []

    if not os.path.isdir(input_folder):
        raise IOError("The folder " + input_folder + " doesn't exist")
        
    for root, dirs, files in os.walk(input_folder):
        for filename in (x for x in files if x.endswith('.jpg')):
            filepath = root + '/' + filename
            object_class = filepath.split('/')[-2]
            training_data.append({'object_class': object_class, 
                'image_path': filepath})
                    
    return training_data

if __name__=='__main__':
    codebook_file = "codebook.pkl"
    feature_map_file = "feature_map.pkl"
    data_folder = "./training_images/"
    scaling_size = 200
    
    # 加载训练数据
    training_data = load_training_data(data_folder)

    # 建立虚拟字典
    print("====== Building visual codebook ======")
    kmeans, centroids = FeatureBuilder().get_codewords(training_data, scaling_size)
    if codebook_file:
        with open(codebook_file, 'wb') as f:
            pickle.dump((kmeans, centroids), f)
    
    # 从输入图像中提取特征
    print("\n====== Building the feature map ======")
    feature_map = get_feature_map(training_data, kmeans, centroids, scaling_size)
    if feature_map_file:
        with open(feature_map_file, 'wb') as f:
            pickle.dump(feature_map, f)

