#### Part1 Load Data

In [2]:
# -*- coding: utf-8 -*-
import pickle
from PIL import Image
import cv2
import numpy as np
import os
from keras.utils import np_utils
from sklearn.model_selection import train_test_split
import random
from keras import backend as K

images = []
labels = []
IMAGE_SIZE = 200


# 按照指定图像大小调整尺寸
def resize_image(image, height=IMAGE_SIZE, width=IMAGE_SIZE):
    return cv2.resize(image, (height, width))


def read_path(path_name):
    for dir_item in os.listdir(path_name):
        full_path = os.path.abspath(os.path.join(path_name, dir_item))

        if os.path.isdir(full_path):  # 如果是文件夹，继续递归调用
            read_path(full_path)
        else:  # 文件
            if dir_item.endswith('.jpg') or dir_item.endswith('.JPG') or dir_item.endswith('.png'):
                image = cv2.imread(full_path)
                image = resize_image(image)
                images.append(image)
                labels.append(path_name)

    return images, labels


def load_dataset(path_name):
    images, labels = read_path(path_name)

    images = np.array(images)
    print(images.shape)
    category = []
    for i in labels:
        category.append(i.split('/')[-1])
    temp = list(set(category))
    dic = {}
    for i in range(len(temp)):
        dic[temp[i]] = i
    for i in range(len(category)):
        labels[i] = dic[category[i]]
    labels = np.array(labels)
    print(labels.shape)
    return images, labels


class Dataset:
    def __init__(self, path_name):
        # 训练集
        self.train_images = None
        self.train_lb = None

        # 测试集
        self.test_images = None
        self.test_lb = None

        # 数据集加载路径
        self.path_name = path_name

        # 当前库采用的维度顺序
        self.input_shape = None

    # 加载数据集并按照交叉验证的原则划分数据集并进行相关预处理工作
    def load(self, img_rows=IMAGE_SIZE, img_cols=IMAGE_SIZE,
             img_channels=3, nb_classes=102):
        # 加载数据集到内存
        images, labels = load_dataset(self.path_name)

        train_images, test_images, train_labels, test_labels = train_test_split(images, labels, test_size=0.3,
                                                                                random_state=random.randint(0, 100))

        # 当前的维度顺序如果为'th'，则输入图片数据时的顺序为：channels,rows,cols，否则:rows,cols,channels
        if K.image_data_format() == 'channels_first':
            train_images = train_images.reshape(train_images.shape[0], img_channels, img_rows, img_cols)
            test_images = test_images.reshape(test_images.shape[0], img_channels, img_rows, img_cols)
            self.input_shape = (img_channels, img_rows, img_cols)
        else:
            train_images = train_images.reshape(train_images.shape[0], img_rows, img_cols, img_channels)
            test_images = test_images.reshape(test_images.shape[0], img_rows, img_cols, img_channels)
            self.input_shape = (img_rows, img_cols, img_channels)

            # 输出训练集、验证集、测试集的数量
            print(train_images.shape[0], 'train samples')
            print(test_images.shape[0], 'test samples')

            self.train_lb = train_labels
            self.test_lb = test_labels

            # 像素数据浮点化以便归一化
            train_images = train_images.astype('float32')
            test_images = test_images.astype('float32')

            # 将其归一化,图像的各像素值归一化到0~1区间
            train_images /= 255
            test_images /= 255

            self.train_images = train_images
            self.test_images = test_images


data = Dataset('./kaggle/caltech-101/')

data.load()

(9144, 200, 200, 3)
(9144,)
6400 train samples
2744 test samples


#### Part2 TODOs

TODO 利用SIFT从训练图像中提取特征  

如果有需要，你也可以在pass之外的地方填写相关代码，请自便，下同。

vec_dict 第i项： i为类别，对应的字典为所有属于该类的sift特征点的信息。注意：kp与des一一对应。

In [8]:
vec_dict = {i:{'kp':[], 'des':[]} for i in range(102)}
# des 为128维度的描述向量
# 102 是数据集的类别个数...QAQ
sift = cv2.SIFT_create()
# print(data.train_images.shape[0])  # 6400
for i in range(data.train_images.shape[0]):
    # if i==10:
    #     break
    tep = cv2.normalize(data.train_images[i], None, 0, 255, cv2.NORM_MINMAX).astype('uint8')
    kp_vector, des_vector = sift.detectAndCompute(tep, None)
    class_num = data.train_lb[i]
    
    kp_vector = list(kp_vector)
    des_vector = list(des_vector)
    vec_dict[class_num]['kp'].extend(kp_vector)
    # print(len(vec_dict[class_num]['kp']))
    vec_dict[class_num]['des'].extend(des_vector)
    # print(len(vec_dict[class_num]['kp']))
    
    # print(len(des_vector[0]))

# print(vec_dict)

In [6]:
print(vec_dict[0]['kp'][0])
print(vec_dict[0]['des'][0])


< cv2.KeyPoint 000002B8C25822E0>
[ 47.  78. 120.  70.  19.   8.   1.   0. 136.  88.  21.   8.   1.   4.
  10.  17.  16.  58.  20.   1.   1.  19.  14.   6.  50.  50.   3.   0.
   0.   1.   0.   8. 131.  29.   5.   9. 142.  70.   6.   1. 152.  39.
   1.   1.   7.  24.  45. 112.  41.   9.   2.   3.  74. 152.  68.  43.
  86.  64.   1.   2.  13.  17.   3.   4.  54.   7.   0.   2.  95.  13.
   1.   0. 152.  21.   0.   2.  13.   5.   4.  17.  42.   3.   0.  17.
 149.  40.   6.   8.  28.  34.   2.   7.  30.   5.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.]


In [10]:
bneck_value = float("inf")
for i in range(102):
    if len(vec_dict[i]['kp']) < bneck_value:
        bneck_value = len(vec_dict[i]['kp'])
        # print(bneck_value)
  
for i in range(102):
    kp_list = vec_dict[i]['kp'] = sorted((vec_dict[i]['kp']),
                                         key=lambda x: x.response,
                                         reverse=True)

TODO 为每个类别选择同样多的特征点用于聚类。特征点个数bneck_value

In [13]:
vec_list = vec_dict[0]['des'][0:bneck_value]
# print(bneck_value) 3379
for i in range(1, 102):
    vec_list = vec_list + vec_dict[i]['des'][0:bneck_value]
vec_list = np.float64(vec_list)
# print(vec_list.shape) (344658, 128)

In [176]:

# for i in range(1, 102):
#     #####
#     pass
#     #####
# vec_list = np.float64(vec_list)

vec_list = []
vec_list.append(np.array(vec_dict[0]['des'][0:bneck_value], dtype=np.float64))
# print(len(vec_list))
for i in range(1, 102):
    vec_list.append(np.array(vec_dict[i]['des'][0:bneck_value], dtype=np.float64))  # 不知道对不对 0.0
vec_list = np.float64(vec_list)
print(vec_list.shape)


(102, 3326, 128)


TODO 对提取出的特征点使用Kmeans聚类，设定合适的聚类中心个数

In [102]:
from sklearn.cluster import KMeans
#####
N_clusters = 50 # just 4 try
kmeans = KMeans(n_clusters=N_clusters)
kmeans.fit(vec_list)
#####
   
   

TODO 利用直方图统计每张图像中的特征点所属聚类中心的个数，将直方图归一化后便得到图像的特征向量。

In [103]:
num_images = data.train_images.shape[0]
hist_vector = np.zeros((num_images, N_clusters))
for i in range(num_images):
    # if i == 2:
    #     break
    tep = cv2.normalize(data.train_images[i], None, 0, 255, cv2.NORM_MINMAX).astype('uint8')
    kp_vector, des_vector = sift.detectAndCompute(tep, None)
    des_vector = np.array(des_vector, dtype=np.float64)
    label_test = kmeans.predict(des_vector)
    # print(label_test)
    for label in label_test:
        hist_vector[i][label] = hist_vector[i][label] + 1
    
    # dst = np.zeros_like(hist_vector[i])
    # print(hist_vector)

    # print(hist_vector[i])
    cv2.normalize(hist_vector[i], hist_vector[i], 0, 255, cv2.NORM_MINMAX)
    # print(hist_vector[i])
# print(hist_vector[1])

In [104]:
# print(data.train_lb[100])
print(hist_vector[19])
print(data.train_lb[6399])

[120.78947368  93.94736842  67.10526316 107.36842105  67.10526316
  26.84210526  53.68421053  67.10526316  40.26315789  40.26315789
 161.05263158  53.68421053  13.42105263  53.68421053  26.84210526
   0.          67.10526316   0.           0.         120.78947368
  53.68421053  40.26315789  40.26315789  40.26315789  80.52631579
  26.84210526  13.42105263  13.42105263  40.26315789  67.10526316
  53.68421053  53.68421053  80.52631579 255.          80.52631579
  40.26315789 120.78947368  53.68421053  80.52631579  93.94736842
  53.68421053  26.84210526  40.26315789  40.26315789  67.10526316
  67.10526316 107.36842105  67.10526316  80.52631579  67.10526316]
70


In [105]:
# Train classifier
from sklearn import svm
classifier = svm.SVC(probability=True)
classifier.fit(hist_vector, data.train_lb)

TODO 构建测试集并计算模型准确率

In [106]:
# 使用SVM构建分类器
# 你可以自行构建分类器，也可以使用SVM
# TODO 构建测试集并计算模型准确率
num_test_images = data.test_images.shape[0]
hist_test_vector = np.zeros((num_test_images, N_clusters))
# print(num_test_images)
for i in range(num_test_images):
    # if i==100:
    #     break
    tep = cv2.normalize(data.test_images[i], None, 0, 255, cv2.NORM_MINMAX).astype('uint8')
    kp_vector, des_vector = sift.detectAndCompute(tep, None)
    des_vector = np.array(des_vector, dtype=np.float64)
    
    label_pred_test = kmeans.predict(des_vector)
    # label_pred_test = classifier.predict(vec_test)
    for label in label_pred_test:
        hist_test_vector[i][label] = hist_test_vector[i][label] + 1
        
    cv2.normalize(hist_test_vector[i], hist_test_vector[i], 0, 255, cv2.NORM_MINMAX)
    # print(hist_test_vector[i])
    # print()


acc = classifier.predict(hist_test_vector)-data.test_lb
tep = len(acc[acc==0])
# print(tep)
# print(len(data.test_lb))
print('accuracy', tep/len(data.test_lb))

accuracy 0.3261661807580175


In [107]:
pred = classifier.predict(hist_test_vector)

In [108]:
for i in range(10):
    print(pred[i])

66
20
9
52
3
17
94
97
47
49


In [109]:
print(hist_test_vector[1])

[255.  17.  34.  17.  17.  34.   0.  17.  17.  34.  17.  17.  51.   0.
  51. 170.   0.  51.   0.  51.  51.  17.  34.  17.   0.  17.   0.   0.
  34. 153.  17.  17.  17. 136.   0.  17.   0.  51.  68.  34.   0.  17.
  17.  17.  34.  34.  17.  17.  17.   0.]
