#### Part1 Load Data

In [15]:
# -*- coding: utf-8 -*-
import pickle
from PIL import Image
import cv2
import numpy as np
import os
from keras.utils import np_utils
from sklearn.model_selection import train_test_split
import random
from keras import backend as K

images = []
labels = []
IMAGE_SIZE = 200


# 按照指定图像大小调整尺寸
def resize_image(image, height=IMAGE_SIZE, width=IMAGE_SIZE):
    return cv2.resize(image, (height, width))


def read_path(path_name):
    for dir_item in os.listdir(path_name):
        full_path = os.path.abspath(os.path.join(path_name, dir_item))

        if os.path.isdir(full_path):  # 如果是文件夹，继续递归调用
            read_path(full_path)
        else:  # 文件
            if dir_item.endswith('.jpg') or dir_item.endswith('.JPG') or dir_item.endswith('.png'):
                image = cv2.imread(full_path)
                image = resize_image(image)
                images.append(image)
                labels.append(path_name)

    return images, labels


def load_dataset(path_name):
    images, labels = read_path(path_name)

    images = np.array(images)
    print(images.shape)
    category = []
    for i in labels:
        category.append(i.split('/')[-1])
    temp = list(set(category))
    dic = {}
    for i in range(len(temp)):
        dic[temp[i]] = i
    for i in range(len(category)):
        labels[i] = dic[category[i]]
    labels = np.array(labels)
    print(labels.shape)
    return images, labels


class Dataset:
    def __init__(self, path_name):
        # 训练集
        self.train_images = None
        self.train_lb = None

        # 测试集
        self.test_images = None
        self.test_lb = None

        # 数据集加载路径
        self.path_name = path_name

        # 当前库采用的维度顺序
        self.input_shape = None

    # 加载数据集并按照交叉验证的原则划分数据集并进行相关预处理工作
    def load(self, img_rows=IMAGE_SIZE, img_cols=IMAGE_SIZE,
             img_channels=3, nb_classes=102):
        # 加载数据集到内存
        images, labels = load_dataset(self.path_name)

        train_images, test_images, train_labels, test_labels = train_test_split(images, labels, test_size=0.3,
                                                                                random_state=random.randint(0, 100))

        # 当前的维度顺序如果为'th'，则输入图片数据时的顺序为：channels,rows,cols，否则:rows,cols,channels
        if K.image_data_format() == 'channels_first':
            train_images = train_images.reshape(train_images.shape[0], img_channels, img_rows, img_cols)
            test_images = test_images.reshape(test_images.shape[0], img_channels, img_rows, img_cols)
            self.input_shape = (img_channels, img_rows, img_cols)
        else:
            train_images = train_images.reshape(train_images.shape[0], img_rows, img_cols, img_channels)
            test_images = test_images.reshape(test_images.shape[0], img_rows, img_cols, img_channels)
            self.input_shape = (img_rows, img_cols, img_channels)

            # 输出训练集、验证集、测试集的数量
            print(train_images.shape[0], 'train samples')
            print(test_images.shape[0], 'test samples')

            self.train_lb = train_labels
            self.test_lb = test_labels

            # 像素数据浮点化以便归一化
            train_images = train_images.astype('float32')
            test_images = test_images.astype('float32')

            # 将其归一化,图像的各像素值归一化到0~1区间
            train_images /= 255
            test_images /= 255

            self.train_images = train_images
            self.test_images = test_images


data = Dataset('./kaggle/caltech-101/')

data.load()

(9144, 200, 200, 3)
(9144,)
6400 train samples
2744 test samples


#### Part2 TODOs

TODO 利用SIFT从训练图像中提取特征  

如果有需要，你也可以在pass之外的地方填写相关代码，请自便，下同。

vec_dict 第i项： i为类别，对应的字典为所有属于该类的sift特征点的信息。注意：kp与des一一对应。

In [16]:
vec_dict = {i:{'kp':[], 'des':[]} for i in range(102)}
# des 为128维度的描述向量
# 102 是数据集的类别个数...QAQ
sift = cv2.SIFT_create()
# print(data.train_images.shape[0])  # 6400
for i in range(data.train_images.shape[0]):
    # if i==10:
    #     break
    tep = cv2.normalize(data.train_images[i], None, 0, 255, cv2.NORM_MINMAX).astype('uint8')
    kp_vector, des_vector = sift.detectAndCompute(tep, None)
    class_num = data.train_lb[i]
    
    kp_vector = list(kp_vector)
    des_vector = list(des_vector)
    vec_dict[class_num]['kp'].extend(kp_vector)
    # print(len(vec_dict[class_num]['kp']))
    vec_dict[class_num]['des'].extend(des_vector)
    # print(len(vec_dict[class_num]['kp']))
    
    # print(len(des_vector[0]))

# print(vec_dict)

In [17]:
print(vec_dict[0]['kp'][0])
print(vec_dict[0]['des'][0])


< cv2.KeyPoint 00000234AFEBD620>
[ 29.   0.   0.   0.   0.   0.   0.   0. 204.   7.   0.   0.   0.   0.
   0.  11. 137.   7.   0.   0.   0.   0.   1.  35.  33.  11.  12.  10.
   2.   0.   0.   7.  39.   0.   0.   0.   0.   0.   0.   1. 204.   5.
   0.   0.   0.   0.   0.  46. 134.   6.   5.   6.   1.   1.   7.  66.
   5.   5.  14.   8.   2.   4.   3.   4.  39.   0.   0.   0.   0.   0.
   0.   0. 204.  41.   1.   0.   0.   0.   0.   6. 132.  39.  16.  13.
   1.   0.   3.  14.   2.   7.  16.   4.   0.   0.  24.  20.  29.   0.
   0.   0.   0.   0.   0.   0. 204.   8.   0.   0.   0.   0.   0.  19.
 124.   7.   1.   0.   0.   0.   3.  67.   9.   1.   2.   2.   1.   1.
  24.  36.]


In [18]:
bneck_value = float("inf")
for i in range(102):
    if len(vec_dict[i]['kp']) < bneck_value:
        bneck_value = len(vec_dict[i]['kp'])
        # print(bneck_value)
  
for i in range(102):
    kp_list = vec_dict[i]['kp'] = sorted((vec_dict[i]['kp']),
                                         key=lambda x: x.response,
                                         reverse=True)

TODO 为每个类别选择同样多的特征点用于聚类。特征点个数bneck_value

In [19]:
vec_list = vec_dict[0]['des'][0:bneck_value]
# print(bneck_value) 3379
for i in range(1, 102):
    vec_list = vec_list + vec_dict[i]['des'][0:bneck_value]
vec_list = np.float64(vec_list)
print(vec_list.shape) #(344658, 128)

(324258, 128)


TODO 对提取出的特征点使用Kmeans聚类，设定合适的聚类中心个数

In [20]:
from sklearn.cluster import KMeans
#####
N_clusters = 50 # just 4 try
kmeans = KMeans(n_clusters=N_clusters)
kmeans.fit(vec_list)
#####
   
   

TODO 利用直方图统计每张图像中的特征点所属聚类中心的个数，将直方图归一化后便得到图像的特征向量。

In [21]:
num_images = data.train_images.shape[0]
hist_vector = np.zeros((num_images, N_clusters))
for i in range(num_images):
    # if i == 2:
    #     break
    tep = cv2.normalize(data.train_images[i], None, 0, 255, cv2.NORM_MINMAX).astype('uint8')
    kp_vector, des_vector = sift.detectAndCompute(tep, None)
    # print(des_vector.shape)
    des_vector = np.array(des_vector, dtype=np.float64)
    label_test = kmeans.predict(des_vector)
    # print(label_test)
    for label in label_test:
        hist_vector[i][label] = hist_vector[i][label] + 1
    # for si in des_vector:
    #     si = np.array(si.reshape(1, -1), dtype=np.float64)
    #     # print(si.shape)
    #     si_label = kmeans.predict(si)
    #     # print(si_label)
    #     hist_vector[i][si_label] = hist_vector[i][si_label] + 1
    
    # dst = np.zeros_like(hist_vector[i])
    # print(hist_vector)

    cv2.normalize(hist_vector[i], hist_vector[i], 0, 1, cv2.NORM_MINMAX) # 归一化
    # print(hist_vector[i])
# print(hist_vector[1])

In [22]:
# print(data.train_lb[100])
print(hist_vector[0])
print(data.train_lb[6399])

[0.0212766  1.         0.27659574 0.04255319 0.21276596 0.27659574
 0.0212766  0.10638298 0.08510638 0.29787234 0.08510638 0.17021277
 0.12765957 0.0212766  0.08510638 0.46808511 0.04255319 0.
 0.10638298 0.08510638 0.06382979 0.10638298 0.17021277 0.25531915
 0.0212766  0.04255319 0.36170213 0.04255319 0.12765957 0.19148936
 0.06382979 0.38297872 0.04255319 0.57446809 0.12765957 0.12765957
 0.         0.10638298 0.38297872 0.17021277 0.12765957 0.
 0.65957447 0.17021277 0.10638298 0.10638298 0.25531915 0.04255319
 0.04255319 0.12765957]
27


In [23]:
# Train classifier SVM
from sklearn import svm
classifier = svm.SVC(probability=True)
classifier.fit(hist_vector, data.train_lb)

TODO 构建测试集并计算模型准确率

In [24]:
# 使用SVM构建分类器
# 你可以自行构建分类器，也可以使用SVM
# TODO 构建测试集并计算模型准确率
num_test_images = data.test_images.shape[0]
hist_test_vector = np.zeros((num_test_images, N_clusters))
# print(num_test_images)
for i in range(num_test_images):
    # if i==2:
    #     break
    tep = cv2.normalize(data.test_images[i], None, 0, 255, cv2.NORM_MINMAX).astype('uint8')
    kp_vector, des_vector = sift.detectAndCompute(tep, None)
    des_vector = np.array(des_vector, dtype=np.float64)
    
    label_pred_test = kmeans.predict(des_vector)
    # label_pred_test = classifier.predict(vec_test)
    # print()
    for label in label_pred_test:
        hist_test_vector[i][label] = hist_test_vector[i][label] + 1
        
    cv2.normalize(hist_test_vector[i], hist_test_vector[i], 0, 1, cv2.NORM_MINMAX)
    # print(hist_test_vector[i])
    # print()


acc = classifier.predict(hist_test_vector)-data.test_lb
tep = len(acc[acc==0])
# print(tep)
# print(len(data.test_lb))
print('Accuracy of SVM:', tep/len(data.test_lb))

Accuracy of SVM: 0.33090379008746357


In [25]:
class My_Dataset(Dataset):

    def __init__(self, features, labels):
        self.features = features
        self.labels   = labels

    def __getitem__(self, index):
        
        feature = self.features[i]
        label   = self.labels[i]
        
        return feature, label

    def __len__(self):
        return len(self.labels)

In [124]:
import torch.nn as nn
from torch.utils.data import DataLoader
import torch

train_set = My_Dataset(hist_vector, data.train_lb)
test_set  = My_Dataset(hist_test_vector, data.test_lb)

train_loader = DataLoader(train_set, batch_size=16)
test_loader  = DataLoader(test_set, batch_size=16)

model = nn.Sequential(
    nn.Linear(in_features=50, out_features=256), 
    nn.ReLU(), 
    nn.Linear(256, 1024), 
    nn.ReLU(), 
    nn.Dropout(0.2), 
    nn.Linear(1024, 102),
    nn.Softmax()
)

# opt = torch.optim.Adam(model.parameters(), lr=0.01)
opt = torch.optim.SGD(model.parameters(), lr=0.0005, momentum=0.8, weight_decay=1e-5)
loss = nn.CrossEntropyLoss()

In [125]:
model.train()
for epoch in range(100):
    for batch_idx, (X,y) in enumerate(train_loader):
        opt.zero_grad()
        # print(X.shape)
        # print(y.shape)
        X = torch.tensor(X, dtype=torch.float32)
        y = torch.tensor(y, dtype=torch.long)
        y_pred = model(X)
        ls = loss(y_pred, y)
        ls.backward()
        opt.step()
        if (batch_idx+1)%100 == 0 :
            print('epoch: {}, batch:{}, loss:{}'.format(
                epoch+1, batch_idx+1, ls
            ))

  X = torch.tensor(X, dtype=torch.float32)
  y = torch.tensor(y, dtype=torch.long)


epoch: 1, batch:100, loss:4.624363899230957
epoch: 1, batch:200, loss:4.624178886413574
epoch: 1, batch:300, loss:4.623955726623535
epoch: 1, batch:400, loss:4.623724937438965
epoch: 2, batch:100, loss:4.6234822273254395
epoch: 2, batch:200, loss:4.623290061950684
epoch: 2, batch:300, loss:4.623076438903809
epoch: 2, batch:400, loss:4.622590065002441
epoch: 3, batch:100, loss:4.6223955154418945
epoch: 3, batch:200, loss:4.622158527374268
epoch: 3, batch:300, loss:4.621729850769043
epoch: 3, batch:400, loss:4.621478080749512
epoch: 4, batch:100, loss:4.621064186096191
epoch: 4, batch:200, loss:4.620717525482178
epoch: 4, batch:300, loss:4.620311737060547
epoch: 4, batch:400, loss:4.619791030883789
epoch: 5, batch:100, loss:4.6192450523376465
epoch: 5, batch:200, loss:4.619004726409912
epoch: 5, batch:300, loss:4.618350028991699
epoch: 5, batch:400, loss:4.617878437042236
epoch: 6, batch:100, loss:4.617140769958496
epoch: 6, batch:200, loss:4.616617202758789
epoch: 6, batch:300, loss:4.6

In [126]:
# test
model.eval()
def accuracy(y_hat, y):
    """计算预测正确的数量"""
    if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
        y_hat = y_hat.argmax(axis=1)  #每行中最大的数作为预测类别
    cmp = y_hat.type(y.dtype) == y #转换数据类型后作比较
    return float(cmp.type(y.dtype).sum())

metric = [0., 0.]
for batch_idx, (X,y) in enumerate(test_loader):
    # print(X.shape)
    # print(y.shape)
    X = torch.tensor(X, dtype=torch.float32)
    y = torch.tensor(y, dtype=torch.long)
    y_hat = model(X)
    metric[0] += accuracy(y_hat, y)
    metric[1] += len(y)

print(metric[0] / metric[1])
# 表现不是很理想 QAQ

0.0


  X = torch.tensor(X, dtype=torch.float32)
  y = torch.tensor(y, dtype=torch.long)
