# 数据准备

In [1]:
#导入相关库
import random
import numpy as np
import matplotlib.pyplot as plt
import pickle
import os
from imageio import imread

In [2]:
#设置整体的图片显示
%matplotlib inline
plt.rcParams['figure.figsize'] = (12.0, 10.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

In [3]:
%load_ext autoreload
%autoreload 2

In [4]:
#定义加载CIFAR数据集的函数
#加载单个batch
def load_CIFAR_batch(filename):
    with open(filename,'rb') as f:
        datadict = pickle.load(f,encoding='latin1')
        X = datadict['data']
        Y = datadict['labels']
        X = X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("float")
        Y = np.array(Y)
        return X, Y
def load_CIFAR10(ROOT):
    xs = []
    ys = []
    for b in range(1,6):
        f = os.path.join(ROOT,'data_batch_%d'%(b, ))
        X,Y = load_CIFAR_batch(f)
        xs.append(X)
        ys.append(Y)
    Xtr = np.concatenate(xs)
    Ytr = np.concatenate(ys)
    del X,Y
    Xte, Yte = load_CIFAR_batch(os.path.join(ROOT,'test_batch'))
    return Xtr,Ytr,Xte,Yte

In [5]:
#将CIFAR-10的数据导入
cifar10_dir = 'cifar-10-batches-py'
X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
#打印CIFAR-10的训练集和测试集
print('训练集数据大小：', X_train.shape)
print('训练集标签大小：',y_train.shape)
print('测试集数据大小：',X_test.shape)
print('测试集标签大小：',y_test.shape)

In [6]:
classes = ['airplane','automobile','bird','cat','deer','dog','frog','horse','ship','truck']
num_class = len(classes)
samples_per_class = 10
for y,cls in enumerate(classes):
    idxs = np.flatnonzero(y_train == y)
    idxs = np.random.choice(idxs,samples_per_class,replace = False)
    for i,idx in enumerate(idxs):
        plt_idx = i*num_class+y+1
        plt.subplot(samples_per_class,num_class,plt_idx)
        plt.imshow(X_train[idx].astype('uint8'))
        plt.axis('off')
        if i == 0:
            plt.title(cls)
plt.show()

In [7]:
#将CIFAR-10的训练集划分为训练集、验证集、开发集，测试集仍全部保留为测试集
num_training = 46000
num_validation = 2000
num_test = 10000
num_dev = 10000

#划分验证集
X_val = X_train[num_training:num_training+num_validation]
y_val = y_train[num_training:num_training+num_validation]

#划分训练集
X_train = X_train[:num_training]
y_train = y_train[:num_training]

#划分开发集
indices = np.random.choice(num_training,num_dev,replace = False)
X_dev = X_train[indices]
y_dev = y_train[indices]

#测试集保持不变
X_test = X_test[:num_test]
y_test = y_test[:num_test]

# 输出各数据集的形状
print('Train data shape: ', X_train.shape)
print('Train labels shape: ', y_train.shape)
print('Validation data shape: ', X_val.shape)
print('Validation labels shape: ', y_val.shape)
print('Development data shape: ', X_dev.shape)
print('Development labels shape: ', y_dev.shape)
print('Test data shape: ', X_test.shape)
print('Test labels shape: ', y_test.shape)

In [8]:
#数据预处理
X_train = np.reshape(X_train,(X_train.shape[0],-1))
X_val = np.reshape(X_val,(X_val.shape[0],-1))
X_test = np.reshape(X_test,(X_test.shape[0],-1))
X_dev = np.reshape(X_dev,(X_dev.shape[0],-1))

print('Training data shape: ', X_train.shape)
print('Validation data shape: ', X_val.shape)
print('Test data shape: ', X_test.shape)
print('dev data shape: ', X_dev.shape)

In [9]:
#减去均值图像
#首先计算训练集的均值图像（防止数据泄露+模拟实际应用场景，保持验证集和测试集的独立性+独立同分布假设）
mean_image = np.mean(X_train,axis = 0)
print(mean_image[:10])
plt.figure(figsize=(4,4))
plt.imshow(mean_image.reshape(32,32,3).astype('uint8'))
plt.show()

#减去均值图像
X_train -= mean_image
X_val -= mean_image
X_test -= mean_image
X_dev -= mean_image

#添加偏置项
X_train = np.hstack([X_train,np.ones((X_train.shape[0],1))])
X_val = np.hstack([X_val,np.ones((X_val.shape[0],1))])
X_test = np.hstack([X_test,np.ones((X_test.shape[0],1))])
X_dev = np.hstack([X_dev,np.ones((X_dev.shape[0],1))])

print(X_train.shape,X_val.shape,X_test.shape,X_dev.shape)


# 线性SVM

In [10]:
class LinearSVM:
    def __init__(self):
        self.W = None  # 权重矩阵

    def loss(self, X, y, reg):
        num_train = X.shape[0]
        scores = X.dot(self.W)
        correct_class_scores = scores[np.arange(num_train), y].reshape(-1, 1)
        margins = np.maximum(0, scores - correct_class_scores + 1)
        margins[np.arange(num_train), y] = 0
        loss = np.sum(margins) / num_train + 0.5 * reg * np.sum(self.W**2)

        inter_mat = np.zeros_like(scores)
        inter_mat[margins > 0] = 1
        inter_mat[np.arange(num_train), y] = -np.sum(inter_mat, axis=1)
        dW = X.T.dot(inter_mat) / num_train + reg * self.W

        return loss, dW

    def train(self, X, y, learning_rate, reg, num_iters, batch_size, verbose=False):
        num_train, dim = X.shape
        num_classes = np.max(y) + 1
        if self.W is None:
            self.W = 0.001 * np.random.randn(dim, num_classes)
        
        loss_history = []
        for it in range(num_iters):
            idx_batch = np.random.choice(num_train, batch_size, replace=True)
            X_batch = X[idx_batch]
            y_batch = y[idx_batch]
            
            loss, grad = self.loss(X_batch, y_batch, reg)
            loss_history.append(loss)
            self.W -= learning_rate * grad
            
            if verbose and it % 100 == 0:
                print(f'Iteration {it} / {num_iters}: Loss {loss}')
        return loss_history

    def predict(self, X):
        return np.argmax(X.dot(self.W), axis=1)


In [18]:
from tqdm import tqdm

learning_rates = [1e-3, 1e-4, 1e-5, 1e-6, 1e-7]
regularizations = [1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4]
num_iters = [500, 1000, 1500, 2000]
batch_size = 200

results = {}
total_configurations = len(learning_rates) * len(regularizations) * len(num_iters)
progress_bar = tqdm(total=total_configurations, desc="Training progress")

for lr in learning_rates:
    for reg in regularizations:
        svm = LinearSVM()
        loss_history = []
        train_accuracies = []
        val_accuracies = []
        for iters in num_iters:
            loss_hist = svm.train(X_train, y_train, learning_rate=lr, reg=reg, num_iters=iters, batch_size=batch_size, verbose=False)
            y_train_pred = svm.predict(X_train)
            y_val_pred = svm.predict(X_val)
            train_accuracy = np.mean(y_train_pred == y_train)
            val_accuracy = np.mean(y_val_pred == y_val)
            loss_history.append(loss_hist[-1])
            train_accuracies.append(train_accuracy)
            val_accuracies.append(val_accuracy)
            progress_bar.update(1)
        results[(lr, reg)] = (num_iters, loss_history, train_accuracies, val_accuracies)

progress_bar.close()


In [24]:
# 创建三组子图，每组对应一个指标：损失、训练准确率、验证准确率
fig, axes = plt.subplots(3, len(learning_rates), figsize=(10 * len(learning_rates), 10 * 3))  # 使每个子图都是方形

for idx_lr, lr in enumerate(learning_rates):
    for idx_metric, metric in enumerate(['Loss', 'Train Acc', 'Val Acc']):
        ax = axes[idx_metric, idx_lr]
        for reg in regularizations:
            iters, loss_history, train_accuracies, val_accuracies = results[(lr, reg)]
            if metric == 'Loss':
                values = loss_history
                marker = 'o'
                linestyle = '-'
            elif metric == 'Train Acc':
                values = train_accuracies
                marker = '^'
                linestyle = '--'
            else:
                values = val_accuracies
                marker = 's'
                linestyle = '-.'

            label = f'Reg={reg}'
            ax.plot(iters, values, marker=marker, linestyle=linestyle, label=label)
        
        ax.set_title(f'{metric} for LR={lr}')
        ax.set_xlabel('Number of Iterations')
        ax.set_ylabel(metric)

        # 将图例放在图的外部右侧
        ax.legend(loc='upper left', bbox_to_anchor=(1.05, 1), title='Regularization')

# 调整布局以容纳图例
plt.tight_layout()
plt.subplots_adjust(right=0.85)  # 留出更多空间在右侧给图例
plt.show()


In [25]:
# 初始化最佳性能指标和最佳模型变量
best_val_accuracy = 0
best_svm_model = None
best_lr = None
best_reg = None
best_iters = None

# 遍历所有结果，寻找具有最高验证集精度的模型
for (lr, reg), (iters, loss_history, train_accuracies, val_accuracies) in results.items():
    max_idx = np.argmax(val_accuracies)  # 找到最高验证精度的索引
    if val_accuracies[max_idx] > best_val_accuracy:
        best_val_accuracy = val_accuracies[max_idx]
        best_lr = lr
        best_reg = reg
        best_iters = iters[max_idx]  # 选择导致最高验证精度的迭代次数
        
        # 重新训练模型以获取最佳模型
        best_svm_model = LinearSVM()
        best_svm_model.train(X_train, y_train, learning_rate=lr, reg=reg, num_iters=best_iters, batch_size=batch_size, verbose=False)

print(f"Best model found with LR={best_lr}, Reg={best_reg}, Iters={best_iters}, Validation Accuracy={best_val_accuracy}")

# 现在可以在测试集上评估这个最佳模型
y_test_pred = best_svm_model.predict(X_test)
test_accuracy = np.mean(y_test == y_test_pred)
print('Linear SVM on raw pixels final test set accuracy: %f' % test_accuracy)


# SVM+HOG

In [13]:
#SVM+HOG
import cv2
from tqdm import tqdm
from math import sqrt, atan2
from sklearn.preprocessing import StandardScaler

def getHOGfeat(image, stride=8, orientations=8, pixels_per_cell=(8, 8), cells_per_block=(2, 2)):
    # 初始化参数
    cx, cy = pixels_per_cell
    bx, by = cells_per_block
    sx, sy = image.shape
    gx = np.zeros(image.shape, dtype=np.float32)
    gy = np.zeros(image.shape, dtype=np.float32)
    eps = 1e-5

    # 计算梯度 gx 和 gy
    gx[:, 1:-1] = image[:, 2:] - image[:, :-2]
    gy[1:-1, :] = image[2:, :] - image[:-2, :]
    magnitude = np.sqrt(gx**2 + gy**2)
    orientation = np.rad2deg(np.arctan2(gy, gx + eps)) % 360

    # 初始化方向直方图
    orientation_histogram = np.zeros((int(sx / cx), int(sy / cy), orientations))
    for i in range(orientations):
        # 处理每个方向
        temp_orientation = np.where((orientation >= (i * 360 / orientations)) & 
                                    (orientation < ((i + 1) * 360 / orientations)), 
                                    magnitude, 0)
        for r in range(int(sx / cx)):
            for c in range(int(sy / cy)):
                orientation_histogram[r, c, i] = temp_orientation[r*cx:(r+1)*cx, c*cy:(c+1)*cy].sum()

    # 归一化特征块
    n_cellsx, n_cellsy = int(sx / cx), int(sy / cy)
    n_blocksx, n_blocksy = (n_cellsx - bx + 1), (n_cellsy - by + 1)
    normalised_blocks = np.zeros((n_blocksy, n_blocksx, by * bx * orientations))
    for x in range(n_blocksx):
        for y in range(n_blocksy):
            block = orientation_histogram[y:y + by, x:x + bx, :].flatten()
            normalised_blocks[y, x, :] = block / np.sqrt(np.sum(block**2) + eps)

    return normalised_blocks.ravel()

def extract_hog_features(images):
    hog_features = []
    for image in tqdm(images, desc="Extracting HOG features"):
        if len(image) == 3073:  # Assuming the last element is extra and needs to be removed
            image = image[:-1]  # Remove the last element
        image = image.reshape(32, 32, 3)  # Reshape flat array into 32x32x3 RGB image
        if image.dtype != np.uint8:
            image = image.astype(np.uint8)  # Ensure the image type is uint8
        image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)  # Convert to grayscale
        image = cv2.resize(image, (64, 64))  # Resize image to 64x64 for HOG
        hog_feature = getHOGfeat(image)
        hog_features.append(hog_feature)
    return np.array(hog_features)

# 提取HOG特征
X_train_hog = extract_hog_features(X_train)
X_val_hog = extract_hog_features(X_val)
X_test_hog = extract_hog_features(X_test)

# 合并原始像素特征和HOG特征
X_train_combined = np.hstack([X_train_hog, X_train])
X_val_combined = np.hstack([X_val_hog, X_val])
X_test_combined = np.hstack([X_test_hog, X_test])

# 特征标准化
scaler = StandardScaler()
X_train_combined = scaler.fit_transform(X_train_combined)
X_val_combined = scaler.transform(X_val_combined)
X_test_combined = scaler.transform(X_test_combined)


In [None]:
import joblib
joblib.dump(scaler, 'scaler.pkl')

In [15]:
# 初始化模型
svm_model = LinearSVM()
# 定义batch_size
batch_size = 200

# 使用更多的超参数进行训练
learning_rates = [1e-3, 1e-4, 1e-5, 1e-6, 1e-7]
regularizations = [1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4]
num_iters = [500, 1000, 1500, 2000]

results = {}
total_configurations = len(learning_rates) * len(regularizations) * len(num_iters)
progress_bar = tqdm(total=total_configurations, desc="Training progress")

for lr in learning_rates:
    for reg in regularizations:
        svm = LinearSVM()
        loss_history = []
        train_accuracies = []
        val_accuracies = []
        for iters in num_iters:
            loss_hist = svm.train(X_train_combined, y_train, learning_rate=lr, reg=reg, num_iters=iters, batch_size=batch_size, verbose=False)
            y_train_pred = svm.predict(X_train_combined)
            y_val_pred = svm.predict(X_val_combined)
            train_accuracy = np.mean(y_train_pred == y_train)
            val_accuracy = np.mean(y_val_pred == y_val)
            loss_history.append(loss_hist[-1])
            train_accuracies.append(train_accuracy)
            val_accuracies.append(val_accuracy)
            progress_bar.update(1)
        results[(lr, reg)] = (num_iters, loss_history, train_accuracies, val_accuracies)

progress_bar.close()

In [18]:
# 创建三组子图，每组对应一个指标：损失、训练准确率、验证准确率
fig, axes = plt.subplots(3, len(learning_rates), figsize=(10 * len(learning_rates), 10 * 3))  # 使每个子图都是方形

for idx_lr, lr in enumerate(learning_rates):
    for idx_metric, metric in enumerate(['Loss', 'Train Acc', 'Val Acc']):
        ax = axes[idx_metric, idx_lr]
        for reg in regularizations:
            iters, loss_history, train_accuracies, val_accuracies = results[(lr, reg)]
            if metric == 'Loss':
                values = loss_history
                marker = 'o'
                linestyle = '-'
            elif metric == 'Train Acc':
                values = train_accuracies
                marker = '^'
                linestyle = '--'
            else:
                values = val_accuracies
                marker = 's'
                linestyle = '-.'

            label = f'Reg={reg}'
            ax.plot(num_iters, values, marker=marker, linestyle=linestyle, label=label)
        
        ax.set_title(f'{metric} for LR={lr}')
        ax.set_xlabel('Number of Iterations')
        ax.set_ylabel(metric)

        # 将图例放在图的外部右侧
        ax.legend(loc='upper left', bbox_to_anchor=(1.05, 1), title='Regularization')

# 调整布局以容纳图例
plt.tight_layout()
plt.subplots_adjust(right=0.85)  # 留出更多空间在右侧给图例
plt.show()

In [20]:
# 寻找最佳模型
best_val_accuracy = 0
best_svm_model = None
best_lr = None
best_reg = None
best_iters = None

for (lr, reg), (iters, loss_history, train_accuracies, val_accuracies) in results.items():
    max_idx = np.argmax(val_accuracies)
    if val_accuracies[max_idx] > best_val_accuracy:
        best_val_accuracy = val_accuracies[max_idx]
        best_lr = lr
        best_reg = reg
        best_iters = iters[max_idx]
        best_svm_model = LinearSVM()
        best_svm_model.train(X_train_combined, y_train, learning_rate=lr, reg=reg, num_iters=best_iters, batch_size=batch_size, verbose=False)

print(f"Best model found with LR={best_lr}, Reg={best_reg}, Iters={best_iters}, Validation Accuracy={best_val_accuracy}")

# 在测试集上评估最佳模型
y_test_pred_hog = best_svm_model.predict(X_test_combined)
test_accuracy_hog = np.mean(y_test_pred_hog == y_test)
print('Test Accuracy with HOG features:', test_accuracy_hog)


In [21]:
# 使用已知最佳参数训练SVM模型
learning_rate = 1e-3
reg = 1e-2
num_iters = 2000
batch_size = 200

svm_model = LinearSVM()
svm_model.train(X_train_combined, y_train, learning_rate, reg, num_iters, batch_size, verbose=True)

# 保存模型到文件
with open('svm_model_hog.pkl', 'wb') as f:
    pickle.dump(svm_model, f)

print("Model training complete and saved to 'svm_model_hog.pkl'.")

# 在测试集上评估模型
y_test_pred_hog = svm_model.predict(X_test_combined)
test_accuracy_hog = np.mean(y_test_pred_hog == y_test)
print('Test Accuracy with HOG features:', test_accuracy_hog)