In [7]:
import re
def encode_label(label_list: list) -> (list, list):
    label_name = [[] for i in range(5)]

    label_encoded_list = []

    for label in label_list:
        if label is None:
            label_encoded_list.append(None)
            continue
        label_encoded = []

        for i in range(len(label) - 1):
            if not label[i] in label_name[i]:
                label_name[i].append(label[i])
                label_encoded.append(len(label_name[i]) - 1)
            else:
                label_encoded.append(label_name[i].index(label[i]))

        if label[4] is None:
            label_encoded.append([None])
        else:
            prop = []
            for ll in label[4]:
                if not ll in label_name[4]:
                    label_name[4].append(ll)
                    prop.append(len(label_name[4]) - 1)
                else:
                    prop.append(label_name[4].index(ll))
            label_encoded.append(prop)

        label_encoded_list.append(label_encoded)
    return label_name, label_encoded_list


def read_label(path: str) -> (list, list):
    label_list = []
    img_name = []
    with open(path, "r") as f:
        r = f.readline()
        while r:
            img_name.append(r[:5].strip())
            t = re.search('_missing descriptor', r)
            if t is not None:
                label_list.append(None)
                r = f.readline()
                continue
            else:
                label1 = []
                pattern = re.compile(r'\((.*?)\)')
                t = pattern.findall(r)
                for s in t[0:-1]:
                    c = s.split(" ")[-1].strip()
                    label1.append(c)

                pattern = re.compile(r'\'\((.*?)\)')
                t = pattern.findall(r)[0].strip()
                if not t == '':
                    prop = []
                    t = t.split(" ")
                    for i in t:
                        prop.append(i)
                    label1.append(prop)
                else:
                    label1.append(None)
            label_list.append(label1)
            r = f.readline()
    return label_list, img_name


def one_hot_prop(label_encoded_list: list, prop_num: int) -> list:
    label_list = []
    iii = 0
    for label in label_encoded_list:
        iii += 1
        if label is not None:
            prop_one_hot = [0 for i in range(prop_num)]
            prop = label.pop()
            for i in prop:
                if i is not None:
                    prop_one_hot[i] = 1
            label.append(prop_one_hot)
            label_list.append(label)
            if 1 in prop_one_hot:
                pass
        else:
            label_list.append(None)
    return label_list


def one_hot(index, num):
    label_one_hot = [0 for i in range(num)]
    label_one_hot[index] = 1
    return label_one_hot


def one_hot_label(label_list: list, label_name) -> list:
    label_one_hot = []
    for label in label_list:
        if label is not None:
            l = []
            for i in range(len(label_name) - 1):
                o = one_hot(label[i], len(label_name[i]))
                l += o
            l += label[-1]
        else:
            label_one_hot.append(None)
            continue
        label_one_hot.append(l)
    return label_one_hot


def label_pre_one_hot(path1, path2):
    label_list1, img_name1 = read_label(path1)
    # print(len(img_name1))
    label_list2, img_name2 = read_label(path2)
    # print(len(img_name2))
    label_list = label_list1 + label_list2

    male = 0
    total = 0

    # for label in label_list:
    #     if label is not None:
    #         total += 1
    #         if label[0] == 'male':
    #             male += 1
    # print(male, total)

    img_name = img_name1 + img_name2
    label_name, label_encoded_list = encode_label(label_list)
    prop_num = len(label_name[-1])
    label_list = one_hot_prop(label_encoded_list, prop_num)
    label_one_hot = one_hot_label(label_list, label_name)

    return label_one_hot, img_name, label_name


In [None]:
import raw_data_read_test as rd
import label_pre as lp
import cv2
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, GridSearchCV
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

# 读取图像和卷标
channels = 1  # 图像的通道数，灰度图为1
path_rawdata = r"./rawdata"
path_label1 = r"./faceDR"
path_label2 = r"./faceDS"
label_one_hot,img_name,label_list = label_pre_one_hot("./faceDR","./faceDS")
img_list = rd.read_rawdata(channels, path_rawdata, img_name)  # 图片矩阵


n = 0
for i in range(len(label_list)):
    num = i - n
    if label_list[num] is None:
        del label_list[num]
        del img_name[num]
        n = n + 1

array_of_img = []
for img in img_list:
    if img is not None:
        img = img / 255.0
        img = cv2.resize(img, (100, 100))
        array_of_img.append(img)
train_images = np.array(array_of_img)
array_of_img = []

array_of_labels = []
for label in label_one_hot:
    if label is not None:
        append_label = label[0]
        array_of_labels.append(int(append_label))
train_labels = np.array(array_of_labels)
array_of_labels = []

# 划分数据集和测试集
X_train, X_test, y_train, y_test = train_test_split(train_images, train_labels, test_size=0.2, random_state=3)

# 将图像数据展平
X_train = X_train.reshape(X_train.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)

# 特征降维
pca = PCA(n_components=85)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

# 设置参数网格
param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': [0.001, 0.01, 0.1, 1],
    'kernel': ['linear']
}

# 创建SVM分类器
svm = SVC()

# 创建GridSearchCV对象
grid_search = GridSearchCV(svm, param_grid, cv=3, scoring='accuracy')

# 进行网格搜索
grid_search.fit(X_train_pca, y_train)

# 输出最佳参数和最佳分数
print("最佳参数：", grid_search.best_params_)
print("最佳分数：", grid_search.best_score_)

# 获取网格搜索的结果
results = grid_search.cv_results_

# 绘制折线图
plt.figure(figsize=(10, 6))
plt.title('SVM Performance with Different Parameters')
plt.xlabel('Parameter C')
plt.ylabel('Mean Test Accuracy')

# 绘制不同gamma值下的准确率
for gamma in param_grid['gamma']:
    mean_test_scores = [results['mean_test_score'][i] for i in range(len(results['mean_test_score'])) if results['param_gamma'][i] == gamma]
    plt.plot(param_grid['C'], mean_test_scores, label=f'gamma={gamma}')

plt.legend()
plt.grid(True)
plt.show()

# 使用最佳参数的模型进行预测
best_svm = grid_search.best_estimator_
y_pred = best_svm.predict(X_test_pca)

# 计算并输出准确率
accuracy = accuracy_score(y_test, y_pred)
print(f'SVM分类准确率：{accuracy * 100:.2f}%')

数据缺失： 1228
数据缺失： 1232
数据缺失： 1808
数据缺失： 4056
数据缺失： 4135
数据缺失： 4136
数据缺失： 5004


In [17]:
# 打印每种参数组合的准确率
for i in range(len(results['params'])):
    params = results['params'][i]
    mean_test_score = results['mean_test_score'][i]
    print(f"参数组合：{params}，平均测试准确率：{mean_test_score:.4f}")

参数组合：{'C': 0.1, 'gamma': 0.001, 'kernel': 'linear'}，平均测试准确率：0.7986
参数组合：{'C': 0.1, 'gamma': 0.01, 'kernel': 'linear'}，平均测试准确率：0.7986
参数组合：{'C': 0.1, 'gamma': 0.1, 'kernel': 'linear'}，平均测试准确率：0.7986
参数组合：{'C': 0.1, 'gamma': 1, 'kernel': 'linear'}，平均测试准确率：0.7986
参数组合：{'C': 1, 'gamma': 0.001, 'kernel': 'linear'}，平均测试准确率：0.7999
参数组合：{'C': 1, 'gamma': 0.01, 'kernel': 'linear'}，平均测试准确率：0.7999
参数组合：{'C': 1, 'gamma': 0.1, 'kernel': 'linear'}，平均测试准确率：0.7999
参数组合：{'C': 1, 'gamma': 1, 'kernel': 'linear'}，平均测试准确率：0.7999
参数组合：{'C': 10, 'gamma': 0.001, 'kernel': 'linear'}，平均测试准确率：0.8002
参数组合：{'C': 10, 'gamma': 0.01, 'kernel': 'linear'}，平均测试准确率：0.8002
参数组合：{'C': 10, 'gamma': 0.1, 'kernel': 'linear'}，平均测试准确率：0.8002
参数组合：{'C': 10, 'gamma': 1, 'kernel': 'linear'}，平均测试准确率：0.8002
参数组合：{'C': 100, 'gamma': 0.001, 'kernel': 'linear'}，平均测试准确率：0.7994
参数组合：{'C': 100, 'gamma': 0.01, 'kernel': 'linear'}，平均测试准确率：0.7994
参数组合：{'C': 100, 'gamma': 0.1, 'kernel': 'linear'}，平均测试准确率：0.7994
参数组合：{'C': 100, 'gamma': 1, 'ke

In [None]:
import raw_data_read_test as rd
import label_pre as lp
import cv2
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, GridSearchCV
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

# 读取图像和标签
channels = 1  # 图像的通道数，灰度图为1
path_rawdata = r"./rawdata"
path_label1 = r"./faceDR"
path_label2 = r"./faceDS"
label_one_hot, img_name, label_list = lp.label_pre_one_hot(path_label1, path_label2)
img_list = rd.read_rawdata(channels, path_rawdata, img_name)  # 图片矩阵

# 删除无效的标签和图像
n = 0
for i in range(len(label_list)):
    num = i - n
    if label_list[num] is None:
        del label_list[num]
        del img_name[num]
        del img_list[num]
        n += 1

# 对图像进行归一化和调整大小
array_of_img = []
for img in img_list:
    img = img / 255.0
    img = cv2.resize(img, (100, 100))
    array_of_img.append(img)
train_images = np.array(array_of_img)

# 将独热编码的标签转换为整数标签
array_of_labels = []
for label in label_one_hot:
    append_label = label[0]
    array_of_labels.append(int(append_label))
train_labels = np.array(array_of_labels)

# 划分数据集和测试集
X_train, X_test, y_train, y_test = train_test_split(train_images, train_labels, test_size=0.2, random_state=3)

# 将图像数据展平
X_train = X_train.reshape(X_train.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)

# 特征降维
pca = PCA(n_components=85)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

# 设置参数网格
param_grid = {
    'C': [0.01, 0.1, 1, 10, 100],
    'gamma': [0.0001, 0.001, 0.01, 0.1, 1],
    'kernel': ['rbf', 'linear', 'poly', 'sigmoid']
}

# 创建SVM分类器
svm = SVC()

# 创建GridSearchCV对象
grid_search = GridSearchCV(svm, param_grid, cv=5, scoring='accuracy', verbose=1)

# 进行网格搜索
grid_search.fit(X_train_pca, y_train)

# 输出最佳参数和最佳分数
print("最佳参数：", grid_search.best_params_)
print("最佳分数：", grid_search.best_score_)

# 获取网格搜索的结果
results = grid_search.cv_results_

# 打印每种参数组合的准确率
for i in range(len(results['params'])):
    params = results['params'][i]
    mean_test_score = results['mean_test_score'][i]
    print(f"参数组合：{params}，平均测试准确率：{mean_test_score:.4f}")

# 使用最佳参数的模型进行预测
best_svm = grid_search.best_estimator_
y_pred = best_svm.predict(X_test_pca)

# 计算并输出准确率
accuracy = accuracy_score(y_test, y_pred)
print(f'SVM分类准确率：{accuracy * 100:.2f}%')

# 绘制折线图
plt.figure(figsize=(12, 8))
plt.title('SVM Performance with Different Parameters')
plt.xlabel('Parameter C')
plt.ylabel('Mean Test Accuracy')

# 绘制不同gamma和kernel值下的准确率
for kernel in param_grid['kernel']:
    for gamma in param_grid['gamma']:
        mean_test_scores = [
            results['mean_test_score'][i] for i in range(len(results['mean_test_score']))
            if results['param_gamma'][i] == gamma and results['param_kernel'][i] == kernel
        ]
        plt.plot(param_grid['C'], mean_test_scores, label=f'kernel={kernel}, gamma={gamma}')

plt.legend(loc='best')
plt.grid(True)
plt.show()