In [2]:
pip install scikit-image

Looking in indexes: http://repo.myhuaweicloud.com/repository/pypi/simple
Collecting scikit-image
  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/2d/ba/63ce953b7d593bd493e80be158f2d9f82936582380aee0998315510633aa/scikit_image-0.19.3-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (13.5 MB)
[K     |████████████████████████████████| 13.5 MB 21.3 MB/s eta 0:00:01
Collecting tifffile>=2019.7.26
  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/d8/38/85ae5ed77598ca90558c17a2f79ddaba33173b31cf8d8f545d34d9134f0d/tifffile-2021.11.2-py3-none-any.whl (178 kB)
[K     |████████████████████████████████| 178 kB 18.2 MB/s eta 0:00:01
Collecting PyWavelets>=1.1.1
  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/ae/56/4441877073d8a5266dbf7b04c7f3dc66f1149c8efb9323e0ef987a9bb1ce/PyWavelets-1.3.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.4 MB)
[K     |█████████████████████████

In [3]:
import os
import numpy as np
import cv2
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import math
from skimage import feature as skif
from matplotlib import pyplot as plt
from tqdm import tqdm
from sklearn import decomposition
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score


In [4]:
def loadDataProcessing(file):
    with open(file) as f:
        s = f.read().splitlines()  # 读取逐行数据放入list列表中
    allMessage = []
    for i in range(len(s)):
        # print(s[9])#打印第九行
        temp = s[i]
        temp = temp.split(' ')  # 按空格划分字符串为list列表
        while '' in temp:  # 删除list中的空元素
            temp.remove('')
        # print(temp)

        # 对单行进行赋值处理
        if len(temp) > 10:
            id = temp[0]
            sex = temp[2][:-1]
            age = temp[4][:-1]
            race = temp[6][:-1]
            face = temp[8][:-1]
            prop = temp[10:]
            prop[0] = prop[0][2:]

            prop.pop(-1)
            # 判断prop是否为空，并且根据prop特征的个数来用空格进行拼接
            if len(prop) != 0:
                str = ''
                for i in range(len(prop) - 1):
                    str = str + prop[i] + ' '
                str = str + prop[len(prop) - 1]
                prop = str
            else:
                prop = 'NaN'

            # print(prop)
            temp = [id, sex, age, race, face, prop]
            allMessage.append(temp)
        # print(temp)
        else:
            pass
        # id = temp[0]
        # temp = [id, "missing descriptor"]
        # print(temp)
        # allMessage.append(temp)
    return allMessage


In [5]:
def roadLabels(path):
    # 将字符串标签“male”，“female”处理为1，0
    if (type(path).__name__ == 'list'):  # 用于处理列表数据
        labels = []
        for i in range(len(y)):
            labels.append(y[i][1])
    else:  # 用于处理csv文件
        df = pd.read_csv(path)
        labels = df.iloc[:, 1].values.tolist()

    # 给标签编码
    le = LabelEncoder()
    labels = le.fit_transform(labels)
    # print(labels)
    return labels


def roadData(path):
    # 读取文件夹下的文件，返回一个list列表
    path = path  # 文件夹目录
    files = os.listdir(path)  # 得到文件夹下的所有文件名称
    # f = open(path+"/"+files[0],'rb')
    # s=np.fromfile(f,dtype=np.ubyte)
    # s=np.int_(s)
    # flag=0
    s = []
    for file in files:  # 遍历文件夹
        with open(path + '/' + file, 'rb') as f:
            x = np.fromfile(f, dtype=np.uint8)

            if len(x) == 128 * 128:
                x = x.reshape(128, 128)

            s.append(x)

    return s

In [7]:
# 构建Gabor滤波器
def build_filters():
    filters = []
    ksize = [7, 9, 11, 13, 15, 17]  # gabor尺度 6个
    lamda = np.pi / 2.0  # 波长

    for theta in np.arange(0, np.pi, np.pi / 4):  # gabor方向 0 45 90 135
        for k in range(6):
            kern = cv2.getGaborKernel((ksize[k], ksize[k]), 1.0, theta, lamda, 0.5, 0, ktype=cv2.CV_32F)
            kern /= 1.5 * kern.sum()
            filters.append(kern)
    return filters


# 滤波过程
def process(img, filters):
    accum = np.zeros_like(img)
    for kern in filters:
        fimg = cv2.filter2D(img, cv2.CV_8UC3, kern)
        np.maximum(accum, fimg, accum)
    return accum

In [8]:
# 池化
def mean_pooling(img, size):
    """
    :param img:
    :param size:
    :return:
    """
    n, m = img.shape[0], img.shape[1]
    fn, fm = int(n / size), int(m / size)
    fimg = np.zeros((fn, fm), dtype=float)
    for i in range(fn):
        for j in range(fm):
            sum = 0
            for x in range(i * size, i * size + size):
                for y in range(j * size, j * size + size):
                    sum += img[x, y]
            #                 print(img[x,y])
            fimg[i, j] = sum / (size * size)
    return fimg


# 特征图生成
def getGabor(img, filters, reduction=1):
    # image = Image.open(img)
    img_ndarray = np.asarray(img)

    res = []  # 滤波结果
    for i in range(len(filters)):
        res1 = process(img_ndarray, filters[i])
        res1 = mean_pooling(res1, reduction)
        res.append(np.asarray(res1))

    return res

In [9]:
def gray_norm(img):
    """
    Grayscale normalization
    :param img:
    :return:
    """
    min_value = np.min(img)
    max_value = np.max(img)
    #     data_z.all()==data_x.all()
    if max_value == min_value:
        return img
    (n, m) = img.shape
    for i in range(n):
        for j in range(m):
            img[i, j] = np.int8(255 * (img[i][j] - min_value) / (max_value - min_value))
    return img


def adaptive_histogram_equalization(img):
    """
    Adaptive Histogram Equalization
    :param img:
    :return:
    """
    img.dtype = 'uint8'
    # create a CLAHE object (Arguments are optional)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    img = clahe.apply(img)
    return img


In [None]:

if __name__ == '__main__':
    # 将性别与图片相匹配
    x = roadData("./rawdata")  # 读取文件夹下的文件
    # 使用原始数据标签
    y1 = loadDataProcessing('faceDR')
    y2 = loadDataProcessing('faceDS')
    y = y1 + y2
    # 只留性别标签
    y = roadLabels(y)
    # 剔除超过128*128的数据和标签
    x.pop(1190)
    x.pop(1186)
    y = np.delete(y, 1190)
    y = np.delete(y, 1186)

    gendata = []
    for i in range(len(y)):
        for j in range(len(x)):
            if i == j:
                t = (y[i], x[j],)
                gendata.append(t)
    print(gendata)

    # 特征提取
    g_feature = []
    g_lable = []
    filters = build_filters()
    print("feature extracting[with GARBOR]...")

    for img in tqdm(gendata):
        #     dst, hist =  brint_s(img[1], 2, 32)
        img = np.float32(img[1])
        x_val = gray_norm(img)
        x_tmp = adaptive_histogram_equalization(x_val)
        #     ga = getGabor(x_val, filters, 8)
        hist = getGabor(img, filters, 8)
        #     hist = np.array(hist).reshape(-1)
        g_feature.append(hist)
        lable = img[0]
        g_lable.append(lable)
    print("extraction completed !")

    # print(g_feature)
    # print(g_lable)
    Gfeature = np.array(g_feature)
    print('Gfeature_shape:', Gfeature.shape)
    print('Gfeature[0]:', Gfeature[0])

    # re_Gfeature：后续可以直接处理的特征
    re_Gfeature = []
    for mat in Gfeature:
        res = np.array(mat).reshape(-1)
        re_Gfeature.append(res)
    # print(re_Gfeature)
    re_Gfeature = np.array(re_Gfeature)
    print('re_fea_shape:', re_Gfeature.shape)
    print('re_Gfeature[0]:', re_Gfeature[0])

    # 标准化————————————————————————————————————————————————————————————————
    scaler = MinMaxScaler()
    scaler.fit(re_Gfeature)
    re_Gfeature = scaler.transform(re_Gfeature)

    # PCA降维————————————————————————————————————————————————————————————————
    pca = decomposition.PCA(n_components=2)
    pca.fit(re_Gfeature)
    # 观察占比可以选择降多少维
    print(pca.explained_variance_ratio_)
    # X为降维后数据
    X = pca.fit_transform(re_Gfeature)
    # print(X)

    # SVM分类————————————————————————————————————————————————————————————————
    # 划分训练集与测试集
    y = g_lable
    x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2019)

    # 分类
    # svc = SVC(kernel='linear', gamma='auto')
    svc = SVC(kernel='rbf', gamma='auto')
    svc.fit(x_train, y_train)

    # 预测结果与性能评价
    y_hat = svc.predict(x_test)
    y_score = svc.decision_function(x_test)

    print("混淆矩阵：")
    print(confusion_matrix(y_test, y_hat))
    print("准确率：", accuracy_score(y_test, y_hat))
    print("精确率：", precision_score(y_test, y_hat, pos_label=1, average='binary'))
    print("召回率：", recall_score(y_test, y_hat, pos_label=1, average='binary'))
    print("F1值：", f1_score(y_test, y_hat, pos_label=1, average='binary'))


[(1, array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=uint8)), (1, array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=uint8)), (1, array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=uint8)), (1, array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=uint8)), (1, array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
   

100%|██████████| 3991/3991 [40:30<00:00,  1.64it/s]


extraction completed !
Gfeature_shape: (3991, 24, 16, 64)
Gfeature[0]: [[[0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 [[0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 [[0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 ...

 [[0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 [[0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 [[0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 