In [10]:
import cv2
import numpy as np
import face_recognition
import pandas as pd
import os


def getFaceEncoding(src):
    image = face_recognition.load_image_file(src)
    image = cv2.resize(image, (image.shape[1] * 2, image.shape[0] * 2))
    image = cv2.medianBlur(image, 7)
    image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21)
    face_locations = face_recognition.face_locations(image)
    if face_locations == []:
        return np.array([])
    img = image[face_locations[0][0]:face_locations[0][2], face_locations[0][3]:face_locations[0][1]]
    face_encoding = face_recognition.face_encodings(image, face_locations)[0]
    return face_encoding


def getEuDist(img_encoding1, img_encoding2):
    img_encoding1 = np.array(img_encoding1)
    img_encoding2 = np.array(img_encoding2)
    diff = np.subtract(img_encoding1, img_encoding2)
    dist = np.sqrt(np.sum(np.square(diff)))
    return dist


def getSimDist(img_encoding1, img_encoding2):
    img_encoding1 = np.array(img_encoding1)
    img_encoding2 = np.array(img_encoding2)
    dist = np.linalg.norm(img_encoding1 - img_encoding2)
    sim = 1.0 / (1.0 + dist)
    return sim


id_imgGroup = []


test_path = "../init_data/toUser/test/data"
file = os.walk(test_path)
subdirs = os.listdir(test_path)
test_data=pd.DataFrame(columns=['EuDist', 'SimDist'], index=range(len(subdirs)))
# test_data = pd.DataFrame()["id"]=range(len(subdirs))
# test_data.insert(test_data.shape[1], 'EuDist', np.nan)
# test_data.insert(test_data.shape[1], 'SimDist', np.nan)
for sub_path, sub_dir_list, sub_file_list in file:

    for sub_dir_name in sub_dir_list:
        sub_file = os.walk(os.path.join(sub_path, sub_dir_name))

        for sub2_path, sub2_dir_list, sub2_file_list in sub_file:
            img_encoding1 = getFaceEncoding(os.path.join(sub2_path, sub2_file_list[0]))
            img_encoding2 = getFaceEncoding(os.path.join(sub2_path, sub2_file_list[1]))
            if (img_encoding1.size == 0) or (img_encoding2.size == 0):
                continue
            else:
                print(sub_dir_name)
                eu_dist = getEuDist(img_encoding1, img_encoding2)
                sim_dist = getSimDist(img_encoding1, img_encoding2)
                test_data.at[int(sub_dir_name), "EuDist"] = eu_dist
                test_data.at[int(sub_dir_name), "SimDist"] = sim_dist
                # print(os.path.join(sub2_path, sub2_file_name))

test_data.to_csv("../init_data/temp_data/test_data.csv", index_label="id")


0
10
2
3
5
8
9


In [23]:
import numpy as np
import pandas as pd

# 自动生成训练集和测试集模块
from sklearn.model_selection import train_test_split
# 计算auc模块
from sklearn.metrics import roc_auc_score
# K近邻分类器、决策树分类器、高斯朴素贝叶斯函数
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
# 打乱数据模块
from sklearn.utils import shuffle
# 输出模型模块
import joblib


def load_testdata(testdata_csv_path, eu_dist=1):  # 读取特征文件列表和标签文件列表的内容，归并后返回
    all_data = pd.read_csv(testdata_csv_path)
    all_data = all_data.fillna(all_data.mean()["EuDist":"SimDist"])

    EuDist1 = all_data["EuDist"].values.tolist()
    EuDist = []
    for i in EuDist1:
        EuDist.append([i])

    SimDist1 = all_data["SimDist"].values.tolist()
    SimDist = []
    for i in SimDist1:
        SimDist.append([i])

    if eu_dist == 1:
        return EuDist
    else:
        return SimDist


x_test = load_testdata("../init_data/temp_data/test_data.csv")

result_table = pd.DataFrame(columns=["label"], index=range(len(x_test)))

# # 加载knn分类器，并在测试集上进行预测
# knn = joblib.load("model/knn_model.h5")
# knn_predict = knn.predict(x_test)  # 得到分类结果
# knn_predict_proba = knn.predict_proba(x_test)
# result_table["label"] = pd.DataFrame(knn_predict_proba)[1]
# result_table.to_csv("../result/result.csv", index_label="id")

# 加载决策树分类器，并在测试集上进行预测
dt = joblib.load("model/dt_model.h5")
dt_predict = dt.predict(x_test)  # 得到分类结果
dt_predict_proba = dt.predict_proba(x_test)
result_table["label"] = pd.DataFrame(dt_predict_proba)[1]
result_table.to_csv("../result/result.csv", index_label="id")

# # 加载贝叶斯分类器，并在测试集上进行预测
# gnb = joblib.load("model/gnb_model.h5")
# gnb_predict = gnb.predict(x_test)  # 得到分类结果
# gnb_predict_proba = gnb.predict_proba(x_test)
# result_table["label"] = pd.DataFrame(gnb_predict_proba)[1]
# result_table.to_csv("../result/result.csv", index_label="id")