In [1]:
import numpy as np
import pandas as pd
import datetime
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
def load_code_and_label(path):
    database_code = np.sign(np.load(path + "database_code.npy"))
    database_labels = np.load(path + "database_labels.npy")
    test_codes = np.sign(np.load(path + "test_code.npy"))
    test_labels = np.load(path + "test_labels.npy")

    return {"database_code": database_code, "database_labels": database_labels, "test_code": test_codes,
            "test_labels": test_labels}

In [3]:
# 计算海明距离, 输入为numpy矩阵, Q查询矩阵, DataSet被查询矩阵
def HammingDistance(Q, DataSet):
    codeLength = Q.shape[1]
    hammingDis = 0.5 * (codeLength - Q.dot(DataSet.T))
    return hammingDis

In [4]:
# 构造相似矩阵, 输入为one-hot numpy矩阵, Q_Lable查询标签, Data_Lables被查询标签
def similarityMatrix(Q_Lable, Data_Lables):
    return np.sign(Q_Lable.dot(Data_Lables.T) - 0.5)

In [5]:
# 统计TP,FN,FP,TN
def hash_fast_TP_FN_FP_TN(Distance_M, S, hash_distance_threshold=1):
    if (S.shape!=Distance_M.shape):
        raise ValueError('The shape of Distance_M and S not sample!')
    Distance_M = Distance_M * S
    # print(Distance_M.shape)
    
    TP_FN = (S == 1).sum() # TP+FN
    FN = (Distance_M > hash_distance_threshold).sum()  # FN
    # print('*********', TP_FN, FN)
    TP = TP_FN - FN  # TP
    
    FP_TN = (S == -1).sum() # FP+TN
    TN = (Distance_M < -hash_distance_threshold).sum()  # TN
    # print('*********', FP_TN, TN)
    FP = FP_TN - TN  # FP
    
    return [TP, FN, FP, TN]

In [6]:
# 计算精确率
def hash_accuracy(result):
    return result[0] / (result[0] + result[2])

In [7]:
# 计算召回率
def hash_recall(result):
    return result[0] / (result[0] + result[1])

In [8]:
# 统计topK准确数
def topN(df, N=100):
    length = len(df)
    sumK = 0
    recallN = 0
    for i in range(length):
        topK = df.iloc[i].sort_values().iloc[:N]
        R_Num = (df.iloc[i].index == df.iloc[i].name).sum()
        T_Num = (topK.index == topK.name).sum()
        # print('--',T_Num,'--',R_Num)
        sumK += T_Num
        recallN += R_Num
    return sumK/(length * N), sumK/recallN

In [9]:
def PR_Curve(data, typ, Ham_D, y_s, K):
    Precision = [1]
    Recall = [0]
    for hash_threshold in range(K + 1):
        result = hash_fast_TP_FN_FP_TN(Ham_D, y_s, hash_distance_threshold=hash_threshold)
        Precision.append(hash_accuracy(result))
        Recall.append(hash_recall(result))
    Precision.append(0)
    Recall.append(1)
    with open('%sof%s_%s_PR_%s_bit.txt' % (data, typ, datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), K), 'a+') as f:
        f.write("%s\n %s \n" % (str(Precision), str(Recall)))
    print('PR RESULT SUCCESS')

In [10]:
bit = [16, 32, 48, 64]
dataSet = ['imagenet', 'nus_wide','cifar10']
# models = ['cross loss', 'nomal']
data = dataSet[2]
# typ = models[1]
K = bit[3]
R = 1000
path = '/home/yun/work/data/%s-models/%s/' % (data, K)
print('Under computer the mAP.......')
print(path)

code_and_label = load_code_and_label(path=path)

database_code = code_and_label['database_code']
validation_code = code_and_label['test_code']
database_labels = code_and_label['database_labels']
validation_labels = code_and_label['test_labels']

Under computer the mAP.......
/home/yun/work/data/cifar10-models/64/


In [11]:
print(database_code.shape, validation_code.shape, database_labels.shape, validation_labels.shape)

(50000, 64) (10000, 64) (50000, 10) (10000, 10)


In [12]:
y_s = similarityMatrix(validation_labels, database_labels)

In [13]:
Ham_D = HammingDistance(validation_code, database_code)

In [14]:
hash_fast_TP_FN_FP_TN(Ham_D, y_s, hash_distance_threshold=2)

[37360983, 12639017, 374798, 449625202]

In [15]:
PR_Curve(data, 'NO', Ham_D, y_s, K)

PR RESULT SUCCESS


In [17]:
dfH = pd.DataFrame(Ham_D, index=np.argmax(code_and_label['test_labels'], axis=1), columns=np.argmax(code_and_label['database_labels'], axis=1))

In [19]:
P = []
for N in range(100, 2001, 100):
    prN = topN(dfH, N)
    print(prN)
    P.append(prN)
print(P)
with open('%sof%s_%s_PTopN_%s_bit.txt' % (data, datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), K), 'a+') as f:
        f.write("%s\n" % str(P))
print('PTopN RESULT SUCCESS')

(0.943846, 0.01887692)
(0.9437675, 0.0377507)
(0.9437573333333333, 0.05662544)
(0.94367825, 0.07549426)
(0.9436214, 0.09436214)
(0.9435781666666667, 0.11322938)
(0.9435464285714286, 0.1320965)
(0.943524875, 0.15096398)
(0.94352, 0.1698336)
(0.9435124, 0.18870248)
(0.9435026363636364, 0.20757058)
(0.9435105, 0.22644252)
(0.9435000769230769, 0.24531002)
(0.9434911428571429, 0.26417752)
(0.9434894, 0.28304682)
(0.94348625, 0.3019156)
(0.9434359411764706, 0.32076822)
(0.9434377222222222, 0.33963758)
(0.9434351052631579, 0.35850534)
(0.9434246, 0.37736984)
[(0.943846, 0.01887692), (0.9437675, 0.0377507), (0.9437573333333333, 0.05662544), (0.94367825, 0.07549426), (0.9436214, 0.09436214), (0.9435781666666667, 0.11322938), (0.9435464285714286, 0.1320965), (0.943524875, 0.15096398), (0.94352, 0.1698336), (0.9435124, 0.18870248), (0.9435026363636364, 0.20757058), (0.9435105, 0.22644252), (0.9435000769230769, 0.24531002), (0.9434911428571429, 0.26417752), (0.9434894, 0.28304682), (0.94348625, 0.

TypeError: not enough arguments for format string

In [None]:
dfH.loc[0][0]

In [16]:
bit = [16, 32, 48, 64]
for K in bit:
    print(K)
    R = 1000
    path = '/home/yun/work/data/%s-models/%s/' % (data, K)
    print('Under computer the mAP.......')
    print(path)

    code_and_label = load_code_and_label(path=path)

    database_code = code_and_label['database_code']
    validation_code = code_and_label['test_code']
    database_labels = code_and_label['database_labels']
    validation_labels = code_and_label['test_labels']

    y_s = similarityMatrix(validation_labels, database_labels)
    Ham_D = HammingDistance(validation_code, database_code)

    rs = hash_fast_TP_FN_FP_TN(Ham_D, y_s, hash_distance_threshold=2)
    print(hash_accuracy(rs))

16
Under computer the mAP.......
/home/yun/work/data/cifar10-models/16/
0.9548614791179079
32
Under computer the mAP.......
/home/yun/work/data/cifar10-models/32/
0.9769535236899393
48
Under computer the mAP.......
/home/yun/work/data/cifar10-models/48/
0.9848583452321682
64
Under computer the mAP.......
/home/yun/work/data/cifar10-models/64/
0.9900678350873405
