In [1]:
import json
import numpy as np
from tqdm import tqdm
from pprint import pprint
from sklearn.cluster import KMeans

from label_processor import CStdLib

In [2]:
with open('result_logits.json', encoding='utf-8') as f:
    logits_json = json.load(f)

In [3]:
embed = CStdLib(single=False)

In [4]:
pbar = tqdm(logits_json.items())
i = 0
for key, prod_dict in pbar:
    optional_tags = prod_dict['optional_tags']
    num_class = len(optional_tags)
    imgs_tags = prod_dict['imgs_tags']
    imgs_logits = []
    for i in range(len(imgs_tags)):
        imgs_logits.append(np.fromstring(imgs_tags[i][key + '_{}.jpg'.format(i)], dtype=np.float32, sep=' '))
    imgs_logits = np.array(imgs_logits)
    cluster = KMeans(n_clusters=num_class, random_state=42).fit(imgs_logits)
    labels = cluster.labels_
    match_score = np.zeros((num_class, num_class))
    # match_score[i, j] 表示 kmeans 分类结果为 i 和 optional_tags[j] 的匹配度
    embeded_tags = [embed(tag) for tag in optional_tags]
    not_matched = False
    for i in range(num_class):
        id_i = np.where(labels == i)[0]
        logits = imgs_logits[id_i]
        for j in range(num_class):
            logits_ids = embeded_tags[j]
            if len(logits_ids) > 0:
                logits_j = logits[:, logits_ids]
                match_score[i, j] = np.mean(logits_j)
            else:
                not_matched = True
                match_score[i, j] = 1
    # 对 scores 进行归一化，使得每一列的 ord 范数为 1；然后在每一行中找到最大值，即为该行商品对应的标签。
    # 按列归一化，按行找最大值，这样就做到了双向选择。
    # 使用较高阶的范数能增大未匹配标签对应的概率，有利于提高 acc/em。
    match_score_norm = match_score / np.linalg.norm(match_score, axis=0, ord=5)
    # print(key, '\n', match_score_norm)
    label_id = np.argmax(match_score_norm, axis=1)
    # print(label_id)
    for i in range(num_class):
        id_i = np.where(labels == i)[0]
        for j in id_i:
            logits_json[key]['imgs_tags'][j][key + '_{}.jpg'.format(j)] = optional_tags[label_id[i]]
    if not_matched:
        print(key, '\n', match_score, '\n', match_score_norm)
        pprint(logits_json[key])

  5%|▌         | 272/5331 [00:03<00:56, 89.96it/s] 

624952669722 
 [[3.6693418  1.        ]
 [6.44651413 1.        ]] 
 [[0.56262988 0.87055056]
 [0.98846105 0.87055056]]
{'imgs_tags': [{'624952669722_0.jpg': '珊瑚色组合'},
               {'624952669722_1.jpg': '珊瑚色组合'},
               {'624952669722_2.jpg': '橄榄色组合'},
               {'624952669722_3.jpg': '橄榄色组合'},
               {'624952669722_4.jpg': '橄榄色组合'}],
 'optional_tags': ['珊瑚色组合', '橄榄色组合']}


  7%|▋         | 353/5331 [00:03<00:45, 109.17it/s]

614043736149 
 [[ 7.20567131  3.90869713  1.96744847  1.          4.77405357]
 [ 3.43354893  2.63160849 10.63453388  1.          3.20032692]
 [ 3.47113943 10.02686214  2.45175362  1.          2.63745213]
 [ 3.31556606  3.57004356  2.21922946  1.         12.93972588]
 [12.69450951  3.98268509  3.5317955   1.          4.43831062]] 
 [[0.56071533 0.38783741 0.18480999 0.72477966 0.36800376]
 [0.26718448 0.26111929 0.9989426  0.72477966 0.24669441]
 [0.27010961 0.99490755 0.23030263 0.72477966 0.2033057 ]
 [0.25800354 0.35423478 0.20846074 0.72477966 0.99744749]
 [0.98783386 0.39517881 0.33175511 0.72477966 0.34212331]]
{'imgs_tags': [{'614043736149_0.jpg': '条纹'},
               {'614043736149_1.jpg': '桔色'},
               {'614043736149_2.jpg': '红色'},
               {'614043736149_3.jpg': '桔色'},
               {'614043736149_4.jpg': '红色'},
               {'614043736149_5.jpg': '黑色'},
               {'614043736149_6.jpg': '条纹'},
               {'614043736149_7.jpg': '黑色'},
               {

 15%|█▌        | 811/5331 [00:08<00:46, 96.48it/s] 

565607486988 
 [[13.27059841  1.        ]
 [12.74233723  1.        ]] 
 [[0.88749843 0.87055056]
 [0.85216988 0.87055056]]
{'imgs_tags': [{'565607486988_0.jpg': '黑色'},
               {'565607486988_1.jpg': '条纹'},
               {'565607486988_2.jpg': '条纹'},
               {'565607486988_3.jpg': '黑色'},
               {'565607486988_4.jpg': '条纹'},
               {'565607486988_5.jpg': '条纹'},
               {'565607486988_6.jpg': '黑色'}],
 'optional_tags': ['黑色', '条纹']}


 30%|██▉       | 1589/5331 [00:16<00:37, 100.23it/s]

623845128856 
 [[ 6.14779282  1.         15.48163986]
 [11.9930191   1.          3.90564489]
 [ 7.79281902  1.         12.79503059]] 
 [[0.49837747 0.80274156 0.9367188 ]
 [0.97222705 0.80274156 0.23631159]
 [0.63173329 0.80274156 0.77416513]]
{'imgs_tags': [{'623845128856_0.jpg': '黑色'},
               {'623845128856_1.jpg': '黑色'},
               {'623845128856_2.jpg': '条纹'},
               {'623845128856_3.jpg': '白色'},
               {'623845128856_4.jpg': '条纹'},
               {'623845128856_5.jpg': '白色'}],
 'optional_tags': ['黑色', '条纹', '白色']}
605311018824 
 [[ 3.53420258  1.         10.07571316]
 [11.7324934   1.          6.90285778]
 [10.93647003  1.          4.95049381]] 
 [[0.27070251 0.80274156 0.96751129]
 [0.89865121 0.80274156 0.66284071]
 [0.83767974 0.80274156 0.47536671]]
{'imgs_tags': [{'605311018824_0.jpg': '黑色'},
               {'605311018824_1.jpg': '蓝色'},
               {'605311018824_2.jpg': '黑色'},
               {'605311018824_3.jpg': '黑色'},
               {'605311

 32%|███▏      | 1681/5331 [00:17<00:34, 105.96it/s]

621875084587 
 [[12.63118744  1.        ]
 [10.30453682  1.        ]] 
 [[0.94017    0.87055056]
 [0.76699174 0.87055056]]
{'imgs_tags': [{'621875084587_0.jpg': '条纹'},
               {'621875084587_1.jpg': '条纹'},
               {'621875084587_2.jpg': '条纹'},
               {'621875084587_3.jpg': '条纹'},
               {'621875084587_4.jpg': '黑色'},
               {'621875084587_5.jpg': '黑色'},
               {'621875084587_6.jpg': '黑色'}],
 'optional_tags': ['黑色', '条纹']}


 43%|████▎     | 2292/5331 [00:23<00:30, 99.81it/s] 

576888702197 
 [[ 9.62512112  1.          6.15399599 10.57003403]
 [ 6.62946081  1.         12.34895134  9.43474865]
 [ 5.61325979  1.          7.50664902 13.18211746]
 [11.67832756  1.          5.60128498  6.72933578]] 
 [[0.76359583 0.75785828 0.4860558  0.73418111]
 [0.52593922 0.75785828 0.97534666 0.65532564]
 [0.4453203  0.75785828 0.59289124 0.91561311]
 [0.92648416 0.75785828 0.4424015  0.4674111 ]]
{'imgs_tags': [{'576888702197_0.jpg': '黑色'},
               {'576888702197_1.jpg': '黑色'},
               {'576888702197_2.jpg': '黑色'},
               {'576888702197_3.jpg': '白色'},
               {'576888702197_4.jpg': '灰色'},
               {'576888702197_5.jpg': '灰色'},
               {'576888702197_6.jpg': '黑色'},
               {'576888702197_7.jpg': '黑色'},
               {'576888702197_8.jpg': '白色'}],
 'optional_tags': ['黑色', '条纹色', '灰色', '白色']}


 46%|████▋     | 2472/5331 [00:25<00:29, 96.16it/s] 

603281751757 
 [[12.29345703  1.        ]
 [13.96550655  1.        ]] 
 [[0.80865144 0.87055056]
 [0.9186372  0.87055056]]
{'imgs_tags': [{'603281751757_0.jpg': '黑色'},
               {'603281751757_1.jpg': '黑色'},
               {'603281751757_2.jpg': '黑色'},
               {'603281751757_3.jpg': '拼色'},
               {'603281751757_4.jpg': '黑色'},
               {'603281751757_5.jpg': '拼色'},
               {'603281751757_6.jpg': '拼色'}],
 'optional_tags': ['黑色', '拼色']}
617278066189 
 [[7.45881748 1.        ]
 [9.07236958 1.        ]] 
 [[0.77134681 0.87055056]
 [0.93821082 0.87055056]]
{'imgs_tags': [{'617278066189_0.jpg': '条纹套装'},
               {'617278066189_1.jpg': '白色套装'},
               {'617278066189_2.jpg': '条纹套装'},
               {'617278066189_3.jpg': '条纹套装'},
               {'617278066189_4.jpg': '条纹套装'},
               {'617278066189_5.jpg': '条纹套装'}],
 'optional_tags': ['白色套装', '条纹套装']}


 53%|█████▎    | 2814/5331 [00:29<00:27, 93.17it/s]

617521964984 
 [[ 3.46477294 11.332304    1.          7.33220577]
 [11.71621227  4.81689596  1.          4.03708076]
 [ 5.06145716  7.84467602  1.          9.23895073]
 [ 7.55221081  9.10394573  1.          8.74075508]] 
 [[0.28865567 0.92119368 0.75785828 0.68491629]
 [0.97609602 0.39156152 0.75785828 0.37711195]
 [0.42167793 0.63768727 0.75785828 0.86302922]
 [0.62918653 0.74005227 0.75785828 0.81649175]]
{'imgs_tags': [{'617521964984_0.jpg': '灰色'},
               {'617521964984_1.jpg': '黑色'},
               {'617521964984_2.jpg': '黑色'},
               {'617521964984_3.jpg': '白色'},
               {'617521964984_4.jpg': '白色'},
               {'617521964984_5.jpg': '白色'},
               {'617521964984_6.jpg': '灰色'},
               {'617521964984_7.jpg': '白色'}],
 'optional_tags': ['黑色', '灰色', '条纹', '白色']}


 58%|█████▊    | 3083/5331 [00:31<00:21, 103.93it/s]

614491314779 
 [[13.38864613  1.        ]
 [12.6219511   1.        ]] 
 [[0.89466119 0.87055056]
 [0.8434288  0.87055056]]
{'imgs_tags': [{'614491314779_0.jpg': '黑白点'},
               {'614491314779_1.jpg': '竖条纹'},
               {'614491314779_2.jpg': '黑白点'},
               {'614491314779_3.jpg': '竖条纹'},
               {'614491314779_4.jpg': '竖条纹'},
               {'614491314779_5.jpg': '竖条纹'},
               {'614491314779_6.jpg': '黑白点'}],
 'optional_tags': ['黑白点', '竖条纹']}


 64%|██████▍   | 3435/5331 [00:35<00:20, 93.38it/s] 

621931894053 
 [[ 1.          7.39813375 12.08259869]
 [ 1.         12.03983593  8.05167389]
 [ 1.          9.4560957  11.81974983]] 
 [[0.80274156 0.57559973 0.86819587]
 [0.80274156 0.93673979 0.57855352]
 [0.80274156 0.7357161  0.84930885]]
{'imgs_tags': [{'621931894053_0.jpg': '白色半裙'},
               {'621931894053_1.jpg': '白色半裙'},
               {'621931894053_2.jpg': '白色半裙'},
               {'621931894053_3.jpg': '黑色半裙'},
               {'621931894053_4.jpg': '白色半裙'},
               {'621931894053_5.jpg': '白色半裙'},
               {'621931894053_6.jpg': '黑色半裙'}],
 'optional_tags': ['条纹衬衫', '黑色半裙', '白色半裙']}


 70%|██████▉   | 3709/5331 [00:38<00:17, 93.79it/s] 

601450019157 
 [[1.]] 
 [[1.]]
{'imgs_tags': [{'601450019157_0.jpg': '1906纯色'},
               {'601450019157_1.jpg': '1906纯色'},
               {'601450019157_2.jpg': '1906纯色'}],
 'optional_tags': ['1906纯色']}


 82%|████████▏ | 4367/5331 [00:45<00:13, 73.43it/s] 

625820335127 
 [[11.1042223   1.          8.08058834]
 [ 7.16854334  1.         12.92403793]
 [13.86554146  1.          6.23412037]] 
 [[0.7523815  0.80274156 0.61104369]
 [0.48571429 0.80274156 0.97729912]
 [0.93947839 0.80274156 0.47141616]]
{'imgs_tags': [{'625820335127_0.jpg': '白色'},
               {'625820335127_1.jpg': '白色'},
               {'625820335127_2.jpg': '条纹'},
               {'625820335127_3.jpg': '黑色'},
               {'625820335127_4.jpg': '条纹'},
               {'625820335127_5.jpg': '白色'},
               {'625820335127_6.jpg': '条纹'},
               {'625820335127_7.jpg': '黑色'}],
 'optional_tags': ['黑色', '条纹', '白色']}


 85%|████████▍ | 4524/5331 [00:47<00:07, 105.01it/s]

577541680180 
 [[15.08464813  1.          7.01837683  2.43292999]
 [ 2.56085491  1.          2.59838891 12.8776989 ]
 [ 3.69489646  1.         11.64810181  3.12644339]
 [ 4.60691595  1.          4.84250879  9.02220917]] 
 [[0.99926569 0.75785828 0.59197889 0.18308921]
 [0.16964098 0.75785828 0.21916626 0.96910628]
 [0.2447643  0.75785828 0.98248221 0.2352793 ]
 [0.30518001 0.75785828 0.40845099 0.67896288]]
{'imgs_tags': [{'577541680180_0.jpg': '红色'},
               {'577541680180_1.jpg': '红色'},
               {'577541680180_2.jpg': '蓝色'},
               {'577541680180_3.jpg': '蓝色'},
               {'577541680180_4.jpg': '橙色'},
               {'577541680180_5.jpg': '深色'},
               {'577541680180_6.jpg': '橙色'},
               {'577541680180_7.jpg': '蓝色'},
               {'577541680180_8.jpg': '蓝色'}],
 'optional_tags': ['橙色', '深色', '红色', '蓝色']}


 89%|████████▉ | 4765/5331 [00:49<00:06, 89.69it/s] 

612126042525 
 [[ 1.         12.58297062]
 [ 1.         13.06167889]] 
 [[0.87055056 0.85370578]
 [0.87055056 0.88618428]]
{'imgs_tags': [{'612126042525_0.jpg': '6802格子'},
               {'612126042525_1.jpg': '6802格子'},
               {'612126042525_2.jpg': '957黑色'},
               {'612126042525_3.jpg': '6802格子'},
               {'612126042525_4.jpg': '6802格子'},
               {'612126042525_5.jpg': '6802格子'},
               {'612126042525_6.jpg': '6802格子'}],
 'optional_tags': ['6802格子', '957黑色']}


100%|██████████| 5331/5331 [00:56<00:00, 94.80it/s] 


In [5]:
with open('./result_labels.json', 'w') as f:
    json.dump(logits_json, f, indent=4, ensure_ascii=False)