In [1]:
import json
import numpy as np
from tqdm import tqdm
from pprint import pprint
from sklearn.cluster import KMeans

from label_processor import CStdLib

In [2]:
with open('archived/result_logits.json', encoding='utf-8') as f:
    logits_json = json.load(f)

In [3]:
embed = CStdLib(single=False)

In [4]:
pbar = tqdm(logits_json.items())
i = 0
for key, prod_dict in pbar:
    optional_tags = prod_dict['optional_tags']
    num_class = len(optional_tags)
    imgs_tags = prod_dict['imgs_tags']
    imgs_logits = []
    for i in range(len(imgs_tags)):
        imgs_logits.append(np.fromstring(imgs_tags[i][key + '_{}.jpg'.format(i)], dtype=np.float32, sep=' '))
    imgs_logits = np.array(imgs_logits)
    cluster = KMeans(n_clusters=num_class, random_state=42).fit(imgs_logits)
    labels = cluster.labels_
    match_score = np.zeros((num_class, num_class))
    # match_score[i, j] 表示 kmeans 分类结果为 i 和 optional_tags[j] 的匹配度
    embeded_tags = [embed(tag) for tag in optional_tags]
    not_matched = False
    for i in range(num_class):
        id_i = np.where(labels == i)[0]
        logits = imgs_logits[id_i]
        for j in range(num_class):
            logits_ids = embeded_tags[j]
            if len(logits_ids) > 0:
                logits_j = logits[:, logits_ids]
                match_score[i, j] = np.mean(logits_j)
            else:
                not_matched = True
                match_score[i, j] = 1
    match_score_norm = match_score / np.linalg.norm(match_score, axis=0, ord=3)
    # print(key, '\n', match_score_norm)
    label_id = np.argmax(match_score_norm, axis=1)
    # print(label_id)
    for i in range(num_class):
        id_i = np.where(labels == i)[0]
        for j in id_i:
            logits_json[key]['imgs_tags'][j][key + '_{}.jpg'.format(j)] = optional_tags[label_id[i]]
    if not_matched:
        print(key, '\n', match_score, '\n', match_score_norm)
        pprint(logits_json[key])

  5%|▌         | 284/5331 [00:03<00:57, 87.86it/s] 

624952669722 
 [[3.54169917 1.        ]
 [6.12443686 1.        ]] 
 [[0.54519422 0.79370053]
 [0.94276995 0.79370053]]
{'imgs_tags': [{'624952669722_0.jpg': '珊瑚色组合'},
               {'624952669722_1.jpg': '珊瑚色组合'},
               {'624952669722_2.jpg': '橄榄色组合'},
               {'624952669722_3.jpg': '橄榄色组合'},
               {'624952669722_4.jpg': '橄榄色组合'}],
 'optional_tags': ['珊瑚色组合', '橄榄色组合']}


  6%|▋         | 341/5331 [00:03<00:48, 103.89it/s]

614043736149 
 [[ 6.78489113  5.1024971   2.56172848  1.          4.02346802]
 [ 2.91709399  2.64869952  9.82762909  1.          3.01735854]
 [ 2.72273445  9.69181824  3.23731184  1.          2.28493643]
 [ 3.47204447  4.16564274  2.93793297  1.         11.85724735]
 [11.45318031  4.11650181  3.28069067  1.          4.18891811]] 
 [[0.54765214 0.47965296 0.25119923 0.58480355 0.32804042]
 [0.23545739 0.24898722 0.96368249 0.58480355 0.24601054]
 [0.21976938 0.91106555 0.31744592 0.58480355 0.18629488]
 [0.28025101 0.3915853  0.28808928 0.58480355 0.96674221]
 [0.92445974 0.38696588 0.32169958 0.58480355 0.34152986]]
{'imgs_tags': [{'614043736149_0.jpg': '条纹'},
               {'614043736149_1.jpg': '桔色'},
               {'614043736149_2.jpg': '红色'},
               {'614043736149_3.jpg': '桔色'},
               {'614043736149_4.jpg': '红色'},
               {'614043736149_5.jpg': '黑色'},
               {'614043736149_6.jpg': '条纹'},
               {'614043736149_7.jpg': '黑色'},
               {

 15%|█▌        | 815/5331 [00:08<00:44, 102.15it/s]

565607486988 
 [[10.7949276   1.        ]
 [11.78563213  1.        ]] 
 [[0.75742233 0.79370053]
 [0.82693477 0.79370053]]
{'imgs_tags': [{'565607486988_0.jpg': '条纹'},
               {'565607486988_1.jpg': '黑色'},
               {'565607486988_2.jpg': '黑色'},
               {'565607486988_3.jpg': '黑色'},
               {'565607486988_4.jpg': '黑色'},
               {'565607486988_5.jpg': '黑色'},
               {'565607486988_6.jpg': '黑色'}],
 'optional_tags': ['黑色', '条纹']}


 30%|██▉       | 1593/5331 [00:16<00:35, 106.42it/s]

623845128856 
 [[ 5.0101409   1.         14.04391861]
 [10.01270103  1.          4.63675213]
 [ 7.21883583  1.         10.98723507]] 
 [[0.4371171  0.69336127 0.87071835]
 [0.87357282 0.69336127 0.28747711]
 [0.62981794 0.69336127 0.68120497]]
{'imgs_tags': [{'623845128856_0.jpg': '黑色'},
               {'623845128856_1.jpg': '黑色'},
               {'623845128856_2.jpg': '条纹'},
               {'623845128856_3.jpg': '白色'},
               {'623845128856_4.jpg': '条纹'},
               {'623845128856_5.jpg': '白色'}],
 'optional_tags': ['黑色', '条纹', '白色']}
605311018824 
 [[10.4801178   1.          4.4920063 ]
 [10.47670078  1.          6.52550602]
 [ 2.47349119  1.          9.62872601]] 
 [[0.79209722 0.69336127 0.41576147]
 [0.79183896 0.69336127 0.60397376]
 [0.18694881 0.69336127 0.89119493]]
{'imgs_tags': [{'605311018824_0.jpg': '黑色'},
               {'605311018824_1.jpg': '蓝色'},
               {'605311018824_2.jpg': '黑色'},
               {'605311018824_3.jpg': '黑色'},
               {'605311

 31%|███▏      | 1672/5331 [00:17<00:36, 100.60it/s]

621875084587 
 [[11.49303055  1.        ]
 [ 9.52917576  1.        ]] 
 [[0.86040244 0.79370053]
 [0.71338243 0.79370053]]
{'imgs_tags': [{'621875084587_0.jpg': '条纹'},
               {'621875084587_1.jpg': '条纹'},
               {'621875084587_2.jpg': '条纹'},
               {'621875084587_3.jpg': '条纹'},
               {'621875084587_4.jpg': '黑色'},
               {'621875084587_5.jpg': '黑色'},
               {'621875084587_6.jpg': '黑色'}],
 'optional_tags': ['黑色', '条纹']}


 43%|████▎     | 2294/5331 [00:23<00:31, 96.41it/s] 

576888702197 
 [[ 8.99164963  1.          6.07917833 10.15758896]
 [ 5.84524012  1.         12.47119045  7.24120283]
 [ 5.90163803  1.          6.64693403 13.20964241]
 [10.3117342   1.          6.65612316  8.3815136 ]] 
 [[0.68837016 0.62996052 0.43375952 0.6236077 ]
 [0.44749173 0.62996052 0.88984025 0.44456119]
 [0.45180937 0.62996052 0.47426983 0.81098327]
 [0.78943135 0.62996052 0.47492549 0.51456861]]
{'imgs_tags': [{'576888702197_0.jpg': '黑色'},
               {'576888702197_1.jpg': '黑色'},
               {'576888702197_2.jpg': '黑色'},
               {'576888702197_3.jpg': '白色'},
               {'576888702197_4.jpg': '灰色'},
               {'576888702197_5.jpg': '灰色'},
               {'576888702197_6.jpg': '黑色'},
               {'576888702197_7.jpg': '黑色'},
               {'576888702197_8.jpg': '白色'}],
 'optional_tags': ['黑色', '条纹色', '灰色', '白色']}


 46%|████▋     | 2472/5331 [00:25<00:30, 92.45it/s] 

603281751757 
 [[10.08445454  1.        ]
 [12.63961983  1.        ]] 
 [[0.69576643 0.79370053]
 [0.8720574  0.79370053]]
{'imgs_tags': [{'603281751757_0.jpg': '黑色'},
               {'603281751757_1.jpg': '黑色'},
               {'603281751757_2.jpg': '黑色'},
               {'603281751757_3.jpg': '拼色'},
               {'603281751757_4.jpg': '黑色'},
               {'603281751757_5.jpg': '拼色'},
               {'603281751757_6.jpg': '拼色'}],
 'optional_tags': ['黑色', '拼色']}
617278066189 
 [[6.77111053 1.        ]
 [9.55943775 1.        ]] 
 [[0.64004145 0.79370053]
 [0.90360899 0.79370053]]
{'imgs_tags': [{'617278066189_0.jpg': '条纹套装'},
               {'617278066189_1.jpg': '白色套装'},
               {'617278066189_2.jpg': '条纹套装'},
               {'617278066189_3.jpg': '条纹套装'},
               {'617278066189_4.jpg': '条纹套装'},
               {'617278066189_5.jpg': '条纹套装'}],
 'optional_tags': ['白色套装', '条纹套装']}


 53%|█████▎    | 2822/5331 [00:29<00:24, 102.37it/s]

617521964984 
 [[ 3.15530992 10.0882349   1.          6.88930798]
 [11.56291103  4.9850049   1.          3.85006595]
 [ 4.67547798  7.18239021  1.          9.39097786]
 [ 6.94367886  8.93373489  1.          8.21610165]] 
 [[0.24983978 0.77169672 0.62996052 0.56986764]
 [0.91555987 0.38132656 0.62996052 0.31846856]
 [0.37020781 0.54941494 0.62996052 0.77679999]
 [0.54980564 0.68338357 0.62996052 0.67961694]]
{'imgs_tags': [{'617521964984_0.jpg': '灰色'},
               {'617521964984_1.jpg': '黑色'},
               {'617521964984_2.jpg': '黑色'},
               {'617521964984_3.jpg': '白色'},
               {'617521964984_4.jpg': '灰色'},
               {'617521964984_5.jpg': '白色'},
               {'617521964984_6.jpg': '灰色'},
               {'617521964984_7.jpg': '灰色'}],
 'optional_tags': ['黑色', '灰色', '条纹', '白色']}


 58%|█████▊    | 3089/5331 [00:31<00:22, 99.33it/s] 

614491314779 
 [[12.45196056  1.        ]
 [12.58227921  1.        ]] 
 [[0.78954743 0.79370053]
 [0.79781061 0.79370053]]
{'imgs_tags': [{'614491314779_0.jpg': '竖条纹'},
               {'614491314779_1.jpg': '黑白点'},
               {'614491314779_2.jpg': '竖条纹'},
               {'614491314779_3.jpg': '黑白点'},
               {'614491314779_4.jpg': '黑白点'},
               {'614491314779_5.jpg': '黑白点'},
               {'614491314779_6.jpg': '竖条纹'}],
 'optional_tags': ['黑白点', '竖条纹']}


 64%|██████▍   | 3437/5331 [00:35<00:17, 109.72it/s]

621931894053 
 [[ 1.         10.26905346  6.61371899]
 [ 1.          7.58517027 10.5323782 ]
 [ 1.          6.29304409  7.30076122]] 
 [[0.69336127 0.84916535 0.53906265]
 [0.69336127 0.62723053 0.85845978]
 [0.69336127 0.52038243 0.59506122]]
{'imgs_tags': [{'621931894053_0.jpg': '白色半裙'},
               {'621931894053_1.jpg': '白色半裙'},
               {'621931894053_2.jpg': '白色半裙'},
               {'621931894053_3.jpg': '黑色半裙'},
               {'621931894053_4.jpg': '条纹衬衫'},
               {'621931894053_5.jpg': '白色半裙'},
               {'621931894053_6.jpg': '黑色半裙'}],
 'optional_tags': ['条纹衬衫', '黑色半裙', '白色半裙']}


 70%|██████▉   | 3717/5331 [00:37<00:15, 105.95it/s]

601450019157 
 [[1.]] 
 [[1.]]
{'imgs_tags': [{'601450019157_0.jpg': '1906纯色'},
               {'601450019157_1.jpg': '1906纯色'},
               {'601450019157_2.jpg': '1906纯色'}],
 'optional_tags': ['1906纯色']}


 82%|████████▏ | 4377/5331 [00:44<00:08, 110.60it/s]

625820335127 
 [[ 9.33763218  1.          7.46607351]
 [ 6.2501893   1.          9.92091751]
 [12.63746452  1.          6.16974735]] 
 [[0.64201708 0.69336127 0.63472523]
 [0.42973724 0.69336127 0.8434228 ]
 [0.86889994 0.69336127 0.52451858]]
{'imgs_tags': [{'625820335127_0.jpg': '白色'},
               {'625820335127_1.jpg': '白色'},
               {'625820335127_2.jpg': '条纹'},
               {'625820335127_3.jpg': '黑色'},
               {'625820335127_4.jpg': '条纹'},
               {'625820335127_5.jpg': '白色'},
               {'625820335127_6.jpg': '条纹'},
               {'625820335127_7.jpg': '黑色'}],
 'optional_tags': ['黑色', '条纹', '白色']}


 85%|████████▌ | 4537/5331 [00:45<00:07, 105.33it/s]

577541680180 
 [[13.46714783  1.          6.60329819  1.92246294]
 [ 3.42867184  1.          2.63183498 11.72202206]
 [ 2.86667657  1.         10.48777962  2.60973549]
 [ 3.29647803  1.          3.67912197  8.85310936]] 
 [[0.98675411 0.62996052 0.57563284 0.14502435]
 [0.25122291 0.62996052 0.22942636 0.88427118]
 [0.21004484 0.62996052 0.91425681 0.19686995]
 [0.24153691 0.62996052 0.32072206 0.66784975]]
{'imgs_tags': [{'577541680180_0.jpg': '红色'},
               {'577541680180_1.jpg': '红色'},
               {'577541680180_2.jpg': '蓝色'},
               {'577541680180_3.jpg': '蓝色'},
               {'577541680180_4.jpg': '橙色'},
               {'577541680180_5.jpg': '蓝色'},
               {'577541680180_6.jpg': '橙色'},
               {'577541680180_7.jpg': '蓝色'},
               {'577541680180_8.jpg': '蓝色'}],
 'optional_tags': ['橙色', '深色', '红色', '蓝色']}


 90%|████████▉ | 4783/5331 [00:48<00:06, 90.21it/s] 

612126042525 
 [[ 1.         11.84072876]
 [ 1.         11.24886417]] 
 [[0.79370053 0.81351158]
 [0.79370053 0.77284781]]
{'imgs_tags': [{'612126042525_0.jpg': '957黑色'},
               {'612126042525_1.jpg': '957黑色'},
               {'612126042525_2.jpg': '957黑色'},
               {'612126042525_3.jpg': '6802格子'},
               {'612126042525_4.jpg': '957黑色'},
               {'612126042525_5.jpg': '957黑色'},
               {'612126042525_6.jpg': '957黑色'}],
 'optional_tags': ['6802格子', '957黑色']}


100%|██████████| 5331/5331 [00:54<00:00, 98.41it/s] 


In [5]:
with open('./result_labels.json', 'w') as f:
    json.dump(logits_json, f, indent=4, ensure_ascii=False)