In [1]:
import re
import mmcv
from sklearn.metrics import average_precision_score
import numpy as np
from typing import Tuple

def get_epoch(file_path: str):
# Read the file
  with open(file_path, "r") as file:
      content = file.read()

  # Use regex to extract the number after "mAP:"
  match = re.search(r"mAP:\s*(\d+)", content)
  if match:
      epoch = int(match.group(1))
      return epoch
  else:
      print("Epoch not found.")

def get_gt_pred(file_path: str):
    output_and_gt = mmcv.load(file_path)
    return np.array(output_and_gt[0]['outputs']), np.array(output_and_gt[0]['gt_labels'])
# import numpy as np
# outputs_result = None
# gt_labels_result = None
# for i in range(1,5):
#     file_path = f"coco/coco_uniform_bce_group{i}.txt"
#     epoch = get_epoch(file_path)
#     outputs, gt_labels = get_gt_pred(f"work_dirs/LT_coco_resnet50_pfc_DB_uniform_bce_group{i}/gt_and_results_e{epoch}.pkl")
#     if outputs_result is None:
#         outputs_result = outputs
#     else:
#         outputs_result = np.hstack((np.array(outputs_result), np.array(outputs)))
#     if gt_labels_result is None:
#         gt_labels_result = gt_labels
#     else:
#         gt_labels_result = np.hstack((np.array(gt_labels_result), np.array(gt_labels)))
    

def calculate_mAP(all_preds: np.ndarray, all_labels: np.ndarray) -> Tuple[np.float64, list]:
    label_cnt = len(all_labels[0])
    mAP = 0
    mAP_per_label = []
    # print(label_cnt)
    all_labels = np.array(all_labels)
    all_preds = np.array(all_preds)
    for i in range(label_cnt):
        # print(np.sum(all_labels[:, i]) > 0)
        AP = average_precision_score(all_labels[:, i], all_preds[:, i])
        mAP_per_label.append(AP)
        mAP += AP

    mAP /= label_cnt
    APs = average_precision_score(all_labels, all_preds, average=None) 
    mAP = APs.mean()

    return mAP, mAP_per_label
def get_3_split_map(outputs_result, gt_labels_result):
    map_score1 =  calculate_mAP( np.array(outputs_result)[:, :22], np.array(gt_labels_result)[:, :22])
    # print(f"head mAP: {map_score1[0]:.4f}")
    # print(np.array(map_score[1]))
    map_score2 =  calculate_mAP( np.array(outputs_result)[:, 22:55], np.array(gt_labels_result)[:, 22:55])
    # print(f"mid mAP: {map_score2[0]:.4f}")
    # print(np.array(map_score[1]))
    map_score3 =  calculate_mAP( np.array(outputs_result)[:, 55:], np.array(gt_labels_result)[:, 55:])

    total = calculate_mAP( np.array(outputs_result), np.array(gt_labels_result))
    # print(f"tail mAP: {map_score3[0]:.4f}")
    print([round(map_score1[0], 4), round(map_score2[0], 4), round(map_score3[0], 4), round(total[0], 4)])
    # print(np.array(map_score[1]))


# get_3_split_map(outputs_result, gt_labels_result)

  from .autonotebook import tqdm as notebook_tqdm


In [33]:
import ast
import pandas as pd
original_position = pd.read_csv("/home/mark/Desktop/工研院/multi-label_classification/data/voc/group2_original_positions_per_step.csv")
original_position['original_positions'] = original_position['original_positions'].apply(ast.literal_eval)
def get_original_index(step):
    if step in original_position['step'].values:
        return original_position.loc[original_position['step'] == step, 'original_positions'].values[0]
    else:
        raise ValueError(f"Step {step} not found.")

In [34]:
get_original_index(2)

[0, 1, 2, 3, 4, 5, 6]

In [16]:
from tqdm import tqdm
best_score_list = []
# range_left, range_right = 

for ri in tqdm(range(1, 13)):
    work_dir_str = f"LT_voc_resnet50_pfc_DB_uniform_bce_35_6_group2_step{ri}"
    best_score = 0
    best_epoch = 0
    epoch_list = []
    for i in range(1,81):
        # epoch = get_epoch(file_path)
        outputs, gt_labels = get_gt_pred(f"work_dirs/{work_dir_str}/gt_and_results_e{i}.pkl")
        epoch_list.append((outputs, gt_labels))
        map_score = calculate_mAP(outputs[:,get_original_index(ri)], gt_labels[:,get_original_index(ri) ])
        if map_score[0] > best_score:
            best_score = map_score[0]
            best_epoch = i
    best_score_list.append((best_score, best_epoch))



100%|██████████| 12/12 [00:19<00:00,  1.65s/it]


In [19]:
best_score_list

[(0.8346037063797255, 12),
 (0.8393071344663696, 13),
 (0.8373995909117216, 13),
 (0.831926751194473, 17),
 (0.8327487380526125, 21),
 (0.8323835426872475, 20),
 (0.8347837573769967, 21),
 (0.8330788291013741, 17),
 (0.835680292331155, 21),
 (0.8345102724325374, 40),
 (0.8389643674966738, 25),
 (0.8423272911334329, 55)]

In [10]:
best_score_list

[(0.8939262310542575, 57),
 (0.8781427470405411, 67),
 (0.8912614046843633, 58),
 (0.8823134943038924, 75),
 (0.8914736607246927, 79),
 (0.884778344231243, 50),
 (0.8856452622366715, 73),
 (0.8763451678467632, 61),
 (0.8782622860133957, 62),
 (0.8729433365348827, 80),
 (0.88622317933045, 57),
 (0.8758618001819585, 72)]

In [4]:
best_score_list

[(0.7795285370440815, 6),
 (0.7796272394960821, 7),
 (0.7781604747052577, 8),
 (0.7863789034787636, 8),
 (0.7823346833228019, 10),
 (0.7845061389355167, 10),
 (0.779517082185382, 9),
 (0.7818899549729551, 13),
 (0.7838615805489964, 10),
 (0.7846113375546993, 13),
 (0.7831168454837797, 17),
 (0.7807536562120532, 15),
 (0.7858822841546967, 12),
 (0.7870392774460797, 17)]

In [27]:
last = outputs[:,range_left: 11 ] 

In [28]:
(outputs[:, -30:] == last).every()

array([[ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       ...,
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True]])

In [21]:
best_score_list

[(0.6302228720370208, 68),
 (0.6288683594595657, 71),
 (0.6227333914497694, 58),
 (0.6216182740062414, 64),
 (0.6267958467001103, 74),
 (0.6269370829145641, 72),
 (0.6111917834746652, 77),
 (0.6110707802614683, 78),
 (0.6156081032266659, 68),
 (0.5972553884034454, 75),
 (0.586243483137789, 73),
 (0.5809931827375383, 77)]

In [10]:
best_score_list

[(0.6244537242756895, 9),
 (0.631269683207284, 10),
 (0.6272452772954641, 15),
 (0.6298287881205433, 13),
 (0.6319336448824721, 16),
 (0.6304857578304011, 15),
 (0.6267199275707046, 17),
 (0.6254485757250204, 19),
 (0.6228322864000027, 20),
 (0.6241215528569193, 21),
 (0.6213801213078801, 29),
 (0.6235290169708279, 22),
 (0.6232340825305664, 28),
 (0.6228016584622147, 26),
 (0.6218446863189525, 35),
 (0.6229577895391062, 32),
 (0.6228398396331677, 30)]

## best group1 index 5 (0.6319336448824721, 16)

In [83]:
selected_indexes = [
    28,  # suitcase
    34,  # baseball glove
    78,  # teddy bear
    7,   # boat
    38,  # tennis racket
    14,  # bird
    6,   # train
    17,  # horse
    35,  # skateboard
    54,  # donut
    76,  # scissors
    51,  # hot dog
    12,  # stop sign
    30,  # skis
    10,  # fire hydrant
    29,  # frisbee
    79,  # toothbrush
    19,  # cow
    33,  # kite
    36,  # surfboard
    4,   # airplane
    67,  # toaster
    13,  # parking meter
    20,  # elephant
    31,  # snowboard
    18,  # sheep
    22,  # zebra
    23,  # giraffe
    21,  # bear
    77   # hair drier
]

# selected_indexes = [
#     0,   # person
#     57,  # dining table
#     56,  # chair
#     41,  # cup
#     39,  # bottle
#     44,  # bowl
#     70,  # book
#     2,   # car
#     68,  # sink
#     43,  # knife
#     60   # tv
# ]


In [18]:
best_epoch = 0
best_score = 0
for i in range(1,81):
    # epoch = get_epoch(file_path)
    outputs, gt_labels = get_gt_pred(f"work_dirs/LT_voc_resnet50_pfc_DB_uniform_bce/gt_and_results_e{i}.pkl")
    epoch_list.append((outputs, gt_labels))
    map_score = calculate_mAP(outputs[:, [15, 17, 19, 11, 1, 7, 13]], gt_labels[:, [15, 17, 19, 11, 1, 7, 13]])
    if map_score[0] > best_score:
        best_score = map_score[0]
        best_epoch = i

print(best_score, best_epoch)

0.8348002847072691 46


In [85]:
outputs[:,selected_indexes].shape

(5000, 30)

## score of group3 in bce all  0.5754014458097789 80

In [35]:
outputs[:,selected_indexes].shape

(5000, 30)

# combination: voc (4,8) (2,13) (1, 57) 

In [40]:
# head_score = calculate_mAP(outputs[:, list(class_split["head"])], gt_labels[:, list(class_split["head"])])[0]
# mid_score = calculate_mAP(outputs[:, list(class_split["middle"])], gt_labels[:, list(class_split["middle"])])[0]
# tail_score = calculate_mAP(outputs[:, list(class_split["tail"])], gt_labels[:, list(class_split["tail"])])[0]

outputs1, gt_labels1 = get_gt_pred(f"/media/mark/T7 Shield/work_dirs/LT_voc_resnet50_pfc_DB_uniform_bce_35_6_group1_l0_r4/gt_and_results_e8.pkl")
outputs2, gt_labels2 = get_gt_pred(f"work_dirs/LT_voc_resnet50_pfc_DB_uniform_bce_35_6_group2_step2/gt_and_results_e13.pkl")
outputs3, gt_labels3 = get_gt_pred(f"/media/mark/T7 Shield/work_dirs/LT_voc_resnet50_pfc_DB_uniform_bce_35_6_group3_l1_r0/gt_and_results_e57.pkl")


outputs = np.hstack((outputs1[:, :5], outputs2[:, :7], outputs3[:, -8:]))
gt_labels = np.hstack((gt_labels1[:, :5], gt_labels2[:, :7], gt_labels3[:, -8:]))

head_score = calculate_mAP(outputs[:, :6], gt_labels[:, :6])[0]
mid_score = calculate_mAP(outputs[:, 6:12], gt_labels[:, 6:12])[0]
tail_score = calculate_mAP(outputs[:, 12:], gt_labels[:, 12:])[0]
total_score = calculate_mAP(outputs, gt_labels)[0]

print(f"head mAP: {head_score:.4f}")
print(f"mid mAP: {mid_score:.4f}")
print(f"tail mAP: {tail_score:.4f}")
print(f"total mAP: {total_score:.4f}")
print([round(head_score, 4), round(mid_score, 4), round(tail_score, 4), round(total_score, 4)])


head mAP: 0.7638
mid mAP: 0.8707
tail mAP: 0.8939
total mAP: 0.8479
[0.7638, 0.8707, 0.8939, 0.8479]


In [37]:
outputs.shape

(4952, 25)

In [38]:
outputs2[:, :7].shape

(4952, 7)

In [30]:
calculate_mAP(outputs2, gt_labels2)[0]

0.8470479588123756

In [31]:
gt_labels2.shape

(4952, 9)

In [21]:
class_split = mmcv.load("appendix/coco/longtail2017/class_split.pkl")

outputs, gt_labels = get_gt_pred(f"work_dirs/LT_coco_resnet50_pfc_DB_classaware_DBloss/gt_and_results_e8.pkl")

head_score = calculate_mAP(outputs[:, list(class_split["head"])], gt_labels[:, list(class_split["head"])])[0]
mid_score = calculate_mAP(outputs[:, list(class_split["middle"])], gt_labels[:, list(class_split["middle"])])[0]
tail_score = calculate_mAP(outputs[:, list(class_split["tail"])], gt_labels[:, list(class_split["tail"])])[0]


# head_score = calculate_mAP(outputs[:, :22], gt_labels[:, :22])[0]
# mid_score = calculate_mAP(outputs[:, 22:55], gt_labels[:, 22:55])[0]
# tail_score = calculate_mAP(outputs[:, 55:], gt_labels[:, 55:])[0]
total_score = calculate_mAP(outputs, gt_labels)[0]

print(f"head mAP: {head_score:.4f}")
print(f"mid mAP: {mid_score:.4f}")
print(f"tail mAP: {tail_score:.4f}")
print(f"total mAP: {total_score:.4f}")
print([head_score])


head mAP: 0.5328
mid mAP: 0.6091
tail mAP: 0.5618
total mAP: 0.5733


In [79]:
gt_labels.sum(axis=0)

array([2693,  149,  535,  159,   97,  189,  157,  250,  121,  191,   86,
         69,   37,  235,  125,  184,  177,  128,   65,   87,   89,   49,
         85,  101,  228,  174,  292,  145,  105,   84,  120,   49,  169,
         91,   97,  100,  127,  149,  167,  379,  110,  390,  155,  181,
        153,  314,  103,   76,   98,   85,   71,   81,   51,  153,   62,
        124,  580,  195,  172,  149,  501,  149,  207,  183,   88,  145,
        106,  214,   54,  115,    8,  187,  101,  230,  204,  137,   28,
         94,    9,   34])

In [81]:
import pandas as pd
df = pd.read_csv("/home/mark/Desktop/工研院/multi-label_classification/data/coco/test_data.csv", index_col=0)
np.array(df.sum(axis=0))

array([2693,  149,  535,  159,   97,  189,  157,  250,  121,  191,   86,
         69,   37,  235,  125,  184,  177,  128,   65,   87,   89,   49,
         85,  101,  228,  174,  292,  145,  105,   84,  120,   49,  169,
         91,   97,  100,  127,  149,  167,  379,  110,  390,  155,  181,
        153,  314,  103,   76,   98,   85,   71,   81,   51,  153,   62,
        124,  580,  195,  172,  149,  501,  149,  207,  183,   88,  145,
        106,  214,   54,  115,    8,  187,  101,  230,  204,  137,   28,
         94,    9,   34])

In [102]:
df.columns

Index(['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train',
       'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign',
       'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
       'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella',
       'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',
       'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard',
       'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork',
       'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange',
       'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
       'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv',
       'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
       'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
       'scissors', 'teddy bear', 'hair drier', 'toothbrush'],
      dtype='object')

In [17]:
# Full COCO class list
coco_classes = "aeroplane,bicycle,bird,boat,bottle,bus,car,cat,chair,cow,diningtable,dog,horse,motorbike,person,pottedplant,sheep,sofa,train,tvmonitor".split(",")

# Target list to find indexes for
# target_items = [
#     "person","chair","car","diningtable","bottle"
# ]

target_items = [
    "pottedplant","sofa","tvmonitor","dog","bicycle","cat","motorbike"
]

# Normalize function: lowercase and replace underscores with spaces
def normalize(label):
    return label.strip().lower().replace("_", " ")

# Build normalized list
normalized_coco = [normalize(label) for label in coco_classes]
normalized_targets = [normalize(label) for label in target_items]

# Get indexes
indexes = [normalized_coco.index(label) for label in normalized_targets]

# Output result
print("Target indexes:", indexes)


Target indexes: [15, 17, 19, 11, 1, 7, 13]


In [106]:
np.array(df.columns)[[0, 60, 56, 41, 39, 45, 73, 2, 71, 43, 62]]

array(['person', 'dining table', 'chair', 'cup', 'bottle', 'bowl', 'book',
       'car', 'sink', 'knife', 'tv'], dtype=object)

In [None]:
"all": [0.5704, 0.636, 0.5447, 0.5894],

In [55]:
calculate_mAP(outputs[:, :11], gt_labels[:, :11])[0]

0.6065970589127846

In [61]:
utputs, gt_labels = get_gt_pred(f"work_dirs/LT_coco_resnet50_pfc_DB_uniform_bce_all/gt_and_results_e{i}.pkl")

In [63]:
gt_labels.sum(axis=0)

array([2693,  149,  535,  159,   97,  189,  157,  250,  121,  191,   86,
         69,   37,  235,  125,  184,  177,  128,   65,   87,   89,   49,
         85,  101,  228,  174,  292,  145,  105,   84,  120,   49,  169,
         91,   97,  100,  127,  149,  167,  379,  110,  390,  155,  181,
        153,  314,  103,   76,   98,   85,   71,   81,   51,  153,   62,
        124,  580,  195,  172,  149,  501,  149,  207,  183,   88,  145,
        106,  214,   54,  115,    8,  187,  101,  230,  204,  137,   28,
         94,    9,   34])

In [74]:
result = mmcv.load("work_dirs/LT_coco_resnet50_pfc_DB/gt_and_results_e10.pkl")
np.array(result[0]['gt_labels']).sum(axis=0)

array([2693,  149,  535,  159,   97,  189,  157,  250,  121,  191,   86,
         69,   37,  235,  125,  184,  177,  128,   65,   87,   89,   49,
         85,  101,  228,  174,  292,  145,  105,   84,  120,   49,  169,
         91,   97,  100,  127,  149,  167,  379,  110,  390,  155,  181,
        153,  314,  103,   76,   98,   85,   71,   81,   51,  153,   62,
        124,  580,  195,  172,  149,  501,  149,  207,  183,   88,  145,
        106,  214,   54,  115,    8,  187,  101,  230,  204,  137,   28,
         94,    9,   34])