In [1]:
import os
import pickle
from pathlib import Path
from glob import glob
import yaml
from collections import defaultdict

In [2]:
label_path = Path("D:/ml/code/yolov5/data/cotton-classes16.yaml")

In [3]:
val_dir = Path("D:/ml/code/datasets/cotton-classes16/val/labels")
pkl_path = Path("D:/ml/code/mm/mmdetection/work_dirs/classes16/cascade-rcnn_convnext-t_fpn_3x_classes16_sgd_auto-scale-lr/unique_ids.pkl")

In [4]:
assert label_path.exists(), f"{label_path} not exists"
assert val_dir.exists(), f"{val_dir} not exists"
assert pkl_path.exists(), f"{pkl_path} not exists"

# load label name

In [None]:
with open(label_path, 'r', encoding='utf-8') as f:
    dataset = yaml.safe_load(f)
names = dataset["names"]
names

{0: 'cao_ling_chong',
 1: 'hong_zhi_zhu',
 2: 'ji_ma',
 3: 'long_kui',
 4: 'ma_chi_xian',
 5: 'mian_hua_he_ban_bing',
 6: 'mian_hua_huang_wei_bing',
 7: 'mian_hua_lao_ye_huang_hua',
 8: 'mian_hua_ye_pian_lao_hua',
 9: 'mian_ling_chong',
 10: 'mian_ya',
 11: 'piao_chong',
 12: 'shi_ya_ying',
 13: 'tian_xuan_hua',
 14: 'yan_fen_shi',
 15: 'ye_xi_gua_miao'}

In [None]:
num_classes = len(names)
num_classes

# 创建id2txt的字典

```python
{
    0: ["pic1", "pic2", "pic3"...],
    1: ["pic2", "pic3", "pic5"...],
    ...
}
```

In [None]:
val_id2stem = defaultdict(list)
pred_id2stem = defaultdict(list)
val_id2stem, pred_id2stem

(defaultdict(list, {}), defaultdict(list, {}))

# 读取txt中的数据

In [6]:
def get_txts_data(path: Path) -> dict[str, list[str]]:
    """获取txt中数据,按照行分割

    Args:
        path (Path): txt文件夹路径

    Returns:
        dict[str, list[str]]:
            {
                img1: [line1],
                img2: [line1, line2],
                ....
            }
    """
    val_txts = path.glob("*.txt")
    data = {}
    for txt in val_txts:
        stem = txt.stem
        with open(txt) as f:
            lines = f.read().splitlines()                   # splitlines可以去除单个空行
            lines = [line for line in lines if line != ""]  # 去除空行
        data[stem] = lines
    return data

In [7]:
val_data = get_txts_data(val_dir)
print(len(val_data.keys()))
k = list(val_data.keys())[0]
print(k, ":", val_data[k])

398
20220711122406 : ['1 0.600446 0.493056 0.59871 0.820767']


# 获取 unique id

In [8]:
def get_txt_unique_ids(data: dict[str, list[str]]) -> dict[str, list[int]]:
    """获取每张图片中的类别id,去重

    Args:
        data (dict[str, list[str]]): get_txts_data 的返回值

    Returns:
        dict[str, list[int]]:
            {
                img1: [0],
                img2: [0, 1],
                ...
            }
    """
    unique_data = {}
    # key: filename
    # value: [line1, line2...]
    for stem, lines in data.items():
        # get line [0] label
        ids = [int(line.split(" ")[0]) for line in lines]
        # unique label
        unique_ids = sorted(set(ids))
        unique_data[stem] = unique_ids
    return unique_data

In [9]:
val_txt_unique_ids = get_txt_unique_ids(val_data)
print(len(val_txt_unique_ids.keys()))
k = list(val_txt_unique_ids.keys())[0]
print(k, ":", val_txt_unique_ids[k])

398
20220711122406 : [1]


In [10]:
with open(pkl_path, mode="rb") as f:
    pred_txt_unique_ids = pickle.load(f)
print(len(pred_txt_unique_ids.keys()))
k = list(pred_txt_unique_ids.keys())[0]
print(k, ":", pred_txt_unique_ids[k])

398
20220711122406 : [1, 5, 8, 9, 10]


# 将txt中的数据转移到id2stem中

In [11]:
val_id2stem[0], val_id2stem[1]

([], [])

In [12]:
def txt_id2stem(txt_unique_ids: dict[str, list[int]], id2stem: dict[int, list[str]]):
    """将uniqueid中的图片数据放入

    Args:
        txt_unique_ids (dict[str, list[int]]): get_txt_unique_ids 的返回值
        id2stem (dict[int, list[str]]):
            {
                0: [img1],
                1: [img2, img4],
                ...
            }
    """
    # 清空id2txt中数组的内容
    for i in id2stem.values():
        i.clear()
    # stem: filename
    # unique_ids: [id0, id1...]
    for stem, unique_ids in txt_unique_ids.items():
        for unique_id in unique_ids:
            # {id0: [img1, img2...]}
            id2stem[unique_id].append(stem)

In [13]:
txt_id2stem(val_txt_unique_ids, val_id2stem)
val_id2stem[1]

['20220711122406',
 '20220711122529',
 '20220711122538',
 '20220711122912',
 '20220711122916']

In [14]:
txt_id2stem(pred_txt_unique_ids, pred_id2stem)
pred_id2stem[1]

['20220711122406',
 '20220711122529',
 '20220711122538',
 '20220711122912',
 '20220711122916',
 'IMG_0923',
 'IMG_1288',
 'IMG_2315',
 'IMG_3355']

In [15]:
for i in range(num_classes):
    print(len(val_id2stem[i]), len(pred_id2stem[i]))

3 8
5 9
4 8
3 19
3 11
35 63
18 50
1 27
37 58
25 70
232 291
163 181
2 4
11 29
3 3
2 5


# 获取评估指标,每个类别和总的召回率,精确率

In [16]:
def get_metrics(val_id2stem: dict[int, list[str]], pred_id2stem: dict[int, list[str]], eps: float = 1e-8) -> tuple[dict, dict[int, dict]]:
    """获取每个类别和总的评估指标

    Args:
        val_id2stem (dict[int, list[str]]):  val
        pred_id2stem (dict[int, list[str]]): pred
        eps (float): 防止除零. Default 1e-8

    Returns:
        tuple[dict, dict[int, dict]]:
            dict:               总指标
            dict[int, dict]:    各类别指标
    """
    total_val = 0       # val total
    total_pred = 0      # pred total
    total_detect = 0    # detect total
    result_for_cls = {} # 每个类别准确率
    for i in range(num_classes):
        # set在这里不为去重,仅仅为了求交集,差集
        val_stem_for_single_id: set = set(val_id2stem[i])   # 单一类别val图片名字
        pred_stem_for_single_id: set = set(pred_id2stem[i]) # 单一类别pred图片名字

        num_val = len(val_stem_for_single_id)
        total_val += num_val
        num_pred = len(pred_stem_for_single_id)
        total_pred += num_pred

        # 检测到的图片名字
        detect      = val_stem_for_single_id.intersection(pred_stem_for_single_id)
        num_detect  = len(detect)
        total_detect += num_detect
        # 没有检测到图片名字
        not_detect  = val_stem_for_single_id.difference(pred_stem_for_single_id)
        # 过度检测到图片名字
        over_detect = pred_stem_for_single_id.difference(val_stem_for_single_id)

        recall = num_detect / (num_val + eps)
        precision = num_detect / (num_pred + eps)
        f1_score = (2 * recall * precision) / (recall + precision + eps)

        # 按照类别id保存
        result_for_cls[i] = {
            "num_val": num_val,
            "num_pred": num_pred,
            "num_detect": num_detect,
            "recall": recall,
            "precision": precision,
            "f1_score": f1_score,
            "detect": detect,
            "not_detect": not_detect,
            "over_detect": over_detect,
        }

    recall = total_detect / total_val
    precision = total_detect / total_pred
    f1_score = (2 * recall * precision) / (recall + precision+ eps)

    total_result = {
        "num_val": total_val,
        "num_pred": total_pred,
        "num_detect": total_detect,
        "recall": recall,
        "precision": precision,
        "f1_score": f1_score,
    }

    return total_result, result_for_cls

In [17]:
total_result, result_for_cls = get_metrics(val_id2stem, pred_id2stem)
total_result

{'num_val': 547,
 'num_pred': 836,
 'num_detect': 527,
 'recall': 0.9634369287020109,
 'precision': 0.6303827751196173}

In [18]:
result_for_cls

{0: {'num_val': 3,
  'num_pred': 8,
  'num_detect': 2,
  'recall': 0.6666666666666666,
  'precision': 0.25,
  'detect': {'IMG_1245', 'IMG_3348'},
  'not_detect': {'IMG_2236'},
  'over_detect': {'IMG_0051',
   'IMG_1040',
   'IMG_1076',
   'IMG_2822',
   'IMG_3026',
   'IMG_9892'}},
 1: {'num_val': 5,
  'num_pred': 9,
  'num_detect': 5,
  'recall': 1.0,
  'precision': 0.5555555555555556,
  'detect': {'20220711122406',
   '20220711122529',
   '20220711122538',
   '20220711122912',
   '20220711122916'},
  'not_detect': set(),
  'over_detect': {'IMG_0923', 'IMG_1288', 'IMG_2315', 'IMG_3355'}},
 2: {'num_val': 4,
  'num_pred': 8,
  'num_detect': 4,
  'recall': 1.0,
  'precision': 0.5,
  'detect': {'DSC02069', 'DSC02071', 'DSC02095', 'IMG_9904'},
  'not_detect': set(),
  'over_detect': {'IMG_0011', 'IMG_1023', 'IMG_1113', 'IMG_9801'}},
 3: {'num_val': 3,
  'num_pred': 19,
  'num_detect': 3,
  'recall': 1.0,
  'precision': 0.15789473684210525,
  'detect': {'IMG_0958', 'IMG_9440', 'longkui1'},

# label name to data

In [20]:
name2acc = {}
for name, acc_cls in zip(names.values(), result_for_cls.values()):
    name2acc[name] = {
        "num_val": acc_cls["num_val"],
        "num_pred": acc_cls["num_pred"],
        "num_detect": acc_cls["num_detect"],
        "recall": acc_cls["recall"],
        "precision": acc_cls["precision"],
        "f1_score": acc_cls["f1_score"],
    }
name2acc

{'cao_ling_chong': {'num_val': 3,
  'num_pred': 8,
  'num_detect': 2,
  'recall': 0.6666666666666666,
  'precision': 0.25},
 'hong_zhi_zhu': {'num_val': 5,
  'num_pred': 9,
  'num_detect': 5,
  'recall': 1.0,
  'precision': 0.5555555555555556},
 'ji_ma': {'num_val': 4,
  'num_pred': 8,
  'num_detect': 4,
  'recall': 1.0,
  'precision': 0.5},
 'long_kui': {'num_val': 3,
  'num_pred': 19,
  'num_detect': 3,
  'recall': 1.0,
  'precision': 0.15789473684210525},
 'ma_chi_xian': {'num_val': 3,
  'num_pred': 11,
  'num_detect': 2,
  'recall': 0.6666666666666666,
  'precision': 0.18181818181818182},
 'mian_hua_he_ban_bing': {'num_val': 35,
  'num_pred': 63,
  'num_detect': 29,
  'recall': 0.8285714285714286,
  'precision': 0.4603174603174603},
 'mian_hua_huang_wei_bing': {'num_val': 18,
  'num_pred': 50,
  'num_detect': 18,
  'recall': 1.0,
  'precision': 0.36},
 'mian_hua_lao_ye_huang_hua': {'num_val': 1,
  'num_pred': 27,
  'num_detect': 0,
  'recall': 0.0,
  'precision': 0.0},
 'mian_hua_y

In [21]:
total_result

{'num_val': 547,
 'num_pred': 836,
 'num_detect': 527,
 'recall': 0.9634369287020109,
 'precision': 0.6303827751196173}

In [22]:
name2acc["total"] = total_result

# pandas

In [23]:
import pandas as pd

In [24]:
df = pd.DataFrame(name2acc, index=["num_val", "num_pred", "num_detect", "recall", "precision", "f1_score"]).T
df

Unnamed: 0,num_val,num_pred,num_detect,recall,precision
cao_ling_chong,3.0,8.0,2.0,0.666667,0.25
hong_zhi_zhu,5.0,9.0,5.0,1.0,0.555556
ji_ma,4.0,8.0,4.0,1.0,0.5
long_kui,3.0,19.0,3.0,1.0,0.157895
ma_chi_xian,3.0,11.0,2.0,0.666667,0.181818
mian_hua_he_ban_bing,35.0,63.0,29.0,0.828571,0.460317
mian_hua_huang_wei_bing,18.0,50.0,18.0,1.0,0.36
mian_hua_lao_ye_huang_hua,1.0,27.0,0.0,0.0,0.0
mian_hua_ye_pian_lao_hua,37.0,58.0,36.0,0.972973,0.62069
mian_ling_chong,25.0,70.0,23.0,0.92,0.328571


In [25]:
df.to_excel("result.xlsx")