# 测试集总体准确率评估指标

## 导入工具包

In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm

## 载入类别名称和ID

In [2]:
idx_to_labels = np.load('idx_to_labels.npy', allow_pickle=True).item()
# 获得类别名称
classes = list(idx_to_labels.values())
print(classes)

['乌龟', '仓鼠', '兔子', '狗', '猫', '金鱼', '鹦鹉']


## 载入测试集预测结果表格

In [4]:
df = pd.read_csv('测试集预测结果.csv')

In [5]:
df

Unnamed: 0,图像路径,标注类别ID,标注类别名称,top-1-预测ID,top-1-预测名称,top-2-预测ID,top-2-预测名称,top-3-预测ID,top-3-预测名称,top-n预测正确,乌龟-预测置信度,仓鼠-预测置信度,兔子-预测置信度,狗-预测置信度,猫-预测置信度,金鱼-预测置信度,鹦鹉-预测置信度
0,dataset_split/val/乌龟/0.jpg,0,乌龟,0.0,乌龟,5.0,金鱼,3.0,狗,1.0,0.999790,0.000013,5.688636e-07,0.000019,0.000015,0.000151,0.000012
1,dataset_split/val/乌龟/106.jpg,0,乌龟,0.0,乌龟,6.0,鹦鹉,5.0,金鱼,1.0,0.970566,0.000354,9.084896e-04,0.000648,0.001596,0.005402,0.020526
2,dataset_split/val/乌龟/115.jpg,0,乌龟,0.0,乌龟,5.0,金鱼,1.0,仓鼠,1.0,0.963122,0.004735,5.228490e-04,0.002156,0.000909,0.027109,0.001446
3,dataset_split/val/乌龟/123.jpg,0,乌龟,0.0,乌龟,5.0,金鱼,6.0,鹦鹉,1.0,0.991517,0.000895,2.685588e-04,0.000279,0.000088,0.003942,0.003010
4,dataset_split/val/乌龟/164.jpg,0,乌龟,0.0,乌龟,5.0,金鱼,3.0,狗,1.0,0.999598,0.000010,6.147398e-05,0.000088,0.000022,0.000191,0.000030
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
571,dataset_split/val/鹦鹉/77.jpg,6,鹦鹉,6.0,鹦鹉,1.0,仓鼠,2.0,兔子,1.0,0.000218,0.133162,7.737693e-02,0.001951,0.007264,0.002795,0.777234
572,dataset_split/val/鹦鹉/85.jpeg,6,鹦鹉,6.0,鹦鹉,1.0,仓鼠,5.0,金鱼,1.0,0.000292,0.000750,4.152166e-04,0.000036,0.000087,0.000645,0.997774
573,dataset_split/val/鹦鹉/88.jpeg,6,鹦鹉,6.0,鹦鹉,1.0,仓鼠,2.0,兔子,1.0,0.000567,0.083319,2.007725e-02,0.002092,0.013484,0.011417,0.869044
574,dataset_split/val/鹦鹉/90.jpg,6,鹦鹉,6.0,鹦鹉,1.0,仓鼠,2.0,兔子,1.0,0.000228,0.007773,5.680962e-03,0.001086,0.000616,0.000903,0.983713


## 准确率

In [6]:
sum(df['标注类别名称'] == df['top-1-预测名称']) / len(df)

0.9722222222222222

## top-n准确率

In [7]:
sum(df['top-n预测正确']) / len(df)

0.9982638888888888

## 各类别其它评估指标

公众号 人工智能小技巧 回复 混淆矩阵

看子豪兄视频和笔记讲解

In [8]:
from sklearn.metrics import classification_report

In [9]:
print(classification_report(df['标注类别名称'], df['top-1-预测名称'], target_names=classes))

              precision    recall  f1-score   support

          乌龟       1.00      1.00      1.00        78
          仓鼠       0.94      0.95      0.95        87
          兔子       0.96      0.90      0.93        60
           狗       0.96      0.99      0.97        96
           猫       0.97      0.98      0.97        91
          金鱼       1.00      0.97      0.98        67
          鹦鹉       0.98      0.99      0.98        97

    accuracy                           0.97       576
   macro avg       0.97      0.97      0.97       576
weighted avg       0.97      0.97      0.97       576



macro avg 宏平均：直接将每一类的评估指标求和取平均（算数平均值）

weighted avg 加权平均：按样本数量（support）加权计算评估指标的平均值

In [10]:
report = classification_report(df['标注类别名称'], df['top-1-预测名称'], target_names=classes, output_dict=True)
del report['accuracy']
df_report = pd.DataFrame(report).transpose()

In [11]:
df_report

Unnamed: 0,precision,recall,f1-score,support
乌龟,1.0,1.0,1.0,78.0
仓鼠,0.943182,0.954023,0.948571,87.0
兔子,0.964286,0.9,0.931034,60.0
狗,0.959596,0.989583,0.974359,96.0
猫,0.967391,0.978022,0.972678,91.0
金鱼,1.0,0.970149,0.984848,67.0
鹦鹉,0.979592,0.989691,0.984615,97.0
macro avg,0.973435,0.968781,0.970872,576.0
weighted avg,0.972375,0.972222,0.972105,576.0


## 补充：各类别准确率（其实就是recall）

In [12]:
accuracy_list = []
for fruit in tqdm(classes):
    df_temp = df[df['标注类别名称']==fruit]
    accuracy = sum(df_temp['标注类别名称'] == df_temp['top-1-预测名称']) / len(df_temp)
    accuracy_list.append(accuracy)

100%|██████████| 7/7 [00:00<00:00, 1096.59it/s]


In [13]:
# 计算 宏平均准确率 和 加权平均准确率
acc_macro = np.mean(accuracy_list)
acc_weighted = sum(accuracy_list * df_report.iloc[:-2]['support'] / len(df))

accuracy_list.append(acc_macro)
accuracy_list.append(acc_weighted)

df_report['accuracy'] = accuracy_list

In [14]:
df_report

Unnamed: 0,precision,recall,f1-score,support,accuracy
乌龟,1.0,1.0,1.0,78.0,1.0
仓鼠,0.943182,0.954023,0.948571,87.0,0.954023
兔子,0.964286,0.9,0.931034,60.0,0.9
狗,0.959596,0.989583,0.974359,96.0,0.989583
猫,0.967391,0.978022,0.972678,91.0,0.978022
金鱼,1.0,0.970149,0.984848,67.0,0.970149
鹦鹉,0.979592,0.989691,0.984615,97.0,0.989691
macro avg,0.973435,0.968781,0.970872,576.0,0.968781
weighted avg,0.972375,0.972222,0.972105,576.0,0.972222


In [15]:
df_report.to_csv('各类别准确率评估指标.csv', index_label='类别')