In [1]:
import json
import cv2
import pandas as pd
import os
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report, multilabel_confusion_matrix
import warnings
warnings.filterwarnings('ignore')

In [2]:
results_file = './results.json'
labels_dir = './test/labels/'
img_dir = './test/images/'
models_dir = './models/'

In [3]:
with open(results_file, 'r') as fp:
    results = json.load(fp)

In [21]:
# map_models = {}
# for file in os.scandir(models_dir):
#     if file.is_file() and file.name.startswith('model') and file.name.endswith('.pt'):
#         model_name = file.name.split('model_')[1][:-3]
#         if 'yolo' not in model_name:
#             map_models[model_name] = len(map_models)

units = [['infantry',
          'anti_tank',
          'armour',
          'wheeled',
          'unit_tactical'],
         ['recce',
          'medic',
          'signal',
          'hq_unit',
          'supply',
          'artillery',
          'engineer',
          'mortar',
          'missile',
          'air_defence'],
         ['infantry',
          'anti_tank',
          'recce',
          'sniper',
          'medic',
          'signal'],
         ['motorized', 'cannon'],
         ['team', 'squad', 'half-platoon', 'platoon', 'company',  # Currently repetition because unit sizes are sampled with uniform distirubution
          'battalion']]  # 'brigade', 'regiment', 'division']

units = set([j for sub in units for j in sub])

map_models = {}
for unit in units:
    map_models[unit] = len(map_models)

In [22]:
for i, row in enumerate(results):
    for s in row['symbols']:
        s['used'] = 0

column_names = ["actual", "predicted", "correct",
                "incorrect", "missed"]  # , "x1", "x2", "y1", "y2"]
df = pd.DataFrame(columns=column_names)
for i, row in enumerate(results):
    cur_img = row['img'].strip()
    img = cv2.imread(f"{img_dir}{cur_img}")
    img_txt = cur_img.split('.')[0] + '.txt'
    with open(f'{labels_dir}{img_txt}') as fp:
        for line in fp:
            splits = line.strip().split(" ")
            x_c, y_c, h, w = map(float, splits[1:])
            actual = splits[0].strip().split("__")[1:]
            height, width, channels = img.shape
            x_c, y_c, w, h = float(x_c)*width, float(y_c) * \
                height, float(w)*width, float(h)*height

            predicted = []
            for s in row['symbols']:
                if s['used'] == 0:
                    if s['xmin'] <= x_c and x_c <= s['xmax'] and s['ymin'] <= y_c and y_c <= s['ymax']:
                        predicted = s['labels']
                        s['used'] = 1
                        break

            dif = set(actual) - set(predicted)
            missed = len(dif)
            correct = len(actual) - missed
            incorrect = len(predicted) - correct
            # print(map_labels(map_models, actual))
            # print(map_labels(map_models, predicted))

            df = pd.concat([df, pd.DataFrame(
                [[actual, predicted, correct, incorrect, missed]], columns=column_names)], ignore_index=True)

for row in results:
    for s in row['symbols']:
        if s['used'] == 0:
            df = pd.concat([df, pd.DataFrame([[[], s['labels'], 0, 0, len(
                s['labels'])]], columns=column_names)], ignore_index=True)

In [23]:
df.head()

Unnamed: 0,actual,predicted,correct,incorrect,missed
0,"[armour, half-platoon]","[company, company]",0,2,2
1,"[missile, wheeled, team]","[company, team]",1,1,2
2,"[hq_unit, wheeled, team]","[company, team]",1,1,2
3,"[hq_unit, half-platoon]","[company, company]",0,2,2
4,"[mortar, wheeled, company]","[company, team]",1,1,2


In [24]:
# actual_labels = set([j for sub in df[['actual', 'predicted']].apply(lambda x: x[0] + x[1], axis=1).to_list() for j in sub])
# for label in actual_labels:
#     if label not in map_models:
#         map_models[label] = len(map_models)

In [27]:
def map_labels(map_models, input_list):
    out = [0] * len(map_models)
    for label in input_list:
        if label in map_models:
            out[map_models[label]] = 1
        elif label == 'anti-tank':
            out[map_models['anti_tank']] = 1
        elif label == 'engineers':
            out[map_models['engineer']] = 1
        elif label == 'hq-unit':
            out[map_models['hq_unit']] = 1
        else:
            # out[map_models['other']] = 1
            raise ValueError(f'Add label {label}')
    return out


df['actual_encoded'] = df['actual'].apply(lambda x: map_labels(map_models, x))
df['predicted_encoded'] = df['predicted'].apply(lambda x: map_labels(map_models, x))

In [28]:
df.head()

Unnamed: 0,actual,predicted,correct,incorrect,missed,actual_encoded,predicted_encoded
0,"[armour, half-platoon]","[company, company]",0,2,2,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1,"[missile, wheeled, team]","[company, team]",1,1,2,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,"[hq_unit, wheeled, team]","[company, team]",1,1,2,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,"[hq_unit, half-platoon]","[company, company]",0,2,2,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4,"[mortar, wheeled, company]","[company, team]",1,1,2,"[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


In [29]:
# y_true = np.sum(df['actual_encoded'].to_list(), axis=0)
# y_pred = np.sum(df['predicted_encoded'].to_list(), axis=0)
y_true = df['actual_encoded'].tolist()
y_pred = df['predicted_encoded'].tolist()

In [30]:
sorted_dict = sorted(map_models.items(), key=lambda x: x[1])
sorted_labels = [item[0] for item in sorted_dict]

In [31]:
y_true = np.array(y_true)
# y_true

In [32]:
y_pred = np.array(y_pred)
# y_pred

In [33]:
conf_mat_dict={}
report_dict = {}

for label_col in range(len(sorted_labels)):
    y_true_label = y_true[:, label_col]
    y_pred_label = y_pred[:, label_col]
    conf_mat_dict[sorted_labels[label_col]] = confusion_matrix(y_pred=y_pred_label, y_true=y_true_label)
    report_dict[sorted_labels[label_col]] = classification_report(y_pred=y_pred_label, y_true=y_true_label)


In [35]:
for label, matrix in conf_mat_dict.items():
    print("Confusion matrix for label {}:".format(label))
    print(matrix)

Confusion matrix for label missile:
[[2681   12]
 [  78   31]]
Confusion matrix for label armour:
[[1918    0]
 [ 742  142]]
Confusion matrix for label sniper:
[[2716    0]
 [  86    0]]
Confusion matrix for label squad:
[[2150   63]
 [ 430  159]]
Confusion matrix for label air_defence:
[[2700    0]
 [ 102    0]]
Confusion matrix for label mortar:
[[2682   28]
 [  43   49]]
Confusion matrix for label hq_unit:
[[2568    0]
 [ 149   85]]
Confusion matrix for label cannon:
[[2514    0]
 [ 241   47]]
Confusion matrix for label supply:
[[2452  115]
 [ 146   89]]
Confusion matrix for label anti_tank:
[[2336    1]
 [ 331  134]]
Confusion matrix for label infantry:
[[2325    2]
 [ 352  123]]
Confusion matrix for label medic:
[[2685    0]
 [  50   67]]
Confusion matrix for label signal:
[[2689    0]
 [  95   18]]
Confusion matrix for label platoon:
[[2237    0]
 [ 511   54]]
Confusion matrix for label recce:
[[2694    2]
 [  72   34]]
Confusion matrix for label wheeled:
[[2539    0]
 [ 258    5

In [37]:
for label, matrix in report_dict.items():
    print("Classification report matrix for label {}:".format(label))
    print(matrix)

Classification report matrix for label missile:
              precision    recall  f1-score   support

           0       0.97      1.00      0.98      2693
           1       0.72      0.28      0.41       109

    accuracy                           0.97      2802
   macro avg       0.85      0.64      0.70      2802
weighted avg       0.96      0.97      0.96      2802

Classification report matrix for label armour:
              precision    recall  f1-score   support

           0       0.72      1.00      0.84      1918
           1       1.00      0.16      0.28       884

    accuracy                           0.74      2802
   macro avg       0.86      0.58      0.56      2802
weighted avg       0.81      0.74      0.66      2802

Classification report matrix for label sniper:
              precision    recall  f1-score   support

           0       0.97      1.00      0.98      2716
           1       0.00      0.00      0.00        86

    accuracy                           0