In [27]:
import os
import torch
import json
import glob
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from random import shuffle
import albumentations as A
from albumentations.pytorch import ToTensorV2
import pandas as pd
import cv2
from tqdm import tqdm
from collections import defaultdict
import sys
import dawid_skene
import numpy as np
import shutil

In [2]:
def get_model(path) -> bool:
    if os.path.exists(path):
        model = torch.load(path)
        model.to(torch.device('cuda'))
        model.eval()
        return model
    else:
        sys.exit(f"Model {path} not found")

### 1. Specify the paths to models and data

In [3]:
# Ensembling
models_path = [
    f'/home/local/Attributes-pytorch/Results/04_04_22/Stage7Baselinev3_200/best_model.pth',
    f'/home/local/Attributes-pytorch/Results/04_04_22/Stage6Baselinev3_200_1e-3_normalized/best_model.pth',
    f'/home/local/Attributes-pytorch/Results/04_04_22/Stage6Baselinev3_200_1e-3/best_model.pth',
    f'/home/local/Attributes-pytorch/Results/04_04_22/Stage6Baselinev2_200/best_model.pth',
]

# Data for checking
dataset_path = '/home/local/Attributes-pytorch/data/Datasets/Bags/Test/images'

### 2. Build the "matrix of annotations"

In [4]:
# input shape
w = 200
h = 200
# input transform params
transform_test = A.Compose([
                   A.Resize(width=w, height=h),
                   A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
               ])

In [5]:
# For the matrix format, see dawid_skene.py
matrix = defaultdict(lambda: defaultdict(dict))

# Classes
attributes = ['Bag', 'Bakpack']
multiclass_matrix = dict()
for attribute in attributes:
    multiclass_matrix[attribute] = matrix.copy()

for model_id, model_path in enumerate(models_path):
    model = get_model(model_path)       
    img_names = os.listdir(dataset_path)
    for img_id, img_name in enumerate(tqdm(img_names, f'Model - {model_id}')):
        # open image 
        img = cv2.imread(os.path.join(dataset_path, img_name))
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # prepare to cnn input
        img_rgb = transform_test(image=img_rgb)['image']
        img_rgb = torch.tensor(img_rgb, dtype=torch.float).permute((2, 0, 1))
        
        # run model
        x = torch.unsqueeze(img_rgb, 0)
        with torch.no_grad():
            output = model(x.to(torch.device('cuda')))
        
        # store output
        for i, attribute in enumerate(attributes):
            pr_label = torch.argmax(output[i].to(torch.device('cpu')), dim=1).tolist()
            # softmax = torch.nn.Softmax(dim=1)
            # pr_score = torch.max(softmax(output[i]).to(torch.device('cpu')), dim=1).values.tolist()
            multiclass_matrix[attribute][img_name][model_id] = pr_label
            
json.dump(multiclass_matrix, open("generated/multiclass_matrix.json", "w"))

Model - 0: 100%|██████████| 20953/20953 [02:52<00:00, 121.55it/s]
Model - 1: 100%|██████████| 20953/20953 [02:52<00:00, 121.56it/s]
Model - 2: 100%|██████████| 20953/20953 [02:53<00:00, 120.85it/s]
Model - 3: 100%|██████████| 20953/20953 [02:53<00:00, 120.86it/s]


### 3. Apply Dawid-Skene algo

In [7]:
multiclass_matrix = json.load(open("generated/multiclass_matrix.json"))

# Classes
attributes = ['Bag', 'Bakpack']
multiclass_soft_pseudo_labels = dict()
for attribute in attributes:
    print("======================================")
    print(attribute)
    print("======================================")
    multiclass_soft_pseudo_labels[attribute] = dict(
        zip(
            multiclass_matrix[attribute], 
            dawid_skene.run(multiclass_matrix[attribute]).tolist()
        )
    )
    
json.dump(multiclass_soft_pseudo_labels, open("generated/soft_pseudo_labels.json", "w"))

Bag
num Patients: 20953
Observers: ['0', '1', '2', '3']
Classes: [0, 1, 2, 3]
Iter	log-likelihood	delta-CM	delta-ER
1 	 -72100.60340160299
2 	 -71979.86577971396 	0.008654	0.724661
3 	 -71954.69765545237 	0.006799	0.276119
4 	 -71941.61913657053 	0.005220	0.206811
5 	 -71934.00838217683 	0.004059	0.159891
6 	 -71929.35267358675 	0.003191	0.125150
7 	 -71926.40793700624 	0.002529	0.099164
8 	 -71924.5011381389 	0.002016	0.079488
9 	 -71923.24482424764 	0.001614	0.064277
10 	 -71922.4058386343 	0.001334	0.052332
11 	 -71921.83938041066 	0.001116	0.042863
12 	 -71921.45340712013 	0.000937	0.035273
13 	 -71921.18835179573 	0.000788	0.029138
14 	 -71921.00510172782 	0.000664	0.024176
15 	 -71920.87766258113 	0.000561	0.020125
16 	 -71920.78857935096 	0.000474	0.016787
17 	 -71920.72602567596 	0.000401	0.014028
18 	 -71920.6819256504 	0.000340	0.011747
19 	 -71920.65072605664 	0.000288	0.009851
20 	 -71920.62858476717 	0.000244	0.008271
21 	 -71920.61282889947 	0.000208	0.006953
22 	 -71920.

### 4. Analyse soft pseudo labels

#### 4.1 Concatinate with gt

In [17]:
multiclass_soft_pseudo_labels = json.load(open("generated/soft_pseudo_labels.json"))
results = pd.DataFrame(columns=['filename', 'bag_conf', 'backpack_conf', 'bag_pr', 'backpack_pr', 'bag_gt', 'backpack_gt'])
markup = json.load(open(os.path.join(dataset_path, "..", "markup.json")))

attributes = ['Bag', 'Bakpack']
# Add info in dataframe
for filename in tqdm(list(markup.keys())):
    # predicted_values
    pr_labels = [np.array(multiclass_soft_pseudo_labels[attribute][filename]).argmax() \
                 for attribute in attributes]
    pr_scores = [np.array(multiclass_soft_pseudo_labels[attribute][filename]).max() \
                 for attribute in attributes]
                     
    gt_labels = [label for label in markup[filename][0]]
    row = [filename] + pr_scores + pr_labels + gt_labels
    results.loc[len(results.index)] = row

results.to_csv("generated/comcatinated.csv")
results.head()

100%|██████████| 20953/20953 [01:10<00:00, 296.26it/s]


Unnamed: 0,filename,bag_conf,backpack_conf,bag_pr,backpack_pr,bag_gt,backpack_gt
0,019076.jpg,0.84968,0.99972,2,0,-1,1
1,019100.jpg,0.866499,0.997938,1,1,3,1
2,019110.jpg,0.997014,0.997938,3,1,3,1
3,019126.jpg,0.633682,0.947732,0,1,3,1
4,019129.jpg,0.999325,0.997938,1,1,0,1


#### 4.2 Find errors

In [38]:
results = pd.read_csv("generated/comcatinated.csv", index_col=0)
results.head()

Unnamed: 0,filename,bag_conf,backpack_conf,bag_pr,backpack_pr,bag_gt,backpack_gt
0,019076.jpg,0.84968,0.99972,2,0,-1,1
1,019100.jpg,0.866499,0.997938,1,1,3,1
2,019110.jpg,0.997014,0.997938,3,1,3,1
3,019126.jpg,0.633682,0.947732,0,1,3,1
4,019129.jpg,0.999325,0.997938,1,1,0,1


#### 4.3 Visualize strong errors

In [39]:
threshold = 0.9

In [40]:
def to_text(val: int, attribute: str):
    if val == -1:
        return "Непонятно"
    descr = {
        'bag': ['hand_bag', 'shoulder_bag', 'suitcase', 'nothing'],
        'backpack': ['Yes', 'No']
    }
    return f'{descr[attribute][val]}'

In [41]:
attributes_list = ["bag", "backpack"]

base_path = f"strong_errors/BagsTest"
if not os.path.exists(base_path):
    os.makedirs(base_path)

for attribute in attributes_list:
    values = set(results[f"{attribute}_pr"].tolist())
    dir_path = f"{base_path}/{attribute}"
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)

    for val in values:
        dir_path = f"{base_path}/{attribute}/{to_text(val, attribute)}"
        if not os.path.exists(dir_path):
            os.makedirs(dir_path)
        files = glob.glob(f"{base_path}/{attribute}/{to_text(val, attribute)}/*.jpg")
        for f in files:
            os.remove(f)

        ranked = results[results[f"{attribute}_pr"] == val]
        ranked = ranked[ranked[f"{attribute}_gt"] != val]
        ranked = ranked[ranked[f"{attribute}_conf"] > threshold]
        for name, conf, gt in zip(ranked.filename.tolist(), ranked[f"{attribute}_conf"].tolist(), ranked[f"{attribute}_gt"].tolist()):
            path = os.path.join(dataset_path, name)
            shutil.copy(path, f"{base_path}/{attribute}/{to_text(val, attribute)}")
            old_name = path.split("/")[-1]
            new_name = f"conf_{int(conf * 100)}_name_{old_name}_gt_{to_text(gt, attribute)}"
            shutil.move(f"{base_path}/{attribute}/{to_text(val, attribute)}/{old_name}", 
                        f"{base_path}/{attribute}/{to_text(val, attribute)}/{new_name}")