<a href="https://colab.research.google.com/github/SEOYUNJE/Endoscope-Object-Detection/blob/main/Calibrated-Confidence-Score/3class_filter.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [47]:
!pip install -q opendatasets
import opendatasets as od
od.download('https://www.kaggle.com/datasets/seoyunje/gastroscopy-256x256-resized-png')
od.download('https://www.kaggle.com/datasets/msyu78/gastroscopy-meta')

Skipping, found downloaded files in "./gastroscopy-256x256-resized-png" (use force=True to force download)
Skipping, found downloaded files in "./gastroscopy-meta" (use force=True to force download)


In [48]:
import numpy as np
import pandas as pd

from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns

## Decoding PredicitonString
___

In [49]:
def string_decoder(predict_string):
    s = predict_string.split(' ')
    class_id = [x for x in (s[0::6])]
    score = [x for x in (s[1::6])]
    x_min = [x for x in (s[2::6])]
    y_min = [x for x in (s[3::6])]
    x_max = [x for x in (s[4::6])]
    y_max = [x for x in (s[5::6])]

    return class_id, score, x_min, y_min, x_max, y_max

In [50]:
def format_pred(boxes, scores, labels):
    pred_strings = []
    for label, score, bbox in zip(labels, scores, boxes):
        xmin, ymin, xmax, ymax = bbox.astype(np.float32)
        pred_strings.append(f"{label} {score} {xmin} {ymin} {xmax} {ymax}")
    return " ".join(pred_strings)

In [51]:
wbf_sub = pd.read_csv('/content/submission.csv')

In [52]:
wbf_list = []

for i, row in tqdm(wbf_sub.iterrows(), total=len(wbf_sub)):
    image_id = row['image_id']
    string_decoded = string_decoder(row['PredictionString'])

    for  (class_id, score, xmin, ymin, xmax, ymax) in zip(*string_decoded):
        wbf_list.append(
            {
                'image_id': image_id,
                'lesion_label': int(class_id),
                'score': float(score),
                'x_min': float(xmin),
                'y_min': float(ymin),
                'x_max': float(xmax),
                'y_max': float(ymax),
                }
        )

wbf_submission = pd.DataFrame(wbf_list, columns=['image_id', 'lesion_label', 'score','x_min','y_min', 'x_max', 'y_max'])

100%|██████████| 400/400 [00:00<00:00, 6695.35it/s]


In [53]:
print('Shape of DataFrame: ', wbf_submission.shape)
print(display(wbf_submission))

Shape of DataFrame:  (10139, 7)


Unnamed: 0,image_id,lesion_label,score,x_min,y_min,x_max,y_max
0,1_1_00030.png,0,0.319568,1037.898071,1296.483765,1320.661499,1646.682251
1,1_1_00030.png,0,0.300593,526.985840,579.764526,649.952698,753.868530
2,1_1_00030.png,0,0.286304,1119.544678,1281.931396,1298.300781,1594.814697
3,1_1_00030.png,0,0.284230,845.285156,1094.289795,983.636230,1270.197876
4,1_1_00030.png,0,0.281957,914.202148,1143.990967,1451.361938,1757.526489
...,...,...,...,...,...,...,...
10134,1_3_09997.png,1,0.004059,1149.739746,823.854065,1902.940430,1249.434448
10135,1_3_09997.png,2,0.004009,654.207092,434.990845,1976.143555,1817.424194
10136,1_3_09997.png,2,0.003716,925.213867,866.592407,1996.191406,1911.358276
10137,1_3_09997.png,2,0.003600,1181.596558,419.672546,1986.268921,1433.615479


None


## Apply 3 Class Filter

In [54]:
class_filter = pd.read_csv('/content/gastro_submission.csv')
print(display(class_filter))

Unnamed: 0,ulcer,polyp,cancer,image_id
0,0.967364,0.035951,0.038746,1_1_00030.png
1,0.180033,0.924693,0.026148,1_1_00033.png
2,0.961043,0.031087,0.036898,1_1_00035.png
3,0.983636,0.025277,0.014472,1_1_00040.png
4,0.979165,0.022631,0.025413,1_1_00095.png
...,...,...,...,...
395,0.017086,0.018231,0.987001,1_3_09783.png
396,0.016761,0.034884,0.980200,1_3_09808.png
397,0.011272,0.011916,0.987444,1_3_09826.png
398,0.019842,0.009413,0.990747,1_3_09872.png


None


In [55]:
calibrated_df = wbf_submission.copy()

for _, image_id in tqdm(enumerate(class_filter.image_id.unique())):
    record = class_filter[class_filter['image_id'] == image_id]

    all_score = []
    for _, (score, label) in calibrated_df[calibrated_df['image_id'] == image_id][['score','lesion_label']].iterrows():
        if label == 0:
           score = score**0.7 * record['ulcer'].values**0.3
        elif label == 1:
           score = score**0.7 * record['polyp'].values**0.3
        else:
           score = score**0.7 * record['cancer'].values**0.3
        all_score.append(score)

    calibrated_df.loc[calibrated_df['image_id'] == image_id, 'score'] = all_score

400it [00:02, 136.42it/s]


In [56]:
calibrated_df

Unnamed: 0,image_id,lesion_label,score,x_min,y_min,x_max,y_max
0,1_1_00030.png,0,0.445524,1037.898071,1296.483765,1320.661499,1646.682251
1,1_1_00030.png,0,0.426838,526.985840,579.764526,649.952698,753.868530
2,1_1_00030.png,0,0.412530,1119.544678,1281.931396,1298.300781,1594.814697
3,1_1_00030.png,0,0.410436,845.285156,1094.289795,983.636230,1270.197876
4,1_1_00030.png,0,0.408136,914.202148,1143.990967,1451.361938,1757.526489
...,...,...,...,...,...,...,...
10134,1_3_09997.png,1,0.005920,1149.739746,823.854065,1902.940430,1249.434448
10135,1_3_09997.png,2,0.020866,654.207092,434.990845,1976.143555,1817.424194
10136,1_3_09997.png,2,0.019787,925.213867,866.592407,1996.191406,1911.358276
10137,1_3_09997.png,2,0.019353,1181.596558,419.672546,1986.268921,1433.615479


**Calculating mAP50, mAP75**

In [57]:
!pip install map-boxes



In [58]:
import map_boxes
from map_boxes import mean_average_precision_for_boxes

**After Calibrated Confidence Score**

In [59]:
test_meta = pd.read_csv('/content/gastroscopy-256x256-resized-png/test_meta.csv')
test_id = test_meta['image_id'].unique()
ann = pd.read_csv('/content/gastroscopy-meta/gastroscopy_data2.csv')
ann = ann[ann['image_id'].isin(test_id)].reset_index(drop=True)

det = calibrated_df.copy()

ann = ann[['image_id','lesion_label','x_min','x_max','y_min','y_max']].values

det = det[['image_id','lesion_label', 'score','x_min','x_max','y_min','y_max']].values

mean_ap_50_after, average_precisions_50_after = map(ann, det, iou_threshold=0.5)
mean_ap_75_after, average_precisions_75_after = map(ann, det, iou_threshold=0.75)

Number of files in annotations: 400
Number of files in predictions: 400
Unique classes: 3
Detections length: 400
Annotations length: 400
0                              | 0.622217 |     150
1                              | 0.796367 |     270
2                              | 0.840848 |     231
mAP: 0.753144
Number of files in annotations: 400
Number of files in predictions: 400
Unique classes: 3
Detections length: 400
Annotations length: 400
0                              | 0.254839 |     150
1                              | 0.501178 |     270
2                              | 0.591640 |     231
mAP: 0.449219
