In [15]:
import pandas as pd
import numpy as np
import os.path as osp
from tqdm.notebook import tqdm

from map_boxes import mean_average_precision_for_boxes

def norm_coordinates(df, meta_df):
    '''
    INPLACE !!!
    '''
    if max([df.x_min.max(), df.x_max.max(), df.y_min.max(), df.y_max.max()]) <= 1:
        raise ValueError('Bbox coordinates are already normalized')
        return
    df['height'] = df.image_id.apply(lambda x: meta_df.loc[x, 'rows'])
    df['width'] = df.image_id.apply(lambda x: meta_df.loc[x, 'columns'])
    df['x_min'] = df.x_min / df.width
    df['x_max'] = df.x_max / df.width
    df['y_min'] = df.y_min / df.height
    df['y_max'] = df.y_max / df.height


def get_mean_average_precision(annotation_path, predictions_path, iou_threshold=0.4, meta_path='/home/semyon/data/VinBigData/train_meta.csv', verbose=False):
    '''
    param: annotation_path: path to .csv with columns ['image_id', 'class_name', 'x_min', 'x_max', 'y_min', 'y_max']
    param: predictions_path: path to .csv with columns ['image_id', 'class_name', 'rad_id', 'x_min', 'x_max', 'y_min', 'y_max'], where 'rad_id' contains confidence
    '''
    if isinstance(annotation_path, pd.DataFrame) and isinstance(predictions_path, pd.DataFrame):
        ann_df = annotation_path.copy()
        pred_df = predictions_path.copy()
    else:
        ann_df = pd.read_csv(annotation_path)
        pred_df = pd.read_csv(predictions_path)

    meta_df = pd.read_csv(meta_path).set_index('image_id')
    
    # inplace norm coordinates
    norm_coordinates(ann_df, meta_df)
    norm_coordinates(pred_df, meta_df)
    
    # annotations
    new_cols = ['ImageID', 'LabelName', 'XMin', 'XMax', 'YMin', 'YMax']
    old_cols = ['image_id', 'class_name', 'x_min', 'x_max', 'y_min', 'y_max']
    for new_col_name, old_col_name in zip(new_cols, old_cols):
        ann_df[new_col_name] = ann_df[old_col_name]
    # predictions
    new_cols = ['ImageID', 'LabelName', 'Conf', 'XMin', 'XMax', 'YMin', 'YMax']
    old_cols = ['image_id', 'class_name', 'rad_id', 'x_min', 'x_max', 'y_min', 'y_max']
    for new_col_name, old_col_name in zip(new_cols, old_cols):
        pred_df[new_col_name] = pred_df[old_col_name]
        
    ann = ann_df[['ImageID', 'LabelName', 'XMin', 'XMax', 'YMin', 'YMax']].values
    pred = pred_df[['ImageID', 'LabelName', 'Conf', 'XMin', 'XMax', 'YMin', 'YMax']].values
    mean_ap, average_precisions = mean_average_precision_for_boxes(ann, pred, iou_threshold=iou_threshold, verbose=verbose)
    return mean_ap, average_precisions

In [3]:
annotations_file = '/home/semyon/data/VinBigData/custom_dfs/weighted_boxes_fusion_iou-0.20_fold-0.csv'
detections_file = '/home/semyon/projects/vinbigdata/eda/dfs/result.csv'

ann_df = pd.read_csv(annotations_file)
pred_df = pd.read_csv(detections_file)

meta_path = '/home/semyon/data/VinBigData/train_meta.csv'
meta_df = pd.read_csv(meta_path).set_index('image_id')

f['height'] = df.image_id.apply(lambda x: meta_df.loc[x, 'rows'])
df['width'] = df.image_id.apply(lambda x: meta_df.loc[x, 'columns'])

In [4]:
norm_coordinates(ann_df, meta_df)
norm_coordinates(pred_df, meta_df)

In [5]:
# annotations
new_cols = ['ImageID', 'LabelName', 'XMin', 'XMax', 'YMin', 'YMax']
old_cols = ['image_id', 'class_name', 'x_min', 'x_max', 'y_min', 'y_max']
for new_col_name, old_col_name in zip(new_cols, old_cols):
    ann_df[new_col_name] = ann_df[old_col_name]
# predictions
new_cols = ['ImageID', 'LabelName', 'Conf', 'XMin', 'XMax', 'YMin', 'YMax']
old_cols = ['image_id', 'class_name', 'rad_id', 'x_min', 'x_max', 'y_min', 'y_max']
for new_col_name, old_col_name in zip(new_cols, old_cols):
    pred_df[new_col_name] = pred_df[old_col_name]

In [13]:
ann = ann_df[['ImageID', 'LabelName', 'XMin', 'XMax', 'YMin', 'YMax']].values
pred = pred_df[['ImageID', 'LabelName', 'Conf', 'XMin', 'XMax', 'YMin', 'YMax']].values
mean_ap, average_precisions = mean_average_precision_for_boxes(ann, pred, iou_threshold=0.5)

Number of files in annotations: 885
Number of files in predictions: 885
Unique classes: 14
Detections length: 885
Annotations length: 885
Aortic enlargement             | 0.918060 |     620
Atelectasis                    | 0.150863 |      43
Calcification                  | 0.048550 |     142
Cardiomegaly                   | 0.931139 |     464
Consolidation                  | 0.222844 |      82
ILD                            | 0.195136 |     125
Infiltration                   | 0.159394 |     172
Lung Opacity                   | 0.181422 |     368
Nodule/Mass                    | 0.137577 |     388
Other lesion                   | 0.033874 |     323
Pleural effusion               | 0.353775 |     285
Pleural thickening             | 0.140186 |     679
Pneumothorax                   | 0.009797 |      21
Pulmonary fibrosis             | 0.168504 |     539
mAP: 0.260794


In [14]:
mean_ap

0.2607943737482015

In [11]:
average_precisions

{'Aortic enlargement': (0.9323038503045298, 620.0),
 'Atelectasis': (0.16313835078966057, 43.0),
 'Calcification': (0.06737020214309525, 142.0),
 'Cardiomegaly': (0.9386996911931025, 464.0),
 'Consolidation': (0.3344506276855171, 82.0),
 'ILD': (0.23406234899666, 125.0),
 'Infiltration': (0.23076311683549677, 172.0),
 'Lung Opacity': (0.23774434003489092, 368.0),
 'Nodule/Mass': (0.17146370596318275, 388.0),
 'Other lesion': (0.05206959399058348, 323.0),
 'Pleural effusion': (0.4635037116054489, 285.0),
 'Pleural thickening': (0.2123565519755393, 679.0),
 'Pneumothorax': (0.017444085686035674, 21.0),
 'Pulmonary fibrosis': (0.21927258454113693, 539.0)}