In [1]:
import numpy as np
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

import imageio
import cv2 as cv
import scipy.ndimage as ndi
from sklearn.metrics import adjusted_rand_score, jaccard_similarity_score, f1_score, confusion_matrix

In [2]:
PATH_ALGO_1 = 'Dataset/sample_1/'
PATH_ALGO_2 = 'Dataset/sample_2/'
PATH_ALGO_3 = 'Dataset/sample_3/'
PATH_EXPERT = 'Dataset/Expert/'
PATH_ORIGIN = 'Dataset/Origin/'
# PATH_LABELS = 'Dataset/OpenPart.csv'

In [4]:
data = pd.read_parquet('predict_parquet_parquet.parquet')
data.drop('index', axis=1, inplace=True)
data.head()

Unnamed: 0,mi,tp,hd,name,type
0,0.0,0.020072,0.0,00011827_003.png,1
1,0.0,0.019844,0.0,00011827_003.png,2
2,0.0,0.093728,0.0,00011827_003.png,3
3,0.131263,0.104455,3027.957232,00011925_072.png,1
4,0.148233,0.127502,4379.76883,00011925_072.png,2


In [5]:
def add_metric(df, metric_name, metric, to_flatten=True, metric_params=None):
    df[metric_name] = metric
    for i in range(len(data)):
        nm, v = data.loc[i,['name', 'type']]
        img_sample_path = f'Dataset/sample_{v}/{nm.split(sep=".")[0]}_s{v}.png'
        img_expert_path = f'Dataset/Expert/{nm.split(sep=".")[0]}_expert.png'
        img_sample = imageio.imread(img_sample_path)
        img_expert = imageio.imread(img_expert_path)
        if to_flatten:
            img_sample = img_sample.flatten()
            img_expert = img_expert.flatten()
        if metric_params is None:
            df[metric_name][i] = metric(img_expert, img_sample)
        else:
            df[metric_name][i] = metric(img_expert, img_sample, **metric_params)

def center_mass_dist(y_true, y_pred):
    true_x, true_y = ndi.center_of_mass(y_true)
    pred_x, pred_y = ndi.center_of_mass(y_pred)
    dist = np.sqrt((true_x - pred_x)**2 + (true_y - pred_y)**2)
    return dist

def volumetric_similarity(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred).flatten()
    if cm.shape[0] == 1:
        return 1
    tn, fp, fn, tp = cm
    vs = 1 - np.abs(fn - fp) / (2*tp + fp + fn)
    return vs

def obj_num_difference(y_true, y_pred):
    _, true_thresh = cv.threshold(y_true, 127, 255, 0)
    _, true_obj_arr = cv.findContours(true_thresh, cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE)
    _, pred_thresh = cv.threshold(y_pred, 127, 255, 0)
    _, pred_obj_arr = cv.findContours(pred_thresh, cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE)
    
    true_obj_num = 0
    if true_obj_arr is not None:
        true_obj_num = true_obj_arr.shape[1]
    pred_obj_num = 0
    if pred_obj_arr is not None:
        pred_obj_num = pred_obj_arr.shape[1]
    return true_obj_num - pred_obj_num

In [6]:
%%time
add_metric(data, 'center_dist', center_mass_dist, False)
add_metric(data, 'vs', volumetric_similarity)
add_metric(data, 'ari', adjusted_rand_score)
add_metric(data, 'jac', jaccard_similarity_score)
add_metric(data, 'f1', f1_score, False, 
           metric_params={'pos_label':255, 'average':'macro'})
add_metric(data, 'obj_num_diff', obj_num_difference, to_flatten=False)

Wall time: 4min 17s


In [7]:
data

Unnamed: 0,mi,tp,hd,name,type,center_dist,vs,ari,jac,f1,obj_num_diff
0,0.000000e+00,0.020072,0.000000,00011827_003.png,1,,0,0,0.979928,0,-3
1,0.000000e+00,0.019844,0.000000,00011827_003.png,2,,0,0,0.980156,0,-1
2,0.000000e+00,0.093728,0.000000,00011827_003.png,3,,0,0,0.906272,0,-1
3,1.312631e-01,0.104455,3027.957232,00011925_072.png,1,70.4206,0.972294,0.509685,0.895545,0.307248,-1
4,1.482327e-01,0.127502,4379.768830,00011925_072.png,2,116.731,0.830651,0.476244,0.872498,0.338614,-1
5,1.012925e-01,0.109610,3677.662301,00011925_072.png,3,120.819,0.819771,0.448463,0.89039,0.193447,1
6,0.000000e+00,0.002421,0.000000,00012045_019.png,1,,0,0,0.997579,0,-1
7,0.000000e+00,0.000000,0.000000,00012045_019.png,2,,1,1,1,0,0
8,1.778092e-15,0.002991,0.000000,00012045_019.png,3,,0,0,0.997009,0,-1
9,1.173234e-01,0.132353,3773.655390,00012094_040.png,1,22.3558,0.729556,0.422172,0.867647,0.294742,0


In [8]:
def feat(name,typ):
    names=[['sample_1'],['sample_2'],['sample_3']]
    names=names[typ-1]
    dist_sum=0
    csv=pd.read_csv('Dataset/DX_TEST_RESULT_FULL.csv')
    name=name.split('.')[0]

    temp_df=csv[csv['file_name']==name]
    temp_df_e=temp_df[temp_df[' user_name']=='Expert']

    for x in names:
        temp_df_s=temp_df[temp_df[' user_name']==x]
        
        if temp_df_e.shape[0]==0:
            return 100000000
        dist_sum=0
        for i in range(temp_df_e.shape[0]):
            center_e=np.array((temp_df_e[' xcenter'].tolist()[i] ,temp_df_e[' ycenter'].tolist()[i] ))    
            temp_dist=100000000

            for j in range(temp_df_s.shape[0]):
                
                center_s=np.array((temp_df_s[' xcenter'].tolist()[j] ,temp_df_s[' ycenter'].tolist()[j] ))
                dist = np.linalg.norm(center_e - center_s)
                if temp_dist>dist:
                    temp_dist=dist
                
            dist_sum=dist_sum+temp_dist

        if temp_df_e.shape[0]!=0:
            return int(dist_sum)

In [9]:
data['feat']=None
for i in range(data.shape[0]):
    data['feat'][i]=feat(data['name'][i],data['type'][i])

In [11]:
data.to_parquet('predict_data.parquet', compression=None)