In [1]:
import numpy as np
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

import imageio
import cv2 as cv
import scipy.ndimage as ndi
from sklearn.metrics import adjusted_rand_score, jaccard_similarity_score, f1_score, confusion_matrix

In [2]:
PATH_ALGO_1 = 'Dataset/sample_1/'
PATH_ALGO_2 = 'Dataset/sample_2/'
PATH_ALGO_3 = 'Dataset/sample_3/'
PATH_EXPERT = 'Dataset/Expert/'
PATH_ORIGIN = 'Dataset/Origin/'
PATH_LABELS = 'Dataset/OpenPart.csv'

In [3]:
data = pd.read_parquet('Dataset/df_parquet.parquet')
data.head()

Unnamed: 0,mi,tp,hd,name,Sample,type
0,0.100797,0.080799,2758.246726,00010277_000.png,3,1
1,0.174395,0.026121,3324.793227,00010277_000.png,4,2
2,0.105645,0.11849,3833.490576,00010277_000.png,4,3
3,0.054925,0.011678,360.624458,00002583_014.png,4,1
4,0.024133,0.015931,1925.207781,00002583_014.png,4,2


In [4]:
def add_metric(df, metric_name, metric, to_flatten=True, metric_params=None):
    df[metric_name] = metric
    for i in range(len(data)):
        nm, v = data.loc[i,['name', 'type']]
        img_sample_path = f'Dataset/sample_{v}/{nm.split(sep=".")[0]}_s{v}.png'
        img_expert_path = f'Dataset/Expert/{nm.split(sep=".")[0]}_expert.png'
        img_sample = imageio.imread(img_sample_path)
        img_expert = imageio.imread(img_expert_path)
        if to_flatten:
            img_sample = img_sample.flatten()
            img_expert = img_expert.flatten()
        if metric_params is None:
            df[metric_name][i] = metric(img_expert, img_sample)
        else:
            df[metric_name][i] = metric(img_expert, img_sample, **metric_params)

def center_mass_dist(y_true, y_pred):
    true_x, true_y = ndi.center_of_mass(y_true)
    pred_x, pred_y = ndi.center_of_mass(y_pred)
    dist = np.sqrt((true_x - pred_x)**2 + (true_y - pred_y)**2)
    return dist

def volumetric_similarity(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred).flatten()
    if cm.shape[0] == 1:
        return 1
    tn, fp, fn, tp = cm
    vs = 1 - np.abs(fn - fp) / (2*tp + fp + fn)
    return vs

def obj_num_difference(y_true, y_pred):
    _, true_thresh = cv.threshold(y_true, 127, 255, 0)
    _, true_obj_arr = cv.findContours(true_thresh, cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE)
    _, pred_thresh = cv.threshold(y_pred, 127, 255, 0)
    _, pred_obj_arr = cv.findContours(pred_thresh, cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE)
    
    true_obj_num = 0
    if true_obj_arr is not None:
        true_obj_num = true_obj_arr.shape[1]
    pred_obj_num = 0
    if pred_obj_arr is not None:
        pred_obj_num = pred_obj_arr.shape[1]
    return true_obj_num - pred_obj_num

In [5]:
%%time
add_metric(data, 'center_dist', center_mass_dist, False)
add_metric(data, 'vs', volumetric_similarity)
add_metric(data, 'ari', adjusted_rand_score)
add_metric(data, 'jac', jaccard_similarity_score)
add_metric(data, 'f1', f1_score, False, 
           metric_params={'pos_label':255, 'average':'macro'})
add_metric(data, 'obj_num_diff', obj_num_difference, to_flatten=False)

Wall time: 8min 57s


In [6]:
data

Unnamed: 0,mi,tp,hd,name,Sample,type,center_dist,vs,ari,jac,f1,obj_num_diff
0,1.007972e-01,0.080799,2758.246726,00010277_000.png,3,1,121.687,0.840047,0.506098,0.919201,0.263547,-2
1,1.743953e-01,0.026121,3324.793227,00010277_000.png,4,2,24.5868,0.896461,0.793478,0.973879,0.283736,1
2,1.056451e-01,0.118490,3833.490576,00010277_000.png,4,3,148.194,0.650414,0.416935,0.88151,0.257423,2
3,5.492471e-02,0.011678,360.624458,00002583_014.png,4,1,5.52059,0.809711,0.7038,0.988322,0.122062,-1
4,2.413282e-02,0.015931,1925.207781,00002583_014.png,4,2,40.7814,0.940468,0.48087,0.984069,0.0961632,0
5,3.595378e-02,0.108039,2828.086809,00002583_014.png,3,3,49.967,0.236986,0.186959,0.891961,0.065013,1
6,8.323938e-02,0.097851,3344.293647,00010103_014.png,5,1,80.094,0.565519,0.39855,0.902149,0.208223,0
7,2.140252e-02,0.083890,3790.847531,00010103_014.png,3,2,301.173,0.89309,0.246974,0.91611,0.112272,1
8,1.062944e-04,0.056729,4618.246420,00010103_014.png,1,3,286.798,0.0663697,-0.00343379,0.943271,0,1
9,5.702377e-02,0.036033,1373.217026,00011355_011.png,4,1,34.2187,0.894448,0.531854,0.963967,0.116819,-3


In [7]:
data.to_parquet('data_with_features.parquet', compression=None)

In [9]:
data = pd.read_parquet('data_with_features.parquet')
data.head()

Unnamed: 0,mi,tp,hd,name,Sample,type,center_dist,vs,ari,jac,f1,obj_num_diff
0,0.100797,0.080799,2758.246726,00010277_000.png,3,1,121.687337,0.840047,0.506098,0.919201,0.263547,-2
1,0.174395,0.026121,3324.793227,00010277_000.png,4,2,24.586807,0.896461,0.793478,0.973879,0.283736,1
2,0.105645,0.11849,3833.490576,00010277_000.png,4,3,148.19379,0.650414,0.416935,0.88151,0.257423,2
3,0.054925,0.011678,360.624458,00002583_014.png,4,1,5.520585,0.809711,0.7038,0.988322,0.122062,-1
4,0.024133,0.015931,1925.207781,00002583_014.png,4,2,40.781397,0.940468,0.48087,0.984069,0.096163,0


In [15]:
def feat(name,typ):
    names=[['sample_1'],['sample_2'],['sample_3']]
    names=names[typ-1]
    dist_sum=0
    csv=pd.read_csv('Dataset/DX_TEST_RESULT_FULL.csv')
    name=name.split('.')[0]

    temp_df=csv[csv['file_name']==name]
    temp_df_e=temp_df[temp_df[' user_name']=='Expert']

    for x in names:
        temp_df_s=temp_df[temp_df[' user_name']==x]
        
        if temp_df_e.shape[0]==0:
            return 100000000
        dist_sum=0
        for i in range(temp_df_e.shape[0]):
            center_e=np.array((temp_df_e[' xcenter'].tolist()[i] ,temp_df_e[' ycenter'].tolist()[i] ))    
            temp_dist=100000000

            for j in range(temp_df_s.shape[0]):
                
                center_s=np.array((temp_df_s[' xcenter'].tolist()[j] ,temp_df_s[' ycenter'].tolist()[j] ))
                dist = np.linalg.norm(center_e - center_s)
                if temp_dist>dist:
                    temp_dist=dist
                
            dist_sum=dist_sum+temp_dist

        if temp_df_e.shape[0]!=0:
            return int(dist_sum)

In [16]:
data['feat']=None
for i in range(data.shape[0]):
    data['feat'][i]=feat(data['name'][i],data['type'][i])

In [18]:
data.to_parquet('dataset_parquet.parquet', compression=None)