In [4]:
from utils import data_loader
import numpy as np
import math
from skimage.io import imread
from tqdm import tqdm

In [5]:
def do_analysis(dataset_name='trancos_v3', is_exist_validation=False, count_on_masks=False):
    dataset_dict = data_loader.load_dataset_paths(dataset_name=dataset_name, is_exist_validation=is_exist_validation, validation_split_size=0.0, density_map_folder_name='density_maps')
    count_type = 'density_maps'
    if count_on_masks:
        count_type = 'masks'
    train_output_data = dataset_dict['train'][count_type]
    test_output_data = dataset_dict['test'][count_type]
    validation_output_data = dataset_dict['validation'][count_type]

    counts = []
    for _, file_name in tqdm(enumerate(train_output_data), total=len(train_output_data)):
        mask = None
        if count_on_masks:
            mask = imread(file_name, as_gray=True) / 255.0
            count = np.sum(mask)
        else:
            mask = np.load(file_name)
            count = np.round(np.sum(mask))

        counts.append(count)

    print('Train size:{} Max count:{} Min count:{} Average:{} '.format(len(train_output_data), np.max(counts), np.min(counts), np.average(counts)))

    counts = []
    for _, file_name in tqdm(enumerate(test_output_data), total=len(test_output_data)):
        mask = None
        if count_on_masks:
            mask = imread(file_name, as_gray=True) / 255.0
            count = np.sum(mask)
        else:
            mask = np.load(file_name)
            count = np.round(np.sum(mask))

        counts.append(count)

    print('Test size:{} Max count:{} Min count:{} Average:{} '.format(len(test_output_data), np.max(counts), np.min(counts), np.average(counts)))

    counts = []
    for _, file_name in tqdm(enumerate(validation_output_data), total=len(validation_output_data)):
        mask = None
        if count_on_masks:
            mask = imread(file_name, as_gray=True) / 255.0
            count = np.sum(mask)
        else:
            mask = np.load(file_name)
            count = np.round(np.sum(mask))

        counts.append(count)
    if len(counts):
        print('Validation size:{} Max count:{} Min count:{} Average:{} '.format(len(validation_output_data), np.max(counts), np.min(counts), np.average(counts)))

In [7]:
do_analysis('trancos_v3')

100%|██████████| 823/823 [00:03<00:00, 220.20it/s]


Train size:823 Max count:95.0 Min count:9.0 Average:36.53219985961914 


100%|██████████| 421/421 [00:01<00:00, 215.07it/s]


Test size:421 Max count:107.0 Min count:13.0 Average:39.560569763183594 


0it [00:00, ?it/s]


In [9]:
do_analysis('final_data')

100%|██████████| 8949/8949 [00:42<00:00, 212.83it/s]


Train size:8949 Max count:95.0 Min count:0.0 Average:12.331433296203613 


100%|██████████| 1301/1301 [00:06<00:00, 207.80it/s]


Test size:1301 Max count:107.0 Min count:1.0 Average:19.273635864257812 


0it [00:00, ?it/s]


In [8]:
do_analysis('kiengiang_dataset', is_exist_validation=True)

100%|██████████| 1260/1260 [00:05<00:00, 218.43it/s]


Train size:1260 Max count:34.0 Min count:3.0 Average:14.781745910644531 


100%|██████████| 380/380 [00:01<00:00, 208.29it/s]


Test size:380 Max count:31.0 Min count:2.0 Average:10.5921049118042 


100%|██████████| 360/360 [00:01<00:00, 223.85it/s]

Validation size:360 Max count:31.0 Min count:4.0 Average:14.897222518920898 





In [6]:
do_analysis('taydo_splitted_data', is_exist_validation=False, count_on_masks=True)

100%|██████████| 6006/6006 [00:18<00:00, 330.87it/s]


Train size:6006 Max count:76.0 Min count:0.0 Average:8.602730602730603 


100%|██████████| 500/500 [00:01<00:00, 316.04it/s]


Test size:500 Max count:62.0 Min count:1.0 Average:8.794 


0it [00:00, ?it/s]
