In [1]:
import torch
import os
from datetime import datetime
import time
import random
import cv2
import pandas as pd
import numpy as np
import albumentations as A
import matplotlib.pyplot as plt
from albumentations.pytorch.transforms import ToTensorV2
from sklearn.model_selection import StratifiedKFold
from torch.utils.data import Dataset,DataLoader
from torch.utils.data.sampler import SequentialSampler, RandomSampler

import pandas as pd
from glob import glob

import json
from pathlib import Path

import scipy
import scipy.io as io
import pickle
from scipy.ndimage.filters import gaussian_filter

from os.path import isfile
from joblib import Parallel, delayed
import psutil

from tqdm import tqdm

SEED = 42

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(SEED)

In [2]:
path = '/home/heye0507/drone/drone_benchmark/data/benchmark'
train_images = path + '/images'
test_images = path + '/test_images'
anno = path + '/annotation'
density_maps = path + '/dmaps'

In [3]:
def expand_path(p):
    fn = p.split('/')[-1].split('.')[0]
    if isfile(train_images + '/' + fn + '.jpg'): 
        return train_images + '/' + fn + '.jpg'
    elif isfile(test_images + '/' + fn + '.jpg'):
        return test_images + '/' + fn + '.jpg'
    return p

def open_image(p):
    image = cv2.imread(p)
    image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB).astype(np.float32)
    image /= 255.
    return image

def get_density_map(p,test=False):
    image_file_p = expand_path(p)
    if image_file_p != p:
        dmap_p = image_file_p.split('/')[-1].split('.')[0]
        dmap_p = density_maps + '/' + dmap_p + '.npy'
        if isfile(dmap_p):
            return
        mat = io.loadmat(p)
        points = mat['annotation'].astype(int)
        image_shape = cv2.imread(image_file_p).shape[:2]
        if test: 
            density_map = None
        else:
            density_map = gaussian_filter_density(image_shape,points)
            #dmap_p = image_file_p.split('/')[-1].split('.')[0]
            #dmap_p = density_maps + '/' + dmap_p + '.npy'
            np.save(dmap_p,density_map)
    else:
        density_map = None
        return p

In [9]:
fns = []
for fp in tqdm(glob(anno+'/*/*.mat'), total=len(glob(anno+'/*/*.mat'))):
    fn = get_density_map(fp)
    if fn:
        fns.append(fn)

100%|██████████| 9818/9818 [00:00<00:00, 101972.55it/s]


In [21]:
with open(path+'/missing.txt','w') as f:
    for fn in fns:
        f.write(('/').join(fn.split('/')[-2:]) + '\n')

In [20]:
('/').join(fn.split('/')[-2:]) + f'\n'

's15/2541.mat\n'

In [4]:
def check_files(fp):
    try:
        img = np.load(fp)
    except:
        return fp

In [5]:
fns = []
fns.append(Parallel(n_jobs=psutil.cpu_count(),verbose=10)(
    (delayed(check_files)(fp) for fp in glob(density_maps+'/*.npy'))
))

[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done   1 tasks      | elapsed:    0.5s
[Parallel(n_jobs=2)]: Done   4 tasks      | elapsed:    0.6s
[Parallel(n_jobs=2)]: Done   9 tasks      | elapsed:    0.6s
[Parallel(n_jobs=2)]: Batch computation too fast (0.1685s.) Setting batch_size=2.
[Parallel(n_jobs=2)]: Done  14 tasks      | elapsed:    0.7s
[Parallel(n_jobs=2)]: Batch computation too fast (0.0385s.) Setting batch_size=4.
[Parallel(n_jobs=2)]: Batch computation too fast (0.0477s.) Setting batch_size=8.
[Parallel(n_jobs=2)]: Done  34 tasks      | elapsed:    0.8s
[Parallel(n_jobs=2)]: Batch computation too fast (0.0941s.) Setting batch_size=16.
[Parallel(n_jobs=2)]: Batch computation too fast (0.1992s.) Setting batch_size=32.
[Parallel(n_jobs=2)]: Done 102 tasks      | elapsed:    1.1s
[Parallel(n_jobs=2)]: Done 358 tasks      | elapsed:    2.1s
[Parallel(n_jobs=2)]: Batch computation too slow (2.6111s.) Setting batch_size=4.
[Par

[Parallel(n_jobs=2)]: Done 8483 tasks      | elapsed: 37.3min
[Parallel(n_jobs=2)]: Done 8608 tasks      | elapsed: 37.9min
[Parallel(n_jobs=2)]: Done 8735 tasks      | elapsed: 38.5min
[Parallel(n_jobs=2)]: Done 8862 tasks      | elapsed: 39.1min
[Parallel(n_jobs=2)]: Done 8991 tasks      | elapsed: 39.7min
[Parallel(n_jobs=2)]: Done 9000 out of 9000 | elapsed: 39.8min finished


In [None]:
e_fns = []
for fp in tqdm(glob(density_maps+'/*.npy'),total=9000):
    try:
        img = np.load(fp)
    except:
        e_fns.append(fp)

  6%|▌         | 508/9000 [01:10<40:07,  3.53it/s] 

In [11]:
for fn in fns[0]:
    if fn:
        print(fn)

In [10]:
len(fns)

1