-
Notifications
You must be signed in to change notification settings - Fork 63
/
create_lmdb.py
36 lines (31 loc) · 1.03 KB
/
create_lmdb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import lmdb
import cv2
import numpy as np
import os
import hashlib
import functools
from glob import glob
from fire import Fire
from tqdm import tqdm
from multiprocessing import Pool
def worker(video_name):
image_names = glob(video_name+'/*')
kv = {}
for image_name in image_names:
img = cv2.imread(image_name)
_, img_encode = cv2.imencode('.jpg', img)
img_encode = img_encode.tobytes()
kv[hashlib.md5(image_name.encode()).digest()] = img_encode
return kv
def create_lmdb(data_dir, output_dir, num_threads):
video_names = glob(data_dir+'/*')
video_names = [x for x in video_names if os.path.isdir(x)]
db = lmdb.open(output_dir, map_size=int(50e9))
with Pool(processes=num_threads) as pool:
for ret in tqdm(pool.imap_unordered(
functools.partial(worker), video_names), total=len(video_names)):
with db.begin(write=True) as txn:
for k, v in ret.items():
txn.put(k, v)
if __name__ == '__main__':
Fire(create_lmdb)