In [1]:
import os.path as osp
import pickle
import random

import cv2
import numpy as np
import torch

import lmdb
import os

import numpy as np
from torch.utils.data import Dataset
import glob



In [36]:
def create_lmdb(dataset, raw_dir, lmdb_dir, filter_file=''):
    print(f'>> Start to create lmdb for {dataset}')

    # scan dir
    if filter_file:  # use sequences specified by the filter_file
        with open(filter_file, 'r') as f:
            seq_idx_lst = sorted([line.strip() for line in f])
    else:  # use all found sequences
        seq_idx_lst = sorted(os.listdir(raw_dir))
        
    #print(seq_idx_lst)

    num_seq = len(seq_idx_lst)
    print(f'>> Number of sequences: {num_seq}')

    # compute space to be allocated
    nbytes = 0
    for seq_idx in seq_idx_lst:
        frm_path_lst = sorted(glob.glob(osp.join(raw_dir, seq_idx, '*.png')))
        
        nbytes_per_frm = cv2.imread(frm_path_lst[0], cv2.IMREAD_UNCHANGED).nbytes
        nbytes += len(frm_path_lst) * nbytes_per_frm
    alloc_size = round(2 * nbytes)
    print(f'>> Space required for lmdb generation: {alloc_size / (1 << 30):.2f} GB')
    
    

    # create lmdb environment
    env = lmdb.open(lmdb_dir, map_size=alloc_size)

    # write data to lmdb
    commit_freq = 5
    keys = []
    txn = env.begin(write=True)
    for b, seq_idx in enumerate(seq_idx_lst):
        # log
        print(f'   Processing sequence: {seq_idx} ({b + 1}/{num_seq})\r', end='')

        # get info
        frm_path_lst = sorted(glob.glob(osp.join(raw_dir, seq_idx, '*.png')))
        n_frm = len(frm_path_lst)

        # read frames
        for i in range(n_frm):
            frm = cv2.imread(frm_path_lst[i], cv2.IMREAD_UNCHANGED)
            frm = np.ascontiguousarray(frm[..., ::-1])  # hwc|rgb|uint8

            h, w, c = frm.shape
            key = f'{seq_idx}_{n_frm}x{h}x{w}_{i:04d}'

            txn.put(key.encode('ascii'), frm)
            keys.append(key)

        # commit
        if b % commit_freq == 0:
            txn.commit()
            txn = env.begin(write=True)

    txn.commit()
    env.close()

    # create meta information
    meta_info = {
        'name': dataset,
        'color': 'RGB',
        'keys': keys
    }
    pickle.dump(meta_info, open(osp.join(lmdb_dir, 'meta_info.pkl'), 'wb'))

    print(f'>> Finished lmdb generation for {dataset}')



In [37]:
create_lmdb('AMD','/media/moose/Moose/Dataset/AMD/outputNew/output/','/media/moose/Moose/Dataset/AMD/')

>> Start to create lmdb for AMD
>> Number of sequences: 298
>> Space required for lmdb generation: 174.89 GB
>> Finished lmdb generation for AMD8/298))8))
