In [8]:
import lmdb
import os
import pickle
from functools import lru_cache
import logging

logger = logging.getLogger(__name__)


class LMDBDataset:
    def __init__(self, db_path):
        self.db_path = db_path
        assert os.path.isfile(self.db_path), "{} not found".format(self.db_path)
        env = self.connect_db(self.db_path)
        with env.begin() as txn:
            self._keys = list(txn.cursor().iternext(values=False))

    def connect_db(self, lmdb_path, save_to_self=False):
        env = lmdb.open(
            lmdb_path,
            subdir=False,
            readonly=True,
            lock=False,
            readahead=False,
            meminit=False,
            max_readers=256,
        )
        if not save_to_self:
            return env
        else:
            self.env = env

    def __len__(self):
        return len(self._keys)

    @lru_cache(maxsize=16)
    def __getitem__(self, idx):
        if not hasattr(self, "env"):
            self.connect_db(self.db_path, save_to_self=True)
        datapoint_pickled = self.env.begin().get(f"{idx}".encode("ascii"))
        data = pickle.loads(datapoint_pickled)
        return data

In [9]:
unimol_pocket = LMDBDataset("/root/Generative-Models/Uni-Mol/example_data/pocket/valid.lmdb")

In [10]:
unimol_pocket[0]

{'atoms': ['N',
  'CA',
  'C',
  'O',
  'CB',
  'H',
  'HA',
  'HB1',
  'HB2',
  'HB3',
  'N',
  'CA',
  'C',
  'O',
  'CB',
  'CG',
  'CD',
  'HA',
  'HB2',
  'HB3',
  'HG2',
  'HG3',
  'HD2',
  'HD3',
  'N',
  'CA',
  'C',
  'O',
  'CB',
  'SG',
  'H',
  'HA',
  'HB2',
  'HB3',
  'N',
  'CA',
  'C',
  'O',
  'CB',
  'CG',
  'OD1',
  'OD2',
  'H',
  'HA',
  'HB2',
  'HB3',
  'N',
  'CA',
  'C',
  'O',
  'CB',
  'CG1',
  'CG2',
  'H',
  'HA',
  'HB',
  'HG11',
  'HG12',
  'HG13',
  'HG21',
  'HG22',
  'HG23',
  'N',
  'CA',
  'C',
  'O',
  'CB',
  'OG',
  'H',
  'HA',
  'HB2',
  'HB3',
  'HG',
  'N',
  'CA',
  'C',
  'O',
  'CB',
  'CG',
  'CD',
  'NE',
  'CZ',
  'NH1',
  'NH2',
  'H',
  'HA',
  'HB2',
  'HB3',
  'HG2',
  'HG3',
  'HD2',
  'HD3',
  'HE',
  'HH11',
  'HH12',
  'HH21',
  'HH22',
  'N',
  'CA',
  'C',
  'O',
  'CB',
  'H',
  'HA',
  'HB1',
  'HB2',
  'HB3',
  'O',
  'H1',
  'H2',
  'O',
  'H1',
  'H2',
  'O',
  'H1',
  'H2',
  'O',
  'H1',
  'H2',
  'O',
  'H1',
  'H2',
 

In [11]:
unimol_pocket[0].keys()

dict_keys(['atoms', 'coordinates', 'meta_info', 'side', 'residue', 'pdbid'])