# lmdb database process

1. Run the script to process the data (e.g. `python abflow/scripts/process_lmdb.py --path /scratch/hz362/datavol/data/sabdab`)
```bash
python abflow/scripts/process_lmdb.py --path <path_to_data_folder>
```

In [1]:
# print an processed data example
import lmdb
import pickle

path = "/scratch/hz362/datavol/data/sabdab/processed_structures.lmdb"

map_size = 250 * 1024**3
db_connection = lmdb.open(
    path,
    map_size=map_size,
    create=False,
    subdir=False,
    readonly=True,
    lock=False,
    readahead=False,
    meminit=False,
)

with db_connection.begin() as txn:
	cursor = txn.cursor()
	for key, value in cursor:
		data = pickle.loads(value)
		
		print(f"ID: {key.decode()}")
		print(f"Data: {data}")
		
		for key, value in data.items():

			print(f"Key: {key}")
			print(f"Value shape: {value.shape}")

		break

db_connection.close()

ID: 1a14_H_L_N
Data: {'res_type': tensor([13, 17, 13,  9, 13, 13,  1,  8,  0, 15,  5, 19, 16,  4, 16, 11, 19, 11,
        10, 19, 18, 17,  5,  7,  4, 19, 12,  2, 16, 15, 19,  1,  0, 14, 15,  5,
         5, 15, 19, 14, 19,  2,  5,  5,  4,  2, 19, 18,  5, 13,  5,  2,  7,  3,
         9, 16, 13, 16, 16,  7, 15,  1, 14,  0, 15, 13,  2,  7, 15, 11, 19,  9,
        11, 18, 19, 13, 13, 16, 17,  8,  9,  9,  7, 19, 19, 16, 15, 11,  9,  6,
        15,  3,  4, 15,  5, 15,  5, 15,  5, 16,  2, 19, 15,  9, 19,  4,  1, 13,
        13,  2,  4, 16,  9, 12,  4, 16,  4,  5,  5,  5, 13,  5, 15, 11,  1, 15,
        12, 17,  9, 16,  2, 11, 12, 14, 12, 11,  2, 12, 16, 17,  5,  8,  1,  2,
        12, 19, 12,  5, 11, 11, 11,  5, 17,  5, 14, 16,  7, 15,  7,  0, 15, 14,
        15,  5, 19,  3,  9,  2,  8,  8, 16, 17,  9, 11, 16,  2, 18, 15,  5,  8]), 'chain_type': tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

# Full atom reconstruction

In [None]:
from abflow.data.process_pdb import output_to_pdb

# directly write to pdb
output_to_pdb(data, "original.pdb")

In [None]:
from abflow.structure import get_frames_and_dihedrals, full_atom_reconstruction

pos_heavyatoms = data["pos_heavyatom"].unsqueeze(0) # add a batch dimension
res_type = data["res_type"].unsqueeze(0)

frame_rotations, frame_translations, sidechain_dihedrals = get_frames_and_dihedrals(pos_heavyatoms, res_type)
# print shapes of original data
print(f"pos_heavyatoms shape: {pos_heavyatoms.shape}")
print(f"res_type shape: {res_type.shape}")
print(f"frame_rotations shape: {frame_rotations.shape}")
print(f"frame_translations shape: {frame_translations.shape}")
print(f"sidechain_dihedrals shape: {sidechain_dihedrals.shape}")

pred_pos_heavyatom = full_atom_reconstruction(frame_rotations, frame_translations, sidechain_dihedrals, res_type)
# print shapes of reconstructed data
print(f"pos_heavyatom shape: {pred_pos_heavyatom.shape}")

data_pred = data.copy()
data_pred["pos_heavyatom"] = pred_pos_heavyatom.squeeze(0)

pos_heavyatoms shape: torch.Size([1, 180, 15, 3])
res_type shape: torch.Size([1, 180])
frame_rotations shape: torch.Size([1, 180, 3, 3])
frame_translations shape: torch.Size([1, 180, 3])
sidechain_dihedrals shape: torch.Size([1, 180, 4])
pos_heavyatom shape: torch.Size([1, 180, 15, 3])


In [None]:
# write the predicted structure to a pdb file
output_to_pdb(data_pred, "reconstruction.pdb")