### Available datasets

In [2]:
from litraj.data import available_datasets
available_datasets()

{'BVEL13k': 'https://bs3u.obs.ru-moscow-1.hc.sbercloud.ru/litraj/BVEL13k.zip',
 'MPLiTrj': 'https://bs3u.obs.ru-moscow-1.hc.sbercloud.ru/litraj/MPLiTrj.zip',
 'nebDFT2k': 'https://bs3u.obs.ru-moscow-1.hc.sbercloud.ru/litraj/nebDFT2k.zip',
 'nebBVSE122k': 'https://bs3u.obs.ru-moscow-1.hc.sbercloud.ru/litraj/nebBVSE122k.zip',
 'MPLiTrj_subsample': 'https://bs3u.obs.ru-moscow-1.hc.sbercloud.ru/litraj/MPLiTrj_subsample.zip',
 'MPLiTrj_raw': 'https://bs3u.obs.ru-moscow-1.hc.sbercloud.ru/litraj/MPLiTrj_raw.zip'}

### nebDFT2k

In [3]:
from litraj.data import download_dataset, load_data
download_dataset('nebDFT2k', '.') # save to the current folder

Downloading ./nebDFT2k.zip: 100%|██████████| 65.4M/65.4M [00:01<00:00, 44.2MB/s]
Extracting: 100%|██████████| 3365/3365 [00:02<00:00, 1297.56it/s]


In [4]:
data = load_data('nebDFT2k', '.')
data.columns

loading trajectories: 100%|██████████| 1681/1681 [00:30<00:00, 55.99it/s]
loading centroids: 100%|██████████| 1681/1681 [00:00<00:00, 2281.68it/s]


Index(['material_id', 'edge_id', 'chemsys', 'has_specific_TM', 'em_bvse',
       'em_dft', '_split', 'trajectory_init', 'trajectory_relaxed',
       'centroid'],
      dtype='object')

In [None]:
# trajectories
data_train = data[data['_split'] == 'train']
for traj_init, traj_relaxed in zip(data_train.trajectory_init,  data_train.trajectory_relaxed):
    energy_profile = [atoms.get_potential_energy() for atoms in traj_relaxed]
    # do stuff

In [5]:
# centroids
data_train = data[data['_split'] == 'train']
centroids_train = data['centroid']
em_train = data['em_dft']

### MPLiTrj and MPLiTrj_subsample

In [9]:
from litraj.data import download_dataset, load_data

download_dataset('MPLiTrj_subsample', folder = '.')

Downloading ./MPLiTrj_subsample.zip: 100%|██████████| 492M/492M [00:14<00:00, 36.4MB/s] 
Extracting: 100%|██████████| 4/4 [00:08<00:00,  2.01s/it]


In [3]:
train, val, test = load_data('MPLiTrj_subsample', folder = '.')

loading train: 100%|██████████| 94044/94044 [01:16<00:00, 1233.77it/s]
loading val: 100%|██████████| 12249/12249 [00:10<00:00, 1116.59it/s]
loading test: 100%|██████████| 11731/11731 [00:09<00:00, 1246.79it/s]


In [9]:
structures_train, energies_train, forces_train, stresses_train = [], [], [], []
for atoms in train:
    structures_train.append(atoms)
    energies_train.append(atoms.calc.get_potential_energy())
    forces_train.append(atoms.calc.get_forces().tolist())
    stresses_train.append(atoms.calc.get_stress().tolist())

### BVEL13k

In [1]:
from litraj.data import download_dataset, load_data

download_dataset('BVEL13k', folder = '.') # save to the current formula

Downloading ./BVEL13k.zip: 100%|██████████| 10.2M/10.2M [00:00<00:00, 20.8MB/s]
Extracting: 100%|██████████| 5/5 [00:00<00:00, 31.19it/s]


In [2]:
atoms_list_train, atoms_list_val, atoms_list_test, index = load_data('BVEL13k', '.')

for atoms in atoms_list_train: # the data is stored in the Ase's Atoms object
    mp_id = atoms.info['material_id']
    e1d = atoms.info['E_1D']
    e2d = atoms.info['E_2D']
    e3d = atoms.info['E_3D']
    # do stuff

loading train: 100%|██████████| 10159/10159 [00:02<00:00, 3468.24it/s]
loading val  : 100%|██████████| 1331/1331 [00:00<00:00, 3526.68it/s]
loading test : 100%|██████████| 1317/1317 [00:00<00:00, 3743.25it/s]


### nebBVSE122k

In [3]:
from litraj.data import download_dataset, load_data

download_dataset('nebBVSE122k', folder = '.')

Downloading ./nebBVSE122k.zip: 100%|██████████| 191M/191M [00:08<00:00, 24.2MB/s] 
Extracting: 100%|██████████| 5/5 [00:03<00:00,  1.55it/s]


In [6]:
from litraj.data import load_data
import numpy as np
atoms_list_train, atoms_list_val, atoms_list_test, index = load_data('nebBVSE122k', folder = '.')

for atoms_with_centroid in atoms_list_train:
    edge_id = atoms_with_centroid.info['edge_id']   # mp-id_source_target_offsetx_offsety_offsetz
    mp_id = atoms_with_centroid.info['material_id']
    em = atoms_with_centroid.info['em']
    centroid_index = np.argwhere(atoms_with_centroid.symbols =='X') # should be the last one
    # do stuff

loading train: 100%|██████████| 96849/96849 [00:42<00:00, 2257.13it/s]
loading val  : 100%|██████████| 12405/12405 [00:05<00:00, 2200.42it/s]
loading test : 100%|██████████| 13167/13167 [00:05<00:00, 2202.88it/s]
