In [3]:
%reload_ext autoreload
%autoreload 2

import h5py
from tqdm import tqdm

from qcm_ml.features.xtb import generate_xtb_matrices_fpsh

src_path = "/home/beom/orbnet_mhd/qcm_ml_b/qcm_ml/data/qm9/QM9_open_gen_10000.hdf5"
tgt_path = "/home/beom/orbnet_mhd/qcm_ml_b/qcm_ml/data/qm9/QM9_open_gen_10000_DXTB_test.hdf5"



with h5py.File(src_path, 'r') as src_file:
    with h5py.File(tgt_path, 'w') as tgt_file:
        for mode in src_file:
            for mol in tqdm(src_file[mode], desc=mode + " molecules"):
                singlet_group = src_file[mode][mol].get("singlet", None)
                if singlet_group is not None:
                    tgt_singlet_group = tgt_file.create_group(f"{mode}/{mol}/singlet")
                    for key, item in singlet_group.items():
                        if key in ["2body"]:
                            continue
                        else: 
                            if item.shape == ():
                                tgt_singlet_group.create_dataset(key, data=item[()])
                            else:
                                tgt_singlet_group.create_dataset(key, data=item[:])

                    # Calculation of 2body features
                    numbers = singlet_group["atomic_numbers"][:] # Repetitive but looks better
                    positions = singlet_group["geometry_bohr"][:]
                    T = generate_xtb_matrices_fpsh(
                        calculator="dxtb",
                        # calculator="tblite",
                        element_numbers=numbers,
                        coordinates=positions,
                        spin=0,
                        spin_pol=True,
                        # spin_pol=False
                        )
                    for M in ["F_a", "F_b", "H", "P_a", "P_b", "S"]:
                    # for M in ["F", "H", "P", "S"]:
                        tgt_singlet_group.create_dataset(f"2body/{M}", data=T[M])
                            
                    

train molecules:   0%|          | 0/10000 [00:00<?, ?it/s]

train molecules:   4%|▍         | 394/10000 [00:34<13:57, 11.47it/s]


KeyboardInterrupt: 

# Profiler

In [1]:
import h5py
from tqdm import tqdm
import cProfile
import pstats
from qcm_ml.features.xtb import generate_xtb_matrices_fpsh

src_path = "/home/beom/orbnet_mhd/qcm_ml_b/qcm_ml/data/qm9/QM9_open_gen_10000.hdf5"
tgt_path = "/home/beom/orbnet_mhd/qcm_ml_b/qcm_ml/data/qm9/QM9_open_gen_10000_DXTB_test.hdf5"

MAX_IDX = 100

def process_data():
    index = 0
    with h5py.File(src_path, 'r') as src_file:
        with h5py.File(tgt_path, 'w') as tgt_file:
            for mode in src_file:
                for mol in tqdm(src_file[mode], desc=mode + " molecules"):
                    if index >= MAX_IDX:
                        break
                    index += 1
                    singlet_group = src_file[mode][mol].get("singlet", None)
                    if singlet_group is not None:
                        tgt_singlet_group = tgt_file.create_group(f"{mode}/{mol}/singlet")
                        for key, item in singlet_group.items():
                            if key in ["2body"]:
                                continue
                            else: 
                                if item.shape == ():
                                    tgt_singlet_group.create_dataset(key, data=item[()])
                                else:
                                    tgt_singlet_group.create_dataset(key, data=item[:])

                        # Calculation of 2body features
                        numbers = singlet_group["atomic_numbers"][:]  # Repetitive but looks better
                        positions = singlet_group["geometry_bohr"][:]
                        T = generate_xtb_matrices_fpsh(
                            calculator="dxtb",
                            element_numbers=numbers,
                            coordinates=positions,
                            spin=0,
                            spin_pol=True,
                        )
                        for M in ["F_a", "F_b", "H", "P_a", "P_b", "S"]:
                            tgt_singlet_group.create_dataset(f"2body/{M}", data=T[M])

# Profiling the process_data function
if __name__ == '__main__':
    profiler = cProfile.Profile()
    profiler.enable()
    process_data() 
    profiler.disable()
    
    # Print the profiling results sorted by cumulative time
    stats = pstats.Stats(profiler).sort_stats('cumulative')
    stats.print_stats(20)  # Print the top 20 results


train molecules:   1%|          | 100/10000 [00:10<17:59,  9.17it/s]
val molecules:   0%|          | 0/5092 [00:00<?, ?it/s]

         3523470 function calls (3502007 primitive calls) in 10.937 seconds

   Ordered by: cumulative time
   List reduced from 1714 to 20 due to restriction <20>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.091    0.091   10.946   10.946 /tmp/ipykernel_5633/2789303748.py:12(process_data)
      100    0.103    0.001   10.252    0.103 /home/beom/orbnet_mhd/qcm_ml_b/qcm_ml/features/xtb.py:96(generate_xtb_matrices_fpsh)
      100    0.012    0.000   10.126    0.101 /home/beom/orbnet_mhd/qcm_ml_b/qcm_ml/features/xtb.py:202(generate_xtb_matrices_dxtb)
      300    0.003    0.000    7.238    0.024 /home/beom/anaconda3/envs/orbnet_tblite/lib/python3.9/site-packages/dxtb/_src/calculators/types/base.py:691(get_property)
      300    0.001    0.000    7.233    0.024 /home/beom/anaconda3/envs/orbnet_tblite/lib/python3.9/site-packages/dxtb/_src/calculators/base.py:51(calculate)
      300    0.003    0.000    7.233    0.024 /home/beom/anaconda3/envs/orbne




In [3]:
import torch

# Check if CUDA is available
if torch.cuda.is_available():
    print("CUDA is available. Devices:")
    # Print the number of available CUDA devices
    for i in range(torch.cuda.device_count()):
        print(f"Device {i}: {torch.cuda.get_device_name(i)}")
else:
    print("CUDA is not available.")


CUDA is available. Devices:
Device 0: TITAN RTX
