In [1]:
import numpy as np
from math import sqrt
from os.path import *
import time
import numba

In [2]:
import Cube
data_1 = Cube.CubeData(join('yld_data', 'monomer_99_wCh_trans1.cub'))
data_2 = Cube.CubeData(join('yld_data', 'monomer_100_wCh_trans1.cub'))
xyz_1 = data_1.coords
xyz_2 = data_2.coords
dens_1 = data_1.cube_data
dens_2 = data_2.cube_data
dV_12 = data_1.dV * data_2.dV

n_pts_12 = xyz_1.shape[0] * xyz_2.shape[0]
global_timers = {}

Reading cube file
Formatting
Done
Reading cube file
Formatting
Done


In [3]:

# data_1 = np.loadtxt(join('yld_data', 'monomer_99_wCh_trans1.fcub'))
# data_2 = np.loadtxt(join('yld_data', 'monomer_100_wCh_trans1.fcub'))
# xyz_1 = data_1[:, 0:3]
# xyz_2 = data_2[:, 0:3]
# dens_1 = data_1[:, 3]
# dens_2 = data_2[:, 3]
# dV_1 = np.prod(np.abs(xyz_1[0] - xyz_1[1]))
# dV_2 = np.prod(np.abs(xyz_2[0] - xyz_2[1]))
# dV_12 = dV_1*dV_2

# n_pts_12 = xyz_1.shape[0] * xyz_2.shape[0]
# global_timers = {}

# print(len(xyz_1))

In [4]:
# def calc_coulomb_pure_python(xyz_1: np.ndarray, dens_1: np.ndarray, xyz_2: np.ndarray, dens_2: np.ndarray):
def calc_coulomb_pure_python(pts_1, rho_1, pts_2, rho_2):
    print("Pure Python Coulomb Integral")

    total = 0.0
    count = 0
    n_pts_1 = len(pts_1)
    n_pts_2 = len(pts_2)
    n_pts_12 = n_pts_1*n_pts_2
    print_num = n_pts_12//5
    for i in range(n_pts_1):
        for j in range(n_pts_2):
            if count % print_num == 0:
                print(f"    Coulomb Integral {(count / n_pts_12*100):.1f} %")

            x1, y1, z1 = pts_1[i]
            x2, y2, z2 = pts_2[j]
            dx = x1 - x2
            dy = y1 - y2
            dz = z1 - z2
            r = sqrt(dx**2 + dy**2 + dz**2)
            total += rho_1[i]*rho_2[j]/r
            count += 1
    return total*dV_12

n_keep = 100
start = time.time()
total = calc_coulomb_pure_python(xyz_1[::n_keep], dens_1[::n_keep], xyz_2[::n_keep], dens_2[::n_keep])
total_time = (time.time() - start)*n_keep**2
global_timers['pure_python'] = (total, total_time)
print(f'pure_python: {total_time:.2f} s ({total} a.u.)')

Pure Python Coulomb Integral
    Coulomb Integral 0.0 %
    Coulomb Integral 20.0 %
    Coulomb Integral 40.0 %
    Coulomb Integral 60.0 %
    Coulomb Integral 80.0 %
    Coulomb Integral 100.0 %
pure_python: 22877.49 s (4.8197766672082645e-09 a.u.)


In [5]:
# def calc_coulomb_pure_python(xyz_1: np.ndarray, dens_1: np.ndarray, xyz_2: np.ndarray, dens_2: np.ndarray):
def calc_coulomb_pure_numpy_mix(pts_1, rho_1, pts_2, rho_2):
    print("Pure Python Coulomb Integral")

    total = 0.0
    count = 0
    n_pts_1 = len(pts_1)
    n_pts_2 = len(pts_2)
    n_pts_12 = n_pts_1*n_pts_2
    print_num = n_pts_12//5
    for i in range(n_pts_1):
        for j in range(n_pts_2):
            if count % print_num == 0:
                print(f"    Coulomb Integral {(count / n_pts_12*100):.1f} %")

            dr = pts_1[i] - pts_2[j]
            r = np.linalg.norm(dr)
            total += rho_1[i]*rho_2[j]/r
            count += 1
    return total*dV_12

n_keep = 100
start = time.time()
total = calc_coulomb_pure_numpy_mix(xyz_1[::n_keep], dens_1[::n_keep], xyz_2[::n_keep], dens_2[::n_keep])
total_time = (time.time() - start)*n_keep**2
global_timers['pure_numpy_mix'] = (total, total_time)
print(f'pure_numpy_mix: {total_time:.2f} s ({total} a.u.)')

Pure Python Coulomb Integral
    Coulomb Integral 0.0 %
    Coulomb Integral 20.0 %
    Coulomb Integral 40.0 %
    Coulomb Integral 60.0 %
    Coulomb Integral 80.0 %
    Coulomb Integral 100.0 %
pure_numpy_mix: 34023.75 s (4.819776667208263e-09 a.u.)


In [6]:
# def calc_coulomb_pure_python(xyz_1: np.ndarray, dens_1: np.ndarray, xyz_2: np.ndarray, dens_2: np.ndarray):
def calc_coulomb_numpy(pts_1, rho_1, pts_2, rho_2):
    print("Pure Python Coulomb Integral")

    total = 0.0
    count = 0
    n_pts_1 = len(pts_1)
    print_num = n_pts_1//5
    for i in range(n_pts_1):
        if count % print_num == 0:
            print(f"    Coulomb Integral {(count / n_pts_1*100):.1f} %")

        dr = pts_1[i] - pts_2
        r = np.linalg.norm(dr, axis=1)
        total += rho_1[i]*np.sum(rho_2/r)
        count += 1

    return total*dV_12

n_keep = 10
start = time.time()
total = calc_coulomb_numpy(xyz_1[::n_keep], dens_1[::n_keep], xyz_2[::n_keep], dens_2[::n_keep])
total_time = (time.time() - start)*n_keep**2
global_timers['numpy'] = (total, total_time)
print(f'numpy: {total_time:.2f} s ({total} a.u.)')

Pure Python Coulomb Integral
    Coulomb Integral 0.0 %
    Coulomb Integral 20.0 %
    Coulomb Integral 40.0 %
    Coulomb Integral 60.0 %
    Coulomb Integral 80.0 %
numpy: 321.96 s (2.7207775039991674e-06 a.u.)


In [7]:
@numba.jit()
def calc_coulomb_numba(pts_1, rho_1, pts_2, rho_2):
    print("Pure Python Coulomb Integral")

    total = 0.0
    n_pts_1 = len(pts_1)
    n_pts_2 = len(pts_2)
    for i in numba.prange(n_pts_1):
        for j in range(n_pts_2):

            x1, y1, z1 = pts_1[i]
            x2, y2, z2 = pts_2[j]
            dx = x1 - x2
            dy = y1 - y2
            dz = z1 - z2
            r = sqrt(dx**2 + dy**2 + dz**2)
            total += rho_1[i]*rho_2[j]/r
            
    return total*dV_12

n_keep = 1
start = time.time()
total = calc_coulomb_numba(xyz_1[::n_keep], dens_1[::n_keep], xyz_2[::n_keep], dens_2[::n_keep])
total_time = (time.time() - start)*n_keep**2
global_timers['numba'] = (total, total_time)
print(f'numba: {total_time:.2f} s ({total} a.u.)')

  @numba.jit()


Pure Python Coulomb Integral
numba: 24.81 s (8.56880814588704e-05 a.u.)


In [8]:
@numba.jit(parallel=True)
def calc_coulomb_numba_parallel(pts_1, rho_1, pts_2, rho_2):
    print("Pure Python Coulomb Integral")

    total = 0.0
    n_pts_1 = len(pts_1)
    n_pts_2 = len(pts_2)
    for i in numba.prange(n_pts_1):
        for j in range(n_pts_2):

            x1, y1, z1 = pts_1[i]
            x2, y2, z2 = pts_2[j]
            dx = x1 - x2
            dy = y1 - y2
            dz = z1 - z2
            r = sqrt(dx**2 + dy**2 + dz**2)
            total += rho_1[i]*rho_2[j]/r
            
    return total*dV_12

n_keep = 1
start = time.time()
numba.set_num_threads(8)
total = calc_coulomb_numba_parallel(xyz_1[::n_keep], dens_1[::n_keep], xyz_2[::n_keep], dens_2[::n_keep])
total_time = (time.time() - start)*n_keep**2
global_timers['numba_parallel'] = (total, total_time)
print(f'numba_parallel: {total_time:.2f} s ({total} a.u.)')

  @numba.jit(parallel=True)
OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


Pure Python Coulomb Integral
numba_parallel: 4.19 s (8.568808148407738e-05 a.u.)


In [10]:
@numba.jit(parallel=True, fastmath=True)
def calc_coulomb_numba_parallel_numpy(coords_1: np.ndarray, cube_data_1: np.ndarray, coords_2: np.ndarray, cube_data_2: np.ndarray):

    size_1 = coords_1.shape[0]
    total = 0.0
    count = 0.0
    n_threads = numba.get_num_threads()
    for i in numba.prange(size_1):
        # if count % 1000 == 0 and numba.get_thread_id() == 0:
        #     pct = np.round(100.0*count*n_threads/size_1, 2)
        #     print(count*n_threads, size_1, pct)

        dr = coords_2 - coords_1[i]
        dr2 = dr*dr
        r = np.sqrt(dr2[:, 0] + dr2[:, 1] + dr2[:, 2])
        total += cube_data_1[i]*np.sum(cube_data_2/r)
        count += 1

    return total*dV_12

n_keep = 1
start = time.time()
numba.set_num_threads(8)
total = calc_coulomb_numba_parallel_numpy(xyz_1[::n_keep], dens_1[::n_keep], xyz_2[::n_keep], dens_2[::n_keep])
total_time = (time.time() - start)*n_keep**2
global_timers['numba_parallel_numpy'] = (total, total_time)
print(f'numba_parallel_numpy: {total_time:.2f} s ({total} a.u.)')

  @numba.jit(parallel=True, fastmath=True)


numba_parallel_numpy: 20.76 s (8.568808148826523e-05 a.u.)


In [28]:
import threading
def _coulomb_by_indix(indicies, pts_1, rho_1, pts_2, rho_2):

    total = 0.0
    count = 0
    n_pts_1 = len(indicies)
    n_pts_2 = len(pts_2)
    n_pts_12 = n_pts_1*n_pts_2
    print_num = n_pts_12//5
    for i in indicies: # EDIT: loop over specified indicies only
        for j in range(n_pts_2):
            if count % print_num == 0:
                print(f"    Coulomb Integral {(count / n_pts_12*100):.1f} %")

            x1, y1, z1 = pts_1[i]
            x2, y2, z2 = pts_2[j]
            dx = x1 - x2
            dy = y1 - y2
            dz = z1 - z2
            
            r = sqrt(dx**2 + dy**2 + dz**2)
            total += rho_1[i]*rho_2[j]/r
            count += 1
    return total*dV_12

def calc_coulomb_thread(n_threads, pts_1, rho_1, pts_2, rho_2):
    total = 0.0
    all_threads = []
    for n in range(n_threads):
        #   these will be the indicies used by the inner Coulomb loop
        indicies = np.arange(n, len(pts_1), n_threads)
        print(f"Thread {n} using indicies ", *indicies[0:4], "...")
        
        thread = threading.Thread(target=_coulomb_by_indix, args=(indicies, pts_1, rho_1, pts_2, rho_2))
        all_threads.append(thread)
        thread.start()

    for thread in all_threads:
        thread.join()


n_keep = 100
start = time.time()
total = calc_coulomb_thread(4, xyz_1[::n_keep], dens_1[::n_keep], xyz_2[::n_keep], dens_2[::n_keep])
total_time = (time.time() - start)*n_keep**2
global_timers['threaded'] = (total, total_time)
print(f'threaded: {total_time:.2f} s ({total} a.u.)')

Thread 0 using indicies  0 4 8 12 ...
    Coulomb Integral 0.0 %
Thread 1 using indicies  1 5 9 13 ...
    Coulomb Integral 0.0 %
Thread 2 using indicies  2 6 10 14 ...
    Coulomb Integral 0.0 %
Thread 3 using indicies  3 7 11 15 ...
    Coulomb Integral 0.0 %
    Coulomb Integral 20.0 %
    Coulomb Integral 20.0 %
    Coulomb Integral 20.0 %
    Coulomb Integral 20.0 %
    Coulomb Integral 40.0 %    Coulomb Integral 40.0 %
    Coulomb Integral 40.0 %

    Coulomb Integral 40.0 %
    Coulomb Integral 60.0 %
    Coulomb Integral 60.0 %
    Coulomb Integral 60.0 %
    Coulomb Integral 60.0 %
    Coulomb Integral 80.0 %    Coulomb Integral 80.0 %

    Coulomb Integral 80.0 %
    Coulomb Integral 80.0 %
    Coulomb Integral 100.0 %
    Coulomb Integral 100.0 %
    Coulomb Integral 100.0 %
    Coulomb Integral 100.0 %
threaded: 23031.80 s (None a.u.)


In [31]:
import multiprocess as mp # use with Jupyter Notebooks
#import multiprocess as mp # use with traditional python files

def calc_coulomb_MP(n_process, pts_1, rho_1, pts_2, rho_2):
    pts_1_split = np.array_split(pts_1, n_process)
    rho_1_split = np.array_split(rho_1, n_process)
    pts_2_split = np.array_split(pts_2, n_process)
    rho_2_split = np.array_split(rho_2, n_process)

    with mp.Pool(n_process) as pool:
        func_params = zip(pts_1_split, rho_1_split, pts_2_split, rho_2_split)
        results = pool.starmap(calc_coulomb_pure_python, func_params)

n_keep = 10
start = time.time()
total = calc_coulomb_MP(4, xyz_1[::n_keep], dens_1[::n_keep], xyz_2[::n_keep], dens_2[::n_keep])
total_time = (time.time() - start)*n_keep**2
global_timers['multiprocessing'] = (total, total_time)
print(f'multiprocessing: {total_time:.2f} s ({total} a.u.)')

Pure Python Coulomb IntegralPure Python Coulomb IntegralPure Python Coulomb Integral
Pure Python Coulomb Integral
    Coulomb Integral 0.0 %

    Coulomb Integral 0.0 %


    Coulomb Integral 0.0 %    Coulomb Integral 0.0 %


    Coulomb Integral 20.0 %
    Coulomb Integral 20.0 %
    Coulomb Integral 20.0 %
    Coulomb Integral 20.0 %
    Coulomb Integral 40.0 %
    Coulomb Integral 40.0 %
    Coulomb Integral 40.0 %
    Coulomb Integral 40.0 %
    Coulomb Integral 60.0 %
    Coulomb Integral 60.0 %
    Coulomb Integral 60.0 %
    Coulomb Integral 60.0 %
    Coulomb Integral 80.0 %    Coulomb Integral 80.0 %

    Coulomb Integral 80.0 %
    Coulomb Integral 80.0 %
multiprocessing: 1460.25 s (None a.u.)


In [35]:
for name, val in global_timers.items():
    print(f'{name:25s} {val[1]:10.1f}')

pure_python                  22877.5
pure_numpy_mix               34023.7
numpy                          322.0
numba                           24.8
numba_parallel                   4.2
numba_parallel_numpy            20.8
threaded                     23031.8
multiprocessing               1460.3
