In [6]:
import numpy as np
from typing import Tuple
import scipy.constants as co
import h5py
from tqdm import tqdm

# import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.io as pio
pio.templates.default = "plotly_dark"

## Startup
Be aware of some lazy coding. DO not copy this stupidly into the main program.

The next code blocks are just a preparation to get interparticle distances AND to get forces array. Later we will use this in detail.

In [15]:
def load_data(filename:str) -> Tuple[np.ndarray, np.ndarray, float, np.ndarray, np.ndarray, float]:
    """Loads the datafile indicated and returns basic properties

    Args:
        filename (str): path to the .hdf5 file

    Returns:
        Tuple:
            pos (np.ndarray): full system position array all timesteps
            force (np.ndarray): full system force array all timesteps
            L (float): boxsize
            N (np.ndarray): Atom amounts per type
            t (np.ndarray): time array
    """
    df = h5py.File(filename)
    skips =  1 # or: df['/input/incar/ML_OUTBLOCK'][()]
    L = df['results/positions/lattice_vectors'][()][0, 0]
    pos = df['intermediate/ion_dynamics/position_ions'][()]*L
    force = df['intermediate/ion_dynamics/forces'][()]
    N = df['results/positions/number_ion_types'][()]
    T = df['intermediate/ion_dynamics/energies'][()][:, 3].mean()
    dt = df['/input/incar/POTIM'][()]*skips
    t =np.arange(pos.shape[0])*dt
    df.close()
    return pos, force, L, N, t, T

def single_frame_rdf_like(pos_array,force_array,indicies,L,bins,return_conventional=False):
    """
    This is function for obtaining an single fram radial distribution function for a single species with itself

    args:
    pos_array (np.array(n,3)): An array with collumns rx, ry, rz
    force_array (np.array(n,3)): An array with collumns fx, fy, fz
    indicies (np.array): The row numbers for the species of interest
    box_x (float): The size of the box in the x direction
    box_y (float): The size of the box in the y direction
    box_z (float): The size of the box in the z direction
    bins (np.array(n)): The positions in r for which the radial distribution function will be calculated
    kwargs:
    return_convention(bool): UNDER CONSTRUCTION If true the conventional histogram based rdf will be returned with bins centred on delr value (default=false) 
    returns:
    A 2 dimensional numpy array of delr values and acommpanying delr values

    """
    pos_ang=pos_array[indicies,:] 
    force_total=force_array[indicies,:]
    storage_array=np.zeros(np.size(bins), dtype=np.longdouble)
    ns=len(indicies)
    box_x, box_y, box_z = L, L, L
    rx=np.zeros((ns,ns))
    ry=np.zeros((ns,ns))
    rz=np.zeros((ns,ns))
    Fx=np.zeros((ns,ns))
    Fy=np.zeros((ns,ns))
    Fz=np.zeros((ns,ns))
    for x in range(ns):
        ry[x,:]=pos_ang[:,1]-pos_ang[x,1] 
        rx[x,:]=pos_ang[:,0]-pos_ang[x,0] 
        rz[x,:]=pos_ang[:,2]-pos_ang[x,2]
        Fx[x,:]=force_total[:,0]
        Fy[x,:]=force_total[:,1]
        Fz[x,:]=force_total[:,2]
    rx-= (np.ceil((np.abs(rx)-box_x/2)/box_x))*((box_x))*np.sign(rx)
    ry-= (np.ceil((np.abs(ry)-box_y/2)/box_y))*((box_y))*np.sign(ry)
    rz-= (np.ceil((np.abs(rz)-box_z/2)/box_z))*((box_z))*np.sign(rz)
    r= (rx*rx+ry*ry+rz*rz)**.5
    with np.errstate(divide='ignore',invalid='ignore'):
        dot_prod=((Fz*rz)+(Fy*ry)+(Fx*rx))/r/r/r
    dot_prod[(rx>box_x/2)+(ry>box_y/2)+(rz>box_z/2)]=0
    dp=dot_prod.reshape(-1)
    rn=r.reshape(-1) 
    digtized_array=np.digitize(rn,bins)-1
    dp[digtized_array==np.size(bins)-1]=0
    storage_array[(np.size(bins)-1)]= np.sum(dp[(digtized_array==np.size(bins)-1)]) #conduct heaviside for our first bin
    for l in range(np.size(bins)-2,-1,-1):
        storage_array[l]= np.sum(dp[(digtized_array==l)])#conduct subsequent heavisides with a rolling sum
    return storage_array

def single_frame_rdf_unlike(pos_array,force_array,indicies,L,bins,return_conventional=False):
    """
    This is function for obtaining an single fram radial distribution function for a single species with itself

    args:
    pos_array (np.array(n,3)): An array with collumns rx, ry, rz
    force_array (np.array(n,3)): An array with collumns fx, fy, fz
    indicies ([np.array,np.array]): The row numbers for the species of interest for the first and second species respectively
    box_x (float): The size of the box in the x direction
    box_y (float): The size of the box in the y direction
    box_z (float): The size of the box in the z direction
    bins (np.array(n)): The positions in r for which the radial distribution function will be calculated
    kwargs:
    return_convention(bool): UNDER CONSTRUCTION  If true the conventional histogram based rdf will be returned with bins centred on delr value (default=false)
    returns:
    A 2 dimensional numpy array of delr values and acommpanying delr values

    """
    pos_ang_1=pos_array[indicies[0],:] 
    force_total_1=force_array[indicies[0],:]
    pos_ang_2=pos_array[indicies[1],:] 
    force_total_2=force_array[indicies[1],:]
    storage_array=np.zeros(np.size(bins), dtype=np.longdouble)
    n1=len(indicies[0])
    n2=len(indicies[1])
    rx=np.zeros((n2,n1))
    ry=np.zeros((n2,n1))
    rz=np.zeros((n2,n1))
    Fx=np.zeros((n2,n1))
    Fy=np.zeros((n2,n1))
    Fz=np.zeros((n2,n1))
    for x in range(n2):
        ry[x,:]=pos_ang_1[:,1]-pos_ang_2[x,1] 
        rx[x,:]=pos_ang_1[:,0]-pos_ang_2[x,0] 
        rz[x,:]=pos_ang_1[:,2]-pos_ang_2[x,2]
        Fx[x,:]=force_total_1[:,0]-force_total_2[x,0] 
        Fy[x,:]=force_total_1[:,1]-force_total_2[x,1] 
        Fz[x,:]=force_total_1[:,2]-force_total_2[x,2] 
    rx-= (np.ceil((np.abs(rx)-L/2)/L))*((L))*np.sign(rx)
    ry-= (np.ceil((np.abs(ry)-L/2)/L))*((L))*np.sign(ry)
    rz-= (np.ceil((np.abs(rz)-L/2)/L))*((L))*np.sign(rz)
    r= (rx*rx+ry*ry+rz*rz)**.5
    with np.errstate(divide='ignore',invalid='ignore'):
        dot_prod=((Fz*rz)+(Fy*ry)+(Fx*rx))/r/r/r
    dot_prod[(rx>L/2)+(ry>L/2)+(rz>L/2)]=0
    dp=dot_prod.reshape(-1)
    rn=r.reshape(-1) 
    digtized_array=np.digitize(rn,bins)-1
    dp[digtized_array==np.size(bins)-1]=0
    storage_array[(np.size(bins)-1)]= np.sum(dp[(digtized_array==np.size(bins)-1)]) #conduct heaviside for our first bin
    for l in range(np.size(bins)-2,-1,-1):
        storage_array[l]= np.sum(dp[(digtized_array==l)])#conduct subsequent heavisides with a rolling sum
    return storage_array

def single_frame_traditional(pos_array,force_array,indicies,L,bins):
    r = bins
    rdf_state = np.zeros(bins.shape[0]-1, dtype=np.longdouble)
    pos = pos_array[indicies]
    
    for (i, pos_i) in enumerate(pos[:-1]):
        d = (pos[i+1:] - pos_i + L/2) % L - L/2  # pbc and mic directly implemented
        r2 = np.sum(d*d, axis=1)
        rdf_state = np.histogram(np.sqrt(r2), bins=r)[0]
    return rdf_state

def run_rdf_traditional(pos, force, L, N, t, T,delr=.01,start=0,stop=-1,period=1, rmax=True):
    single_frame_function = single_frame_traditional
    indicies = np.arange(N[0], N[0] + N[1])
    prefactor= float(L*L*L)/(float(len(indicies))*float(len(indicies)-1))

    if start > pos.shape[0]:
        print('First frame index exceeds frames in trajectory')
        return
    if stop > pos.shape[0]:
        print('Final frame index exceeds frames in trajectory')
        return

    # find how many steps to run
    to_run=range(int(start%pos.shape[0]),int(stop%pos.shape[0]),period)

    # set the bins
    if rmax is True:
        bins= np.arange(0,L/2,delr)
    else:
        bins= np.arange(0,float(rmax),delr)
    
    accumulated_storage_array = np.zeros(bins.shape[0]-1, dtype=np.longdouble)
    for frame_count in tqdm(to_run):
        accumulated_storage_array += single_frame_function(pos[frame_count],force[frame_count],indicies,L,bins)
    # recompute bins to centerpoints
    bins = (bins[1:] + bins[:-1])/2
    accumulated_storage_array *= prefactor/(frame_count*bins**2*(bins[1]-bins[0]))  # r^2*dr
    return np.array([bins, accumulated_storage_array])

def run_rdf(pos, force, L, N, t, T,delr=.01,start=0,stop=-1,period=1, rmax=True, from_zero=True):
    """
    This is the master function for running a force RDF.

    args:
    temp (float): Temperature of the system
    delr (float): The spacing between radial points in an RDF (this is not a bin width as this is not a histogram but a heaviside)
    kwargs:
    start (int): The first frame for which the radial distribution function will be calculated
    stop (int): The last value for which the radial distribution function will be calculated
    period (int): The jumps made between sampled frames
    rmax (float): The maximum radial position defaults to follow the minimum image convention
    from_zero (bool): A boolian value if True the Heviside is taken from zero if false it is take from rmax

    returns:
    A 2 dimensional numpy array of r values and acommpanying rdf values

    """
    
    single_frame_function = single_frame_rdf_like
    indicies = np.arange(N[0], N[0] + N[1])
    prefactor= L**3/(pos.shape[0]*(pos.shape[0]-1)*4*np.pi)  # 1/(N(N-1)*rho *4*pi)

    if start > pos.shape[0]:
        print('First frame index exceeds frames in trajectory')
        return
    if stop > pos.shape[0]:
        print('Final frame index exceeds frames in trajectory')
        return

    # find how many steps to run
    to_run=range(int(start%pos.shape[0]),int(stop%pos.shape[0]),period)

    # set the bins
    if rmax is True:
        bins= np.arange(0,L/2,delr)
    else:
        bins= np.arange(0,float(rmax),delr)
    
    
    accumulated_storage_array=np.zeros(np.size(bins), dtype=np.longdouble)
    for frame_count in tqdm(to_run):
        accumulated_storage_array+=single_frame_function(pos[frame_count],force[frame_count],indicies,L,bins)
    
    accumulated_storage_array=np.nan_to_num(accumulated_storage_array)
    accumulated_storage_array*=prefactor/(4*np.pi*len(to_run)*(co.k/co.eV)*T)
    if from_zero == True:
        return np.array([bins,np.cumsum(accumulated_storage_array)])
    else:
        return np.array([bins,1-np.cumsum(accumulated_storage_array[::-1])[::-1]])

def run_rdf_better(pos, force, L, N, t, T,delr=.01,start=0,stop=-1,period=1, rmax=True, from_zero=True):
    """
    This is the master function for running a force RDF.

    args:
    temp (float): Temperature of the system
    delr (float): The spacing between radial points in an RDF (this is not a bin width as this is not a histogram but a heaviside)
    kwargs:
    start (int): The first frame for which the radial distribution function will be calculated
    stop (int): The last value for which the radial distribution function will be calculated
    period (int): The jumps made between sampled frames
    rmax (float): The maximum radial position defaults to follow the minimum image convention
    from_zero (bool): A boolian value if True the Heviside is taken from zero if false it is take from rmax

    returns:
    A 2 dimensional numpy array of r values and acommpanying rdf values

    """
    
    single_frame_function = single_frame_rdf_like
    indicies = np.arange(N[0], N[0] + N[1])
    prefactor= float(L*L*L)/(float(len(indicies))*float(len(indicies)-1))

    if start > pos.shape[0]:
        print('First frame index exceeds frames in trajectory')
        return
    if stop > pos.shape[0]:
        print('Final frame index exceeds frames in trajectory')
        return

    # find how many steps to run
    to_run=range(int(start%pos.shape[0]),int(stop%pos.shape[0]),period)

    # set the bins
    if rmax is True:
        bins= np.arange(0,L/2,delr)
    else:
        bins= np.arange(0,float(rmax),delr)
    
    list_store = []
    accumulated_storage_array=np.zeros(np.size(bins), dtype=np.longdouble)
    for frame_count in tqdm(to_run):
        this_frame=single_frame_function(pos[frame_count],force[frame_count],indicies,L,bins)
        accumulated_storage_array+=this_frame
        list_store.append(this_frame)
    
    base_array=np.nan_to_num(np.array(list_store))
    base_array*=prefactor*co.eV/(4*np.pi*co.k*T)
    
    accumulated_storage_array=np.nan_to_num(accumulated_storage_array)
    accumulated_storage_array*=prefactor*co.eV/(4*np.pi*len(to_run)*co.k*T)
    exp_zero_rdf=np.array(np.cumsum(accumulated_storage_array)[:-1])
    exp_inf_rdf=np.array(1-np.cumsum(accumulated_storage_array[::-1])[::-1][1:])
    exp_delta=exp_inf_rdf-exp_zero_rdf
    base_zero_rdf=np.array(np.cumsum(base_array,axis=1))[:,:-1]
    base_inf_rdf=np.array(1-np.cumsum(base_array[:,::-1],axis=1)[:,::-1][:,1:])
    base_delta = base_inf_rdf - base_zero_rdf
    var_del=np.mean((base_delta-exp_delta)**2,axis=0)
    cov_inf=np.mean((base_delta-exp_delta)*(base_inf_rdf-exp_inf_rdf),axis=0)
    combination = cov_inf/var_del
    return np.transpose(np.array([bins[1:],np.mean(base_inf_rdf*(1-combination)+(base_zero_rdf*combination),axis=0),combination]))


In [16]:
file = r"test_output/combined_simulation/vaspout4.h5"
pos, force, L, N, t, T = load_data(filename=file)

## Set properties

In [17]:
i = 10  # investigate timestep 10

pos_i = pos[i, :, :]
force_i = force[i, :, :]
O_i  = np.arange(N[0], N[0] + N[1])
bins = np.linspace(0, L/2, 250)

# Now we can test force rdf code
for the first step, we do this without any function and code it purely. Lets do oxygen-oxygen for the learning first, look into this code https://github.com/user200000/revelsmd/blob/main/revelsMD/revels_rdf.py

In [28]:
rdf0 = run_rdf_traditional(pos, force, L, N, t, T, delr=0.1, period=10)
rdf1 = run_rdf(pos, force, L, N, t, T, period=10)
rdf2 = run_rdf(pos, force, L, N, t, T, period=10, from_zero=False)
rdf3 = run_rdf_better(pos, force, L, N, t, T, period=10)

rdf0 = np.nan_to_num(rdf0).astype(np.float64)
rdf1 = np.nan_to_num(rdf1).astype(np.float64)
rdf2 = np.nan_to_num(rdf2).astype(np.float64)
rdf3 = np.nan_to_num(rdf3).astype(np.float64)

100%|██████████| 500/500 [00:01<00:00, 316.15it/s]
100%|██████████| 500/500 [00:03<00:00, 135.31it/s]
100%|██████████| 500/500 [00:03<00:00, 133.88it/s]
100%|██████████| 500/500 [00:03<00:00, 135.48it/s]


In [29]:
f = go.FigureWidget()
f.add_scatter(x=rdf0[0, :], y=rdf0[1, :], name='traditional rdf')
f.add_scatter(x=rdf1[0, :], y=rdf1[1, :], name='from zero')
f.add_scatter(x=rdf2[0], y=rdf2[1], name='from infinity')
f.add_scatter(x=rdf3[:, 0], y=rdf3[:, 1], name='combined variance reduced')
f.update_layout(
    title='force rdf',
    xaxis_title='radial distance r / [Angstrom]',
    yaxis_title='occurance g(r)',
)

f.show()