# 0. Normalize the `tildeR` for the specific center atoms (e.g. Li-Li&Si)
## Using `TildeRPairNormalizer`

In [1]:
import numpy as np
from typing import List, Dict

from matersdk.io.pwmat.output.movement import Movement
from matersdk.data.deepmd.data_system import DpLabeledSystem
from matersdk.io.publicLayer.structure import DStructure
from matersdk.io.publicLayer.neigh import StructureNeighborsDescriptor
from matersdk.feature.deepmd.se_pair import DpseTildeRPairDescriptor

from matersdk.feature.deepmd.preprocess import (
                    TildeRPairNormalizer,
                    NormalizerPremise
)

# 1. Custom parameters

In [18]:
atom_config_path = "/data/home/liuhanyu/hyliu/code/matersdk/demo/feature/movement/LiSi.config"
movement_path = "/data/home/liuhanyu/hyliu/code/mlff/test/demo2/PWdata/data1/MOVEMENT"
movement = Movement(movement_path=movement_path)

dpsys = DpLabeledSystem.from_trajectory_s(trajectory_object=movement)
dpsys

****************** LabeledSystem Summary *******************
	 * Images Number           : 550           
	 * Atoms Number            : 72            
	 * Virials Information     : True          
	 * Energy Deposition       : True          
	 * Elements List           :
		 - Li: 48              
		 - Si: 24              
************************************************************




# 2. `NormalizerPremise.concat_tildeRs()`: Generate the dataset to calc `davg` and `dstds`
1. Note: We just use the first 10 frames to calculate `davg` and `dstd`

In [19]:
structure_indices = [*range(10)]
rcut = 6.5
rcut_smooth = 6.0
center_atomic_number = 3    # Li
nbr_atomic_numbers = [3, 14]      # Li, Si
scaling_matrix = [3, 3, 3]
max_num_nbrs_dict = {3: 100, 14: 80}

reformat_mark = True
coords_are_cartesian = True

In [20]:
tildeRs_array = NormalizerPremise.concat_tildeRs(
                    dp_labeled_system=dpsys,
                    structure_indices=structure_indices,
                    rcut=rcut,
                    rcut_smooth=rcut_smooth,
                    center_atomic_number=center_atomic_number,
                    nbr_atomic_numbers=nbr_atomic_numbers,
                    max_num_nbrs_dict=max_num_nbrs_dict,
                    scaling_matrix=scaling_matrix
)
tildeRs_array.shape

(48, 1800, 4)

# 3. Initialize `DpseTildeRPairNormalizer`

In [21]:
normalizer = TildeRPairNormalizer(tildeRs_array=tildeRs_array)
print("Step 1.1. davg of Environment matrix : ", end='\n\t')
print(normalizer.davg)
print("Step 1.2. dstd of Environment matrix : ", end='\n\t')
print(normalizer.dstd)

Step 1.1. davg of Environment matrix : 
	[[0.06974313 0.         0.         0.        ]]
Step 1.2. dstd of Environment matrix : 
	[[0.11278804 0.07656205 0.07656205 0.07656205]]


# 4. Normalize a new `tildeR` of new `DStructure`

In [22]:
new_structure = movement.get_frame_structure(idx_frame=100)
struct_nbr = StructureNeighborsDescriptor.create(
                'v1',
                structure=new_structure,
                rcut=rcut,
                scaling_matrix=scaling_matrix,
                reformat_mark=reformat_mark,
                coords_are_cartesian=coords_are_cartesian)

new_tildeRs_array = DpseTildeRPairDescriptor.create(
                'v1',
                structure_neighbors=struct_nbr,
                center_atomic_number=3,
                nbr_atomic_number=14,
                rcut=rcut,
                rcut_smooth=rcut_smooth).get_tildeR(max_num_nbrs=100)

In [23]:
print("Step 2. Using a new environment matrix, after normalize...")
print("Step 2.1. The max value of environment is : ", end="\n\t")
print(np.max(normalizer.normalize(tildeRs_array=new_tildeRs_array)))
print("Step 2.2. The min value of environment is : ", end="\n\t")
print(np.min(normalizer.normalize(tildeRs_array=new_tildeRs_array)))

Step 2. Using a new environment matrix, after normalize...
Step 2.1. The max value of environment is : 
	4.982843530922144
Step 2.2. The min value of environment is : 
	-4.953980917632865
