In [1]:
import atomdnn

# 'float32' is used for reading data and train by default, one can set data_type to 'float64' here
atomdnn.data_type = 'float64'

# force and stress are evaluated by default, 
# if one only need to compute potential energy, then set compute_force to false
atomdnn.compute_force = True

# default value is for converting ev/A^3 to GPa
# note that: the predicted positive stress means tension and negative stress means compression
stress_unit_convert = 160.2176 

import numpy as np
import tensorflow as tf
import pickle
from atomdnn import data
from atomdnn import network
from atomdnn.data import Data
from atomdnn.data import *
from atomdnn.network import Network

print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  4


## Load tensorflow dataset 

In [2]:
dataset = tf.data.experimental.load('/workspace/data/group_share/graphene_tfdataset')

2021-10-06 01:06:41.271850: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1510] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 9673 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 2080 Ti, pci bus id: 0000:19:00.0, compute capability: 7.5
2021-10-06 01:06:41.272862: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1510] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 9673 MB memory:  -> device: 1, name: NVIDIA GeForce RTX 2080 Ti, pci bus id: 0000:1a:00.0, compute capability: 7.5
2021-10-06 01:06:41.273735: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1510] Created device /job:localhost/replica:0/task:0/device:GPU:2 with 9673 MB memory:  -> device: 2, name: NVIDIA GeForce RTX 2080 Ti, pci bus id: 0000:67:00.0, compute capability: 7.5
2021-10-06 01:06:41.274539: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1510] Created device /job:localhost/replica:0/task:0/device:GPU:3 with 9673 MB memory:  -> device: 3, name: NVIDIA GeForce RTX

In [3]:
train_dataset, val_dataset, test_dataset = split_dataset(dataset,0.7,0.2,0.1,shuffle=True)

Traning data: 630 images
Validation data: 180 images
Test data: 90 images


## Build Network object from class Network 

### \_\_init\_\_(self, elements=None, num_fingerprints=None, arch=None,activation_function=None, data_type=None, import_dir=None)

- **elements:** element list, required

- **num_fingerprints:** number of fingerprints in data, required

- **arch:** number of layers of neural network

- **activation_function:** if not set, default is 'tanh'

- **import_dir:** read from the directory of a saved (imported) network, if used, all other parameters are disabled

In [4]:
elements = ['C']
act_fun = 'tanh' # activation function
nfp = get_fingerprints_num(dataset) # number of fingerprints
arch = [50,50] # NN layers

model = Network(elements=elements,num_fingerprints=nfp,arch=arch,activation_function=act_fun)

## Train the model

### train(self, train_dataset, val_dataset, batch_size=None, epochs=None, loss_fn=None, optimizer=None, lr=None, loss_weights=None, shuffle=True)

- **train_dataset**: tensorflow dataset for training
    
- **val_dataset**: tensorflow dataset for validation
    
- **batch_size**: if not set, use 30

- **scaling**: 'std' or 'norm'
    
- **epochs**: if not set, use 1
    
- **opimizer**: if not set, use Adam
    
- **lr**: learning rate, if not set, use 0.01
    
- **loss_weights**: sets the weights for loss function, if not set, use {'pe':1,'force':0,'stress':0}
    
- **shuffle**: training dataset is shuffled during training

### The training and validation loss history is stored in the dictionary, with the keys of 'pe_loss', 'force_loss', 'stress_loss' and 'total_loss':  
- **train_loss**  
- **val_loss** 



In [5]:
loss_weights = {'pe' : 0.1, 'force' : 1, 'stress': 0.1}

model.train(train_dataset, val_dataset, batch_size=50, lr=0.01, epochs=5,scaling='std', loss_weights=loss_weights, shuffle=True)

optimizer is set to Adam by default.
loss_fn is set to mae by default.
Forces are used for training.
Stresses are used for training.


2021-10-06 01:06:48.440100: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Scaling factors are computed using training dataset.
Training dataset are standardized.
Validation dataset are standardized.
Training dataset will be shuffled during training.

===> Epoch 1/5 - 5.263s/epoch
     training_loss    - pe_loss: 122.062 - force_loss: 37.331 - stress_loss: 43.388 - total_loss: 53.876
     validation_loss  - pe_loss: 130.147 - force_loss: 21.431 - stress_loss: 32.828 - total_loss: 37.729

===> Epoch 2/5 - 4.216s/epoch
     training_loss    - pe_loss: 119.259 - force_loss: 21.189 - stress_loss: 26.423 - total_loss: 35.757
     validation_loss  - pe_loss: 117.162 - force_loss: 16.244 - stress_loss: 24.686 - total_loss: 30.428

===> Epoch 3/5 - 4.605s/epoch
     training_loss    - pe_loss: 112.796 - force_loss: 15.202 - stress_loss: 21.101 - total_loss: 28.592
     validation_loss  - pe_loss: 111.872 - force_loss: 13.079 - stress_loss: 17.964 - total_loss: 26.063

===> Epoch 4/5 - 4.678s/epoch
     training_loss    - pe_loss: 107.908 - force_loss: 11.855 - stress

# Evaluate dataset

### evaluate(self,dataset, batch_size=None, compute_force=atomdnn.compute_force):
- **dataset**: tensorflow dataset to be evaluated
- **batch_size**: if not set, batch_size equals to the total size of dataset
- **compute**: if compute force, derivative data are needed

In [12]:
model.evaluate(test_dataset)

Evaluation loss is:
        pe_loss:       1.1037e+02
     force_loss:       1.1732e+01
    stress_loss:       2.0113e+01
     total_loss:       2.4780e+01

The prediction is:


{'pe': array([ -94.81653037,   43.37780429, -134.29769855,   53.6709754 ,
        -101.34093082,   60.39999918,   62.75686244, -100.52176236,
        -104.041808  ,   25.74884672,   54.96260191,  -96.50545319,
         -96.83861711, -100.11138217,   59.93182155, -100.44345216,
          48.62745753,   22.26499645,  -97.88841275,  -95.96525047,
        -123.19242945,   -5.28853577,   59.61715824,   23.68488978,
          53.96003264, -125.65008495,  -99.39219429, -108.69475066,
          63.92839569,   25.03389981, -122.17798808, -133.81100608,
        -100.69527017,   -3.79152986,   54.81592737,  -98.92007427,
          23.54695288,  -99.47198284, -161.25573948,  -94.97156151,
        -134.33688607,   25.65591768,  -96.48223796,   21.36588882,
          59.85668465,  -97.58950124,   51.2488969 ,   55.25569786,
          23.55543475, -105.59382355,  -98.11283885,   42.69953041,
          40.35370683,  -97.16633472,   61.35779788,  -99.70468685,
        -123.72509623,   22.61688477,   24

# Prediction

### predict(self, input_dict, compute_force=atomdnn.compute_force, training=False)

- **input_dict**: input dictionary data, which could be data outside the dataset
- **training**: set to False 
- **compute_force**: if compute force, derivative data are needed

In [11]:
# take two images from test dataset 
input_dict = get_input_dict(test_dataset.take(2))
model.predict(input_dict)

{'pe': array([-94.81653037,  43.37780429]),
 'force': array([[[ 2.45228698e+00, -6.15761125e+00, -9.79223661e-01],
         [ 3.90010324e+00,  1.25073227e+01, -1.11988560e+00],
         [-9.00831977e-01,  3.27460173e+00,  5.24370941e-01],
         [ 1.53991870e-01,  1.62439574e+00, -3.95105609e-01],
         [-1.22665945e+00, -5.11508528e-01, -4.12692164e-01],
         [ 1.83915957e+00,  5.04951729e+00, -1.40719571e+00],
         [ 2.06925392e+00,  8.26307129e-01,  1.16236951e+00],
         [-3.76125907e+00,  1.22358620e+00,  1.01384105e+00],
         [-4.12636306e+00, -6.54880554e+00,  3.14062176e+00],
         [ 2.88815673e+00,  4.41297722e+00,  7.82873045e-01],
         [-1.26816258e+00, -1.72529498e+00,  3.37459235e-01],
         [-2.61883591e+00, -1.11258028e-01, -1.66068760e+00],
         [ 1.43655326e+00,  3.82264720e+00, -5.46182139e-01],
         [ 4.23581700e+00, -1.94632408e+00,  9.78609459e-01],
         [-3.95305921e+00, -7.00874301e+00, -6.15000238e-01],
         [ 9.4249

# Save trained model

### save(obj, model_dir, descriptor=None)

- **obj**: Network object

- **model_dir**: directory for saving the trained model

- **descriptor**: descriptor parameters used to generate fingerprints, if set, a parameters file is generated for LAMMPS simulation

In [13]:
descriptor = {'name': 'acsf', 
              'cutoff': 6.5001,
              'etaG2':[0.01,0.025,0.05,0.075,0.1,0.15,0.2,0.3,0.4,0.5,0.6,0.8,1,1.5,2,3,5,10], 
              'etaG4': [0.01], 
              'zeta': [0.08,0.1,0.15,0.2,0.3,0.35,0.5,0.6,0.8,1.,1.5,2.,3.0,4.,5.5,7.0,10.0,25.0,50.0,100.0],
              'lambda': [1.0, -1.0]}

save_dir = '/workspace/data/weigao/machine_learning/graphene.tfdnn'
network.save(model,save_dir,descriptor=descriptor)

2021-10-06 01:30:49.139483: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


INFO:tensorflow:Assets written to: /workspace/data/weigao/machine_learning/graphene.tfdnn/assets
Network signatures and descriptor are written to /workspace/data/weigao/machine_learning/graphene.tfdnn/parameters for LAMMPS simulation.


In [14]:
# print signature
network.print_signature(save_dir)

The given SavedModel SignatureDef contains the following input(s):
  inputs['atom_type'] tensor_info:
      dtype: DT_INT32
      shape: (-1, -1)
      name: serving_default_atom_type:0
  inputs['center_atom_id'] tensor_info:
      dtype: DT_INT32
      shape: (-1, -1)
      name: serving_default_center_atom_id:0
  inputs['dgdr'] tensor_info:
      dtype: DT_DOUBLE
      shape: (-1, -1, -1, -1)
      name: serving_default_dgdr:0
  inputs['fingerprints'] tensor_info:
      dtype: DT_DOUBLE
      shape: (-1, -1, -1)
      name: serving_default_fingerprints:0
  inputs['neighbor_atom_coord'] tensor_info:
      dtype: DT_DOUBLE
      shape: (-1, -1, -1, -1)
      name: serving_default_neighbor_atom_coord:0
  inputs['neighbor_atom_id'] tensor_info:
      dtype: DT_INT32
      shape: (-1, -1)
      name: serving_default_neighbor_atom_id:0
The given SavedModel SignatureDef contains the following output(s):
  outputs['atom_pe'] tensor_info:
      dtype: DT_DOUBLE
      shape: (-1, -1, 1)
      

# Load the saved model for continuous training and prediction

**load(model_dir)**

- **model_dir**: saved model directory

In [15]:
save_dir = '/workspace/data/weigao/machine_learning/graphene.tfdnn'
imported_model = network.load(save_dir)

Network has been inflated! self.built: True


In [18]:
imported_model.train(train_dataset, val_dataset, batch_size=50, lr=0.01, epochs=5,scaling='std', loss_weights=loss_weights, shuffle=True)

Forces are used for training.
Stresses are used for training.
Scaling factors are computed using training dataset.
Training dataset are standardized.
Validation dataset are standardized.
Training dataset will be shuffled during training.

===> Epoch 1/5 - 4.524s/epoch
     training_loss    - pe_loss: 97.219 - force_loss: 10.387 - stress_loss: 21.564 - total_loss: 22.265
     validation_loss  - pe_loss: 94.341 - force_loss: 9.700 - stress_loss: 18.809 - total_loss: 21.015

===> Epoch 2/5 - 4.610s/epoch
     training_loss    - pe_loss: 92.244 - force_loss: 9.282 - stress_loss: 17.577 - total_loss: 20.265
     validation_loss  - pe_loss: 83.708 - force_loss: 8.882 - stress_loss: 16.631 - total_loss: 18.916

===> Epoch 3/5 - 4.578s/epoch
     training_loss    - pe_loss: 77.637 - force_loss: 8.572 - stress_loss: 15.748 - total_loss: 17.910
     validation_loss  - pe_loss: 74.852 - force_loss: 8.581 - stress_loss: 17.535 - total_loss: 17.819

===> Epoch 4/5 - 4.543s/epoch
     training_loss 

In [20]:
imported_model.evaluate(test_dataset)

Evaluation loss is:
        pe_loss:       5.1532e+01
     force_loss:       8.2061e+00
    stress_loss:       1.3577e+01
     total_loss:       1.4717e+01

The prediction is:


{'pe': array([-110.06788596,  -62.24758699, -117.24405585,  -53.22510166,
        -116.73397093,  -54.99791559,  -54.07490486, -116.37246962,
        -133.07180596,  -79.54146797,  -45.02673581, -111.12527282,
        -111.07682357, -111.3969862 ,  -53.90228333, -116.01041089,
         -58.18068193,  -83.87487487, -111.48414933, -110.6337797 ,
        -137.53124453, -131.55346082,  -46.54854472,  -79.24946391,
         -57.37782435, -137.59670149, -118.10407237, -123.44279687,
         -47.48595971,  -79.18602774, -127.08138177, -140.36086454,
        -119.88000127, -134.14990133,  -57.62813657, -115.04026987,
         -79.2505217 , -113.16327324, -129.97940002, -109.85261186,
        -149.00323755,  -79.53219804, -110.94238024,  -83.26678912,
         -56.23327558, -114.7091523 ,  -57.12451571,  -52.31554002,
         -79.25026762, -115.62018069, -116.20419219,  -65.79426497,
         -59.47777076, -110.69829096,  -45.01347559, -165.5955542 ,
        -116.46461538,  -83.3073623 ,  -79

In [22]:
input_dict = get_input_dict(test_dataset.take(2))
imported_model.predict(input_dict)

{'pe': array([-110.06788596,  -62.24758699]),
 'force': array([[[ 1.97992773e+00, -4.16364746e+00, -7.56623826e-01],
         [ 3.42816629e+00,  9.50428644e+00, -7.58184836e-01],
         [ 6.23137641e-02,  2.60083810e+00,  3.24989986e-01],
         [-1.18007647e-01,  1.11654674e+00, -2.54472038e-01],
         [-9.60516863e-01, -1.25333526e+00, -2.95265974e-01],
         [ 9.74452117e-01,  4.18548762e+00, -8.50291918e-01],
         [ 1.22466379e+00,  7.14416680e-01,  5.93316210e-01],
         [-2.98358671e+00,  9.29497304e-01,  8.22813319e-01],
         [-2.99188761e+00, -5.47347663e+00,  2.03407582e+00],
         [ 1.80410199e+00,  3.45624309e+00,  5.67578537e-01],
         [-8.26742841e-01, -1.31908810e+00,  3.60459140e-01],
         [-2.05555441e+00, -4.34820093e-01, -1.23945927e+00],
         [ 6.88047183e-01,  2.72251184e+00, -4.40939965e-01],
         [ 2.89557608e+00, -2.03437436e+00,  8.81305258e-01],
         [-3.02195146e+00, -5.13389113e+00, -5.43209957e-01],
         [ 1.78