In [1]:
import atomdnn

# 'float32' is used for reading data and train by default, one can set data_type to 'float64' here
atomdnn.data_type = 'float64'

# force and stress are evaluated by default, 
# if one only need to compute potential energy, then set compute_force to false
atomdnn.compute_force = True

import numpy as np
import tensorflow as tf
import pickle
from atomdnn import data
from atomdnn import network
from atomdnn.data import Data
from atomdnn.data import *
from atomdnn.network import Network
# import importlib
# importlib.reload(atomdnn.data)
# importlib.reload(atomdnn.network)

# Load data class from saved pickle file

In [2]:
grdata = pickle.load(open("/mnt/machine_learning/grdata.pickle", "rb", -1))

# Shuffel and then split the data into training, validation and test sets

### split(self, train_data_percent=None, val_data_percent=None, test_data_percent=None, data_size=None)

- **train_pct**: percentage of data used for training

- **val_pct**: percentage of data used for validation

- **test_pct**: percentage of data used for test

- **data_size**: if not set, use the whole data


In [3]:
grdata.shuffel()

In [4]:
(x_train,y_train),(x_val,y_val),(x_test,y_test) = grdata.split(0.7,0.2,0.1)

Traning data: 407 images
Validation data: 116 images
Test data: 59 images


# Build Network object from class Network 

__init__(self, elements=None, num_fingerprints=None, arch=None,activation_function=None, data_type=None, import_dir=None)

- **elements:** element list, required

- **num_fingerprints:** number of fingerprints in data, required

- **std**: = [mean, standard_deviation] of fingerprints, if set, standarlize the fingprints

- **norm**: = [min, max] of fingerprints, if set, normalize the fingerprints

- **arch:** number of layers of neural network

- **activation_function:** if not set, default is 'tanh'

- **import_dir:** read from the directory of a saved (imported) network, if used, all other parameters are disabled

In [5]:
model = Network(elements=['C'],num_fingerprints=grdata.num_fingerprints, std = [grdata.mean_fp,grdata.dev_fp],
               arch=[50,50])

activation function is set to tanh by default.


# Train the model

**train(self, train_input_dict, train_output_dict,
              batch_size=None, epochs=None, loss_fn=None, optimizer=None, lr=None, train_force=False, train_stress=False)**

- **train_input_dict**: input dictionary generated from build_dataset() for training
    
- **train_output_dict**: output dictionary generated from build_dataset() for training
    
- **batch_size**: if not set, use 30
    
- **epochs**: if not set, use 1
    
- **opimizer**: if not set, use Adam
    
- **lr**: learning rate, if not set, use 0.01
    
- **train_force**: if force used for training
    
- **train_stress**: if stress used for training

In [None]:
model.train(x_train,y_train, validation_data=[x_val,y_val], batch_size=30, epochs=500,train_force=False,pe_loss_weight=0.01, force_loss_weight=1)

Forces are not used for training.
Stresses are not used for training.

===> Epoch 1/500 - 1.308s/epoch
     training_loss    - pe: 1.558
     validation_loss  - pe: 3.447

===> Epoch 2/500 - 1.310s/epoch
     training_loss    - pe: 1.527
     validation_loss  - pe: 2.860

===> Epoch 3/500 - 1.276s/epoch
     training_loss    - pe: 1.289
     validation_loss  - pe: 2.612

===> Epoch 4/500 - 1.244s/epoch
     training_loss    - pe: 1.523
     validation_loss  - pe: 3.957

===> Epoch 5/500 - 1.276s/epoch
     training_loss    - pe: 1.551
     validation_loss  - pe: 2.400

===> Epoch 6/500 - 1.347s/epoch
     training_loss    - pe: 1.473
     validation_loss  - pe: 2.212

===> Epoch 7/500 - 1.276s/epoch
     training_loss    - pe: 1.462
     validation_loss  - pe: 1.611

===> Epoch 8/500 - 1.292s/epoch
     training_loss    - pe: 2.508
     validation_loss  - pe: 2.586

===> Epoch 9/500 - 1.388s/epoch
     training_loss    - pe: 1.422
     validation_loss  - pe: 2.914

===> Epoch 10/500 - 

In [None]:
grdata.input_dict['dGdr']

In [None]:
grdata.input_dict['fingerprints']

In [None]:
tf.matmul(1/grdata.dev_fp*grdata.input_dict['dGdr'])

In [7]:
model.evaluate(x_test,y_test)

        pe_loss:       1.9471e+00
     force_loss:       2.1933e+01
    stress_loss:       3.6029e+05
     total_loss:       1.9471e+00


In [None]:
model.evaluate(x_val,y_val)

# Prediction: compute potential energy, force and stress

**predict (self, input_dict, training=False,compute_force=True)**

- **input_dict**: input dictionary generated from build_dataset function
    
- **training**: set to False
    
- **compute_force**: if compute force, derivative data are needed

In [None]:
stress_predict = tf.convert_to_tensor(stress_predict)
mask = [True,True,True,False,True,True,False,False,True]
tf.reshape(tf.boolean_mask(stress_predict, mask,axis=1),[-1,6])

In [None]:
loss_fn = tf.keras.losses.get('mae')

pe_predict = model.predict(x_test)['pe']
force_predict = model.predict(x_test)['force']
stress_predict = model.predict(x_test)['stress']

print(loss_fn(pe_predict,y_test['pe']))
print(tf.reduce_mean(loss_fn(force_predict,y_test['force'])))
print(tf.reduce_mean(loss_fn(stress_predict,y_test['stress'])))

In [None]:
model.__call__(x_test)

# Save trained model

**save(obj, model_dir, descriptor=None)**

- **obj**: Network object

- **model_dir**: directory for saving the trained model

- **descriptor**: descriptor parameters used to generate fingerprints, if set, a parameters file is generated for LAMMPS simulation

In [None]:
descriptor = {'name': 'acsf', 
              'cutoff': 6.5001,
              'etaG2':[0.01,0.025,0.05,0.075,0.1,0.15,0.2,0.3,0.4,0.5,0.6,0.8,1,1.5,2,3,5,10], 
              'etaG4': [0.01], 
              'zeta': [0.08,0.1,0.15,0.2,0.3,0.35,0.5,0.6,0.8,1.,1.5,2.,3.0,4.,5.5,7.0,10.0,25.0,50.0,100.0],
              'lambda': [1.0, -1.0]}

save_dir = 'graphene_24atoms.tfdnn'
network.save(model, save_dir,descriptor=descriptor)

# Load the saved model for continuous training and prediction

**load(model_dir)**

- **model_dir**: saved model directory

In [None]:
save_dir = 'graphene_24atoms.tfdnn'
model = network.load(save_dir)

In [None]:
# print signature

network.print_signature(save_dir)

In [None]:
onedata = data.read_inputdata_from_lmp(batch_mode=False, fp_filename='data_graphene_96atoms/dump_fingerprints.200',der_filename='data_graphene_96atoms/dump_fingerprints_der.200')

In [None]:
# peratom potential energy

new_model.__call__(onedata.input_dict)

In [None]:
# compute peratom stress 

centerid = onedata.input_dict['center_atom_id']

center_one_hot = tf.one_hot(centerid,depth=onedata.num_blocks,axis=1,dtype=onedata.data_type)

stress_block = new_model.__call__(onedata.input_dict)['stress']

stress_peratom = tf.matmul(center_one_hot,stress_block)

evA2bar = 1602176

for i in range(0,onedata.num_atoms):
    print(i+1,"          ",stress_peratom[0][i].numpy()*ev2bar)
    print('\n')

In [None]:
atom_pe = new_model.__call__(onedata.input_dict)['atom_pe'][0]

for i in range(0,onedata.num_atoms):
    print("%d:   %.6g %.6 %.6 %.6 %.6 %.6 %.6 %.6" % (i+1, atom_pe[i].numpy()))

In [None]:
stress = new_model.__call__(onedata.input_dict)['stress'][0]

for i in range(0,onedata.num_atoms):
    print("%d:   %.6g" % (i+1, atom_pe[i].numpy()))

In [None]:
new_model.predict(onedata.input_dict)

In [None]:
force = new_model.predict(onedata.input_dict)['force'][0]

print ("%s %5s %15s %15s"%("atom_id","f_x","f_y","f_z"))
for i in range(0,onedata.num_atoms):
    print("%d %15.6f %15.6f %15.6f" % (i+1,force[i][0].numpy(), force[i][1].numpy(), force[i][2].numpy()))

In [None]:
# continue the training 

new_model.train(grdata.train_input_dict, grdata.train_output_dict, batch_size=30, epochs=100)

In [None]:
new_model.predict(grdata.test_input_dict)

# Check C_inference

In [None]:
# chose the second image to test
image = data.slice_dict (grdata.input_dict,0,1)
model.predict(image)

In [None]:
print(image['neighbor_atom_id'][0][23])
print(image['neighbor_atom_coord'][0][23])

In [None]:
image

In [None]:
model.__call__(image)

In [None]:
!../c_inference/inference_energy "../example/graphene_energy.tfdnn/" "../example/data_graphene_96atoms/dump_fingerprints.200" 96  

# Save dataset

In [None]:
tfdataset = tf.data.Dataset.from_tensor_slices((grdata.input_dict,grdata.output_dict))

In [None]:
tf.data.experimental.save(tfdataset, 'graphene_tfdataset')

In [None]:
newdataset = tf.data.experimental.load('graphene_tfdataset',element_spec=tfdataset.element_spec)

# debuging

In [None]:
pe_model = network.load('graphene_energy.tfdnn')

In [None]:
pe_model.predict(onedata.input_dict,compute_force=False)

In [None]:
onedata = data.read_inputdata_from_lmp(batch_mode=False, fp_filename='data_graphene_96atoms/dump_fingerprints.200',der_filename='data_graphene_96atoms/dump_fingerprints_der.200')

In [None]:
int(float('2'))

In [None]:
model.predict(onedata.input_dict)

In [None]:
for i in range(96):
    print(i+1, model.predict(onedata.input_dict)['force'][0][i].numpy())

In [None]:
onedata.input_dict['fingerprints'][0][1]

In [None]:
print('%.8e'%onedata.input_dict['fingerprints'][0][1][0].numpy())